tb 0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README +60 -0
- data/bin/tb +1137 -0
- data/lib/tb.rb +35 -0
- data/lib/tb/basic.rb +1071 -0
- data/lib/tb/csv.rb +125 -0
- data/lib/tb/enumerable.rb +284 -0
- data/lib/tb/fieldset.rb +96 -0
- data/lib/tb/pathfinder.rb +569 -0
- data/lib/tb/qtsv.rb +93 -0
- data/lib/tb/reader.rb +213 -0
- data/lib/tb/record.rb +129 -0
- data/lib/tb/tsv.rb +93 -0
- data/sample/excel2csv +270 -0
- data/sample/poi-xls2csv.rb +397 -0
- data/sample/poi-xls2csv.sh +39 -0
- data/test-all.rb +7 -0
- data/test/test_basic.rb +290 -0
- data/test/test_csv.rb +78 -0
- data/test/test_enumerable.rb +122 -0
- data/test/test_record.rb +12 -0
- data/test/test_tsv.rb +41 -0
- metadata +73 -0
data/README
ADDED
@@ -0,0 +1,60 @@
|
|
1
|
+
= tb - manipulation tool for table: CSV, TSV, etc.
|
2
|
+
|
3
|
+
== Usage
|
4
|
+
|
5
|
+
% tb help
|
6
|
+
Usage:
|
7
|
+
tb csv [OPTS] [TABLE]
|
8
|
+
tb tsv [OPTS] [TABLE]
|
9
|
+
tb json [OPTS] [TABLE]
|
10
|
+
tb yaml [OPTS] [TABLE]
|
11
|
+
tb pp [OPTS] [TABLE]
|
12
|
+
tb grep [OPTS] REGEXP [TABLE]
|
13
|
+
tb gsub [OPTS] REGEXP STRING [TABLE]
|
14
|
+
tb sort [OPTS] [TABLE]
|
15
|
+
tb select [OPTS] FIELD,... [TABLE]
|
16
|
+
tb rename [OPTS] SRC,DST,... [TABLE]
|
17
|
+
tb newfield [OPTS] FIELD RUBY-EXP [TABLE]
|
18
|
+
tb cat [OPTS] [TABLE ...]
|
19
|
+
tb join [OPTS] [TABLE ...]
|
20
|
+
tb group [OPTS] [TABLE]
|
21
|
+
tb cross [OPTS] [TABLE]
|
22
|
+
tb shape [OPTS] [TABLE ...]
|
23
|
+
tb mheader [OPTS] [TABLE]
|
24
|
+
tb crop [OPTS] [TABLE]
|
25
|
+
|
26
|
+
|
27
|
+
|
28
|
+
== Install
|
29
|
+
|
30
|
+
gem install tb
|
31
|
+
|
32
|
+
== Author
|
33
|
+
|
34
|
+
Tanaka Akira <akr@fsij.org>
|
35
|
+
|
36
|
+
== License
|
37
|
+
|
38
|
+
Redistribution and use in source and binary forms, with or without
|
39
|
+
modification, are permitted provided that the following conditions are met:
|
40
|
+
|
41
|
+
(1) Redistributions of source code must retain the above copyright notice, this
|
42
|
+
list of conditions and the following disclaimer.
|
43
|
+
(2) Redistributions in binary form must reproduce the above copyright notice,
|
44
|
+
this list of conditions and the following disclaimer in the documentation
|
45
|
+
and/or other materials provided with the distribution.
|
46
|
+
(3) The name of the author may not be used to endorse or promote products
|
47
|
+
derived from this software without specific prior written permission.
|
48
|
+
|
49
|
+
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
|
50
|
+
WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
51
|
+
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
|
52
|
+
EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
53
|
+
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
|
54
|
+
OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
55
|
+
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
56
|
+
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
|
57
|
+
IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
|
58
|
+
OF SUCH DAMAGE.
|
59
|
+
|
60
|
+
(The modified BSD licence)
|
data/bin/tb
ADDED
@@ -0,0 +1,1137 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# Copyright (C) 2011 Tanaka Akira <akr@fsij.org>
|
4
|
+
#
|
5
|
+
# Redistribution and use in source and binary forms, with or without
|
6
|
+
# modification, are permitted provided that the following conditions are met:
|
7
|
+
#
|
8
|
+
# 1. Redistributions of source code must retain the above copyright notice, this
|
9
|
+
# list of conditions and the following disclaimer.
|
10
|
+
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
11
|
+
# this list of conditions and the following disclaimer in the documentation
|
12
|
+
# and/or other materials provided with the distribution.
|
13
|
+
# 3. The name of the author may not be used to endorse or promote products
|
14
|
+
# derived from this software without specific prior written permission.
|
15
|
+
#
|
16
|
+
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
|
17
|
+
# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
18
|
+
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
|
19
|
+
# EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
20
|
+
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
|
21
|
+
# OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
22
|
+
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
23
|
+
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
|
24
|
+
# IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
|
25
|
+
# OF SUCH DAMAGE.
|
26
|
+
|
27
|
+
require 'tb'
|
28
|
+
require 'optparse'
|
29
|
+
require 'enumerator'
|
30
|
+
|
31
|
+
def main(argv)
|
32
|
+
subcommand = argv.shift
|
33
|
+
case subcommand
|
34
|
+
when 'help', '-h' then main_help(argv)
|
35
|
+
when 'csv' then main_csv(argv)
|
36
|
+
when 'tsv' then main_tsv(argv)
|
37
|
+
when 'json' then main_json(argv)
|
38
|
+
when 'yaml' then main_yaml(argv)
|
39
|
+
when 'pp' then main_pp(argv)
|
40
|
+
when 'grep' then main_grep(argv)
|
41
|
+
when 'gsub' then main_gsub(argv)
|
42
|
+
when 'sort' then main_sort(argv)
|
43
|
+
when 'select' then main_select(argv)
|
44
|
+
when 'rename' then main_rename(argv)
|
45
|
+
when 'newfield' then main_newfield(argv)
|
46
|
+
when 'cat' then main_cat(argv)
|
47
|
+
when 'join' then main_join(argv)
|
48
|
+
when 'group' then main_group(argv)
|
49
|
+
when 'cross' then main_cross(argv)
|
50
|
+
when 'shape' then main_shape(argv)
|
51
|
+
when 'mheader' then main_mheader(argv)
|
52
|
+
when 'crop' then main_crop(argv)
|
53
|
+
when nil
|
54
|
+
err "Usage: tb subcommand args..."
|
55
|
+
else
|
56
|
+
err "unexpected subcommand: #{subcommand.inspect}"
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def usage(status)
|
61
|
+
print <<'End'
|
62
|
+
Usage:
|
63
|
+
tb csv [OPTS] [TABLE]
|
64
|
+
tb tsv [OPTS] [TABLE]
|
65
|
+
tb json [OPTS] [TABLE]
|
66
|
+
tb yaml [OPTS] [TABLE]
|
67
|
+
tb pp [OPTS] [TABLE]
|
68
|
+
tb grep [OPTS] REGEXP [TABLE]
|
69
|
+
tb gsub [OPTS] REGEXP STRING [TABLE]
|
70
|
+
tb sort [OPTS] [TABLE]
|
71
|
+
tb select [OPTS] FIELD,... [TABLE]
|
72
|
+
tb rename [OPTS] SRC,DST,... [TABLE]
|
73
|
+
tb newfield [OPTS] FIELD RUBY-EXP [TABLE]
|
74
|
+
tb cat [OPTS] [TABLE ...]
|
75
|
+
tb join [OPTS] [TABLE ...]
|
76
|
+
tb group [OPTS] [TABLE]
|
77
|
+
tb cross [OPTS] [TABLE]
|
78
|
+
tb shape [OPTS] [TABLE ...]
|
79
|
+
tb mheader [OPTS] [TABLE]
|
80
|
+
tb crop [OPTS] [TABLE]
|
81
|
+
End
|
82
|
+
exit status
|
83
|
+
end
|
84
|
+
|
85
|
+
def main_help(argv)
|
86
|
+
subcommand = argv.shift
|
87
|
+
case subcommand
|
88
|
+
when 'csv' then puts op_csv
|
89
|
+
when 'tsv' then puts op_tsv
|
90
|
+
when 'json' then puts op_json
|
91
|
+
when 'yaml' then puts op_yaml
|
92
|
+
when 'pp' then puts op_pp
|
93
|
+
when 'grep' then puts op_grep
|
94
|
+
when 'gsub' then puts op_gsub
|
95
|
+
when 'sort' then puts op_sort
|
96
|
+
when 'select' then puts op_select
|
97
|
+
when 'rename' then puts op_rename
|
98
|
+
when 'newfield' then puts op_newfield
|
99
|
+
when 'cat' then puts op_cat
|
100
|
+
when 'join' then puts op_join
|
101
|
+
when 'group' then puts op_group
|
102
|
+
when 'cross' then puts op_cross
|
103
|
+
when 'shape' then puts op_shape
|
104
|
+
when 'mheader' then puts op_mheader
|
105
|
+
when 'crop' then puts op_crop
|
106
|
+
when nil
|
107
|
+
usage(true)
|
108
|
+
else
|
109
|
+
err "unexpected subcommand: #{subcommand.inspect}"
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
$opt_N = nil
|
114
|
+
$opt_debug = 0
|
115
|
+
$opt_no_pager = nil
|
116
|
+
|
117
|
+
def op_csv
|
118
|
+
op = OptionParser.new
|
119
|
+
op.banner = 'Usage: tb csv [OPTS] [TABLE]'
|
120
|
+
op.def_option('-h', 'show help message') { puts op; exit 0 }
|
121
|
+
op.def_option('-N', 'use numeric field name') { $opt_N = true }
|
122
|
+
op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
|
123
|
+
op
|
124
|
+
end
|
125
|
+
|
126
|
+
def op_tsv
|
127
|
+
op = OptionParser.new
|
128
|
+
op.banner = 'Usage: tb tsv [OPTS] [TABLE]'
|
129
|
+
op.def_option('-h', 'show help message') { puts op; exit 0 }
|
130
|
+
op.def_option('-N', 'use numeric field name') { $opt_N = true }
|
131
|
+
op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
|
132
|
+
op
|
133
|
+
end
|
134
|
+
|
135
|
+
def op_json
|
136
|
+
op = OptionParser.new
|
137
|
+
op.banner = 'Usage: tb json [OPTS] [TABLE]'
|
138
|
+
op.def_option('-h', 'show help message') { puts op; exit 0 }
|
139
|
+
op.def_option('-N', 'use numeric field name') { $opt_N = true }
|
140
|
+
op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
|
141
|
+
op
|
142
|
+
end
|
143
|
+
|
144
|
+
def op_yaml
|
145
|
+
op = OptionParser.new
|
146
|
+
op.banner = 'Usage: tb yaml [OPTS] [TABLE]'
|
147
|
+
op.def_option('-h', 'show help message') { puts op; exit 0 }
|
148
|
+
op.def_option('-N', 'use numeric field name') { $opt_N = true }
|
149
|
+
op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
|
150
|
+
op
|
151
|
+
end
|
152
|
+
|
153
|
+
def op_pp
|
154
|
+
op = OptionParser.new
|
155
|
+
op.banner = 'Usage: tb pp [OPTS] [TABLE]'
|
156
|
+
op.def_option('-h', 'show help message') { puts op; exit 0 }
|
157
|
+
op.def_option('-N', 'use numeric field name') { $opt_N = true }
|
158
|
+
op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
|
159
|
+
op
|
160
|
+
end
|
161
|
+
|
162
|
+
$opt_grep_e = nil
|
163
|
+
$opt_grep_ruby = nil
|
164
|
+
$opt_grep_f = nil
|
165
|
+
$opt_grep_v = nil
|
166
|
+
def op_grep
|
167
|
+
op = OptionParser.new
|
168
|
+
op.banner = 'Usage: tb grep [OPTS] REGEXP [TABLE]'
|
169
|
+
op.def_option('-h', 'show help message') { puts op; exit 0 }
|
170
|
+
op.def_option('-N', 'use numeric field name') { $opt_N = true }
|
171
|
+
op.def_option('-f FIELD', 'search field') {|field| $opt_grep_f = field }
|
172
|
+
op.def_option('-e REGEXP', 'predicate written in ruby. A hash is given as _. no usual regexp argument.') {|pattern| $opt_grep_e = pattern }
|
173
|
+
op.def_option('--ruby RUBY-EXP', 'specify a regexp. no usual regexp argument.') {|ruby_exp| $opt_grep_ruby = ruby_exp }
|
174
|
+
op.def_option('-v', 'ouput the records which doesn\'t match') { $opt_grep_v = true }
|
175
|
+
op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
|
176
|
+
op
|
177
|
+
end
|
178
|
+
|
179
|
+
$opt_gsub_e = nil
|
180
|
+
$opt_gsub_f = nil
|
181
|
+
def op_gsub
|
182
|
+
op = OptionParser.new
|
183
|
+
op.banner = 'Usage: tb gsub [OPTS] REGEXP STRING [TABLE]'
|
184
|
+
op.def_option('-h', 'show help message') { puts op; exit 0 }
|
185
|
+
op.def_option('-N', 'use numeric field name') { $opt_N = true }
|
186
|
+
op.def_option('-f FIELD', 'search field') {|field| $opt_gsub_f = field }
|
187
|
+
op.def_option('-e REGEXP', 'predicate written in ruby. A hash is given as _. no usual regexp argument.') {|pattern| $opt_gsub_e = pattern }
|
188
|
+
op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
|
189
|
+
op
|
190
|
+
end
|
191
|
+
|
192
|
+
$opt_sort_f = nil
|
193
|
+
def op_sort
|
194
|
+
op = OptionParser.new
|
195
|
+
op.banner = 'Usage: tb sort [OPTS] [TABLE]'
|
196
|
+
op.def_option('-h', 'show help message') { puts op; exit 0 }
|
197
|
+
op.def_option('-N', 'use numeric field name') { $opt_N = true }
|
198
|
+
op.def_option('-f FIELD,...', 'specify sort keys') {|fs| $opt_sort_f = fs }
|
199
|
+
op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
|
200
|
+
op
|
201
|
+
end
|
202
|
+
|
203
|
+
$opt_select_v = nil
|
204
|
+
def op_select
|
205
|
+
op = OptionParser.new
|
206
|
+
op.banner = 'Usage: tb select [OPTS] FIELD,... [TABLE]'
|
207
|
+
op.def_option('-h', 'show help message') { puts op; exit 0 }
|
208
|
+
op.def_option('-N', 'use numeric field name') { $opt_N = true }
|
209
|
+
op.def_option('-v', 'invert match') { $opt_select_v = true }
|
210
|
+
op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
|
211
|
+
op
|
212
|
+
end
|
213
|
+
|
214
|
+
def op_rename
|
215
|
+
op = OptionParser.new
|
216
|
+
op.banner = 'Usage: tb rename [OPTS] SRC,DST,... [TABLE]'
|
217
|
+
op.def_option('-h', 'show help message') { puts op; exit 0 }
|
218
|
+
op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
|
219
|
+
op
|
220
|
+
end
|
221
|
+
|
222
|
+
def op_newfield
|
223
|
+
op = OptionParser.new
|
224
|
+
op.banner = 'Usage: tb newfield [OPTS] FIELD RUBY-EXP [TABLE]'
|
225
|
+
op.def_option('-h', 'show help message') { puts op; exit 0 }
|
226
|
+
op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
|
227
|
+
op
|
228
|
+
end
|
229
|
+
|
230
|
+
def op_cat
|
231
|
+
op = OptionParser.new
|
232
|
+
op.banner = 'Usage: tb cat [OPTS] [TABLE ...]'
|
233
|
+
op.def_option('-h', 'show help message') { puts op; exit 0 }
|
234
|
+
op.def_option('-N', 'use numeric field name') { $opt_N = true }
|
235
|
+
op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
|
236
|
+
op
|
237
|
+
end
|
238
|
+
|
239
|
+
$opt_join_outer = nil
|
240
|
+
$opt_join_outer_missing = nil
|
241
|
+
def op_join
|
242
|
+
op = OptionParser.new
|
243
|
+
op.banner = 'Usage: tb join [OPTS] [TABLE ...]'
|
244
|
+
op.def_option('-h', 'show help message') { puts op; exit 0 }
|
245
|
+
op.def_option('-d', '--debug', 'show debug message') { $opt_debug += 1 }
|
246
|
+
op.def_option('-N', 'use numeric field name') { $opt_N = true }
|
247
|
+
op.def_option('--outer', 'outer join') { $opt_join_outer = :full }
|
248
|
+
op.def_option('--left', 'left outer join') { $opt_join_outer = :left }
|
249
|
+
op.def_option('--right', 'right outer join') { $opt_join_outer = :right }
|
250
|
+
op.def_option('--outer-missing=DEFAULT', 'missing value for outer join') {|missing|
|
251
|
+
$opt_join_outer ||= :full
|
252
|
+
$opt_join_outer_missing = missing
|
253
|
+
}
|
254
|
+
op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
|
255
|
+
op
|
256
|
+
end
|
257
|
+
|
258
|
+
$opt_group_fields = []
|
259
|
+
def op_group
|
260
|
+
op = OptionParser.new
|
261
|
+
op.banner = 'Usage: tb group [OPTS] KEY-FIELD1,... [TABLE]'
|
262
|
+
op.def_option('-h', 'show help message') { puts op; exit 0 }
|
263
|
+
op.def_option('-a AGGREGATION-SPEC[,NEW-FIELD]',
|
264
|
+
'--aggregate AGGREGATION-SPEC[,NEW-FIELD]') {|arg| $opt_group_fields << arg }
|
265
|
+
op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
|
266
|
+
op
|
267
|
+
end
|
268
|
+
|
269
|
+
$opt_cross_fields = []
|
270
|
+
$opt_cross_compact = false
|
271
|
+
def op_cross
|
272
|
+
op = OptionParser.new
|
273
|
+
op.banner = 'Usage: tb cross [OPTS] HKEY-FIELD1,... VKEY-FIELD1,... [TABLE]'
|
274
|
+
op.def_option('-h', 'show help message') { puts op; exit 0 }
|
275
|
+
op.def_option('-a AGGREGATION-SPEC[,NEW-FIELD]',
|
276
|
+
'--aggregate AGGREGATION-SPEC[,NEW-FIELD]') {|arg| $opt_cross_fields << arg }
|
277
|
+
op.def_option('-c', '--compact', 'compact format') { $opt_cross_compact = true }
|
278
|
+
op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
|
279
|
+
op
|
280
|
+
end
|
281
|
+
|
282
|
+
def op_shape
|
283
|
+
op = OptionParser.new
|
284
|
+
op.banner = 'Usage: tb shape [OPTS] [TABLE ...]'
|
285
|
+
op.def_option('-h', 'show help message') { puts op; exit 0 }
|
286
|
+
op.def_option('-N', 'use numeric field name') { $opt_N = true }
|
287
|
+
op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
|
288
|
+
op
|
289
|
+
end
|
290
|
+
|
291
|
+
$opt_mheader_count = nil
|
292
|
+
def op_mheader
|
293
|
+
op = OptionParser.new
|
294
|
+
op.banner = 'Usage: tb mheader [OPTS] [TABLE]'
|
295
|
+
op.def_option('-h', 'show help message') { puts op; exit 0 }
|
296
|
+
op.def_option('-c N', 'number of header records') {|arg| $opt_mheader_count = arg.to_i }
|
297
|
+
op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
|
298
|
+
op
|
299
|
+
end
|
300
|
+
|
301
|
+
$opt_crop_range = nil
|
302
|
+
def op_crop
|
303
|
+
op = OptionParser.new
|
304
|
+
op.banner = 'Usage: tb crop [OPTS] [TABLE]'
|
305
|
+
op.def_option('-h', 'show help message') { puts op; exit 0 }
|
306
|
+
op.def_option('-r RANGE', 'range. i.e. "2,1-4,3", "B1:D3"') {|arg| $opt_crop_range = arg }
|
307
|
+
op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
|
308
|
+
op
|
309
|
+
end
|
310
|
+
|
311
|
+
def err(msg)
|
312
|
+
STDERR.puts msg
|
313
|
+
exit 1
|
314
|
+
end
|
315
|
+
|
316
|
+
def comparison_value(v)
|
317
|
+
case v
|
318
|
+
when nil
|
319
|
+
[]
|
320
|
+
when Numeric
|
321
|
+
[0, v]
|
322
|
+
when String
|
323
|
+
case v
|
324
|
+
when /\A\s*-?\d+\s*\z/
|
325
|
+
[0, Integer(v)]
|
326
|
+
when /\A\s*-?(\d+(\.\d*)?|\.\d+)([eE][-+]?\d+)?\s*\z/
|
327
|
+
[0, Float(v)]
|
328
|
+
else
|
329
|
+
a = []
|
330
|
+
v.scan(/(\d+)|\D+/) {
|
331
|
+
if $1
|
332
|
+
a << 0 << $1.to_i
|
333
|
+
else
|
334
|
+
a << 1 << $&
|
335
|
+
end
|
336
|
+
}
|
337
|
+
a
|
338
|
+
end
|
339
|
+
else
|
340
|
+
raise ArgumentError, "unexpected: #{v.inspect}"
|
341
|
+
end
|
342
|
+
end
|
343
|
+
|
344
|
+
def conv_to_numeric(v)
|
345
|
+
v = v.strip
|
346
|
+
if /\A-?\d+\z/ =~ v
|
347
|
+
v = v.to_i
|
348
|
+
elsif /\A-?(\d+(\.\d*)?|\.\d+)([eE][-+]?\d+)?\z/ =~ v
|
349
|
+
v = v.to_f
|
350
|
+
else
|
351
|
+
raise "numeric value expected: #{v.inspect}"
|
352
|
+
end
|
353
|
+
v
|
354
|
+
end
|
355
|
+
|
356
|
+
class CountAggregator
|
357
|
+
def initialize() @result = 0 end
|
358
|
+
def update(v) @result += 1 end
|
359
|
+
def finish() @result end
|
360
|
+
end
|
361
|
+
|
362
|
+
class SumAggregator
|
363
|
+
def initialize() @result = 0 end
|
364
|
+
def update(v) @result += conv_to_numeric(v) if !(v.nil? || v == '') end
|
365
|
+
def finish() @result end
|
366
|
+
end
|
367
|
+
|
368
|
+
class AvgAggregator
|
369
|
+
def initialize() @sum = 0; @count = 0 end
|
370
|
+
def update(v) @count += 1; @sum += conv_to_numeric(v) if !(v.nil? || v == '') end
|
371
|
+
def finish() @sum / @count.to_f end
|
372
|
+
end
|
373
|
+
|
374
|
+
class MaxAggregator
|
375
|
+
def initialize() @v = nil; @cmp = nil end
|
376
|
+
def update(v)
|
377
|
+
cmp = comparison_value(v)
|
378
|
+
if @cmp == nil
|
379
|
+
@v, @cmp = v, cmp
|
380
|
+
else
|
381
|
+
@v, @cmp = v, cmp if (@cmp <=> cmp) < 0
|
382
|
+
end
|
383
|
+
end
|
384
|
+
def finish() @v end
|
385
|
+
end
|
386
|
+
|
387
|
+
class MinAggregator
|
388
|
+
def initialize() @v = @cmp = nil end
|
389
|
+
def update(v)
|
390
|
+
cmp = comparison_value(v)
|
391
|
+
if @cmp == nil
|
392
|
+
@v, @cmp = v, cmp
|
393
|
+
else
|
394
|
+
@v, @cmp = v, cmp if (@cmp <=> cmp) > 0
|
395
|
+
end
|
396
|
+
end
|
397
|
+
def finish() @v end
|
398
|
+
end
|
399
|
+
|
400
|
+
class ValuesAggregator
|
401
|
+
def initialize() @result = [] end
|
402
|
+
def update(v) @result << v if v end
|
403
|
+
def finish() @result.join(",") end
|
404
|
+
end
|
405
|
+
|
406
|
+
class UniqueValuesAggregator
|
407
|
+
def initialize() @result = [] end
|
408
|
+
def update(v) @result << v if v end
|
409
|
+
def finish() @result.uniq.join(",") end
|
410
|
+
end
|
411
|
+
|
412
|
+
class Selector
|
413
|
+
def initialize(i, aggregator) @i = i; @agg = aggregator end
|
414
|
+
def update(ary) @agg.update(ary[@i]) end
|
415
|
+
def finish() @agg.finish end
|
416
|
+
end
|
417
|
+
|
418
|
+
def make_aggregator(spec, fs)
|
419
|
+
case spec
|
420
|
+
when 'count'
|
421
|
+
CountAggregator.new
|
422
|
+
when /\Asum\((.*)\)\z/
|
423
|
+
field = $1
|
424
|
+
i = fs.index(field)
|
425
|
+
raise ArgumentError, "field not found: #{field.inspect}" if !i
|
426
|
+
Selector.new(i, SumAggregator.new)
|
427
|
+
when /\Aavg\((.*)\)\z/
|
428
|
+
field = $1
|
429
|
+
i = fs.index(field)
|
430
|
+
raise ArgumentError, "field not found: #{field.inspect}" if !i
|
431
|
+
Selector.new(i, AvgAggregator.new)
|
432
|
+
when /\Amax\((.*)\)\z/
|
433
|
+
field = $1
|
434
|
+
i = fs.index(field)
|
435
|
+
raise ArgumentError, "field not found: #{field.inspect}" if !i
|
436
|
+
Selector.new(i, MaxAggregator.new)
|
437
|
+
when /\Amin\((.*)\)\z/
|
438
|
+
field = $1
|
439
|
+
i = fs.index(field)
|
440
|
+
raise ArgumentError, "field not found: #{field.inspect}" if !i
|
441
|
+
Selector.new(i, MinAggregator.new)
|
442
|
+
when /\Avalues\((.*)\)\z/
|
443
|
+
field = $1
|
444
|
+
i = fs.index(field)
|
445
|
+
raise ArgumentError, "field not found: #{field.inspect}" if !i
|
446
|
+
Selector.new(i, ValuesAggregator.new)
|
447
|
+
when /\Auniquevalues\((.*)\)\z/
|
448
|
+
field = $1
|
449
|
+
i = fs.index(field)
|
450
|
+
raise ArgumentError, "field not found: #{field.inspect}" if !i
|
451
|
+
Selector.new(i, UniqueValuesAggregator.new)
|
452
|
+
else
|
453
|
+
raise ArgumentError, "unexpected aggregation spec: #{spec.inspect}"
|
454
|
+
end
|
455
|
+
end
|
456
|
+
|
457
|
+
def aggregate(spec, table)
|
458
|
+
update, finish = make_aggregator(spec, table.list_fields)
|
459
|
+
table.each {|rec|
|
460
|
+
update.call(rec.values_at(*fs))
|
461
|
+
}
|
462
|
+
finish.call
|
463
|
+
end
|
464
|
+
|
465
|
+
def main_csv(argv)
|
466
|
+
op_csv.parse!(argv)
|
467
|
+
each_table_file(argv) {|tbl|
|
468
|
+
with_output {|out|
|
469
|
+
tbl_generate_csv(tbl, out)
|
470
|
+
}
|
471
|
+
}
|
472
|
+
end
|
473
|
+
|
474
|
+
def main_tsv(argv)
|
475
|
+
op_tsv.parse!(argv)
|
476
|
+
each_table_file(argv) {|tbl|
|
477
|
+
with_output {|out|
|
478
|
+
tbl_generate_tsv(tbl, out)
|
479
|
+
}
|
480
|
+
}
|
481
|
+
end
|
482
|
+
|
483
|
+
def main_json(argv)
|
484
|
+
require 'json'
|
485
|
+
op_json.parse!(argv)
|
486
|
+
argv = ['-'] if argv.empty?
|
487
|
+
with_output {|out|
|
488
|
+
out.print "["
|
489
|
+
sep = nil
|
490
|
+
argv.each {|filename|
|
491
|
+
sep = ",\n\n" if sep
|
492
|
+
tablereader_open(filename) {|tblreader|
|
493
|
+
tblreader.each {|ary|
|
494
|
+
out.print sep if sep
|
495
|
+
header = tblreader.header
|
496
|
+
h = {}
|
497
|
+
ary.each_with_index {|e, i|
|
498
|
+
h[header[i]] = e if !e.nil?
|
499
|
+
}
|
500
|
+
out.print JSON.pretty_generate(h)
|
501
|
+
sep = ",\n"
|
502
|
+
}
|
503
|
+
}
|
504
|
+
}
|
505
|
+
out.puts "]"
|
506
|
+
}
|
507
|
+
end
|
508
|
+
|
509
|
+
def main_yaml(argv)
|
510
|
+
require 'yaml'
|
511
|
+
op_yaml.parse!(argv)
|
512
|
+
each_table_file(argv) {|tbl|
|
513
|
+
ary = tbl.map {|rec| rec.to_h }
|
514
|
+
with_output {|out|
|
515
|
+
YAML.dump(ary, out)
|
516
|
+
out.puts
|
517
|
+
}
|
518
|
+
}
|
519
|
+
end
|
520
|
+
|
521
|
+
def main_pp(argv)
|
522
|
+
op_pp.parse!(argv)
|
523
|
+
argv.unshift '-' if argv.empty?
|
524
|
+
with_output {|out|
|
525
|
+
argv.each {|filename|
|
526
|
+
tablereader_open(filename) {|tblreader|
|
527
|
+
tblreader.each {|ary|
|
528
|
+
h = {}
|
529
|
+
ary.each_with_index {|v, i|
|
530
|
+
next if v.nil?
|
531
|
+
h[tblreader.field_from_index_ex(i)] = v
|
532
|
+
}
|
533
|
+
PP.pp h, out
|
534
|
+
}
|
535
|
+
}
|
536
|
+
}
|
537
|
+
}
|
538
|
+
end
|
539
|
+
|
540
|
+
def main_grep(argv)
|
541
|
+
op_grep.parse!(argv)
|
542
|
+
if $opt_grep_ruby
|
543
|
+
pred = eval("lambda {|_| #{$opt_grep_ruby} }")
|
544
|
+
elsif $opt_grep_e
|
545
|
+
re = Regexp.new($opt_grep_e)
|
546
|
+
pred = $opt_grep_f ? lambda {|_| re =~ _[$opt_grep_f] } :
|
547
|
+
lambda {|_| _.any? {|k, v| re =~ v.to_s } }
|
548
|
+
else
|
549
|
+
re = Regexp.new(argv.shift)
|
550
|
+
pred = $opt_grep_f ? lambda {|_| re =~ _[$opt_grep_f] } :
|
551
|
+
lambda {|_| _.any? {|k, v| re =~ v.to_s } }
|
552
|
+
end
|
553
|
+
opt_v = $opt_grep_v ? true : false
|
554
|
+
argv.unshift '-' if argv.empty?
|
555
|
+
argv.each {|filename|
|
556
|
+
tablereader_open(filename) {|tblreader|
|
557
|
+
with_table_stream_output {|gen|
|
558
|
+
gen.output_header tblreader.header
|
559
|
+
tblreader.each {|ary|
|
560
|
+
h = {}
|
561
|
+
ary.each_with_index {|str, i|
|
562
|
+
f = tblreader.field_from_index_ex(i)
|
563
|
+
h[f] = str
|
564
|
+
}
|
565
|
+
found = pred.call(h)
|
566
|
+
found = opt_v ^ !!(found)
|
567
|
+
gen << ary if found
|
568
|
+
}
|
569
|
+
}
|
570
|
+
}
|
571
|
+
}
|
572
|
+
end
|
573
|
+
|
574
|
+
def main_gsub(argv)
|
575
|
+
op_gsub.parse!(argv)
|
576
|
+
if $opt_gsub_e
|
577
|
+
re = Regexp.new($opt_gsub_e)
|
578
|
+
else
|
579
|
+
re = Regexp.new(argv.shift)
|
580
|
+
end
|
581
|
+
repl = argv.shift
|
582
|
+
filename = argv.empty? ? '-' : argv.shift
|
583
|
+
warn "extra arguments: #{argv.join(" ")}" if !argv.empty?
|
584
|
+
tablereader_open(filename) {|tblreader|
|
585
|
+
with_table_stream_output {|gen|
|
586
|
+
gen.output_header tblreader.header
|
587
|
+
tblreader.each {|ary|
|
588
|
+
if $opt_gsub_f
|
589
|
+
ary2 = []
|
590
|
+
ary.each_with_index {|str, i|
|
591
|
+
f = tblreader.field_from_index_ex(i)
|
592
|
+
if f == $opt_gsub_f
|
593
|
+
str ||= ''
|
594
|
+
ary2 << str.gsub(re, repl)
|
595
|
+
else
|
596
|
+
ary2 << str
|
597
|
+
end
|
598
|
+
}
|
599
|
+
else
|
600
|
+
ary2 = ary.map {|s|
|
601
|
+
s ||= ''
|
602
|
+
s.gsub(re, repl)
|
603
|
+
}
|
604
|
+
end
|
605
|
+
gen << ary2
|
606
|
+
}
|
607
|
+
}
|
608
|
+
}
|
609
|
+
end
|
610
|
+
|
611
|
+
def main_sort(argv)
|
612
|
+
op_sort.parse!(argv)
|
613
|
+
filename = argv.empty? ? '-' : argv.shift
|
614
|
+
warn "extra arguments: #{argv.join(" ")}" if !argv.empty?
|
615
|
+
if $opt_sort_f
|
616
|
+
fs = split_field_list_argument($opt_sort_f)
|
617
|
+
else
|
618
|
+
fs = nil
|
619
|
+
end
|
620
|
+
tbl = load_table(filename)
|
621
|
+
if fs
|
622
|
+
blk = lambda {|rec| fs.map {|f| comparison_value(rec[f]) } }
|
623
|
+
else
|
624
|
+
blk = lambda {|rec| rec.map {|k, v| comparison_value(v) } }
|
625
|
+
end
|
626
|
+
tbl2 = tbl.reorder_records_by(&blk)
|
627
|
+
with_output {|out|
|
628
|
+
tbl_generate_csv(tbl2, out)
|
629
|
+
}
|
630
|
+
end
|
631
|
+
|
632
|
+
def main_select(argv)
|
633
|
+
op_select.parse!(argv)
|
634
|
+
fs = split_field_list_argument(argv.shift)
|
635
|
+
filename = argv.shift || '-'
|
636
|
+
warn "extra arguments: #{argv.join(" ")}" if !argv.empty?
|
637
|
+
tablereader_open(filename) {|tblreader|
|
638
|
+
if $opt_select_v
|
639
|
+
h = {}
|
640
|
+
fs.each {|f| h[tblreader.index_from_field(f)] = true }
|
641
|
+
header = nil
|
642
|
+
if !$opt_N
|
643
|
+
header = []
|
644
|
+
tblreader.header.each_with_index {|f, i|
|
645
|
+
header << f if !h[i]
|
646
|
+
}
|
647
|
+
end
|
648
|
+
with_table_stream_output {|gen|
|
649
|
+
gen.output_header(header)
|
650
|
+
tblreader.each {|ary|
|
651
|
+
values = []
|
652
|
+
ary.each_with_index {|v, i|
|
653
|
+
values << v if !h[i]
|
654
|
+
}
|
655
|
+
gen << values
|
656
|
+
}
|
657
|
+
}
|
658
|
+
else
|
659
|
+
header = tblreader.header
|
660
|
+
is = []
|
661
|
+
is = fs.map {|f| tblreader.index_from_field(f) }
|
662
|
+
with_table_stream_output {|gen|
|
663
|
+
gen.output_header(is.map {|i| tblreader.field_from_index_ex(i) })
|
664
|
+
tblreader.each {|ary|
|
665
|
+
gen << ary.values_at(*is)
|
666
|
+
}
|
667
|
+
}
|
668
|
+
end
|
669
|
+
}
|
670
|
+
end
|
671
|
+
|
672
|
+
def main_rename(argv)
|
673
|
+
op_rename.parse!(argv)
|
674
|
+
fs = split_field_list_argument(argv.shift)
|
675
|
+
filename = argv.shift || '-'
|
676
|
+
warn "extra arguments: #{argv.join(" ")}" if !argv.empty?
|
677
|
+
h = {}
|
678
|
+
fs.each_slice(2) {|sf, df| h[sf] = df }
|
679
|
+
tablereader_open(filename) {|tblreader|
|
680
|
+
header = tblreader.header
|
681
|
+
h.each {|sf, df|
|
682
|
+
unless header.include? sf
|
683
|
+
raise "field not defined: #{sf.inspect}"
|
684
|
+
end
|
685
|
+
}
|
686
|
+
renamed_header = tblreader.header.map {|f| h.fetch(f, f) }
|
687
|
+
with_table_stream_output {|gen|
|
688
|
+
gen.output_header(renamed_header)
|
689
|
+
tblreader.each {|ary|
|
690
|
+
gen << ary
|
691
|
+
}
|
692
|
+
}
|
693
|
+
}
|
694
|
+
end
|
695
|
+
|
696
|
+
def main_newfield(argv)
|
697
|
+
op_rename.parse!(argv)
|
698
|
+
field = argv.shift
|
699
|
+
rubyexp = argv.shift
|
700
|
+
pr = eval("lambda {|_| #{rubyexp} }")
|
701
|
+
filename = argv.shift || '-'
|
702
|
+
warn "extra arguments: #{argv.join(" ")}" if !argv.empty?
|
703
|
+
tablereader_open(filename) {|tblreader|
|
704
|
+
renamed_header = [field] + tblreader.header
|
705
|
+
with_table_stream_output {|gen|
|
706
|
+
gen.output_header(renamed_header)
|
707
|
+
tblreader.each {|ary|
|
708
|
+
h = {}
|
709
|
+
ary.each_with_index {|str, i|
|
710
|
+
f = tblreader.field_from_index_ex(i)
|
711
|
+
h[f] = str
|
712
|
+
}
|
713
|
+
gen << [pr.call(h), *ary]
|
714
|
+
}
|
715
|
+
}
|
716
|
+
}
|
717
|
+
end
|
718
|
+
|
719
|
+
def main_cat(argv)
|
720
|
+
op_cat.parse!(argv)
|
721
|
+
argv = ['-'] if argv.empty?
|
722
|
+
if $opt_N
|
723
|
+
argv.each {|filename|
|
724
|
+
with_table_stream_output {|gen|
|
725
|
+
tablereader_open(filename) {|tblreader|
|
726
|
+
tblreader.each {|ary|
|
727
|
+
gen << ary
|
728
|
+
}
|
729
|
+
}
|
730
|
+
}
|
731
|
+
}
|
732
|
+
else
|
733
|
+
readers = []
|
734
|
+
h = {}
|
735
|
+
argv.each {|filename|
|
736
|
+
r = tablereader_open(filename)
|
737
|
+
readers << r
|
738
|
+
r.header.each {|f|
|
739
|
+
h[f] = h.size if !h[f]
|
740
|
+
}
|
741
|
+
}
|
742
|
+
with_table_stream_output {|gen|
|
743
|
+
gen.output_header h.keys.sort_by {|k| h[k] }
|
744
|
+
readers.each {|r|
|
745
|
+
header = r.header.dup
|
746
|
+
r.each {|ary|
|
747
|
+
while header.length < ary.length
|
748
|
+
f = r.field_from_index_ex(header.length)
|
749
|
+
header << f
|
750
|
+
h[f] = h.size if !h[f]
|
751
|
+
end
|
752
|
+
ary2 = []
|
753
|
+
ary.each_with_index {|v, i|
|
754
|
+
f = r.field_from_index(i)
|
755
|
+
j = h.fetch(f)
|
756
|
+
ary2[j] = v
|
757
|
+
}
|
758
|
+
gen << ary2
|
759
|
+
}
|
760
|
+
}
|
761
|
+
}
|
762
|
+
end
|
763
|
+
end
|
764
|
+
|
765
|
+
def main_join(argv)
|
766
|
+
op_join.parse!(argv)
|
767
|
+
result = Tb.new([], [])
|
768
|
+
retain_left = false
|
769
|
+
retain_right = false
|
770
|
+
case $opt_join_outer
|
771
|
+
when :full
|
772
|
+
retain_left = true
|
773
|
+
retain_right = true
|
774
|
+
when :left
|
775
|
+
retain_left = true
|
776
|
+
when :right
|
777
|
+
retain_right = true
|
778
|
+
when nil
|
779
|
+
else
|
780
|
+
raise "unexpected $opt_join_outer: #{$opt_join_outer.inspect}"
|
781
|
+
end
|
782
|
+
if $opt_join_outer
|
783
|
+
each_table_file(argv) {|tbl|
|
784
|
+
STDERR.puts "shared keys: #{(result.list_fields & tbl.list_fields).inspect}" if 1 <= $opt_debug
|
785
|
+
result = result.natjoin2_outer(tbl, $opt_join_outer_missing, retain_left, retain_right)
|
786
|
+
}
|
787
|
+
else
|
788
|
+
each_table_file(argv) {|tbl|
|
789
|
+
STDERR.puts "shared keys: #{(result.list_fields & tbl.list_fields).inspect}" if 1 <= $opt_debug
|
790
|
+
result = result.natjoin2(tbl)
|
791
|
+
}
|
792
|
+
end
|
793
|
+
with_output {|out|
|
794
|
+
tbl_generate_csv(result, out)
|
795
|
+
}
|
796
|
+
end
|
797
|
+
|
798
|
+
def main_group(argv)
|
799
|
+
op_group.parse!(argv)
|
800
|
+
kfs = split_field_list_argument(argv.shift)
|
801
|
+
opt_group_fields = $opt_group_fields.map {|arg|
|
802
|
+
aggregation_spec, new_field = split_field_list_argument(arg)
|
803
|
+
new_field ||= aggregation_spec
|
804
|
+
[new_field, lambda {|fields| make_aggregator(aggregation_spec, fields) } ]
|
805
|
+
}
|
806
|
+
filename = argv.shift || '-'
|
807
|
+
warn "extra arguments: #{argv.join(" ")}" if !argv.empty?
|
808
|
+
h = {}
|
809
|
+
tablereader_open(filename) {|tblreader|
|
810
|
+
kis = kfs.map {|f| tblreader.index_from_field(f) }
|
811
|
+
result_fields = kfs + opt_group_fields.map {|nf, maker| nf }
|
812
|
+
tblreader.each {|ary|
|
813
|
+
kvs = ary.values_at(*kis)
|
814
|
+
if !h.include?(kvs)
|
815
|
+
h[kvs] = opt_group_fields.map {|nf, maker| ag = maker.call(tblreader.header); ag.update(ary); ag }
|
816
|
+
else
|
817
|
+
h[kvs].each {|ag|
|
818
|
+
ag.update(ary)
|
819
|
+
}
|
820
|
+
end
|
821
|
+
}
|
822
|
+
result = Tb.new(result_fields)
|
823
|
+
h.keys.sort_by {|k| k.map {|v| comparison_value(v) } }.each {|k|
|
824
|
+
a = h[k]
|
825
|
+
result.insert_values result_fields, k + a.map {|ag| ag.finish }
|
826
|
+
}
|
827
|
+
with_output {|out|
|
828
|
+
tbl_generate_csv(result, out)
|
829
|
+
}
|
830
|
+
}
|
831
|
+
end
|
832
|
+
|
833
|
+
def main_cross(argv)
|
834
|
+
op_cross.parse!(argv)
|
835
|
+
hkfs = split_field_list_argument(argv.shift)
|
836
|
+
vkfs = split_field_list_argument(argv.shift)
|
837
|
+
if $opt_cross_fields.empty?
|
838
|
+
opt_cross_fields = [['count', 'count']]
|
839
|
+
else
|
840
|
+
opt_cross_fields = $opt_cross_fields.map {|arg|
|
841
|
+
agg_spec, new_field = split_field_list_argument(arg)
|
842
|
+
new_field ||= agg_spec
|
843
|
+
[agg_spec, new_field]
|
844
|
+
}
|
845
|
+
end
|
846
|
+
filename = argv.shift || '-'
|
847
|
+
warn "extra arguments: #{argv.join(" ")}" if !argv.empty?
|
848
|
+
tablereader_open(filename) {|tblreader|
|
849
|
+
vkis = vkfs.map {|f| tblreader.index_from_field(f) }
|
850
|
+
hkis = hkfs.map {|f| tblreader.index_from_field(f) }
|
851
|
+
vset = {}
|
852
|
+
hset = {}
|
853
|
+
set = {}
|
854
|
+
tblreader.each {|ary|
|
855
|
+
vkvs = ary.values_at(*vkis)
|
856
|
+
hkvs = ary.values_at(*hkis)
|
857
|
+
vset[vkvs] = true if !vset.include?(vkvs)
|
858
|
+
hset[hkvs] = true if !hset.include?(hkvs)
|
859
|
+
if !set.include?([vkvs, hkvs])
|
860
|
+
set[[vkvs, hkvs]] = opt_cross_fields.map {|agg_spec, nf|
|
861
|
+
ag = make_aggregator(agg_spec, tblreader.header)
|
862
|
+
ag.update(ary)
|
863
|
+
ag
|
864
|
+
}
|
865
|
+
else
|
866
|
+
set[[vkvs, hkvs]].each {|ag|
|
867
|
+
ag.update(ary)
|
868
|
+
}
|
869
|
+
end
|
870
|
+
}
|
871
|
+
vary = vset.keys.sort_by {|a| a.map {|v| comparison_value(v) } }
|
872
|
+
hary = hset.keys.sort_by {|a| a.map {|v| comparison_value(v) } }
|
873
|
+
with_output {|out|
|
874
|
+
Tb.csv_stream_output(out) {|gen|
|
875
|
+
hkfs.each_with_index {|hkf, i|
|
876
|
+
next if $opt_cross_compact && i == hkfs.length - 1
|
877
|
+
row = [nil] * (vkfs.length - 1) + [hkf]
|
878
|
+
hary.each {|hkvs| opt_cross_fields.length.times { row << hkvs[i] } }
|
879
|
+
gen << row
|
880
|
+
}
|
881
|
+
if $opt_cross_compact
|
882
|
+
r = vkfs.dup
|
883
|
+
hary.each {|hkvs| r.concat([hkvs[-1]] * opt_cross_fields.length) }
|
884
|
+
gen << r
|
885
|
+
else
|
886
|
+
r = vkfs.dup
|
887
|
+
hary.each {|hkvs| r.concat opt_cross_fields.map {|agg_spec, new_field| new_field } }
|
888
|
+
gen << r
|
889
|
+
end
|
890
|
+
vary.each {|vkvs|
|
891
|
+
row = vkvs.dup
|
892
|
+
hary.each {|hkvs|
|
893
|
+
ags = set[[vkvs, hkvs]]
|
894
|
+
if !ags
|
895
|
+
opt_cross_fields.length.times { row << nil }
|
896
|
+
else
|
897
|
+
ags.each {|ag| row << ag.finish }
|
898
|
+
end
|
899
|
+
}
|
900
|
+
gen << row
|
901
|
+
}
|
902
|
+
}
|
903
|
+
}
|
904
|
+
}
|
905
|
+
end
|
906
|
+
|
907
|
+
def main_shape(argv)
|
908
|
+
op_shape.parse!(argv)
|
909
|
+
filenames = argv.empty? ? ['-'] : argv
|
910
|
+
result = Tb.new(%w[header_fields min_fields max_fields records filename])
|
911
|
+
filenames.each {|filename|
|
912
|
+
tablereader_open(filename) {|tblreader|
|
913
|
+
num_header_fields = tblreader.header.length
|
914
|
+
min_num_fields = nil
|
915
|
+
max_num_fields = nil
|
916
|
+
num_records = 0
|
917
|
+
tblreader.each {|ary|
|
918
|
+
num_records += 1
|
919
|
+
n = ary.length
|
920
|
+
if min_num_fields.nil?
|
921
|
+
min_num_fields = max_num_fields = n
|
922
|
+
else
|
923
|
+
min_num_fields = n if n < min_num_fields
|
924
|
+
max_num_fields = n if max_num_fields < n
|
925
|
+
end
|
926
|
+
}
|
927
|
+
result.insert({'header_fields'=>num_header_fields,
|
928
|
+
'min_fields'=>min_num_fields,
|
929
|
+
'max_fields'=>max_num_fields,
|
930
|
+
'records'=>num_records,
|
931
|
+
'filename'=>filename})
|
932
|
+
}
|
933
|
+
}
|
934
|
+
with_output {|out|
|
935
|
+
# don't use tbl_generate_csv() because the header should always outputted.
|
936
|
+
result.generate_csv(out)
|
937
|
+
}
|
938
|
+
end
|
939
|
+
|
940
|
+
def main_mheader(argv)
|
941
|
+
op_mheader.parse!(argv)
|
942
|
+
filename = argv.shift || '-'
|
943
|
+
warn "extra arguments: #{argv.join(" ")}" if !argv.empty?
|
944
|
+
header = []
|
945
|
+
if $opt_mheader_count
|
946
|
+
c = $opt_mheader_count
|
947
|
+
header_end_p = lambda {
|
948
|
+
c -= 1
|
949
|
+
c == 0 ? header.map {|a| a.compact.join(' ').strip } : nil
|
950
|
+
}
|
951
|
+
else
|
952
|
+
header_end_p = lambda {
|
953
|
+
h2 = header.map {|a| a.compact.join(' ').strip }.uniq
|
954
|
+
header.length == h2.length ? h2 : nil
|
955
|
+
}
|
956
|
+
end
|
957
|
+
with_table_stream_output {|gen|
|
958
|
+
Tb::Reader.open(filename, {:numeric=>true}) {|tblreader|
|
959
|
+
tblreader.each {|ary|
|
960
|
+
if header
|
961
|
+
ary.each_with_index {|v,i|
|
962
|
+
header[i] ||= []
|
963
|
+
header[i] << v if header[i].empty? || header[i].last != v
|
964
|
+
}
|
965
|
+
h2 = header_end_p.call
|
966
|
+
if h2
|
967
|
+
gen << h2
|
968
|
+
header = nil
|
969
|
+
end
|
970
|
+
else
|
971
|
+
gen << ary
|
972
|
+
end
|
973
|
+
}
|
974
|
+
}
|
975
|
+
}
|
976
|
+
if header
|
977
|
+
warn "no header found."
|
978
|
+
end
|
979
|
+
end
|
980
|
+
|
981
|
+
def main_crop(argv)
|
982
|
+
op_crop.parse!(argv)
|
983
|
+
filename = argv.shift || '-'
|
984
|
+
warn "extra arguments: #{argv.join(" ")}" if !argv.empty?
|
985
|
+
stream = false
|
986
|
+
if $opt_crop_range
|
987
|
+
case $opt_crop_range
|
988
|
+
when /\A(\d+),(\d+)-(\d+),(\d+)\z/ # 1-based
|
989
|
+
stream = true
|
990
|
+
range_col1 = $1.to_i
|
991
|
+
range_row1 = $2.to_i
|
992
|
+
range_col2 = $3.to_i
|
993
|
+
range_row2 = $4.to_i
|
994
|
+
when /\A([A-Z]+)(\d+):([A-Z]+)(\d+)\z/ # 1-based
|
995
|
+
stream = true
|
996
|
+
range_col1 = decode_a1_addressing_col($1)
|
997
|
+
range_row1 = $2.to_i
|
998
|
+
range_col2 = decode_a1_addressing_col($3)
|
999
|
+
range_row2 = $4.to_i
|
1000
|
+
else
|
1001
|
+
raise ArgumentError, "unexpected range argument: #{$opt_crop_range.inspect}"
|
1002
|
+
end
|
1003
|
+
end
|
1004
|
+
if stream
|
1005
|
+
with_table_stream_output {|gen|
|
1006
|
+
Tb::Reader.open(filename, {:numeric=>true}) {|tblreader|
|
1007
|
+
rownum = 1
|
1008
|
+
tblreader.each {|ary|
|
1009
|
+
if range_row2 < rownum
|
1010
|
+
break
|
1011
|
+
end
|
1012
|
+
if range_row1 <= rownum
|
1013
|
+
if range_col2 < ary.length
|
1014
|
+
ary[range_col2..-1] = []
|
1015
|
+
end
|
1016
|
+
if 1 < range_col1
|
1017
|
+
ary[0...(range_col1-1)] = []
|
1018
|
+
end
|
1019
|
+
gen << ary
|
1020
|
+
end
|
1021
|
+
rownum += 1
|
1022
|
+
}
|
1023
|
+
}
|
1024
|
+
}
|
1025
|
+
else
|
1026
|
+
arys = []
|
1027
|
+
Tb::Reader.open(filename, {:numeric=>true}) {|tblreader|
|
1028
|
+
tblreader.each {|a|
|
1029
|
+
a.pop while !a.empty? && (a.last.nil? || a.last == '')
|
1030
|
+
arys << a
|
1031
|
+
}
|
1032
|
+
}
|
1033
|
+
arys.pop while !arys.empty? && arys.last.all? {|v| v.nil? || v == '' }
|
1034
|
+
arys.shift while !arys.empty? && arys.first.all? {|v| v.nil? || v == '' }
|
1035
|
+
if !arys.empty?
|
1036
|
+
while arys.all? {|a| a.empty? || (a.first.nil? || a.first == '') }
|
1037
|
+
arys.each {|a| a.shift }
|
1038
|
+
end
|
1039
|
+
end
|
1040
|
+
with_table_stream_output {|gen|
|
1041
|
+
arys.each {|a| gen << a }
|
1042
|
+
}
|
1043
|
+
end
|
1044
|
+
end
|
1045
|
+
|
1046
|
+
def decode_a1_addressing_col(str)
|
1047
|
+
(26**str.length-1)/25+str.tr("A-Z", "0-9A-P").to_i(26)
|
1048
|
+
end
|
1049
|
+
|
1050
|
+
def split_field_list_argument(arg)
|
1051
|
+
split_csv_argument(arg).map {|f| f || '' }
|
1052
|
+
end
|
1053
|
+
|
1054
|
+
def split_csv_argument(arg)
|
1055
|
+
Tb.csv_stream_input(arg) {|ary| return ary }
|
1056
|
+
return []
|
1057
|
+
end
|
1058
|
+
|
1059
|
+
def each_table_file(argv)
|
1060
|
+
if argv.empty?
|
1061
|
+
yield load_table('-')
|
1062
|
+
else
|
1063
|
+
argv.each {|filename|
|
1064
|
+
tbl = load_table(filename)
|
1065
|
+
yield tbl
|
1066
|
+
}
|
1067
|
+
end
|
1068
|
+
end
|
1069
|
+
|
1070
|
+
def load_table(filename)
|
1071
|
+
tablereader_open(filename) {|tblreader|
|
1072
|
+
arys = []
|
1073
|
+
tblreader.each {|ary|
|
1074
|
+
arys << ary
|
1075
|
+
}
|
1076
|
+
header = tblreader.header
|
1077
|
+
tbl = Tb.new(header)
|
1078
|
+
arys.each {|ary|
|
1079
|
+
ary << nil while ary.length < header.length
|
1080
|
+
tbl.insert_values header, ary
|
1081
|
+
}
|
1082
|
+
tbl
|
1083
|
+
}
|
1084
|
+
end
|
1085
|
+
|
1086
|
+
def tablereader_open(filename, &b)
|
1087
|
+
Tb::Reader.open(filename, {:numeric=>$opt_N}, &b)
|
1088
|
+
end
|
1089
|
+
|
1090
|
+
def with_table_stream_output
|
1091
|
+
with_output {|out|
|
1092
|
+
Tb.csv_stream_output(out) {|gen|
|
1093
|
+
def gen.output_header(header)
|
1094
|
+
self << header if !$opt_N
|
1095
|
+
end
|
1096
|
+
yield gen
|
1097
|
+
}
|
1098
|
+
}
|
1099
|
+
end
|
1100
|
+
|
1101
|
+
def tbl_generate_csv(tbl, out)
|
1102
|
+
if $opt_N
|
1103
|
+
header = tbl.list_fields
|
1104
|
+
Tb.csv_stream_output(out) {|gen|
|
1105
|
+
tbl.each {|rec|
|
1106
|
+
gen << rec.values_at(*header)
|
1107
|
+
}
|
1108
|
+
}
|
1109
|
+
else
|
1110
|
+
tbl.generate_csv(out)
|
1111
|
+
end
|
1112
|
+
end
|
1113
|
+
|
1114
|
+
def tbl_generate_tsv(tbl, out)
|
1115
|
+
if $opt_N
|
1116
|
+
header = tbl.list_fields
|
1117
|
+
Tb.tsv_stream_output(out) {|gen|
|
1118
|
+
tbl.each {|rec|
|
1119
|
+
gen << rec.values_at(*header)
|
1120
|
+
}
|
1121
|
+
}
|
1122
|
+
else
|
1123
|
+
tbl.generate_tsv(out)
|
1124
|
+
end
|
1125
|
+
end
|
1126
|
+
|
1127
|
+
def with_output
|
1128
|
+
if STDOUT.tty? && !$opt_no_pager
|
1129
|
+
IO.popen(ENV['PAGER'] || 'more', 'w') {|pager|
|
1130
|
+
yield pager
|
1131
|
+
}
|
1132
|
+
else
|
1133
|
+
yield STDOUT
|
1134
|
+
end
|
1135
|
+
end
|
1136
|
+
|
1137
|
+
main ARGV
|