tb 0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README ADDED
@@ -0,0 +1,60 @@
1
+ = tb - manipulation tool for table: CSV, TSV, etc.
2
+
3
+ == Usage
4
+
5
+ % tb help
6
+ Usage:
7
+ tb csv [OPTS] [TABLE]
8
+ tb tsv [OPTS] [TABLE]
9
+ tb json [OPTS] [TABLE]
10
+ tb yaml [OPTS] [TABLE]
11
+ tb pp [OPTS] [TABLE]
12
+ tb grep [OPTS] REGEXP [TABLE]
13
+ tb gsub [OPTS] REGEXP STRING [TABLE]
14
+ tb sort [OPTS] [TABLE]
15
+ tb select [OPTS] FIELD,... [TABLE]
16
+ tb rename [OPTS] SRC,DST,... [TABLE]
17
+ tb newfield [OPTS] FIELD RUBY-EXP [TABLE]
18
+ tb cat [OPTS] [TABLE ...]
19
+ tb join [OPTS] [TABLE ...]
20
+ tb group [OPTS] [TABLE]
21
+ tb cross [OPTS] [TABLE]
22
+ tb shape [OPTS] [TABLE ...]
23
+ tb mheader [OPTS] [TABLE]
24
+ tb crop [OPTS] [TABLE]
25
+
26
+
27
+
28
+ == Install
29
+
30
+ gem install tb
31
+
32
+ == Author
33
+
34
+ Tanaka Akira <akr@fsij.org>
35
+
36
+ == License
37
+
38
+ Redistribution and use in source and binary forms, with or without
39
+ modification, are permitted provided that the following conditions are met:
40
+
41
+ (1) Redistributions of source code must retain the above copyright notice, this
42
+ list of conditions and the following disclaimer.
43
+ (2) Redistributions in binary form must reproduce the above copyright notice,
44
+ this list of conditions and the following disclaimer in the documentation
45
+ and/or other materials provided with the distribution.
46
+ (3) The name of the author may not be used to endorse or promote products
47
+ derived from this software without specific prior written permission.
48
+
49
+ THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
50
+ WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
51
+ MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
52
+ EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
53
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
54
+ OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
55
+ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
56
+ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
57
+ IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
58
+ OF SUCH DAMAGE.
59
+
60
+ (The modified BSD licence)
data/bin/tb ADDED
@@ -0,0 +1,1137 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # Copyright (C) 2011 Tanaka Akira <akr@fsij.org>
4
+ #
5
+ # Redistribution and use in source and binary forms, with or without
6
+ # modification, are permitted provided that the following conditions are met:
7
+ #
8
+ # 1. Redistributions of source code must retain the above copyright notice, this
9
+ # list of conditions and the following disclaimer.
10
+ # 2. Redistributions in binary form must reproduce the above copyright notice,
11
+ # this list of conditions and the following disclaimer in the documentation
12
+ # and/or other materials provided with the distribution.
13
+ # 3. The name of the author may not be used to endorse or promote products
14
+ # derived from this software without specific prior written permission.
15
+ #
16
+ # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
17
+ # WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
18
+ # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
19
+ # EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20
+ # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
21
+ # OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22
+ # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23
+ # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
24
+ # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
25
+ # OF SUCH DAMAGE.
26
+
27
+ require 'tb'
28
+ require 'optparse'
29
+ require 'enumerator'
30
+
31
+ def main(argv)
32
+ subcommand = argv.shift
33
+ case subcommand
34
+ when 'help', '-h' then main_help(argv)
35
+ when 'csv' then main_csv(argv)
36
+ when 'tsv' then main_tsv(argv)
37
+ when 'json' then main_json(argv)
38
+ when 'yaml' then main_yaml(argv)
39
+ when 'pp' then main_pp(argv)
40
+ when 'grep' then main_grep(argv)
41
+ when 'gsub' then main_gsub(argv)
42
+ when 'sort' then main_sort(argv)
43
+ when 'select' then main_select(argv)
44
+ when 'rename' then main_rename(argv)
45
+ when 'newfield' then main_newfield(argv)
46
+ when 'cat' then main_cat(argv)
47
+ when 'join' then main_join(argv)
48
+ when 'group' then main_group(argv)
49
+ when 'cross' then main_cross(argv)
50
+ when 'shape' then main_shape(argv)
51
+ when 'mheader' then main_mheader(argv)
52
+ when 'crop' then main_crop(argv)
53
+ when nil
54
+ err "Usage: tb subcommand args..."
55
+ else
56
+ err "unexpected subcommand: #{subcommand.inspect}"
57
+ end
58
+ end
59
+
60
+ def usage(status)
61
+ print <<'End'
62
+ Usage:
63
+ tb csv [OPTS] [TABLE]
64
+ tb tsv [OPTS] [TABLE]
65
+ tb json [OPTS] [TABLE]
66
+ tb yaml [OPTS] [TABLE]
67
+ tb pp [OPTS] [TABLE]
68
+ tb grep [OPTS] REGEXP [TABLE]
69
+ tb gsub [OPTS] REGEXP STRING [TABLE]
70
+ tb sort [OPTS] [TABLE]
71
+ tb select [OPTS] FIELD,... [TABLE]
72
+ tb rename [OPTS] SRC,DST,... [TABLE]
73
+ tb newfield [OPTS] FIELD RUBY-EXP [TABLE]
74
+ tb cat [OPTS] [TABLE ...]
75
+ tb join [OPTS] [TABLE ...]
76
+ tb group [OPTS] [TABLE]
77
+ tb cross [OPTS] [TABLE]
78
+ tb shape [OPTS] [TABLE ...]
79
+ tb mheader [OPTS] [TABLE]
80
+ tb crop [OPTS] [TABLE]
81
+ End
82
+ exit status
83
+ end
84
+
85
+ def main_help(argv)
86
+ subcommand = argv.shift
87
+ case subcommand
88
+ when 'csv' then puts op_csv
89
+ when 'tsv' then puts op_tsv
90
+ when 'json' then puts op_json
91
+ when 'yaml' then puts op_yaml
92
+ when 'pp' then puts op_pp
93
+ when 'grep' then puts op_grep
94
+ when 'gsub' then puts op_gsub
95
+ when 'sort' then puts op_sort
96
+ when 'select' then puts op_select
97
+ when 'rename' then puts op_rename
98
+ when 'newfield' then puts op_newfield
99
+ when 'cat' then puts op_cat
100
+ when 'join' then puts op_join
101
+ when 'group' then puts op_group
102
+ when 'cross' then puts op_cross
103
+ when 'shape' then puts op_shape
104
+ when 'mheader' then puts op_mheader
105
+ when 'crop' then puts op_crop
106
+ when nil
107
+ usage(true)
108
+ else
109
+ err "unexpected subcommand: #{subcommand.inspect}"
110
+ end
111
+ end
112
+
113
+ $opt_N = nil
114
+ $opt_debug = 0
115
+ $opt_no_pager = nil
116
+
117
+ def op_csv
118
+ op = OptionParser.new
119
+ op.banner = 'Usage: tb csv [OPTS] [TABLE]'
120
+ op.def_option('-h', 'show help message') { puts op; exit 0 }
121
+ op.def_option('-N', 'use numeric field name') { $opt_N = true }
122
+ op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
123
+ op
124
+ end
125
+
126
+ def op_tsv
127
+ op = OptionParser.new
128
+ op.banner = 'Usage: tb tsv [OPTS] [TABLE]'
129
+ op.def_option('-h', 'show help message') { puts op; exit 0 }
130
+ op.def_option('-N', 'use numeric field name') { $opt_N = true }
131
+ op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
132
+ op
133
+ end
134
+
135
+ def op_json
136
+ op = OptionParser.new
137
+ op.banner = 'Usage: tb json [OPTS] [TABLE]'
138
+ op.def_option('-h', 'show help message') { puts op; exit 0 }
139
+ op.def_option('-N', 'use numeric field name') { $opt_N = true }
140
+ op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
141
+ op
142
+ end
143
+
144
+ def op_yaml
145
+ op = OptionParser.new
146
+ op.banner = 'Usage: tb yaml [OPTS] [TABLE]'
147
+ op.def_option('-h', 'show help message') { puts op; exit 0 }
148
+ op.def_option('-N', 'use numeric field name') { $opt_N = true }
149
+ op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
150
+ op
151
+ end
152
+
153
+ def op_pp
154
+ op = OptionParser.new
155
+ op.banner = 'Usage: tb pp [OPTS] [TABLE]'
156
+ op.def_option('-h', 'show help message') { puts op; exit 0 }
157
+ op.def_option('-N', 'use numeric field name') { $opt_N = true }
158
+ op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
159
+ op
160
+ end
161
+
162
+ $opt_grep_e = nil
163
+ $opt_grep_ruby = nil
164
+ $opt_grep_f = nil
165
+ $opt_grep_v = nil
166
+ def op_grep
167
+ op = OptionParser.new
168
+ op.banner = 'Usage: tb grep [OPTS] REGEXP [TABLE]'
169
+ op.def_option('-h', 'show help message') { puts op; exit 0 }
170
+ op.def_option('-N', 'use numeric field name') { $opt_N = true }
171
+ op.def_option('-f FIELD', 'search field') {|field| $opt_grep_f = field }
172
+ op.def_option('-e REGEXP', 'predicate written in ruby. A hash is given as _. no usual regexp argument.') {|pattern| $opt_grep_e = pattern }
173
+ op.def_option('--ruby RUBY-EXP', 'specify a regexp. no usual regexp argument.') {|ruby_exp| $opt_grep_ruby = ruby_exp }
174
+ op.def_option('-v', 'ouput the records which doesn\'t match') { $opt_grep_v = true }
175
+ op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
176
+ op
177
+ end
178
+
179
+ $opt_gsub_e = nil
180
+ $opt_gsub_f = nil
181
+ def op_gsub
182
+ op = OptionParser.new
183
+ op.banner = 'Usage: tb gsub [OPTS] REGEXP STRING [TABLE]'
184
+ op.def_option('-h', 'show help message') { puts op; exit 0 }
185
+ op.def_option('-N', 'use numeric field name') { $opt_N = true }
186
+ op.def_option('-f FIELD', 'search field') {|field| $opt_gsub_f = field }
187
+ op.def_option('-e REGEXP', 'predicate written in ruby. A hash is given as _. no usual regexp argument.') {|pattern| $opt_gsub_e = pattern }
188
+ op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
189
+ op
190
+ end
191
+
192
+ $opt_sort_f = nil
193
+ def op_sort
194
+ op = OptionParser.new
195
+ op.banner = 'Usage: tb sort [OPTS] [TABLE]'
196
+ op.def_option('-h', 'show help message') { puts op; exit 0 }
197
+ op.def_option('-N', 'use numeric field name') { $opt_N = true }
198
+ op.def_option('-f FIELD,...', 'specify sort keys') {|fs| $opt_sort_f = fs }
199
+ op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
200
+ op
201
+ end
202
+
203
+ $opt_select_v = nil
204
+ def op_select
205
+ op = OptionParser.new
206
+ op.banner = 'Usage: tb select [OPTS] FIELD,... [TABLE]'
207
+ op.def_option('-h', 'show help message') { puts op; exit 0 }
208
+ op.def_option('-N', 'use numeric field name') { $opt_N = true }
209
+ op.def_option('-v', 'invert match') { $opt_select_v = true }
210
+ op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
211
+ op
212
+ end
213
+
214
+ def op_rename
215
+ op = OptionParser.new
216
+ op.banner = 'Usage: tb rename [OPTS] SRC,DST,... [TABLE]'
217
+ op.def_option('-h', 'show help message') { puts op; exit 0 }
218
+ op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
219
+ op
220
+ end
221
+
222
+ def op_newfield
223
+ op = OptionParser.new
224
+ op.banner = 'Usage: tb newfield [OPTS] FIELD RUBY-EXP [TABLE]'
225
+ op.def_option('-h', 'show help message') { puts op; exit 0 }
226
+ op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
227
+ op
228
+ end
229
+
230
+ def op_cat
231
+ op = OptionParser.new
232
+ op.banner = 'Usage: tb cat [OPTS] [TABLE ...]'
233
+ op.def_option('-h', 'show help message') { puts op; exit 0 }
234
+ op.def_option('-N', 'use numeric field name') { $opt_N = true }
235
+ op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
236
+ op
237
+ end
238
+
239
+ $opt_join_outer = nil
240
+ $opt_join_outer_missing = nil
241
+ def op_join
242
+ op = OptionParser.new
243
+ op.banner = 'Usage: tb join [OPTS] [TABLE ...]'
244
+ op.def_option('-h', 'show help message') { puts op; exit 0 }
245
+ op.def_option('-d', '--debug', 'show debug message') { $opt_debug += 1 }
246
+ op.def_option('-N', 'use numeric field name') { $opt_N = true }
247
+ op.def_option('--outer', 'outer join') { $opt_join_outer = :full }
248
+ op.def_option('--left', 'left outer join') { $opt_join_outer = :left }
249
+ op.def_option('--right', 'right outer join') { $opt_join_outer = :right }
250
+ op.def_option('--outer-missing=DEFAULT', 'missing value for outer join') {|missing|
251
+ $opt_join_outer ||= :full
252
+ $opt_join_outer_missing = missing
253
+ }
254
+ op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
255
+ op
256
+ end
257
+
258
+ $opt_group_fields = []
259
+ def op_group
260
+ op = OptionParser.new
261
+ op.banner = 'Usage: tb group [OPTS] KEY-FIELD1,... [TABLE]'
262
+ op.def_option('-h', 'show help message') { puts op; exit 0 }
263
+ op.def_option('-a AGGREGATION-SPEC[,NEW-FIELD]',
264
+ '--aggregate AGGREGATION-SPEC[,NEW-FIELD]') {|arg| $opt_group_fields << arg }
265
+ op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
266
+ op
267
+ end
268
+
269
+ $opt_cross_fields = []
270
+ $opt_cross_compact = false
271
+ def op_cross
272
+ op = OptionParser.new
273
+ op.banner = 'Usage: tb cross [OPTS] HKEY-FIELD1,... VKEY-FIELD1,... [TABLE]'
274
+ op.def_option('-h', 'show help message') { puts op; exit 0 }
275
+ op.def_option('-a AGGREGATION-SPEC[,NEW-FIELD]',
276
+ '--aggregate AGGREGATION-SPEC[,NEW-FIELD]') {|arg| $opt_cross_fields << arg }
277
+ op.def_option('-c', '--compact', 'compact format') { $opt_cross_compact = true }
278
+ op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
279
+ op
280
+ end
281
+
282
+ def op_shape
283
+ op = OptionParser.new
284
+ op.banner = 'Usage: tb shape [OPTS] [TABLE ...]'
285
+ op.def_option('-h', 'show help message') { puts op; exit 0 }
286
+ op.def_option('-N', 'use numeric field name') { $opt_N = true }
287
+ op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
288
+ op
289
+ end
290
+
291
+ $opt_mheader_count = nil
292
+ def op_mheader
293
+ op = OptionParser.new
294
+ op.banner = 'Usage: tb mheader [OPTS] [TABLE]'
295
+ op.def_option('-h', 'show help message') { puts op; exit 0 }
296
+ op.def_option('-c N', 'number of header records') {|arg| $opt_mheader_count = arg.to_i }
297
+ op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
298
+ op
299
+ end
300
+
301
+ $opt_crop_range = nil
302
+ def op_crop
303
+ op = OptionParser.new
304
+ op.banner = 'Usage: tb crop [OPTS] [TABLE]'
305
+ op.def_option('-h', 'show help message') { puts op; exit 0 }
306
+ op.def_option('-r RANGE', 'range. i.e. "2,1-4,3", "B1:D3"') {|arg| $opt_crop_range = arg }
307
+ op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
308
+ op
309
+ end
310
+
311
+ def err(msg)
312
+ STDERR.puts msg
313
+ exit 1
314
+ end
315
+
316
+ def comparison_value(v)
317
+ case v
318
+ when nil
319
+ []
320
+ when Numeric
321
+ [0, v]
322
+ when String
323
+ case v
324
+ when /\A\s*-?\d+\s*\z/
325
+ [0, Integer(v)]
326
+ when /\A\s*-?(\d+(\.\d*)?|\.\d+)([eE][-+]?\d+)?\s*\z/
327
+ [0, Float(v)]
328
+ else
329
+ a = []
330
+ v.scan(/(\d+)|\D+/) {
331
+ if $1
332
+ a << 0 << $1.to_i
333
+ else
334
+ a << 1 << $&
335
+ end
336
+ }
337
+ a
338
+ end
339
+ else
340
+ raise ArgumentError, "unexpected: #{v.inspect}"
341
+ end
342
+ end
343
+
344
+ def conv_to_numeric(v)
345
+ v = v.strip
346
+ if /\A-?\d+\z/ =~ v
347
+ v = v.to_i
348
+ elsif /\A-?(\d+(\.\d*)?|\.\d+)([eE][-+]?\d+)?\z/ =~ v
349
+ v = v.to_f
350
+ else
351
+ raise "numeric value expected: #{v.inspect}"
352
+ end
353
+ v
354
+ end
355
+
356
+ class CountAggregator
357
+ def initialize() @result = 0 end
358
+ def update(v) @result += 1 end
359
+ def finish() @result end
360
+ end
361
+
362
+ class SumAggregator
363
+ def initialize() @result = 0 end
364
+ def update(v) @result += conv_to_numeric(v) if !(v.nil? || v == '') end
365
+ def finish() @result end
366
+ end
367
+
368
+ class AvgAggregator
369
+ def initialize() @sum = 0; @count = 0 end
370
+ def update(v) @count += 1; @sum += conv_to_numeric(v) if !(v.nil? || v == '') end
371
+ def finish() @sum / @count.to_f end
372
+ end
373
+
374
+ class MaxAggregator
375
+ def initialize() @v = nil; @cmp = nil end
376
+ def update(v)
377
+ cmp = comparison_value(v)
378
+ if @cmp == nil
379
+ @v, @cmp = v, cmp
380
+ else
381
+ @v, @cmp = v, cmp if (@cmp <=> cmp) < 0
382
+ end
383
+ end
384
+ def finish() @v end
385
+ end
386
+
387
+ class MinAggregator
388
+ def initialize() @v = @cmp = nil end
389
+ def update(v)
390
+ cmp = comparison_value(v)
391
+ if @cmp == nil
392
+ @v, @cmp = v, cmp
393
+ else
394
+ @v, @cmp = v, cmp if (@cmp <=> cmp) > 0
395
+ end
396
+ end
397
+ def finish() @v end
398
+ end
399
+
400
+ class ValuesAggregator
401
+ def initialize() @result = [] end
402
+ def update(v) @result << v if v end
403
+ def finish() @result.join(",") end
404
+ end
405
+
406
+ class UniqueValuesAggregator
407
+ def initialize() @result = [] end
408
+ def update(v) @result << v if v end
409
+ def finish() @result.uniq.join(",") end
410
+ end
411
+
412
+ class Selector
413
+ def initialize(i, aggregator) @i = i; @agg = aggregator end
414
+ def update(ary) @agg.update(ary[@i]) end
415
+ def finish() @agg.finish end
416
+ end
417
+
418
+ def make_aggregator(spec, fs)
419
+ case spec
420
+ when 'count'
421
+ CountAggregator.new
422
+ when /\Asum\((.*)\)\z/
423
+ field = $1
424
+ i = fs.index(field)
425
+ raise ArgumentError, "field not found: #{field.inspect}" if !i
426
+ Selector.new(i, SumAggregator.new)
427
+ when /\Aavg\((.*)\)\z/
428
+ field = $1
429
+ i = fs.index(field)
430
+ raise ArgumentError, "field not found: #{field.inspect}" if !i
431
+ Selector.new(i, AvgAggregator.new)
432
+ when /\Amax\((.*)\)\z/
433
+ field = $1
434
+ i = fs.index(field)
435
+ raise ArgumentError, "field not found: #{field.inspect}" if !i
436
+ Selector.new(i, MaxAggregator.new)
437
+ when /\Amin\((.*)\)\z/
438
+ field = $1
439
+ i = fs.index(field)
440
+ raise ArgumentError, "field not found: #{field.inspect}" if !i
441
+ Selector.new(i, MinAggregator.new)
442
+ when /\Avalues\((.*)\)\z/
443
+ field = $1
444
+ i = fs.index(field)
445
+ raise ArgumentError, "field not found: #{field.inspect}" if !i
446
+ Selector.new(i, ValuesAggregator.new)
447
+ when /\Auniquevalues\((.*)\)\z/
448
+ field = $1
449
+ i = fs.index(field)
450
+ raise ArgumentError, "field not found: #{field.inspect}" if !i
451
+ Selector.new(i, UniqueValuesAggregator.new)
452
+ else
453
+ raise ArgumentError, "unexpected aggregation spec: #{spec.inspect}"
454
+ end
455
+ end
456
+
457
+ def aggregate(spec, table)
458
+ update, finish = make_aggregator(spec, table.list_fields)
459
+ table.each {|rec|
460
+ update.call(rec.values_at(*fs))
461
+ }
462
+ finish.call
463
+ end
464
+
465
+ def main_csv(argv)
466
+ op_csv.parse!(argv)
467
+ each_table_file(argv) {|tbl|
468
+ with_output {|out|
469
+ tbl_generate_csv(tbl, out)
470
+ }
471
+ }
472
+ end
473
+
474
+ def main_tsv(argv)
475
+ op_tsv.parse!(argv)
476
+ each_table_file(argv) {|tbl|
477
+ with_output {|out|
478
+ tbl_generate_tsv(tbl, out)
479
+ }
480
+ }
481
+ end
482
+
483
+ def main_json(argv)
484
+ require 'json'
485
+ op_json.parse!(argv)
486
+ argv = ['-'] if argv.empty?
487
+ with_output {|out|
488
+ out.print "["
489
+ sep = nil
490
+ argv.each {|filename|
491
+ sep = ",\n\n" if sep
492
+ tablereader_open(filename) {|tblreader|
493
+ tblreader.each {|ary|
494
+ out.print sep if sep
495
+ header = tblreader.header
496
+ h = {}
497
+ ary.each_with_index {|e, i|
498
+ h[header[i]] = e if !e.nil?
499
+ }
500
+ out.print JSON.pretty_generate(h)
501
+ sep = ",\n"
502
+ }
503
+ }
504
+ }
505
+ out.puts "]"
506
+ }
507
+ end
508
+
509
+ def main_yaml(argv)
510
+ require 'yaml'
511
+ op_yaml.parse!(argv)
512
+ each_table_file(argv) {|tbl|
513
+ ary = tbl.map {|rec| rec.to_h }
514
+ with_output {|out|
515
+ YAML.dump(ary, out)
516
+ out.puts
517
+ }
518
+ }
519
+ end
520
+
521
+ def main_pp(argv)
522
+ op_pp.parse!(argv)
523
+ argv.unshift '-' if argv.empty?
524
+ with_output {|out|
525
+ argv.each {|filename|
526
+ tablereader_open(filename) {|tblreader|
527
+ tblreader.each {|ary|
528
+ h = {}
529
+ ary.each_with_index {|v, i|
530
+ next if v.nil?
531
+ h[tblreader.field_from_index_ex(i)] = v
532
+ }
533
+ PP.pp h, out
534
+ }
535
+ }
536
+ }
537
+ }
538
+ end
539
+
540
+ def main_grep(argv)
541
+ op_grep.parse!(argv)
542
+ if $opt_grep_ruby
543
+ pred = eval("lambda {|_| #{$opt_grep_ruby} }")
544
+ elsif $opt_grep_e
545
+ re = Regexp.new($opt_grep_e)
546
+ pred = $opt_grep_f ? lambda {|_| re =~ _[$opt_grep_f] } :
547
+ lambda {|_| _.any? {|k, v| re =~ v.to_s } }
548
+ else
549
+ re = Regexp.new(argv.shift)
550
+ pred = $opt_grep_f ? lambda {|_| re =~ _[$opt_grep_f] } :
551
+ lambda {|_| _.any? {|k, v| re =~ v.to_s } }
552
+ end
553
+ opt_v = $opt_grep_v ? true : false
554
+ argv.unshift '-' if argv.empty?
555
+ argv.each {|filename|
556
+ tablereader_open(filename) {|tblreader|
557
+ with_table_stream_output {|gen|
558
+ gen.output_header tblreader.header
559
+ tblreader.each {|ary|
560
+ h = {}
561
+ ary.each_with_index {|str, i|
562
+ f = tblreader.field_from_index_ex(i)
563
+ h[f] = str
564
+ }
565
+ found = pred.call(h)
566
+ found = opt_v ^ !!(found)
567
+ gen << ary if found
568
+ }
569
+ }
570
+ }
571
+ }
572
+ end
573
+
574
+ def main_gsub(argv)
575
+ op_gsub.parse!(argv)
576
+ if $opt_gsub_e
577
+ re = Regexp.new($opt_gsub_e)
578
+ else
579
+ re = Regexp.new(argv.shift)
580
+ end
581
+ repl = argv.shift
582
+ filename = argv.empty? ? '-' : argv.shift
583
+ warn "extra arguments: #{argv.join(" ")}" if !argv.empty?
584
+ tablereader_open(filename) {|tblreader|
585
+ with_table_stream_output {|gen|
586
+ gen.output_header tblreader.header
587
+ tblreader.each {|ary|
588
+ if $opt_gsub_f
589
+ ary2 = []
590
+ ary.each_with_index {|str, i|
591
+ f = tblreader.field_from_index_ex(i)
592
+ if f == $opt_gsub_f
593
+ str ||= ''
594
+ ary2 << str.gsub(re, repl)
595
+ else
596
+ ary2 << str
597
+ end
598
+ }
599
+ else
600
+ ary2 = ary.map {|s|
601
+ s ||= ''
602
+ s.gsub(re, repl)
603
+ }
604
+ end
605
+ gen << ary2
606
+ }
607
+ }
608
+ }
609
+ end
610
+
611
+ def main_sort(argv)
612
+ op_sort.parse!(argv)
613
+ filename = argv.empty? ? '-' : argv.shift
614
+ warn "extra arguments: #{argv.join(" ")}" if !argv.empty?
615
+ if $opt_sort_f
616
+ fs = split_field_list_argument($opt_sort_f)
617
+ else
618
+ fs = nil
619
+ end
620
+ tbl = load_table(filename)
621
+ if fs
622
+ blk = lambda {|rec| fs.map {|f| comparison_value(rec[f]) } }
623
+ else
624
+ blk = lambda {|rec| rec.map {|k, v| comparison_value(v) } }
625
+ end
626
+ tbl2 = tbl.reorder_records_by(&blk)
627
+ with_output {|out|
628
+ tbl_generate_csv(tbl2, out)
629
+ }
630
+ end
631
+
632
+ def main_select(argv)
633
+ op_select.parse!(argv)
634
+ fs = split_field_list_argument(argv.shift)
635
+ filename = argv.shift || '-'
636
+ warn "extra arguments: #{argv.join(" ")}" if !argv.empty?
637
+ tablereader_open(filename) {|tblreader|
638
+ if $opt_select_v
639
+ h = {}
640
+ fs.each {|f| h[tblreader.index_from_field(f)] = true }
641
+ header = nil
642
+ if !$opt_N
643
+ header = []
644
+ tblreader.header.each_with_index {|f, i|
645
+ header << f if !h[i]
646
+ }
647
+ end
648
+ with_table_stream_output {|gen|
649
+ gen.output_header(header)
650
+ tblreader.each {|ary|
651
+ values = []
652
+ ary.each_with_index {|v, i|
653
+ values << v if !h[i]
654
+ }
655
+ gen << values
656
+ }
657
+ }
658
+ else
659
+ header = tblreader.header
660
+ is = []
661
+ is = fs.map {|f| tblreader.index_from_field(f) }
662
+ with_table_stream_output {|gen|
663
+ gen.output_header(is.map {|i| tblreader.field_from_index_ex(i) })
664
+ tblreader.each {|ary|
665
+ gen << ary.values_at(*is)
666
+ }
667
+ }
668
+ end
669
+ }
670
+ end
671
+
672
+ def main_rename(argv)
673
+ op_rename.parse!(argv)
674
+ fs = split_field_list_argument(argv.shift)
675
+ filename = argv.shift || '-'
676
+ warn "extra arguments: #{argv.join(" ")}" if !argv.empty?
677
+ h = {}
678
+ fs.each_slice(2) {|sf, df| h[sf] = df }
679
+ tablereader_open(filename) {|tblreader|
680
+ header = tblreader.header
681
+ h.each {|sf, df|
682
+ unless header.include? sf
683
+ raise "field not defined: #{sf.inspect}"
684
+ end
685
+ }
686
+ renamed_header = tblreader.header.map {|f| h.fetch(f, f) }
687
+ with_table_stream_output {|gen|
688
+ gen.output_header(renamed_header)
689
+ tblreader.each {|ary|
690
+ gen << ary
691
+ }
692
+ }
693
+ }
694
+ end
695
+
696
+ def main_newfield(argv)
697
+ op_rename.parse!(argv)
698
+ field = argv.shift
699
+ rubyexp = argv.shift
700
+ pr = eval("lambda {|_| #{rubyexp} }")
701
+ filename = argv.shift || '-'
702
+ warn "extra arguments: #{argv.join(" ")}" if !argv.empty?
703
+ tablereader_open(filename) {|tblreader|
704
+ renamed_header = [field] + tblreader.header
705
+ with_table_stream_output {|gen|
706
+ gen.output_header(renamed_header)
707
+ tblreader.each {|ary|
708
+ h = {}
709
+ ary.each_with_index {|str, i|
710
+ f = tblreader.field_from_index_ex(i)
711
+ h[f] = str
712
+ }
713
+ gen << [pr.call(h), *ary]
714
+ }
715
+ }
716
+ }
717
+ end
718
+
719
+ def main_cat(argv)
720
+ op_cat.parse!(argv)
721
+ argv = ['-'] if argv.empty?
722
+ if $opt_N
723
+ argv.each {|filename|
724
+ with_table_stream_output {|gen|
725
+ tablereader_open(filename) {|tblreader|
726
+ tblreader.each {|ary|
727
+ gen << ary
728
+ }
729
+ }
730
+ }
731
+ }
732
+ else
733
+ readers = []
734
+ h = {}
735
+ argv.each {|filename|
736
+ r = tablereader_open(filename)
737
+ readers << r
738
+ r.header.each {|f|
739
+ h[f] = h.size if !h[f]
740
+ }
741
+ }
742
+ with_table_stream_output {|gen|
743
+ gen.output_header h.keys.sort_by {|k| h[k] }
744
+ readers.each {|r|
745
+ header = r.header.dup
746
+ r.each {|ary|
747
+ while header.length < ary.length
748
+ f = r.field_from_index_ex(header.length)
749
+ header << f
750
+ h[f] = h.size if !h[f]
751
+ end
752
+ ary2 = []
753
+ ary.each_with_index {|v, i|
754
+ f = r.field_from_index(i)
755
+ j = h.fetch(f)
756
+ ary2[j] = v
757
+ }
758
+ gen << ary2
759
+ }
760
+ }
761
+ }
762
+ end
763
+ end
764
+
765
+ def main_join(argv)
766
+ op_join.parse!(argv)
767
+ result = Tb.new([], [])
768
+ retain_left = false
769
+ retain_right = false
770
+ case $opt_join_outer
771
+ when :full
772
+ retain_left = true
773
+ retain_right = true
774
+ when :left
775
+ retain_left = true
776
+ when :right
777
+ retain_right = true
778
+ when nil
779
+ else
780
+ raise "unexpected $opt_join_outer: #{$opt_join_outer.inspect}"
781
+ end
782
+ if $opt_join_outer
783
+ each_table_file(argv) {|tbl|
784
+ STDERR.puts "shared keys: #{(result.list_fields & tbl.list_fields).inspect}" if 1 <= $opt_debug
785
+ result = result.natjoin2_outer(tbl, $opt_join_outer_missing, retain_left, retain_right)
786
+ }
787
+ else
788
+ each_table_file(argv) {|tbl|
789
+ STDERR.puts "shared keys: #{(result.list_fields & tbl.list_fields).inspect}" if 1 <= $opt_debug
790
+ result = result.natjoin2(tbl)
791
+ }
792
+ end
793
+ with_output {|out|
794
+ tbl_generate_csv(result, out)
795
+ }
796
+ end
797
+
798
+ def main_group(argv)
799
+ op_group.parse!(argv)
800
+ kfs = split_field_list_argument(argv.shift)
801
+ opt_group_fields = $opt_group_fields.map {|arg|
802
+ aggregation_spec, new_field = split_field_list_argument(arg)
803
+ new_field ||= aggregation_spec
804
+ [new_field, lambda {|fields| make_aggregator(aggregation_spec, fields) } ]
805
+ }
806
+ filename = argv.shift || '-'
807
+ warn "extra arguments: #{argv.join(" ")}" if !argv.empty?
808
+ h = {}
809
+ tablereader_open(filename) {|tblreader|
810
+ kis = kfs.map {|f| tblreader.index_from_field(f) }
811
+ result_fields = kfs + opt_group_fields.map {|nf, maker| nf }
812
+ tblreader.each {|ary|
813
+ kvs = ary.values_at(*kis)
814
+ if !h.include?(kvs)
815
+ h[kvs] = opt_group_fields.map {|nf, maker| ag = maker.call(tblreader.header); ag.update(ary); ag }
816
+ else
817
+ h[kvs].each {|ag|
818
+ ag.update(ary)
819
+ }
820
+ end
821
+ }
822
+ result = Tb.new(result_fields)
823
+ h.keys.sort_by {|k| k.map {|v| comparison_value(v) } }.each {|k|
824
+ a = h[k]
825
+ result.insert_values result_fields, k + a.map {|ag| ag.finish }
826
+ }
827
+ with_output {|out|
828
+ tbl_generate_csv(result, out)
829
+ }
830
+ }
831
+ end
832
+
833
+ def main_cross(argv)
834
+ op_cross.parse!(argv)
835
+ hkfs = split_field_list_argument(argv.shift)
836
+ vkfs = split_field_list_argument(argv.shift)
837
+ if $opt_cross_fields.empty?
838
+ opt_cross_fields = [['count', 'count']]
839
+ else
840
+ opt_cross_fields = $opt_cross_fields.map {|arg|
841
+ agg_spec, new_field = split_field_list_argument(arg)
842
+ new_field ||= agg_spec
843
+ [agg_spec, new_field]
844
+ }
845
+ end
846
+ filename = argv.shift || '-'
847
+ warn "extra arguments: #{argv.join(" ")}" if !argv.empty?
848
+ tablereader_open(filename) {|tblreader|
849
+ vkis = vkfs.map {|f| tblreader.index_from_field(f) }
850
+ hkis = hkfs.map {|f| tblreader.index_from_field(f) }
851
+ vset = {}
852
+ hset = {}
853
+ set = {}
854
+ tblreader.each {|ary|
855
+ vkvs = ary.values_at(*vkis)
856
+ hkvs = ary.values_at(*hkis)
857
+ vset[vkvs] = true if !vset.include?(vkvs)
858
+ hset[hkvs] = true if !hset.include?(hkvs)
859
+ if !set.include?([vkvs, hkvs])
860
+ set[[vkvs, hkvs]] = opt_cross_fields.map {|agg_spec, nf|
861
+ ag = make_aggregator(agg_spec, tblreader.header)
862
+ ag.update(ary)
863
+ ag
864
+ }
865
+ else
866
+ set[[vkvs, hkvs]].each {|ag|
867
+ ag.update(ary)
868
+ }
869
+ end
870
+ }
871
+ vary = vset.keys.sort_by {|a| a.map {|v| comparison_value(v) } }
872
+ hary = hset.keys.sort_by {|a| a.map {|v| comparison_value(v) } }
873
+ with_output {|out|
874
+ Tb.csv_stream_output(out) {|gen|
875
+ hkfs.each_with_index {|hkf, i|
876
+ next if $opt_cross_compact && i == hkfs.length - 1
877
+ row = [nil] * (vkfs.length - 1) + [hkf]
878
+ hary.each {|hkvs| opt_cross_fields.length.times { row << hkvs[i] } }
879
+ gen << row
880
+ }
881
+ if $opt_cross_compact
882
+ r = vkfs.dup
883
+ hary.each {|hkvs| r.concat([hkvs[-1]] * opt_cross_fields.length) }
884
+ gen << r
885
+ else
886
+ r = vkfs.dup
887
+ hary.each {|hkvs| r.concat opt_cross_fields.map {|agg_spec, new_field| new_field } }
888
+ gen << r
889
+ end
890
+ vary.each {|vkvs|
891
+ row = vkvs.dup
892
+ hary.each {|hkvs|
893
+ ags = set[[vkvs, hkvs]]
894
+ if !ags
895
+ opt_cross_fields.length.times { row << nil }
896
+ else
897
+ ags.each {|ag| row << ag.finish }
898
+ end
899
+ }
900
+ gen << row
901
+ }
902
+ }
903
+ }
904
+ }
905
+ end
906
+
907
+ def main_shape(argv)
908
+ op_shape.parse!(argv)
909
+ filenames = argv.empty? ? ['-'] : argv
910
+ result = Tb.new(%w[header_fields min_fields max_fields records filename])
911
+ filenames.each {|filename|
912
+ tablereader_open(filename) {|tblreader|
913
+ num_header_fields = tblreader.header.length
914
+ min_num_fields = nil
915
+ max_num_fields = nil
916
+ num_records = 0
917
+ tblreader.each {|ary|
918
+ num_records += 1
919
+ n = ary.length
920
+ if min_num_fields.nil?
921
+ min_num_fields = max_num_fields = n
922
+ else
923
+ min_num_fields = n if n < min_num_fields
924
+ max_num_fields = n if max_num_fields < n
925
+ end
926
+ }
927
+ result.insert({'header_fields'=>num_header_fields,
928
+ 'min_fields'=>min_num_fields,
929
+ 'max_fields'=>max_num_fields,
930
+ 'records'=>num_records,
931
+ 'filename'=>filename})
932
+ }
933
+ }
934
+ with_output {|out|
935
+ # don't use tbl_generate_csv() because the header should always outputted.
936
+ result.generate_csv(out)
937
+ }
938
+ end
939
+
940
+ def main_mheader(argv)
941
+ op_mheader.parse!(argv)
942
+ filename = argv.shift || '-'
943
+ warn "extra arguments: #{argv.join(" ")}" if !argv.empty?
944
+ header = []
945
+ if $opt_mheader_count
946
+ c = $opt_mheader_count
947
+ header_end_p = lambda {
948
+ c -= 1
949
+ c == 0 ? header.map {|a| a.compact.join(' ').strip } : nil
950
+ }
951
+ else
952
+ header_end_p = lambda {
953
+ h2 = header.map {|a| a.compact.join(' ').strip }.uniq
954
+ header.length == h2.length ? h2 : nil
955
+ }
956
+ end
957
+ with_table_stream_output {|gen|
958
+ Tb::Reader.open(filename, {:numeric=>true}) {|tblreader|
959
+ tblreader.each {|ary|
960
+ if header
961
+ ary.each_with_index {|v,i|
962
+ header[i] ||= []
963
+ header[i] << v if header[i].empty? || header[i].last != v
964
+ }
965
+ h2 = header_end_p.call
966
+ if h2
967
+ gen << h2
968
+ header = nil
969
+ end
970
+ else
971
+ gen << ary
972
+ end
973
+ }
974
+ }
975
+ }
976
+ if header
977
+ warn "no header found."
978
+ end
979
+ end
980
+
981
+ def main_crop(argv)
982
+ op_crop.parse!(argv)
983
+ filename = argv.shift || '-'
984
+ warn "extra arguments: #{argv.join(" ")}" if !argv.empty?
985
+ stream = false
986
+ if $opt_crop_range
987
+ case $opt_crop_range
988
+ when /\A(\d+),(\d+)-(\d+),(\d+)\z/ # 1-based
989
+ stream = true
990
+ range_col1 = $1.to_i
991
+ range_row1 = $2.to_i
992
+ range_col2 = $3.to_i
993
+ range_row2 = $4.to_i
994
+ when /\A([A-Z]+)(\d+):([A-Z]+)(\d+)\z/ # 1-based
995
+ stream = true
996
+ range_col1 = decode_a1_addressing_col($1)
997
+ range_row1 = $2.to_i
998
+ range_col2 = decode_a1_addressing_col($3)
999
+ range_row2 = $4.to_i
1000
+ else
1001
+ raise ArgumentError, "unexpected range argument: #{$opt_crop_range.inspect}"
1002
+ end
1003
+ end
1004
+ if stream
1005
+ with_table_stream_output {|gen|
1006
+ Tb::Reader.open(filename, {:numeric=>true}) {|tblreader|
1007
+ rownum = 1
1008
+ tblreader.each {|ary|
1009
+ if range_row2 < rownum
1010
+ break
1011
+ end
1012
+ if range_row1 <= rownum
1013
+ if range_col2 < ary.length
1014
+ ary[range_col2..-1] = []
1015
+ end
1016
+ if 1 < range_col1
1017
+ ary[0...(range_col1-1)] = []
1018
+ end
1019
+ gen << ary
1020
+ end
1021
+ rownum += 1
1022
+ }
1023
+ }
1024
+ }
1025
+ else
1026
+ arys = []
1027
+ Tb::Reader.open(filename, {:numeric=>true}) {|tblreader|
1028
+ tblreader.each {|a|
1029
+ a.pop while !a.empty? && (a.last.nil? || a.last == '')
1030
+ arys << a
1031
+ }
1032
+ }
1033
+ arys.pop while !arys.empty? && arys.last.all? {|v| v.nil? || v == '' }
1034
+ arys.shift while !arys.empty? && arys.first.all? {|v| v.nil? || v == '' }
1035
+ if !arys.empty?
1036
+ while arys.all? {|a| a.empty? || (a.first.nil? || a.first == '') }
1037
+ arys.each {|a| a.shift }
1038
+ end
1039
+ end
1040
+ with_table_stream_output {|gen|
1041
+ arys.each {|a| gen << a }
1042
+ }
1043
+ end
1044
+ end
1045
+
1046
+ def decode_a1_addressing_col(str)
1047
+ (26**str.length-1)/25+str.tr("A-Z", "0-9A-P").to_i(26)
1048
+ end
1049
+
1050
+ def split_field_list_argument(arg)
1051
+ split_csv_argument(arg).map {|f| f || '' }
1052
+ end
1053
+
1054
+ def split_csv_argument(arg)
1055
+ Tb.csv_stream_input(arg) {|ary| return ary }
1056
+ return []
1057
+ end
1058
+
1059
+ def each_table_file(argv)
1060
+ if argv.empty?
1061
+ yield load_table('-')
1062
+ else
1063
+ argv.each {|filename|
1064
+ tbl = load_table(filename)
1065
+ yield tbl
1066
+ }
1067
+ end
1068
+ end
1069
+
1070
+ def load_table(filename)
1071
+ tablereader_open(filename) {|tblreader|
1072
+ arys = []
1073
+ tblreader.each {|ary|
1074
+ arys << ary
1075
+ }
1076
+ header = tblreader.header
1077
+ tbl = Tb.new(header)
1078
+ arys.each {|ary|
1079
+ ary << nil while ary.length < header.length
1080
+ tbl.insert_values header, ary
1081
+ }
1082
+ tbl
1083
+ }
1084
+ end
1085
+
1086
+ def tablereader_open(filename, &b)
1087
+ Tb::Reader.open(filename, {:numeric=>$opt_N}, &b)
1088
+ end
1089
+
1090
+ def with_table_stream_output
1091
+ with_output {|out|
1092
+ Tb.csv_stream_output(out) {|gen|
1093
+ def gen.output_header(header)
1094
+ self << header if !$opt_N
1095
+ end
1096
+ yield gen
1097
+ }
1098
+ }
1099
+ end
1100
+
1101
+ def tbl_generate_csv(tbl, out)
1102
+ if $opt_N
1103
+ header = tbl.list_fields
1104
+ Tb.csv_stream_output(out) {|gen|
1105
+ tbl.each {|rec|
1106
+ gen << rec.values_at(*header)
1107
+ }
1108
+ }
1109
+ else
1110
+ tbl.generate_csv(out)
1111
+ end
1112
+ end
1113
+
1114
+ def tbl_generate_tsv(tbl, out)
1115
+ if $opt_N
1116
+ header = tbl.list_fields
1117
+ Tb.tsv_stream_output(out) {|gen|
1118
+ tbl.each {|rec|
1119
+ gen << rec.values_at(*header)
1120
+ }
1121
+ }
1122
+ else
1123
+ tbl.generate_tsv(out)
1124
+ end
1125
+ end
1126
+
1127
+ def with_output
1128
+ if STDOUT.tty? && !$opt_no_pager
1129
+ IO.popen(ENV['PAGER'] || 'more', 'w') {|pager|
1130
+ yield pager
1131
+ }
1132
+ else
1133
+ yield STDOUT
1134
+ end
1135
+ end
1136
+
1137
+ main ARGV