tb 0.1

Sign up to get free protection for your applications and to get access to all the features.
data/README ADDED
@@ -0,0 +1,60 @@
1
+ = tb - manipulation tool for table: CSV, TSV, etc.
2
+
3
+ == Usage
4
+
5
+ % tb help
6
+ Usage:
7
+ tb csv [OPTS] [TABLE]
8
+ tb tsv [OPTS] [TABLE]
9
+ tb json [OPTS] [TABLE]
10
+ tb yaml [OPTS] [TABLE]
11
+ tb pp [OPTS] [TABLE]
12
+ tb grep [OPTS] REGEXP [TABLE]
13
+ tb gsub [OPTS] REGEXP STRING [TABLE]
14
+ tb sort [OPTS] [TABLE]
15
+ tb select [OPTS] FIELD,... [TABLE]
16
+ tb rename [OPTS] SRC,DST,... [TABLE]
17
+ tb newfield [OPTS] FIELD RUBY-EXP [TABLE]
18
+ tb cat [OPTS] [TABLE ...]
19
+ tb join [OPTS] [TABLE ...]
20
+ tb group [OPTS] [TABLE]
21
+ tb cross [OPTS] [TABLE]
22
+ tb shape [OPTS] [TABLE ...]
23
+ tb mheader [OPTS] [TABLE]
24
+ tb crop [OPTS] [TABLE]
25
+
26
+
27
+
28
+ == Install
29
+
30
+ gem install tb
31
+
32
+ == Author
33
+
34
+ Tanaka Akira <akr@fsij.org>
35
+
36
+ == License
37
+
38
+ Redistribution and use in source and binary forms, with or without
39
+ modification, are permitted provided that the following conditions are met:
40
+
41
+ (1) Redistributions of source code must retain the above copyright notice, this
42
+ list of conditions and the following disclaimer.
43
+ (2) Redistributions in binary form must reproduce the above copyright notice,
44
+ this list of conditions and the following disclaimer in the documentation
45
+ and/or other materials provided with the distribution.
46
+ (3) The name of the author may not be used to endorse or promote products
47
+ derived from this software without specific prior written permission.
48
+
49
+ THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
50
+ WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
51
+ MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
52
+ EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
53
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
54
+ OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
55
+ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
56
+ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
57
+ IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
58
+ OF SUCH DAMAGE.
59
+
60
+ (The modified BSD licence)
data/bin/tb ADDED
@@ -0,0 +1,1137 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # Copyright (C) 2011 Tanaka Akira <akr@fsij.org>
4
+ #
5
+ # Redistribution and use in source and binary forms, with or without
6
+ # modification, are permitted provided that the following conditions are met:
7
+ #
8
+ # 1. Redistributions of source code must retain the above copyright notice, this
9
+ # list of conditions and the following disclaimer.
10
+ # 2. Redistributions in binary form must reproduce the above copyright notice,
11
+ # this list of conditions and the following disclaimer in the documentation
12
+ # and/or other materials provided with the distribution.
13
+ # 3. The name of the author may not be used to endorse or promote products
14
+ # derived from this software without specific prior written permission.
15
+ #
16
+ # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
17
+ # WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
18
+ # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
19
+ # EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20
+ # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
21
+ # OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22
+ # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23
+ # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
24
+ # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
25
+ # OF SUCH DAMAGE.
26
+
27
+ require 'tb'
28
+ require 'optparse'
29
+ require 'enumerator'
30
+
31
+ def main(argv)
32
+ subcommand = argv.shift
33
+ case subcommand
34
+ when 'help', '-h' then main_help(argv)
35
+ when 'csv' then main_csv(argv)
36
+ when 'tsv' then main_tsv(argv)
37
+ when 'json' then main_json(argv)
38
+ when 'yaml' then main_yaml(argv)
39
+ when 'pp' then main_pp(argv)
40
+ when 'grep' then main_grep(argv)
41
+ when 'gsub' then main_gsub(argv)
42
+ when 'sort' then main_sort(argv)
43
+ when 'select' then main_select(argv)
44
+ when 'rename' then main_rename(argv)
45
+ when 'newfield' then main_newfield(argv)
46
+ when 'cat' then main_cat(argv)
47
+ when 'join' then main_join(argv)
48
+ when 'group' then main_group(argv)
49
+ when 'cross' then main_cross(argv)
50
+ when 'shape' then main_shape(argv)
51
+ when 'mheader' then main_mheader(argv)
52
+ when 'crop' then main_crop(argv)
53
+ when nil
54
+ err "Usage: tb subcommand args..."
55
+ else
56
+ err "unexpected subcommand: #{subcommand.inspect}"
57
+ end
58
+ end
59
+
60
+ def usage(status)
61
+ print <<'End'
62
+ Usage:
63
+ tb csv [OPTS] [TABLE]
64
+ tb tsv [OPTS] [TABLE]
65
+ tb json [OPTS] [TABLE]
66
+ tb yaml [OPTS] [TABLE]
67
+ tb pp [OPTS] [TABLE]
68
+ tb grep [OPTS] REGEXP [TABLE]
69
+ tb gsub [OPTS] REGEXP STRING [TABLE]
70
+ tb sort [OPTS] [TABLE]
71
+ tb select [OPTS] FIELD,... [TABLE]
72
+ tb rename [OPTS] SRC,DST,... [TABLE]
73
+ tb newfield [OPTS] FIELD RUBY-EXP [TABLE]
74
+ tb cat [OPTS] [TABLE ...]
75
+ tb join [OPTS] [TABLE ...]
76
+ tb group [OPTS] [TABLE]
77
+ tb cross [OPTS] [TABLE]
78
+ tb shape [OPTS] [TABLE ...]
79
+ tb mheader [OPTS] [TABLE]
80
+ tb crop [OPTS] [TABLE]
81
+ End
82
+ exit status
83
+ end
84
+
85
+ def main_help(argv)
86
+ subcommand = argv.shift
87
+ case subcommand
88
+ when 'csv' then puts op_csv
89
+ when 'tsv' then puts op_tsv
90
+ when 'json' then puts op_json
91
+ when 'yaml' then puts op_yaml
92
+ when 'pp' then puts op_pp
93
+ when 'grep' then puts op_grep
94
+ when 'gsub' then puts op_gsub
95
+ when 'sort' then puts op_sort
96
+ when 'select' then puts op_select
97
+ when 'rename' then puts op_rename
98
+ when 'newfield' then puts op_newfield
99
+ when 'cat' then puts op_cat
100
+ when 'join' then puts op_join
101
+ when 'group' then puts op_group
102
+ when 'cross' then puts op_cross
103
+ when 'shape' then puts op_shape
104
+ when 'mheader' then puts op_mheader
105
+ when 'crop' then puts op_crop
106
+ when nil
107
+ usage(true)
108
+ else
109
+ err "unexpected subcommand: #{subcommand.inspect}"
110
+ end
111
+ end
112
+
113
+ $opt_N = nil
114
+ $opt_debug = 0
115
+ $opt_no_pager = nil
116
+
117
+ def op_csv
118
+ op = OptionParser.new
119
+ op.banner = 'Usage: tb csv [OPTS] [TABLE]'
120
+ op.def_option('-h', 'show help message') { puts op; exit 0 }
121
+ op.def_option('-N', 'use numeric field name') { $opt_N = true }
122
+ op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
123
+ op
124
+ end
125
+
126
+ def op_tsv
127
+ op = OptionParser.new
128
+ op.banner = 'Usage: tb tsv [OPTS] [TABLE]'
129
+ op.def_option('-h', 'show help message') { puts op; exit 0 }
130
+ op.def_option('-N', 'use numeric field name') { $opt_N = true }
131
+ op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
132
+ op
133
+ end
134
+
135
+ def op_json
136
+ op = OptionParser.new
137
+ op.banner = 'Usage: tb json [OPTS] [TABLE]'
138
+ op.def_option('-h', 'show help message') { puts op; exit 0 }
139
+ op.def_option('-N', 'use numeric field name') { $opt_N = true }
140
+ op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
141
+ op
142
+ end
143
+
144
+ def op_yaml
145
+ op = OptionParser.new
146
+ op.banner = 'Usage: tb yaml [OPTS] [TABLE]'
147
+ op.def_option('-h', 'show help message') { puts op; exit 0 }
148
+ op.def_option('-N', 'use numeric field name') { $opt_N = true }
149
+ op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
150
+ op
151
+ end
152
+
153
+ def op_pp
154
+ op = OptionParser.new
155
+ op.banner = 'Usage: tb pp [OPTS] [TABLE]'
156
+ op.def_option('-h', 'show help message') { puts op; exit 0 }
157
+ op.def_option('-N', 'use numeric field name') { $opt_N = true }
158
+ op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
159
+ op
160
+ end
161
+
162
+ $opt_grep_e = nil
163
+ $opt_grep_ruby = nil
164
+ $opt_grep_f = nil
165
+ $opt_grep_v = nil
166
+ def op_grep
167
+ op = OptionParser.new
168
+ op.banner = 'Usage: tb grep [OPTS] REGEXP [TABLE]'
169
+ op.def_option('-h', 'show help message') { puts op; exit 0 }
170
+ op.def_option('-N', 'use numeric field name') { $opt_N = true }
171
+ op.def_option('-f FIELD', 'search field') {|field| $opt_grep_f = field }
172
+ op.def_option('-e REGEXP', 'predicate written in ruby. A hash is given as _. no usual regexp argument.') {|pattern| $opt_grep_e = pattern }
173
+ op.def_option('--ruby RUBY-EXP', 'specify a regexp. no usual regexp argument.') {|ruby_exp| $opt_grep_ruby = ruby_exp }
174
+ op.def_option('-v', 'ouput the records which doesn\'t match') { $opt_grep_v = true }
175
+ op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
176
+ op
177
+ end
178
+
179
+ $opt_gsub_e = nil
180
+ $opt_gsub_f = nil
181
+ def op_gsub
182
+ op = OptionParser.new
183
+ op.banner = 'Usage: tb gsub [OPTS] REGEXP STRING [TABLE]'
184
+ op.def_option('-h', 'show help message') { puts op; exit 0 }
185
+ op.def_option('-N', 'use numeric field name') { $opt_N = true }
186
+ op.def_option('-f FIELD', 'search field') {|field| $opt_gsub_f = field }
187
+ op.def_option('-e REGEXP', 'predicate written in ruby. A hash is given as _. no usual regexp argument.') {|pattern| $opt_gsub_e = pattern }
188
+ op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
189
+ op
190
+ end
191
+
192
+ $opt_sort_f = nil
193
+ def op_sort
194
+ op = OptionParser.new
195
+ op.banner = 'Usage: tb sort [OPTS] [TABLE]'
196
+ op.def_option('-h', 'show help message') { puts op; exit 0 }
197
+ op.def_option('-N', 'use numeric field name') { $opt_N = true }
198
+ op.def_option('-f FIELD,...', 'specify sort keys') {|fs| $opt_sort_f = fs }
199
+ op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
200
+ op
201
+ end
202
+
203
+ $opt_select_v = nil
204
+ def op_select
205
+ op = OptionParser.new
206
+ op.banner = 'Usage: tb select [OPTS] FIELD,... [TABLE]'
207
+ op.def_option('-h', 'show help message') { puts op; exit 0 }
208
+ op.def_option('-N', 'use numeric field name') { $opt_N = true }
209
+ op.def_option('-v', 'invert match') { $opt_select_v = true }
210
+ op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
211
+ op
212
+ end
213
+
214
+ def op_rename
215
+ op = OptionParser.new
216
+ op.banner = 'Usage: tb rename [OPTS] SRC,DST,... [TABLE]'
217
+ op.def_option('-h', 'show help message') { puts op; exit 0 }
218
+ op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
219
+ op
220
+ end
221
+
222
+ def op_newfield
223
+ op = OptionParser.new
224
+ op.banner = 'Usage: tb newfield [OPTS] FIELD RUBY-EXP [TABLE]'
225
+ op.def_option('-h', 'show help message') { puts op; exit 0 }
226
+ op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
227
+ op
228
+ end
229
+
230
+ def op_cat
231
+ op = OptionParser.new
232
+ op.banner = 'Usage: tb cat [OPTS] [TABLE ...]'
233
+ op.def_option('-h', 'show help message') { puts op; exit 0 }
234
+ op.def_option('-N', 'use numeric field name') { $opt_N = true }
235
+ op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
236
+ op
237
+ end
238
+
239
+ $opt_join_outer = nil
240
+ $opt_join_outer_missing = nil
241
+ def op_join
242
+ op = OptionParser.new
243
+ op.banner = 'Usage: tb join [OPTS] [TABLE ...]'
244
+ op.def_option('-h', 'show help message') { puts op; exit 0 }
245
+ op.def_option('-d', '--debug', 'show debug message') { $opt_debug += 1 }
246
+ op.def_option('-N', 'use numeric field name') { $opt_N = true }
247
+ op.def_option('--outer', 'outer join') { $opt_join_outer = :full }
248
+ op.def_option('--left', 'left outer join') { $opt_join_outer = :left }
249
+ op.def_option('--right', 'right outer join') { $opt_join_outer = :right }
250
+ op.def_option('--outer-missing=DEFAULT', 'missing value for outer join') {|missing|
251
+ $opt_join_outer ||= :full
252
+ $opt_join_outer_missing = missing
253
+ }
254
+ op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
255
+ op
256
+ end
257
+
258
+ $opt_group_fields = []
259
+ def op_group
260
+ op = OptionParser.new
261
+ op.banner = 'Usage: tb group [OPTS] KEY-FIELD1,... [TABLE]'
262
+ op.def_option('-h', 'show help message') { puts op; exit 0 }
263
+ op.def_option('-a AGGREGATION-SPEC[,NEW-FIELD]',
264
+ '--aggregate AGGREGATION-SPEC[,NEW-FIELD]') {|arg| $opt_group_fields << arg }
265
+ op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
266
+ op
267
+ end
268
+
269
+ $opt_cross_fields = []
270
+ $opt_cross_compact = false
271
+ def op_cross
272
+ op = OptionParser.new
273
+ op.banner = 'Usage: tb cross [OPTS] HKEY-FIELD1,... VKEY-FIELD1,... [TABLE]'
274
+ op.def_option('-h', 'show help message') { puts op; exit 0 }
275
+ op.def_option('-a AGGREGATION-SPEC[,NEW-FIELD]',
276
+ '--aggregate AGGREGATION-SPEC[,NEW-FIELD]') {|arg| $opt_cross_fields << arg }
277
+ op.def_option('-c', '--compact', 'compact format') { $opt_cross_compact = true }
278
+ op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
279
+ op
280
+ end
281
+
282
+ def op_shape
283
+ op = OptionParser.new
284
+ op.banner = 'Usage: tb shape [OPTS] [TABLE ...]'
285
+ op.def_option('-h', 'show help message') { puts op; exit 0 }
286
+ op.def_option('-N', 'use numeric field name') { $opt_N = true }
287
+ op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
288
+ op
289
+ end
290
+
291
+ $opt_mheader_count = nil
292
+ def op_mheader
293
+ op = OptionParser.new
294
+ op.banner = 'Usage: tb mheader [OPTS] [TABLE]'
295
+ op.def_option('-h', 'show help message') { puts op; exit 0 }
296
+ op.def_option('-c N', 'number of header records') {|arg| $opt_mheader_count = arg.to_i }
297
+ op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
298
+ op
299
+ end
300
+
301
+ $opt_crop_range = nil
302
+ def op_crop
303
+ op = OptionParser.new
304
+ op.banner = 'Usage: tb crop [OPTS] [TABLE]'
305
+ op.def_option('-h', 'show help message') { puts op; exit 0 }
306
+ op.def_option('-r RANGE', 'range. i.e. "2,1-4,3", "B1:D3"') {|arg| $opt_crop_range = arg }
307
+ op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
308
+ op
309
+ end
310
+
311
+ def err(msg)
312
+ STDERR.puts msg
313
+ exit 1
314
+ end
315
+
316
+ def comparison_value(v)
317
+ case v
318
+ when nil
319
+ []
320
+ when Numeric
321
+ [0, v]
322
+ when String
323
+ case v
324
+ when /\A\s*-?\d+\s*\z/
325
+ [0, Integer(v)]
326
+ when /\A\s*-?(\d+(\.\d*)?|\.\d+)([eE][-+]?\d+)?\s*\z/
327
+ [0, Float(v)]
328
+ else
329
+ a = []
330
+ v.scan(/(\d+)|\D+/) {
331
+ if $1
332
+ a << 0 << $1.to_i
333
+ else
334
+ a << 1 << $&
335
+ end
336
+ }
337
+ a
338
+ end
339
+ else
340
+ raise ArgumentError, "unexpected: #{v.inspect}"
341
+ end
342
+ end
343
+
344
+ def conv_to_numeric(v)
345
+ v = v.strip
346
+ if /\A-?\d+\z/ =~ v
347
+ v = v.to_i
348
+ elsif /\A-?(\d+(\.\d*)?|\.\d+)([eE][-+]?\d+)?\z/ =~ v
349
+ v = v.to_f
350
+ else
351
+ raise "numeric value expected: #{v.inspect}"
352
+ end
353
+ v
354
+ end
355
+
356
+ class CountAggregator
357
+ def initialize() @result = 0 end
358
+ def update(v) @result += 1 end
359
+ def finish() @result end
360
+ end
361
+
362
+ class SumAggregator
363
+ def initialize() @result = 0 end
364
+ def update(v) @result += conv_to_numeric(v) if !(v.nil? || v == '') end
365
+ def finish() @result end
366
+ end
367
+
368
+ class AvgAggregator
369
+ def initialize() @sum = 0; @count = 0 end
370
+ def update(v) @count += 1; @sum += conv_to_numeric(v) if !(v.nil? || v == '') end
371
+ def finish() @sum / @count.to_f end
372
+ end
373
+
374
+ class MaxAggregator
375
+ def initialize() @v = nil; @cmp = nil end
376
+ def update(v)
377
+ cmp = comparison_value(v)
378
+ if @cmp == nil
379
+ @v, @cmp = v, cmp
380
+ else
381
+ @v, @cmp = v, cmp if (@cmp <=> cmp) < 0
382
+ end
383
+ end
384
+ def finish() @v end
385
+ end
386
+
387
+ class MinAggregator
388
+ def initialize() @v = @cmp = nil end
389
+ def update(v)
390
+ cmp = comparison_value(v)
391
+ if @cmp == nil
392
+ @v, @cmp = v, cmp
393
+ else
394
+ @v, @cmp = v, cmp if (@cmp <=> cmp) > 0
395
+ end
396
+ end
397
+ def finish() @v end
398
+ end
399
+
400
+ class ValuesAggregator
401
+ def initialize() @result = [] end
402
+ def update(v) @result << v if v end
403
+ def finish() @result.join(",") end
404
+ end
405
+
406
+ class UniqueValuesAggregator
407
+ def initialize() @result = [] end
408
+ def update(v) @result << v if v end
409
+ def finish() @result.uniq.join(",") end
410
+ end
411
+
412
+ class Selector
413
+ def initialize(i, aggregator) @i = i; @agg = aggregator end
414
+ def update(ary) @agg.update(ary[@i]) end
415
+ def finish() @agg.finish end
416
+ end
417
+
418
+ def make_aggregator(spec, fs)
419
+ case spec
420
+ when 'count'
421
+ CountAggregator.new
422
+ when /\Asum\((.*)\)\z/
423
+ field = $1
424
+ i = fs.index(field)
425
+ raise ArgumentError, "field not found: #{field.inspect}" if !i
426
+ Selector.new(i, SumAggregator.new)
427
+ when /\Aavg\((.*)\)\z/
428
+ field = $1
429
+ i = fs.index(field)
430
+ raise ArgumentError, "field not found: #{field.inspect}" if !i
431
+ Selector.new(i, AvgAggregator.new)
432
+ when /\Amax\((.*)\)\z/
433
+ field = $1
434
+ i = fs.index(field)
435
+ raise ArgumentError, "field not found: #{field.inspect}" if !i
436
+ Selector.new(i, MaxAggregator.new)
437
+ when /\Amin\((.*)\)\z/
438
+ field = $1
439
+ i = fs.index(field)
440
+ raise ArgumentError, "field not found: #{field.inspect}" if !i
441
+ Selector.new(i, MinAggregator.new)
442
+ when /\Avalues\((.*)\)\z/
443
+ field = $1
444
+ i = fs.index(field)
445
+ raise ArgumentError, "field not found: #{field.inspect}" if !i
446
+ Selector.new(i, ValuesAggregator.new)
447
+ when /\Auniquevalues\((.*)\)\z/
448
+ field = $1
449
+ i = fs.index(field)
450
+ raise ArgumentError, "field not found: #{field.inspect}" if !i
451
+ Selector.new(i, UniqueValuesAggregator.new)
452
+ else
453
+ raise ArgumentError, "unexpected aggregation spec: #{spec.inspect}"
454
+ end
455
+ end
456
+
457
+ def aggregate(spec, table)
458
+ update, finish = make_aggregator(spec, table.list_fields)
459
+ table.each {|rec|
460
+ update.call(rec.values_at(*fs))
461
+ }
462
+ finish.call
463
+ end
464
+
465
+ def main_csv(argv)
466
+ op_csv.parse!(argv)
467
+ each_table_file(argv) {|tbl|
468
+ with_output {|out|
469
+ tbl_generate_csv(tbl, out)
470
+ }
471
+ }
472
+ end
473
+
474
+ def main_tsv(argv)
475
+ op_tsv.parse!(argv)
476
+ each_table_file(argv) {|tbl|
477
+ with_output {|out|
478
+ tbl_generate_tsv(tbl, out)
479
+ }
480
+ }
481
+ end
482
+
483
+ def main_json(argv)
484
+ require 'json'
485
+ op_json.parse!(argv)
486
+ argv = ['-'] if argv.empty?
487
+ with_output {|out|
488
+ out.print "["
489
+ sep = nil
490
+ argv.each {|filename|
491
+ sep = ",\n\n" if sep
492
+ tablereader_open(filename) {|tblreader|
493
+ tblreader.each {|ary|
494
+ out.print sep if sep
495
+ header = tblreader.header
496
+ h = {}
497
+ ary.each_with_index {|e, i|
498
+ h[header[i]] = e if !e.nil?
499
+ }
500
+ out.print JSON.pretty_generate(h)
501
+ sep = ",\n"
502
+ }
503
+ }
504
+ }
505
+ out.puts "]"
506
+ }
507
+ end
508
+
509
+ def main_yaml(argv)
510
+ require 'yaml'
511
+ op_yaml.parse!(argv)
512
+ each_table_file(argv) {|tbl|
513
+ ary = tbl.map {|rec| rec.to_h }
514
+ with_output {|out|
515
+ YAML.dump(ary, out)
516
+ out.puts
517
+ }
518
+ }
519
+ end
520
+
521
+ def main_pp(argv)
522
+ op_pp.parse!(argv)
523
+ argv.unshift '-' if argv.empty?
524
+ with_output {|out|
525
+ argv.each {|filename|
526
+ tablereader_open(filename) {|tblreader|
527
+ tblreader.each {|ary|
528
+ h = {}
529
+ ary.each_with_index {|v, i|
530
+ next if v.nil?
531
+ h[tblreader.field_from_index_ex(i)] = v
532
+ }
533
+ PP.pp h, out
534
+ }
535
+ }
536
+ }
537
+ }
538
+ end
539
+
540
+ def main_grep(argv)
541
+ op_grep.parse!(argv)
542
+ if $opt_grep_ruby
543
+ pred = eval("lambda {|_| #{$opt_grep_ruby} }")
544
+ elsif $opt_grep_e
545
+ re = Regexp.new($opt_grep_e)
546
+ pred = $opt_grep_f ? lambda {|_| re =~ _[$opt_grep_f] } :
547
+ lambda {|_| _.any? {|k, v| re =~ v.to_s } }
548
+ else
549
+ re = Regexp.new(argv.shift)
550
+ pred = $opt_grep_f ? lambda {|_| re =~ _[$opt_grep_f] } :
551
+ lambda {|_| _.any? {|k, v| re =~ v.to_s } }
552
+ end
553
+ opt_v = $opt_grep_v ? true : false
554
+ argv.unshift '-' if argv.empty?
555
+ argv.each {|filename|
556
+ tablereader_open(filename) {|tblreader|
557
+ with_table_stream_output {|gen|
558
+ gen.output_header tblreader.header
559
+ tblreader.each {|ary|
560
+ h = {}
561
+ ary.each_with_index {|str, i|
562
+ f = tblreader.field_from_index_ex(i)
563
+ h[f] = str
564
+ }
565
+ found = pred.call(h)
566
+ found = opt_v ^ !!(found)
567
+ gen << ary if found
568
+ }
569
+ }
570
+ }
571
+ }
572
+ end
573
+
574
+ def main_gsub(argv)
575
+ op_gsub.parse!(argv)
576
+ if $opt_gsub_e
577
+ re = Regexp.new($opt_gsub_e)
578
+ else
579
+ re = Regexp.new(argv.shift)
580
+ end
581
+ repl = argv.shift
582
+ filename = argv.empty? ? '-' : argv.shift
583
+ warn "extra arguments: #{argv.join(" ")}" if !argv.empty?
584
+ tablereader_open(filename) {|tblreader|
585
+ with_table_stream_output {|gen|
586
+ gen.output_header tblreader.header
587
+ tblreader.each {|ary|
588
+ if $opt_gsub_f
589
+ ary2 = []
590
+ ary.each_with_index {|str, i|
591
+ f = tblreader.field_from_index_ex(i)
592
+ if f == $opt_gsub_f
593
+ str ||= ''
594
+ ary2 << str.gsub(re, repl)
595
+ else
596
+ ary2 << str
597
+ end
598
+ }
599
+ else
600
+ ary2 = ary.map {|s|
601
+ s ||= ''
602
+ s.gsub(re, repl)
603
+ }
604
+ end
605
+ gen << ary2
606
+ }
607
+ }
608
+ }
609
+ end
610
+
611
+ def main_sort(argv)
612
+ op_sort.parse!(argv)
613
+ filename = argv.empty? ? '-' : argv.shift
614
+ warn "extra arguments: #{argv.join(" ")}" if !argv.empty?
615
+ if $opt_sort_f
616
+ fs = split_field_list_argument($opt_sort_f)
617
+ else
618
+ fs = nil
619
+ end
620
+ tbl = load_table(filename)
621
+ if fs
622
+ blk = lambda {|rec| fs.map {|f| comparison_value(rec[f]) } }
623
+ else
624
+ blk = lambda {|rec| rec.map {|k, v| comparison_value(v) } }
625
+ end
626
+ tbl2 = tbl.reorder_records_by(&blk)
627
+ with_output {|out|
628
+ tbl_generate_csv(tbl2, out)
629
+ }
630
+ end
631
+
632
+ def main_select(argv)
633
+ op_select.parse!(argv)
634
+ fs = split_field_list_argument(argv.shift)
635
+ filename = argv.shift || '-'
636
+ warn "extra arguments: #{argv.join(" ")}" if !argv.empty?
637
+ tablereader_open(filename) {|tblreader|
638
+ if $opt_select_v
639
+ h = {}
640
+ fs.each {|f| h[tblreader.index_from_field(f)] = true }
641
+ header = nil
642
+ if !$opt_N
643
+ header = []
644
+ tblreader.header.each_with_index {|f, i|
645
+ header << f if !h[i]
646
+ }
647
+ end
648
+ with_table_stream_output {|gen|
649
+ gen.output_header(header)
650
+ tblreader.each {|ary|
651
+ values = []
652
+ ary.each_with_index {|v, i|
653
+ values << v if !h[i]
654
+ }
655
+ gen << values
656
+ }
657
+ }
658
+ else
659
+ header = tblreader.header
660
+ is = []
661
+ is = fs.map {|f| tblreader.index_from_field(f) }
662
+ with_table_stream_output {|gen|
663
+ gen.output_header(is.map {|i| tblreader.field_from_index_ex(i) })
664
+ tblreader.each {|ary|
665
+ gen << ary.values_at(*is)
666
+ }
667
+ }
668
+ end
669
+ }
670
+ end
671
+
672
+ def main_rename(argv)
673
+ op_rename.parse!(argv)
674
+ fs = split_field_list_argument(argv.shift)
675
+ filename = argv.shift || '-'
676
+ warn "extra arguments: #{argv.join(" ")}" if !argv.empty?
677
+ h = {}
678
+ fs.each_slice(2) {|sf, df| h[sf] = df }
679
+ tablereader_open(filename) {|tblreader|
680
+ header = tblreader.header
681
+ h.each {|sf, df|
682
+ unless header.include? sf
683
+ raise "field not defined: #{sf.inspect}"
684
+ end
685
+ }
686
+ renamed_header = tblreader.header.map {|f| h.fetch(f, f) }
687
+ with_table_stream_output {|gen|
688
+ gen.output_header(renamed_header)
689
+ tblreader.each {|ary|
690
+ gen << ary
691
+ }
692
+ }
693
+ }
694
+ end
695
+
696
+ def main_newfield(argv)
697
+ op_rename.parse!(argv)
698
+ field = argv.shift
699
+ rubyexp = argv.shift
700
+ pr = eval("lambda {|_| #{rubyexp} }")
701
+ filename = argv.shift || '-'
702
+ warn "extra arguments: #{argv.join(" ")}" if !argv.empty?
703
+ tablereader_open(filename) {|tblreader|
704
+ renamed_header = [field] + tblreader.header
705
+ with_table_stream_output {|gen|
706
+ gen.output_header(renamed_header)
707
+ tblreader.each {|ary|
708
+ h = {}
709
+ ary.each_with_index {|str, i|
710
+ f = tblreader.field_from_index_ex(i)
711
+ h[f] = str
712
+ }
713
+ gen << [pr.call(h), *ary]
714
+ }
715
+ }
716
+ }
717
+ end
718
+
719
+ def main_cat(argv)
720
+ op_cat.parse!(argv)
721
+ argv = ['-'] if argv.empty?
722
+ if $opt_N
723
+ argv.each {|filename|
724
+ with_table_stream_output {|gen|
725
+ tablereader_open(filename) {|tblreader|
726
+ tblreader.each {|ary|
727
+ gen << ary
728
+ }
729
+ }
730
+ }
731
+ }
732
+ else
733
+ readers = []
734
+ h = {}
735
+ argv.each {|filename|
736
+ r = tablereader_open(filename)
737
+ readers << r
738
+ r.header.each {|f|
739
+ h[f] = h.size if !h[f]
740
+ }
741
+ }
742
+ with_table_stream_output {|gen|
743
+ gen.output_header h.keys.sort_by {|k| h[k] }
744
+ readers.each {|r|
745
+ header = r.header.dup
746
+ r.each {|ary|
747
+ while header.length < ary.length
748
+ f = r.field_from_index_ex(header.length)
749
+ header << f
750
+ h[f] = h.size if !h[f]
751
+ end
752
+ ary2 = []
753
+ ary.each_with_index {|v, i|
754
+ f = r.field_from_index(i)
755
+ j = h.fetch(f)
756
+ ary2[j] = v
757
+ }
758
+ gen << ary2
759
+ }
760
+ }
761
+ }
762
+ end
763
+ end
764
+
765
+ def main_join(argv)
766
+ op_join.parse!(argv)
767
+ result = Tb.new([], [])
768
+ retain_left = false
769
+ retain_right = false
770
+ case $opt_join_outer
771
+ when :full
772
+ retain_left = true
773
+ retain_right = true
774
+ when :left
775
+ retain_left = true
776
+ when :right
777
+ retain_right = true
778
+ when nil
779
+ else
780
+ raise "unexpected $opt_join_outer: #{$opt_join_outer.inspect}"
781
+ end
782
+ if $opt_join_outer
783
+ each_table_file(argv) {|tbl|
784
+ STDERR.puts "shared keys: #{(result.list_fields & tbl.list_fields).inspect}" if 1 <= $opt_debug
785
+ result = result.natjoin2_outer(tbl, $opt_join_outer_missing, retain_left, retain_right)
786
+ }
787
+ else
788
+ each_table_file(argv) {|tbl|
789
+ STDERR.puts "shared keys: #{(result.list_fields & tbl.list_fields).inspect}" if 1 <= $opt_debug
790
+ result = result.natjoin2(tbl)
791
+ }
792
+ end
793
+ with_output {|out|
794
+ tbl_generate_csv(result, out)
795
+ }
796
+ end
797
+
798
+ def main_group(argv)
799
+ op_group.parse!(argv)
800
+ kfs = split_field_list_argument(argv.shift)
801
+ opt_group_fields = $opt_group_fields.map {|arg|
802
+ aggregation_spec, new_field = split_field_list_argument(arg)
803
+ new_field ||= aggregation_spec
804
+ [new_field, lambda {|fields| make_aggregator(aggregation_spec, fields) } ]
805
+ }
806
+ filename = argv.shift || '-'
807
+ warn "extra arguments: #{argv.join(" ")}" if !argv.empty?
808
+ h = {}
809
+ tablereader_open(filename) {|tblreader|
810
+ kis = kfs.map {|f| tblreader.index_from_field(f) }
811
+ result_fields = kfs + opt_group_fields.map {|nf, maker| nf }
812
+ tblreader.each {|ary|
813
+ kvs = ary.values_at(*kis)
814
+ if !h.include?(kvs)
815
+ h[kvs] = opt_group_fields.map {|nf, maker| ag = maker.call(tblreader.header); ag.update(ary); ag }
816
+ else
817
+ h[kvs].each {|ag|
818
+ ag.update(ary)
819
+ }
820
+ end
821
+ }
822
+ result = Tb.new(result_fields)
823
+ h.keys.sort_by {|k| k.map {|v| comparison_value(v) } }.each {|k|
824
+ a = h[k]
825
+ result.insert_values result_fields, k + a.map {|ag| ag.finish }
826
+ }
827
+ with_output {|out|
828
+ tbl_generate_csv(result, out)
829
+ }
830
+ }
831
+ end
832
+
833
+ def main_cross(argv)
834
+ op_cross.parse!(argv)
835
+ hkfs = split_field_list_argument(argv.shift)
836
+ vkfs = split_field_list_argument(argv.shift)
837
+ if $opt_cross_fields.empty?
838
+ opt_cross_fields = [['count', 'count']]
839
+ else
840
+ opt_cross_fields = $opt_cross_fields.map {|arg|
841
+ agg_spec, new_field = split_field_list_argument(arg)
842
+ new_field ||= agg_spec
843
+ [agg_spec, new_field]
844
+ }
845
+ end
846
+ filename = argv.shift || '-'
847
+ warn "extra arguments: #{argv.join(" ")}" if !argv.empty?
848
+ tablereader_open(filename) {|tblreader|
849
+ vkis = vkfs.map {|f| tblreader.index_from_field(f) }
850
+ hkis = hkfs.map {|f| tblreader.index_from_field(f) }
851
+ vset = {}
852
+ hset = {}
853
+ set = {}
854
+ tblreader.each {|ary|
855
+ vkvs = ary.values_at(*vkis)
856
+ hkvs = ary.values_at(*hkis)
857
+ vset[vkvs] = true if !vset.include?(vkvs)
858
+ hset[hkvs] = true if !hset.include?(hkvs)
859
+ if !set.include?([vkvs, hkvs])
860
+ set[[vkvs, hkvs]] = opt_cross_fields.map {|agg_spec, nf|
861
+ ag = make_aggregator(agg_spec, tblreader.header)
862
+ ag.update(ary)
863
+ ag
864
+ }
865
+ else
866
+ set[[vkvs, hkvs]].each {|ag|
867
+ ag.update(ary)
868
+ }
869
+ end
870
+ }
871
+ vary = vset.keys.sort_by {|a| a.map {|v| comparison_value(v) } }
872
+ hary = hset.keys.sort_by {|a| a.map {|v| comparison_value(v) } }
873
+ with_output {|out|
874
+ Tb.csv_stream_output(out) {|gen|
875
+ hkfs.each_with_index {|hkf, i|
876
+ next if $opt_cross_compact && i == hkfs.length - 1
877
+ row = [nil] * (vkfs.length - 1) + [hkf]
878
+ hary.each {|hkvs| opt_cross_fields.length.times { row << hkvs[i] } }
879
+ gen << row
880
+ }
881
+ if $opt_cross_compact
882
+ r = vkfs.dup
883
+ hary.each {|hkvs| r.concat([hkvs[-1]] * opt_cross_fields.length) }
884
+ gen << r
885
+ else
886
+ r = vkfs.dup
887
+ hary.each {|hkvs| r.concat opt_cross_fields.map {|agg_spec, new_field| new_field } }
888
+ gen << r
889
+ end
890
+ vary.each {|vkvs|
891
+ row = vkvs.dup
892
+ hary.each {|hkvs|
893
+ ags = set[[vkvs, hkvs]]
894
+ if !ags
895
+ opt_cross_fields.length.times { row << nil }
896
+ else
897
+ ags.each {|ag| row << ag.finish }
898
+ end
899
+ }
900
+ gen << row
901
+ }
902
+ }
903
+ }
904
+ }
905
+ end
906
+
907
+ def main_shape(argv)
908
+ op_shape.parse!(argv)
909
+ filenames = argv.empty? ? ['-'] : argv
910
+ result = Tb.new(%w[header_fields min_fields max_fields records filename])
911
+ filenames.each {|filename|
912
+ tablereader_open(filename) {|tblreader|
913
+ num_header_fields = tblreader.header.length
914
+ min_num_fields = nil
915
+ max_num_fields = nil
916
+ num_records = 0
917
+ tblreader.each {|ary|
918
+ num_records += 1
919
+ n = ary.length
920
+ if min_num_fields.nil?
921
+ min_num_fields = max_num_fields = n
922
+ else
923
+ min_num_fields = n if n < min_num_fields
924
+ max_num_fields = n if max_num_fields < n
925
+ end
926
+ }
927
+ result.insert({'header_fields'=>num_header_fields,
928
+ 'min_fields'=>min_num_fields,
929
+ 'max_fields'=>max_num_fields,
930
+ 'records'=>num_records,
931
+ 'filename'=>filename})
932
+ }
933
+ }
934
+ with_output {|out|
935
+ # don't use tbl_generate_csv() because the header should always outputted.
936
+ result.generate_csv(out)
937
+ }
938
+ end
939
+
940
+ def main_mheader(argv)
941
+ op_mheader.parse!(argv)
942
+ filename = argv.shift || '-'
943
+ warn "extra arguments: #{argv.join(" ")}" if !argv.empty?
944
+ header = []
945
+ if $opt_mheader_count
946
+ c = $opt_mheader_count
947
+ header_end_p = lambda {
948
+ c -= 1
949
+ c == 0 ? header.map {|a| a.compact.join(' ').strip } : nil
950
+ }
951
+ else
952
+ header_end_p = lambda {
953
+ h2 = header.map {|a| a.compact.join(' ').strip }.uniq
954
+ header.length == h2.length ? h2 : nil
955
+ }
956
+ end
957
+ with_table_stream_output {|gen|
958
+ Tb::Reader.open(filename, {:numeric=>true}) {|tblreader|
959
+ tblreader.each {|ary|
960
+ if header
961
+ ary.each_with_index {|v,i|
962
+ header[i] ||= []
963
+ header[i] << v if header[i].empty? || header[i].last != v
964
+ }
965
+ h2 = header_end_p.call
966
+ if h2
967
+ gen << h2
968
+ header = nil
969
+ end
970
+ else
971
+ gen << ary
972
+ end
973
+ }
974
+ }
975
+ }
976
+ if header
977
+ warn "no header found."
978
+ end
979
+ end
980
+
981
+ def main_crop(argv)
982
+ op_crop.parse!(argv)
983
+ filename = argv.shift || '-'
984
+ warn "extra arguments: #{argv.join(" ")}" if !argv.empty?
985
+ stream = false
986
+ if $opt_crop_range
987
+ case $opt_crop_range
988
+ when /\A(\d+),(\d+)-(\d+),(\d+)\z/ # 1-based
989
+ stream = true
990
+ range_col1 = $1.to_i
991
+ range_row1 = $2.to_i
992
+ range_col2 = $3.to_i
993
+ range_row2 = $4.to_i
994
+ when /\A([A-Z]+)(\d+):([A-Z]+)(\d+)\z/ # 1-based
995
+ stream = true
996
+ range_col1 = decode_a1_addressing_col($1)
997
+ range_row1 = $2.to_i
998
+ range_col2 = decode_a1_addressing_col($3)
999
+ range_row2 = $4.to_i
1000
+ else
1001
+ raise ArgumentError, "unexpected range argument: #{$opt_crop_range.inspect}"
1002
+ end
1003
+ end
1004
+ if stream
1005
+ with_table_stream_output {|gen|
1006
+ Tb::Reader.open(filename, {:numeric=>true}) {|tblreader|
1007
+ rownum = 1
1008
+ tblreader.each {|ary|
1009
+ if range_row2 < rownum
1010
+ break
1011
+ end
1012
+ if range_row1 <= rownum
1013
+ if range_col2 < ary.length
1014
+ ary[range_col2..-1] = []
1015
+ end
1016
+ if 1 < range_col1
1017
+ ary[0...(range_col1-1)] = []
1018
+ end
1019
+ gen << ary
1020
+ end
1021
+ rownum += 1
1022
+ }
1023
+ }
1024
+ }
1025
+ else
1026
+ arys = []
1027
+ Tb::Reader.open(filename, {:numeric=>true}) {|tblreader|
1028
+ tblreader.each {|a|
1029
+ a.pop while !a.empty? && (a.last.nil? || a.last == '')
1030
+ arys << a
1031
+ }
1032
+ }
1033
+ arys.pop while !arys.empty? && arys.last.all? {|v| v.nil? || v == '' }
1034
+ arys.shift while !arys.empty? && arys.first.all? {|v| v.nil? || v == '' }
1035
+ if !arys.empty?
1036
+ while arys.all? {|a| a.empty? || (a.first.nil? || a.first == '') }
1037
+ arys.each {|a| a.shift }
1038
+ end
1039
+ end
1040
+ with_table_stream_output {|gen|
1041
+ arys.each {|a| gen << a }
1042
+ }
1043
+ end
1044
+ end
1045
+
1046
+ def decode_a1_addressing_col(str)
1047
+ (26**str.length-1)/25+str.tr("A-Z", "0-9A-P").to_i(26)
1048
+ end
1049
+
1050
+ def split_field_list_argument(arg)
1051
+ split_csv_argument(arg).map {|f| f || '' }
1052
+ end
1053
+
1054
+ def split_csv_argument(arg)
1055
+ Tb.csv_stream_input(arg) {|ary| return ary }
1056
+ return []
1057
+ end
1058
+
1059
+ def each_table_file(argv)
1060
+ if argv.empty?
1061
+ yield load_table('-')
1062
+ else
1063
+ argv.each {|filename|
1064
+ tbl = load_table(filename)
1065
+ yield tbl
1066
+ }
1067
+ end
1068
+ end
1069
+
1070
+ def load_table(filename)
1071
+ tablereader_open(filename) {|tblreader|
1072
+ arys = []
1073
+ tblreader.each {|ary|
1074
+ arys << ary
1075
+ }
1076
+ header = tblreader.header
1077
+ tbl = Tb.new(header)
1078
+ arys.each {|ary|
1079
+ ary << nil while ary.length < header.length
1080
+ tbl.insert_values header, ary
1081
+ }
1082
+ tbl
1083
+ }
1084
+ end
1085
+
1086
+ def tablereader_open(filename, &b)
1087
+ Tb::Reader.open(filename, {:numeric=>$opt_N}, &b)
1088
+ end
1089
+
1090
+ def with_table_stream_output
1091
+ with_output {|out|
1092
+ Tb.csv_stream_output(out) {|gen|
1093
+ def gen.output_header(header)
1094
+ self << header if !$opt_N
1095
+ end
1096
+ yield gen
1097
+ }
1098
+ }
1099
+ end
1100
+
1101
+ def tbl_generate_csv(tbl, out)
1102
+ if $opt_N
1103
+ header = tbl.list_fields
1104
+ Tb.csv_stream_output(out) {|gen|
1105
+ tbl.each {|rec|
1106
+ gen << rec.values_at(*header)
1107
+ }
1108
+ }
1109
+ else
1110
+ tbl.generate_csv(out)
1111
+ end
1112
+ end
1113
+
1114
+ def tbl_generate_tsv(tbl, out)
1115
+ if $opt_N
1116
+ header = tbl.list_fields
1117
+ Tb.tsv_stream_output(out) {|gen|
1118
+ tbl.each {|rec|
1119
+ gen << rec.values_at(*header)
1120
+ }
1121
+ }
1122
+ else
1123
+ tbl.generate_tsv(out)
1124
+ end
1125
+ end
1126
+
1127
+ def with_output
1128
+ if STDOUT.tty? && !$opt_no_pager
1129
+ IO.popen(ENV['PAGER'] || 'more', 'w') {|pager|
1130
+ yield pager
1131
+ }
1132
+ else
1133
+ yield STDOUT
1134
+ end
1135
+ end
1136
+
1137
+ main ARGV