tb 0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +60 -0
- data/bin/tb +1137 -0
- data/lib/tb.rb +35 -0
- data/lib/tb/basic.rb +1071 -0
- data/lib/tb/csv.rb +125 -0
- data/lib/tb/enumerable.rb +284 -0
- data/lib/tb/fieldset.rb +96 -0
- data/lib/tb/pathfinder.rb +569 -0
- data/lib/tb/qtsv.rb +93 -0
- data/lib/tb/reader.rb +213 -0
- data/lib/tb/record.rb +129 -0
- data/lib/tb/tsv.rb +93 -0
- data/sample/excel2csv +270 -0
- data/sample/poi-xls2csv.rb +397 -0
- data/sample/poi-xls2csv.sh +39 -0
- data/test-all.rb +7 -0
- data/test/test_basic.rb +290 -0
- data/test/test_csv.rb +78 -0
- data/test/test_enumerable.rb +122 -0
- data/test/test_record.rb +12 -0
- data/test/test_tsv.rb +41 -0
- metadata +73 -0
data/README
ADDED
@@ -0,0 +1,60 @@
|
|
1
|
+
= tb - manipulation tool for table: CSV, TSV, etc.
|
2
|
+
|
3
|
+
== Usage
|
4
|
+
|
5
|
+
% tb help
|
6
|
+
Usage:
|
7
|
+
tb csv [OPTS] [TABLE]
|
8
|
+
tb tsv [OPTS] [TABLE]
|
9
|
+
tb json [OPTS] [TABLE]
|
10
|
+
tb yaml [OPTS] [TABLE]
|
11
|
+
tb pp [OPTS] [TABLE]
|
12
|
+
tb grep [OPTS] REGEXP [TABLE]
|
13
|
+
tb gsub [OPTS] REGEXP STRING [TABLE]
|
14
|
+
tb sort [OPTS] [TABLE]
|
15
|
+
tb select [OPTS] FIELD,... [TABLE]
|
16
|
+
tb rename [OPTS] SRC,DST,... [TABLE]
|
17
|
+
tb newfield [OPTS] FIELD RUBY-EXP [TABLE]
|
18
|
+
tb cat [OPTS] [TABLE ...]
|
19
|
+
tb join [OPTS] [TABLE ...]
|
20
|
+
tb group [OPTS] [TABLE]
|
21
|
+
tb cross [OPTS] [TABLE]
|
22
|
+
tb shape [OPTS] [TABLE ...]
|
23
|
+
tb mheader [OPTS] [TABLE]
|
24
|
+
tb crop [OPTS] [TABLE]
|
25
|
+
|
26
|
+
|
27
|
+
|
28
|
+
== Install
|
29
|
+
|
30
|
+
gem install tb
|
31
|
+
|
32
|
+
== Author
|
33
|
+
|
34
|
+
Tanaka Akira <akr@fsij.org>
|
35
|
+
|
36
|
+
== License
|
37
|
+
|
38
|
+
Redistribution and use in source and binary forms, with or without
|
39
|
+
modification, are permitted provided that the following conditions are met:
|
40
|
+
|
41
|
+
(1) Redistributions of source code must retain the above copyright notice, this
|
42
|
+
list of conditions and the following disclaimer.
|
43
|
+
(2) Redistributions in binary form must reproduce the above copyright notice,
|
44
|
+
this list of conditions and the following disclaimer in the documentation
|
45
|
+
and/or other materials provided with the distribution.
|
46
|
+
(3) The name of the author may not be used to endorse or promote products
|
47
|
+
derived from this software without specific prior written permission.
|
48
|
+
|
49
|
+
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
|
50
|
+
WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
51
|
+
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
|
52
|
+
EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
53
|
+
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
|
54
|
+
OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
55
|
+
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
56
|
+
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
|
57
|
+
IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
|
58
|
+
OF SUCH DAMAGE.
|
59
|
+
|
60
|
+
(The modified BSD licence)
|
data/bin/tb
ADDED
@@ -0,0 +1,1137 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# Copyright (C) 2011 Tanaka Akira <akr@fsij.org>
|
4
|
+
#
|
5
|
+
# Redistribution and use in source and binary forms, with or without
|
6
|
+
# modification, are permitted provided that the following conditions are met:
|
7
|
+
#
|
8
|
+
# 1. Redistributions of source code must retain the above copyright notice, this
|
9
|
+
# list of conditions and the following disclaimer.
|
10
|
+
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
11
|
+
# this list of conditions and the following disclaimer in the documentation
|
12
|
+
# and/or other materials provided with the distribution.
|
13
|
+
# 3. The name of the author may not be used to endorse or promote products
|
14
|
+
# derived from this software without specific prior written permission.
|
15
|
+
#
|
16
|
+
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
|
17
|
+
# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
18
|
+
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
|
19
|
+
# EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
20
|
+
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
|
21
|
+
# OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
22
|
+
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
23
|
+
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
|
24
|
+
# IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
|
25
|
+
# OF SUCH DAMAGE.
|
26
|
+
|
27
|
+
require 'tb'
|
28
|
+
require 'optparse'
|
29
|
+
require 'enumerator'
|
30
|
+
|
31
|
+
def main(argv)
|
32
|
+
subcommand = argv.shift
|
33
|
+
case subcommand
|
34
|
+
when 'help', '-h' then main_help(argv)
|
35
|
+
when 'csv' then main_csv(argv)
|
36
|
+
when 'tsv' then main_tsv(argv)
|
37
|
+
when 'json' then main_json(argv)
|
38
|
+
when 'yaml' then main_yaml(argv)
|
39
|
+
when 'pp' then main_pp(argv)
|
40
|
+
when 'grep' then main_grep(argv)
|
41
|
+
when 'gsub' then main_gsub(argv)
|
42
|
+
when 'sort' then main_sort(argv)
|
43
|
+
when 'select' then main_select(argv)
|
44
|
+
when 'rename' then main_rename(argv)
|
45
|
+
when 'newfield' then main_newfield(argv)
|
46
|
+
when 'cat' then main_cat(argv)
|
47
|
+
when 'join' then main_join(argv)
|
48
|
+
when 'group' then main_group(argv)
|
49
|
+
when 'cross' then main_cross(argv)
|
50
|
+
when 'shape' then main_shape(argv)
|
51
|
+
when 'mheader' then main_mheader(argv)
|
52
|
+
when 'crop' then main_crop(argv)
|
53
|
+
when nil
|
54
|
+
err "Usage: tb subcommand args..."
|
55
|
+
else
|
56
|
+
err "unexpected subcommand: #{subcommand.inspect}"
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def usage(status)
|
61
|
+
print <<'End'
|
62
|
+
Usage:
|
63
|
+
tb csv [OPTS] [TABLE]
|
64
|
+
tb tsv [OPTS] [TABLE]
|
65
|
+
tb json [OPTS] [TABLE]
|
66
|
+
tb yaml [OPTS] [TABLE]
|
67
|
+
tb pp [OPTS] [TABLE]
|
68
|
+
tb grep [OPTS] REGEXP [TABLE]
|
69
|
+
tb gsub [OPTS] REGEXP STRING [TABLE]
|
70
|
+
tb sort [OPTS] [TABLE]
|
71
|
+
tb select [OPTS] FIELD,... [TABLE]
|
72
|
+
tb rename [OPTS] SRC,DST,... [TABLE]
|
73
|
+
tb newfield [OPTS] FIELD RUBY-EXP [TABLE]
|
74
|
+
tb cat [OPTS] [TABLE ...]
|
75
|
+
tb join [OPTS] [TABLE ...]
|
76
|
+
tb group [OPTS] [TABLE]
|
77
|
+
tb cross [OPTS] [TABLE]
|
78
|
+
tb shape [OPTS] [TABLE ...]
|
79
|
+
tb mheader [OPTS] [TABLE]
|
80
|
+
tb crop [OPTS] [TABLE]
|
81
|
+
End
|
82
|
+
exit status
|
83
|
+
end
|
84
|
+
|
85
|
+
def main_help(argv)
|
86
|
+
subcommand = argv.shift
|
87
|
+
case subcommand
|
88
|
+
when 'csv' then puts op_csv
|
89
|
+
when 'tsv' then puts op_tsv
|
90
|
+
when 'json' then puts op_json
|
91
|
+
when 'yaml' then puts op_yaml
|
92
|
+
when 'pp' then puts op_pp
|
93
|
+
when 'grep' then puts op_grep
|
94
|
+
when 'gsub' then puts op_gsub
|
95
|
+
when 'sort' then puts op_sort
|
96
|
+
when 'select' then puts op_select
|
97
|
+
when 'rename' then puts op_rename
|
98
|
+
when 'newfield' then puts op_newfield
|
99
|
+
when 'cat' then puts op_cat
|
100
|
+
when 'join' then puts op_join
|
101
|
+
when 'group' then puts op_group
|
102
|
+
when 'cross' then puts op_cross
|
103
|
+
when 'shape' then puts op_shape
|
104
|
+
when 'mheader' then puts op_mheader
|
105
|
+
when 'crop' then puts op_crop
|
106
|
+
when nil
|
107
|
+
usage(true)
|
108
|
+
else
|
109
|
+
err "unexpected subcommand: #{subcommand.inspect}"
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
$opt_N = nil
|
114
|
+
$opt_debug = 0
|
115
|
+
$opt_no_pager = nil
|
116
|
+
|
117
|
+
def op_csv
|
118
|
+
op = OptionParser.new
|
119
|
+
op.banner = 'Usage: tb csv [OPTS] [TABLE]'
|
120
|
+
op.def_option('-h', 'show help message') { puts op; exit 0 }
|
121
|
+
op.def_option('-N', 'use numeric field name') { $opt_N = true }
|
122
|
+
op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
|
123
|
+
op
|
124
|
+
end
|
125
|
+
|
126
|
+
def op_tsv
|
127
|
+
op = OptionParser.new
|
128
|
+
op.banner = 'Usage: tb tsv [OPTS] [TABLE]'
|
129
|
+
op.def_option('-h', 'show help message') { puts op; exit 0 }
|
130
|
+
op.def_option('-N', 'use numeric field name') { $opt_N = true }
|
131
|
+
op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
|
132
|
+
op
|
133
|
+
end
|
134
|
+
|
135
|
+
def op_json
|
136
|
+
op = OptionParser.new
|
137
|
+
op.banner = 'Usage: tb json [OPTS] [TABLE]'
|
138
|
+
op.def_option('-h', 'show help message') { puts op; exit 0 }
|
139
|
+
op.def_option('-N', 'use numeric field name') { $opt_N = true }
|
140
|
+
op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
|
141
|
+
op
|
142
|
+
end
|
143
|
+
|
144
|
+
def op_yaml
|
145
|
+
op = OptionParser.new
|
146
|
+
op.banner = 'Usage: tb yaml [OPTS] [TABLE]'
|
147
|
+
op.def_option('-h', 'show help message') { puts op; exit 0 }
|
148
|
+
op.def_option('-N', 'use numeric field name') { $opt_N = true }
|
149
|
+
op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
|
150
|
+
op
|
151
|
+
end
|
152
|
+
|
153
|
+
def op_pp
|
154
|
+
op = OptionParser.new
|
155
|
+
op.banner = 'Usage: tb pp [OPTS] [TABLE]'
|
156
|
+
op.def_option('-h', 'show help message') { puts op; exit 0 }
|
157
|
+
op.def_option('-N', 'use numeric field name') { $opt_N = true }
|
158
|
+
op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
|
159
|
+
op
|
160
|
+
end
|
161
|
+
|
162
|
+
$opt_grep_e = nil
|
163
|
+
$opt_grep_ruby = nil
|
164
|
+
$opt_grep_f = nil
|
165
|
+
$opt_grep_v = nil
|
166
|
+
def op_grep
|
167
|
+
op = OptionParser.new
|
168
|
+
op.banner = 'Usage: tb grep [OPTS] REGEXP [TABLE]'
|
169
|
+
op.def_option('-h', 'show help message') { puts op; exit 0 }
|
170
|
+
op.def_option('-N', 'use numeric field name') { $opt_N = true }
|
171
|
+
op.def_option('-f FIELD', 'search field') {|field| $opt_grep_f = field }
|
172
|
+
op.def_option('-e REGEXP', 'predicate written in ruby. A hash is given as _. no usual regexp argument.') {|pattern| $opt_grep_e = pattern }
|
173
|
+
op.def_option('--ruby RUBY-EXP', 'specify a regexp. no usual regexp argument.') {|ruby_exp| $opt_grep_ruby = ruby_exp }
|
174
|
+
op.def_option('-v', 'ouput the records which doesn\'t match') { $opt_grep_v = true }
|
175
|
+
op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
|
176
|
+
op
|
177
|
+
end
|
178
|
+
|
179
|
+
$opt_gsub_e = nil
|
180
|
+
$opt_gsub_f = nil
|
181
|
+
def op_gsub
|
182
|
+
op = OptionParser.new
|
183
|
+
op.banner = 'Usage: tb gsub [OPTS] REGEXP STRING [TABLE]'
|
184
|
+
op.def_option('-h', 'show help message') { puts op; exit 0 }
|
185
|
+
op.def_option('-N', 'use numeric field name') { $opt_N = true }
|
186
|
+
op.def_option('-f FIELD', 'search field') {|field| $opt_gsub_f = field }
|
187
|
+
op.def_option('-e REGEXP', 'predicate written in ruby. A hash is given as _. no usual regexp argument.') {|pattern| $opt_gsub_e = pattern }
|
188
|
+
op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
|
189
|
+
op
|
190
|
+
end
|
191
|
+
|
192
|
+
$opt_sort_f = nil
|
193
|
+
def op_sort
|
194
|
+
op = OptionParser.new
|
195
|
+
op.banner = 'Usage: tb sort [OPTS] [TABLE]'
|
196
|
+
op.def_option('-h', 'show help message') { puts op; exit 0 }
|
197
|
+
op.def_option('-N', 'use numeric field name') { $opt_N = true }
|
198
|
+
op.def_option('-f FIELD,...', 'specify sort keys') {|fs| $opt_sort_f = fs }
|
199
|
+
op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
|
200
|
+
op
|
201
|
+
end
|
202
|
+
|
203
|
+
$opt_select_v = nil
|
204
|
+
def op_select
|
205
|
+
op = OptionParser.new
|
206
|
+
op.banner = 'Usage: tb select [OPTS] FIELD,... [TABLE]'
|
207
|
+
op.def_option('-h', 'show help message') { puts op; exit 0 }
|
208
|
+
op.def_option('-N', 'use numeric field name') { $opt_N = true }
|
209
|
+
op.def_option('-v', 'invert match') { $opt_select_v = true }
|
210
|
+
op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
|
211
|
+
op
|
212
|
+
end
|
213
|
+
|
214
|
+
def op_rename
|
215
|
+
op = OptionParser.new
|
216
|
+
op.banner = 'Usage: tb rename [OPTS] SRC,DST,... [TABLE]'
|
217
|
+
op.def_option('-h', 'show help message') { puts op; exit 0 }
|
218
|
+
op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
|
219
|
+
op
|
220
|
+
end
|
221
|
+
|
222
|
+
def op_newfield
|
223
|
+
op = OptionParser.new
|
224
|
+
op.banner = 'Usage: tb newfield [OPTS] FIELD RUBY-EXP [TABLE]'
|
225
|
+
op.def_option('-h', 'show help message') { puts op; exit 0 }
|
226
|
+
op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
|
227
|
+
op
|
228
|
+
end
|
229
|
+
|
230
|
+
def op_cat
|
231
|
+
op = OptionParser.new
|
232
|
+
op.banner = 'Usage: tb cat [OPTS] [TABLE ...]'
|
233
|
+
op.def_option('-h', 'show help message') { puts op; exit 0 }
|
234
|
+
op.def_option('-N', 'use numeric field name') { $opt_N = true }
|
235
|
+
op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
|
236
|
+
op
|
237
|
+
end
|
238
|
+
|
239
|
+
$opt_join_outer = nil
|
240
|
+
$opt_join_outer_missing = nil
|
241
|
+
def op_join
|
242
|
+
op = OptionParser.new
|
243
|
+
op.banner = 'Usage: tb join [OPTS] [TABLE ...]'
|
244
|
+
op.def_option('-h', 'show help message') { puts op; exit 0 }
|
245
|
+
op.def_option('-d', '--debug', 'show debug message') { $opt_debug += 1 }
|
246
|
+
op.def_option('-N', 'use numeric field name') { $opt_N = true }
|
247
|
+
op.def_option('--outer', 'outer join') { $opt_join_outer = :full }
|
248
|
+
op.def_option('--left', 'left outer join') { $opt_join_outer = :left }
|
249
|
+
op.def_option('--right', 'right outer join') { $opt_join_outer = :right }
|
250
|
+
op.def_option('--outer-missing=DEFAULT', 'missing value for outer join') {|missing|
|
251
|
+
$opt_join_outer ||= :full
|
252
|
+
$opt_join_outer_missing = missing
|
253
|
+
}
|
254
|
+
op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
|
255
|
+
op
|
256
|
+
end
|
257
|
+
|
258
|
+
$opt_group_fields = []
|
259
|
+
def op_group
|
260
|
+
op = OptionParser.new
|
261
|
+
op.banner = 'Usage: tb group [OPTS] KEY-FIELD1,... [TABLE]'
|
262
|
+
op.def_option('-h', 'show help message') { puts op; exit 0 }
|
263
|
+
op.def_option('-a AGGREGATION-SPEC[,NEW-FIELD]',
|
264
|
+
'--aggregate AGGREGATION-SPEC[,NEW-FIELD]') {|arg| $opt_group_fields << arg }
|
265
|
+
op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
|
266
|
+
op
|
267
|
+
end
|
268
|
+
|
269
|
+
$opt_cross_fields = []
|
270
|
+
$opt_cross_compact = false
|
271
|
+
def op_cross
|
272
|
+
op = OptionParser.new
|
273
|
+
op.banner = 'Usage: tb cross [OPTS] HKEY-FIELD1,... VKEY-FIELD1,... [TABLE]'
|
274
|
+
op.def_option('-h', 'show help message') { puts op; exit 0 }
|
275
|
+
op.def_option('-a AGGREGATION-SPEC[,NEW-FIELD]',
|
276
|
+
'--aggregate AGGREGATION-SPEC[,NEW-FIELD]') {|arg| $opt_cross_fields << arg }
|
277
|
+
op.def_option('-c', '--compact', 'compact format') { $opt_cross_compact = true }
|
278
|
+
op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
|
279
|
+
op
|
280
|
+
end
|
281
|
+
|
282
|
+
def op_shape
|
283
|
+
op = OptionParser.new
|
284
|
+
op.banner = 'Usage: tb shape [OPTS] [TABLE ...]'
|
285
|
+
op.def_option('-h', 'show help message') { puts op; exit 0 }
|
286
|
+
op.def_option('-N', 'use numeric field name') { $opt_N = true }
|
287
|
+
op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
|
288
|
+
op
|
289
|
+
end
|
290
|
+
|
291
|
+
$opt_mheader_count = nil
|
292
|
+
def op_mheader
|
293
|
+
op = OptionParser.new
|
294
|
+
op.banner = 'Usage: tb mheader [OPTS] [TABLE]'
|
295
|
+
op.def_option('-h', 'show help message') { puts op; exit 0 }
|
296
|
+
op.def_option('-c N', 'number of header records') {|arg| $opt_mheader_count = arg.to_i }
|
297
|
+
op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
|
298
|
+
op
|
299
|
+
end
|
300
|
+
|
301
|
+
$opt_crop_range = nil
|
302
|
+
def op_crop
|
303
|
+
op = OptionParser.new
|
304
|
+
op.banner = 'Usage: tb crop [OPTS] [TABLE]'
|
305
|
+
op.def_option('-h', 'show help message') { puts op; exit 0 }
|
306
|
+
op.def_option('-r RANGE', 'range. i.e. "2,1-4,3", "B1:D3"') {|arg| $opt_crop_range = arg }
|
307
|
+
op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
|
308
|
+
op
|
309
|
+
end
|
310
|
+
|
311
|
+
def err(msg)
|
312
|
+
STDERR.puts msg
|
313
|
+
exit 1
|
314
|
+
end
|
315
|
+
|
316
|
+
def comparison_value(v)
|
317
|
+
case v
|
318
|
+
when nil
|
319
|
+
[]
|
320
|
+
when Numeric
|
321
|
+
[0, v]
|
322
|
+
when String
|
323
|
+
case v
|
324
|
+
when /\A\s*-?\d+\s*\z/
|
325
|
+
[0, Integer(v)]
|
326
|
+
when /\A\s*-?(\d+(\.\d*)?|\.\d+)([eE][-+]?\d+)?\s*\z/
|
327
|
+
[0, Float(v)]
|
328
|
+
else
|
329
|
+
a = []
|
330
|
+
v.scan(/(\d+)|\D+/) {
|
331
|
+
if $1
|
332
|
+
a << 0 << $1.to_i
|
333
|
+
else
|
334
|
+
a << 1 << $&
|
335
|
+
end
|
336
|
+
}
|
337
|
+
a
|
338
|
+
end
|
339
|
+
else
|
340
|
+
raise ArgumentError, "unexpected: #{v.inspect}"
|
341
|
+
end
|
342
|
+
end
|
343
|
+
|
344
|
+
def conv_to_numeric(v)
|
345
|
+
v = v.strip
|
346
|
+
if /\A-?\d+\z/ =~ v
|
347
|
+
v = v.to_i
|
348
|
+
elsif /\A-?(\d+(\.\d*)?|\.\d+)([eE][-+]?\d+)?\z/ =~ v
|
349
|
+
v = v.to_f
|
350
|
+
else
|
351
|
+
raise "numeric value expected: #{v.inspect}"
|
352
|
+
end
|
353
|
+
v
|
354
|
+
end
|
355
|
+
|
356
|
+
class CountAggregator
|
357
|
+
def initialize() @result = 0 end
|
358
|
+
def update(v) @result += 1 end
|
359
|
+
def finish() @result end
|
360
|
+
end
|
361
|
+
|
362
|
+
class SumAggregator
|
363
|
+
def initialize() @result = 0 end
|
364
|
+
def update(v) @result += conv_to_numeric(v) if !(v.nil? || v == '') end
|
365
|
+
def finish() @result end
|
366
|
+
end
|
367
|
+
|
368
|
+
class AvgAggregator
|
369
|
+
def initialize() @sum = 0; @count = 0 end
|
370
|
+
def update(v) @count += 1; @sum += conv_to_numeric(v) if !(v.nil? || v == '') end
|
371
|
+
def finish() @sum / @count.to_f end
|
372
|
+
end
|
373
|
+
|
374
|
+
class MaxAggregator
|
375
|
+
def initialize() @v = nil; @cmp = nil end
|
376
|
+
def update(v)
|
377
|
+
cmp = comparison_value(v)
|
378
|
+
if @cmp == nil
|
379
|
+
@v, @cmp = v, cmp
|
380
|
+
else
|
381
|
+
@v, @cmp = v, cmp if (@cmp <=> cmp) < 0
|
382
|
+
end
|
383
|
+
end
|
384
|
+
def finish() @v end
|
385
|
+
end
|
386
|
+
|
387
|
+
class MinAggregator
|
388
|
+
def initialize() @v = @cmp = nil end
|
389
|
+
def update(v)
|
390
|
+
cmp = comparison_value(v)
|
391
|
+
if @cmp == nil
|
392
|
+
@v, @cmp = v, cmp
|
393
|
+
else
|
394
|
+
@v, @cmp = v, cmp if (@cmp <=> cmp) > 0
|
395
|
+
end
|
396
|
+
end
|
397
|
+
def finish() @v end
|
398
|
+
end
|
399
|
+
|
400
|
+
class ValuesAggregator
|
401
|
+
def initialize() @result = [] end
|
402
|
+
def update(v) @result << v if v end
|
403
|
+
def finish() @result.join(",") end
|
404
|
+
end
|
405
|
+
|
406
|
+
class UniqueValuesAggregator
|
407
|
+
def initialize() @result = [] end
|
408
|
+
def update(v) @result << v if v end
|
409
|
+
def finish() @result.uniq.join(",") end
|
410
|
+
end
|
411
|
+
|
412
|
+
class Selector
|
413
|
+
def initialize(i, aggregator) @i = i; @agg = aggregator end
|
414
|
+
def update(ary) @agg.update(ary[@i]) end
|
415
|
+
def finish() @agg.finish end
|
416
|
+
end
|
417
|
+
|
418
|
+
def make_aggregator(spec, fs)
|
419
|
+
case spec
|
420
|
+
when 'count'
|
421
|
+
CountAggregator.new
|
422
|
+
when /\Asum\((.*)\)\z/
|
423
|
+
field = $1
|
424
|
+
i = fs.index(field)
|
425
|
+
raise ArgumentError, "field not found: #{field.inspect}" if !i
|
426
|
+
Selector.new(i, SumAggregator.new)
|
427
|
+
when /\Aavg\((.*)\)\z/
|
428
|
+
field = $1
|
429
|
+
i = fs.index(field)
|
430
|
+
raise ArgumentError, "field not found: #{field.inspect}" if !i
|
431
|
+
Selector.new(i, AvgAggregator.new)
|
432
|
+
when /\Amax\((.*)\)\z/
|
433
|
+
field = $1
|
434
|
+
i = fs.index(field)
|
435
|
+
raise ArgumentError, "field not found: #{field.inspect}" if !i
|
436
|
+
Selector.new(i, MaxAggregator.new)
|
437
|
+
when /\Amin\((.*)\)\z/
|
438
|
+
field = $1
|
439
|
+
i = fs.index(field)
|
440
|
+
raise ArgumentError, "field not found: #{field.inspect}" if !i
|
441
|
+
Selector.new(i, MinAggregator.new)
|
442
|
+
when /\Avalues\((.*)\)\z/
|
443
|
+
field = $1
|
444
|
+
i = fs.index(field)
|
445
|
+
raise ArgumentError, "field not found: #{field.inspect}" if !i
|
446
|
+
Selector.new(i, ValuesAggregator.new)
|
447
|
+
when /\Auniquevalues\((.*)\)\z/
|
448
|
+
field = $1
|
449
|
+
i = fs.index(field)
|
450
|
+
raise ArgumentError, "field not found: #{field.inspect}" if !i
|
451
|
+
Selector.new(i, UniqueValuesAggregator.new)
|
452
|
+
else
|
453
|
+
raise ArgumentError, "unexpected aggregation spec: #{spec.inspect}"
|
454
|
+
end
|
455
|
+
end
|
456
|
+
|
457
|
+
def aggregate(spec, table)
|
458
|
+
update, finish = make_aggregator(spec, table.list_fields)
|
459
|
+
table.each {|rec|
|
460
|
+
update.call(rec.values_at(*fs))
|
461
|
+
}
|
462
|
+
finish.call
|
463
|
+
end
|
464
|
+
|
465
|
+
def main_csv(argv)
|
466
|
+
op_csv.parse!(argv)
|
467
|
+
each_table_file(argv) {|tbl|
|
468
|
+
with_output {|out|
|
469
|
+
tbl_generate_csv(tbl, out)
|
470
|
+
}
|
471
|
+
}
|
472
|
+
end
|
473
|
+
|
474
|
+
def main_tsv(argv)
|
475
|
+
op_tsv.parse!(argv)
|
476
|
+
each_table_file(argv) {|tbl|
|
477
|
+
with_output {|out|
|
478
|
+
tbl_generate_tsv(tbl, out)
|
479
|
+
}
|
480
|
+
}
|
481
|
+
end
|
482
|
+
|
483
|
+
def main_json(argv)
|
484
|
+
require 'json'
|
485
|
+
op_json.parse!(argv)
|
486
|
+
argv = ['-'] if argv.empty?
|
487
|
+
with_output {|out|
|
488
|
+
out.print "["
|
489
|
+
sep = nil
|
490
|
+
argv.each {|filename|
|
491
|
+
sep = ",\n\n" if sep
|
492
|
+
tablereader_open(filename) {|tblreader|
|
493
|
+
tblreader.each {|ary|
|
494
|
+
out.print sep if sep
|
495
|
+
header = tblreader.header
|
496
|
+
h = {}
|
497
|
+
ary.each_with_index {|e, i|
|
498
|
+
h[header[i]] = e if !e.nil?
|
499
|
+
}
|
500
|
+
out.print JSON.pretty_generate(h)
|
501
|
+
sep = ",\n"
|
502
|
+
}
|
503
|
+
}
|
504
|
+
}
|
505
|
+
out.puts "]"
|
506
|
+
}
|
507
|
+
end
|
508
|
+
|
509
|
+
def main_yaml(argv)
|
510
|
+
require 'yaml'
|
511
|
+
op_yaml.parse!(argv)
|
512
|
+
each_table_file(argv) {|tbl|
|
513
|
+
ary = tbl.map {|rec| rec.to_h }
|
514
|
+
with_output {|out|
|
515
|
+
YAML.dump(ary, out)
|
516
|
+
out.puts
|
517
|
+
}
|
518
|
+
}
|
519
|
+
end
|
520
|
+
|
521
|
+
def main_pp(argv)
|
522
|
+
op_pp.parse!(argv)
|
523
|
+
argv.unshift '-' if argv.empty?
|
524
|
+
with_output {|out|
|
525
|
+
argv.each {|filename|
|
526
|
+
tablereader_open(filename) {|tblreader|
|
527
|
+
tblreader.each {|ary|
|
528
|
+
h = {}
|
529
|
+
ary.each_with_index {|v, i|
|
530
|
+
next if v.nil?
|
531
|
+
h[tblreader.field_from_index_ex(i)] = v
|
532
|
+
}
|
533
|
+
PP.pp h, out
|
534
|
+
}
|
535
|
+
}
|
536
|
+
}
|
537
|
+
}
|
538
|
+
end
|
539
|
+
|
540
|
+
def main_grep(argv)
|
541
|
+
op_grep.parse!(argv)
|
542
|
+
if $opt_grep_ruby
|
543
|
+
pred = eval("lambda {|_| #{$opt_grep_ruby} }")
|
544
|
+
elsif $opt_grep_e
|
545
|
+
re = Regexp.new($opt_grep_e)
|
546
|
+
pred = $opt_grep_f ? lambda {|_| re =~ _[$opt_grep_f] } :
|
547
|
+
lambda {|_| _.any? {|k, v| re =~ v.to_s } }
|
548
|
+
else
|
549
|
+
re = Regexp.new(argv.shift)
|
550
|
+
pred = $opt_grep_f ? lambda {|_| re =~ _[$opt_grep_f] } :
|
551
|
+
lambda {|_| _.any? {|k, v| re =~ v.to_s } }
|
552
|
+
end
|
553
|
+
opt_v = $opt_grep_v ? true : false
|
554
|
+
argv.unshift '-' if argv.empty?
|
555
|
+
argv.each {|filename|
|
556
|
+
tablereader_open(filename) {|tblreader|
|
557
|
+
with_table_stream_output {|gen|
|
558
|
+
gen.output_header tblreader.header
|
559
|
+
tblreader.each {|ary|
|
560
|
+
h = {}
|
561
|
+
ary.each_with_index {|str, i|
|
562
|
+
f = tblreader.field_from_index_ex(i)
|
563
|
+
h[f] = str
|
564
|
+
}
|
565
|
+
found = pred.call(h)
|
566
|
+
found = opt_v ^ !!(found)
|
567
|
+
gen << ary if found
|
568
|
+
}
|
569
|
+
}
|
570
|
+
}
|
571
|
+
}
|
572
|
+
end
|
573
|
+
|
574
|
+
def main_gsub(argv)
|
575
|
+
op_gsub.parse!(argv)
|
576
|
+
if $opt_gsub_e
|
577
|
+
re = Regexp.new($opt_gsub_e)
|
578
|
+
else
|
579
|
+
re = Regexp.new(argv.shift)
|
580
|
+
end
|
581
|
+
repl = argv.shift
|
582
|
+
filename = argv.empty? ? '-' : argv.shift
|
583
|
+
warn "extra arguments: #{argv.join(" ")}" if !argv.empty?
|
584
|
+
tablereader_open(filename) {|tblreader|
|
585
|
+
with_table_stream_output {|gen|
|
586
|
+
gen.output_header tblreader.header
|
587
|
+
tblreader.each {|ary|
|
588
|
+
if $opt_gsub_f
|
589
|
+
ary2 = []
|
590
|
+
ary.each_with_index {|str, i|
|
591
|
+
f = tblreader.field_from_index_ex(i)
|
592
|
+
if f == $opt_gsub_f
|
593
|
+
str ||= ''
|
594
|
+
ary2 << str.gsub(re, repl)
|
595
|
+
else
|
596
|
+
ary2 << str
|
597
|
+
end
|
598
|
+
}
|
599
|
+
else
|
600
|
+
ary2 = ary.map {|s|
|
601
|
+
s ||= ''
|
602
|
+
s.gsub(re, repl)
|
603
|
+
}
|
604
|
+
end
|
605
|
+
gen << ary2
|
606
|
+
}
|
607
|
+
}
|
608
|
+
}
|
609
|
+
end
|
610
|
+
|
611
|
+
def main_sort(argv)
|
612
|
+
op_sort.parse!(argv)
|
613
|
+
filename = argv.empty? ? '-' : argv.shift
|
614
|
+
warn "extra arguments: #{argv.join(" ")}" if !argv.empty?
|
615
|
+
if $opt_sort_f
|
616
|
+
fs = split_field_list_argument($opt_sort_f)
|
617
|
+
else
|
618
|
+
fs = nil
|
619
|
+
end
|
620
|
+
tbl = load_table(filename)
|
621
|
+
if fs
|
622
|
+
blk = lambda {|rec| fs.map {|f| comparison_value(rec[f]) } }
|
623
|
+
else
|
624
|
+
blk = lambda {|rec| rec.map {|k, v| comparison_value(v) } }
|
625
|
+
end
|
626
|
+
tbl2 = tbl.reorder_records_by(&blk)
|
627
|
+
with_output {|out|
|
628
|
+
tbl_generate_csv(tbl2, out)
|
629
|
+
}
|
630
|
+
end
|
631
|
+
|
632
|
+
def main_select(argv)
|
633
|
+
op_select.parse!(argv)
|
634
|
+
fs = split_field_list_argument(argv.shift)
|
635
|
+
filename = argv.shift || '-'
|
636
|
+
warn "extra arguments: #{argv.join(" ")}" if !argv.empty?
|
637
|
+
tablereader_open(filename) {|tblreader|
|
638
|
+
if $opt_select_v
|
639
|
+
h = {}
|
640
|
+
fs.each {|f| h[tblreader.index_from_field(f)] = true }
|
641
|
+
header = nil
|
642
|
+
if !$opt_N
|
643
|
+
header = []
|
644
|
+
tblreader.header.each_with_index {|f, i|
|
645
|
+
header << f if !h[i]
|
646
|
+
}
|
647
|
+
end
|
648
|
+
with_table_stream_output {|gen|
|
649
|
+
gen.output_header(header)
|
650
|
+
tblreader.each {|ary|
|
651
|
+
values = []
|
652
|
+
ary.each_with_index {|v, i|
|
653
|
+
values << v if !h[i]
|
654
|
+
}
|
655
|
+
gen << values
|
656
|
+
}
|
657
|
+
}
|
658
|
+
else
|
659
|
+
header = tblreader.header
|
660
|
+
is = []
|
661
|
+
is = fs.map {|f| tblreader.index_from_field(f) }
|
662
|
+
with_table_stream_output {|gen|
|
663
|
+
gen.output_header(is.map {|i| tblreader.field_from_index_ex(i) })
|
664
|
+
tblreader.each {|ary|
|
665
|
+
gen << ary.values_at(*is)
|
666
|
+
}
|
667
|
+
}
|
668
|
+
end
|
669
|
+
}
|
670
|
+
end
|
671
|
+
|
672
|
+
def main_rename(argv)
|
673
|
+
op_rename.parse!(argv)
|
674
|
+
fs = split_field_list_argument(argv.shift)
|
675
|
+
filename = argv.shift || '-'
|
676
|
+
warn "extra arguments: #{argv.join(" ")}" if !argv.empty?
|
677
|
+
h = {}
|
678
|
+
fs.each_slice(2) {|sf, df| h[sf] = df }
|
679
|
+
tablereader_open(filename) {|tblreader|
|
680
|
+
header = tblreader.header
|
681
|
+
h.each {|sf, df|
|
682
|
+
unless header.include? sf
|
683
|
+
raise "field not defined: #{sf.inspect}"
|
684
|
+
end
|
685
|
+
}
|
686
|
+
renamed_header = tblreader.header.map {|f| h.fetch(f, f) }
|
687
|
+
with_table_stream_output {|gen|
|
688
|
+
gen.output_header(renamed_header)
|
689
|
+
tblreader.each {|ary|
|
690
|
+
gen << ary
|
691
|
+
}
|
692
|
+
}
|
693
|
+
}
|
694
|
+
end
|
695
|
+
|
696
|
+
def main_newfield(argv)
|
697
|
+
op_rename.parse!(argv)
|
698
|
+
field = argv.shift
|
699
|
+
rubyexp = argv.shift
|
700
|
+
pr = eval("lambda {|_| #{rubyexp} }")
|
701
|
+
filename = argv.shift || '-'
|
702
|
+
warn "extra arguments: #{argv.join(" ")}" if !argv.empty?
|
703
|
+
tablereader_open(filename) {|tblreader|
|
704
|
+
renamed_header = [field] + tblreader.header
|
705
|
+
with_table_stream_output {|gen|
|
706
|
+
gen.output_header(renamed_header)
|
707
|
+
tblreader.each {|ary|
|
708
|
+
h = {}
|
709
|
+
ary.each_with_index {|str, i|
|
710
|
+
f = tblreader.field_from_index_ex(i)
|
711
|
+
h[f] = str
|
712
|
+
}
|
713
|
+
gen << [pr.call(h), *ary]
|
714
|
+
}
|
715
|
+
}
|
716
|
+
}
|
717
|
+
end
|
718
|
+
|
719
|
+
def main_cat(argv)
|
720
|
+
op_cat.parse!(argv)
|
721
|
+
argv = ['-'] if argv.empty?
|
722
|
+
if $opt_N
|
723
|
+
argv.each {|filename|
|
724
|
+
with_table_stream_output {|gen|
|
725
|
+
tablereader_open(filename) {|tblreader|
|
726
|
+
tblreader.each {|ary|
|
727
|
+
gen << ary
|
728
|
+
}
|
729
|
+
}
|
730
|
+
}
|
731
|
+
}
|
732
|
+
else
|
733
|
+
readers = []
|
734
|
+
h = {}
|
735
|
+
argv.each {|filename|
|
736
|
+
r = tablereader_open(filename)
|
737
|
+
readers << r
|
738
|
+
r.header.each {|f|
|
739
|
+
h[f] = h.size if !h[f]
|
740
|
+
}
|
741
|
+
}
|
742
|
+
with_table_stream_output {|gen|
|
743
|
+
gen.output_header h.keys.sort_by {|k| h[k] }
|
744
|
+
readers.each {|r|
|
745
|
+
header = r.header.dup
|
746
|
+
r.each {|ary|
|
747
|
+
while header.length < ary.length
|
748
|
+
f = r.field_from_index_ex(header.length)
|
749
|
+
header << f
|
750
|
+
h[f] = h.size if !h[f]
|
751
|
+
end
|
752
|
+
ary2 = []
|
753
|
+
ary.each_with_index {|v, i|
|
754
|
+
f = r.field_from_index(i)
|
755
|
+
j = h.fetch(f)
|
756
|
+
ary2[j] = v
|
757
|
+
}
|
758
|
+
gen << ary2
|
759
|
+
}
|
760
|
+
}
|
761
|
+
}
|
762
|
+
end
|
763
|
+
end
|
764
|
+
|
765
|
+
def main_join(argv)
|
766
|
+
op_join.parse!(argv)
|
767
|
+
result = Tb.new([], [])
|
768
|
+
retain_left = false
|
769
|
+
retain_right = false
|
770
|
+
case $opt_join_outer
|
771
|
+
when :full
|
772
|
+
retain_left = true
|
773
|
+
retain_right = true
|
774
|
+
when :left
|
775
|
+
retain_left = true
|
776
|
+
when :right
|
777
|
+
retain_right = true
|
778
|
+
when nil
|
779
|
+
else
|
780
|
+
raise "unexpected $opt_join_outer: #{$opt_join_outer.inspect}"
|
781
|
+
end
|
782
|
+
if $opt_join_outer
|
783
|
+
each_table_file(argv) {|tbl|
|
784
|
+
STDERR.puts "shared keys: #{(result.list_fields & tbl.list_fields).inspect}" if 1 <= $opt_debug
|
785
|
+
result = result.natjoin2_outer(tbl, $opt_join_outer_missing, retain_left, retain_right)
|
786
|
+
}
|
787
|
+
else
|
788
|
+
each_table_file(argv) {|tbl|
|
789
|
+
STDERR.puts "shared keys: #{(result.list_fields & tbl.list_fields).inspect}" if 1 <= $opt_debug
|
790
|
+
result = result.natjoin2(tbl)
|
791
|
+
}
|
792
|
+
end
|
793
|
+
with_output {|out|
|
794
|
+
tbl_generate_csv(result, out)
|
795
|
+
}
|
796
|
+
end
|
797
|
+
|
798
|
+
def main_group(argv)
|
799
|
+
op_group.parse!(argv)
|
800
|
+
kfs = split_field_list_argument(argv.shift)
|
801
|
+
opt_group_fields = $opt_group_fields.map {|arg|
|
802
|
+
aggregation_spec, new_field = split_field_list_argument(arg)
|
803
|
+
new_field ||= aggregation_spec
|
804
|
+
[new_field, lambda {|fields| make_aggregator(aggregation_spec, fields) } ]
|
805
|
+
}
|
806
|
+
filename = argv.shift || '-'
|
807
|
+
warn "extra arguments: #{argv.join(" ")}" if !argv.empty?
|
808
|
+
h = {}
|
809
|
+
tablereader_open(filename) {|tblreader|
|
810
|
+
kis = kfs.map {|f| tblreader.index_from_field(f) }
|
811
|
+
result_fields = kfs + opt_group_fields.map {|nf, maker| nf }
|
812
|
+
tblreader.each {|ary|
|
813
|
+
kvs = ary.values_at(*kis)
|
814
|
+
if !h.include?(kvs)
|
815
|
+
h[kvs] = opt_group_fields.map {|nf, maker| ag = maker.call(tblreader.header); ag.update(ary); ag }
|
816
|
+
else
|
817
|
+
h[kvs].each {|ag|
|
818
|
+
ag.update(ary)
|
819
|
+
}
|
820
|
+
end
|
821
|
+
}
|
822
|
+
result = Tb.new(result_fields)
|
823
|
+
h.keys.sort_by {|k| k.map {|v| comparison_value(v) } }.each {|k|
|
824
|
+
a = h[k]
|
825
|
+
result.insert_values result_fields, k + a.map {|ag| ag.finish }
|
826
|
+
}
|
827
|
+
with_output {|out|
|
828
|
+
tbl_generate_csv(result, out)
|
829
|
+
}
|
830
|
+
}
|
831
|
+
end
|
832
|
+
|
833
|
+
def main_cross(argv)
|
834
|
+
op_cross.parse!(argv)
|
835
|
+
hkfs = split_field_list_argument(argv.shift)
|
836
|
+
vkfs = split_field_list_argument(argv.shift)
|
837
|
+
if $opt_cross_fields.empty?
|
838
|
+
opt_cross_fields = [['count', 'count']]
|
839
|
+
else
|
840
|
+
opt_cross_fields = $opt_cross_fields.map {|arg|
|
841
|
+
agg_spec, new_field = split_field_list_argument(arg)
|
842
|
+
new_field ||= agg_spec
|
843
|
+
[agg_spec, new_field]
|
844
|
+
}
|
845
|
+
end
|
846
|
+
filename = argv.shift || '-'
|
847
|
+
warn "extra arguments: #{argv.join(" ")}" if !argv.empty?
|
848
|
+
tablereader_open(filename) {|tblreader|
|
849
|
+
vkis = vkfs.map {|f| tblreader.index_from_field(f) }
|
850
|
+
hkis = hkfs.map {|f| tblreader.index_from_field(f) }
|
851
|
+
vset = {}
|
852
|
+
hset = {}
|
853
|
+
set = {}
|
854
|
+
tblreader.each {|ary|
|
855
|
+
vkvs = ary.values_at(*vkis)
|
856
|
+
hkvs = ary.values_at(*hkis)
|
857
|
+
vset[vkvs] = true if !vset.include?(vkvs)
|
858
|
+
hset[hkvs] = true if !hset.include?(hkvs)
|
859
|
+
if !set.include?([vkvs, hkvs])
|
860
|
+
set[[vkvs, hkvs]] = opt_cross_fields.map {|agg_spec, nf|
|
861
|
+
ag = make_aggregator(agg_spec, tblreader.header)
|
862
|
+
ag.update(ary)
|
863
|
+
ag
|
864
|
+
}
|
865
|
+
else
|
866
|
+
set[[vkvs, hkvs]].each {|ag|
|
867
|
+
ag.update(ary)
|
868
|
+
}
|
869
|
+
end
|
870
|
+
}
|
871
|
+
vary = vset.keys.sort_by {|a| a.map {|v| comparison_value(v) } }
|
872
|
+
hary = hset.keys.sort_by {|a| a.map {|v| comparison_value(v) } }
|
873
|
+
with_output {|out|
|
874
|
+
Tb.csv_stream_output(out) {|gen|
|
875
|
+
hkfs.each_with_index {|hkf, i|
|
876
|
+
next if $opt_cross_compact && i == hkfs.length - 1
|
877
|
+
row = [nil] * (vkfs.length - 1) + [hkf]
|
878
|
+
hary.each {|hkvs| opt_cross_fields.length.times { row << hkvs[i] } }
|
879
|
+
gen << row
|
880
|
+
}
|
881
|
+
if $opt_cross_compact
|
882
|
+
r = vkfs.dup
|
883
|
+
hary.each {|hkvs| r.concat([hkvs[-1]] * opt_cross_fields.length) }
|
884
|
+
gen << r
|
885
|
+
else
|
886
|
+
r = vkfs.dup
|
887
|
+
hary.each {|hkvs| r.concat opt_cross_fields.map {|agg_spec, new_field| new_field } }
|
888
|
+
gen << r
|
889
|
+
end
|
890
|
+
vary.each {|vkvs|
|
891
|
+
row = vkvs.dup
|
892
|
+
hary.each {|hkvs|
|
893
|
+
ags = set[[vkvs, hkvs]]
|
894
|
+
if !ags
|
895
|
+
opt_cross_fields.length.times { row << nil }
|
896
|
+
else
|
897
|
+
ags.each {|ag| row << ag.finish }
|
898
|
+
end
|
899
|
+
}
|
900
|
+
gen << row
|
901
|
+
}
|
902
|
+
}
|
903
|
+
}
|
904
|
+
}
|
905
|
+
end
|
906
|
+
|
907
|
+
def main_shape(argv)
|
908
|
+
op_shape.parse!(argv)
|
909
|
+
filenames = argv.empty? ? ['-'] : argv
|
910
|
+
result = Tb.new(%w[header_fields min_fields max_fields records filename])
|
911
|
+
filenames.each {|filename|
|
912
|
+
tablereader_open(filename) {|tblreader|
|
913
|
+
num_header_fields = tblreader.header.length
|
914
|
+
min_num_fields = nil
|
915
|
+
max_num_fields = nil
|
916
|
+
num_records = 0
|
917
|
+
tblreader.each {|ary|
|
918
|
+
num_records += 1
|
919
|
+
n = ary.length
|
920
|
+
if min_num_fields.nil?
|
921
|
+
min_num_fields = max_num_fields = n
|
922
|
+
else
|
923
|
+
min_num_fields = n if n < min_num_fields
|
924
|
+
max_num_fields = n if max_num_fields < n
|
925
|
+
end
|
926
|
+
}
|
927
|
+
result.insert({'header_fields'=>num_header_fields,
|
928
|
+
'min_fields'=>min_num_fields,
|
929
|
+
'max_fields'=>max_num_fields,
|
930
|
+
'records'=>num_records,
|
931
|
+
'filename'=>filename})
|
932
|
+
}
|
933
|
+
}
|
934
|
+
with_output {|out|
|
935
|
+
# don't use tbl_generate_csv() because the header should always outputted.
|
936
|
+
result.generate_csv(out)
|
937
|
+
}
|
938
|
+
end
|
939
|
+
|
940
|
+
def main_mheader(argv)
|
941
|
+
op_mheader.parse!(argv)
|
942
|
+
filename = argv.shift || '-'
|
943
|
+
warn "extra arguments: #{argv.join(" ")}" if !argv.empty?
|
944
|
+
header = []
|
945
|
+
if $opt_mheader_count
|
946
|
+
c = $opt_mheader_count
|
947
|
+
header_end_p = lambda {
|
948
|
+
c -= 1
|
949
|
+
c == 0 ? header.map {|a| a.compact.join(' ').strip } : nil
|
950
|
+
}
|
951
|
+
else
|
952
|
+
header_end_p = lambda {
|
953
|
+
h2 = header.map {|a| a.compact.join(' ').strip }.uniq
|
954
|
+
header.length == h2.length ? h2 : nil
|
955
|
+
}
|
956
|
+
end
|
957
|
+
with_table_stream_output {|gen|
|
958
|
+
Tb::Reader.open(filename, {:numeric=>true}) {|tblreader|
|
959
|
+
tblreader.each {|ary|
|
960
|
+
if header
|
961
|
+
ary.each_with_index {|v,i|
|
962
|
+
header[i] ||= []
|
963
|
+
header[i] << v if header[i].empty? || header[i].last != v
|
964
|
+
}
|
965
|
+
h2 = header_end_p.call
|
966
|
+
if h2
|
967
|
+
gen << h2
|
968
|
+
header = nil
|
969
|
+
end
|
970
|
+
else
|
971
|
+
gen << ary
|
972
|
+
end
|
973
|
+
}
|
974
|
+
}
|
975
|
+
}
|
976
|
+
if header
|
977
|
+
warn "no header found."
|
978
|
+
end
|
979
|
+
end
|
980
|
+
|
981
|
+
def main_crop(argv)
|
982
|
+
op_crop.parse!(argv)
|
983
|
+
filename = argv.shift || '-'
|
984
|
+
warn "extra arguments: #{argv.join(" ")}" if !argv.empty?
|
985
|
+
stream = false
|
986
|
+
if $opt_crop_range
|
987
|
+
case $opt_crop_range
|
988
|
+
when /\A(\d+),(\d+)-(\d+),(\d+)\z/ # 1-based
|
989
|
+
stream = true
|
990
|
+
range_col1 = $1.to_i
|
991
|
+
range_row1 = $2.to_i
|
992
|
+
range_col2 = $3.to_i
|
993
|
+
range_row2 = $4.to_i
|
994
|
+
when /\A([A-Z]+)(\d+):([A-Z]+)(\d+)\z/ # 1-based
|
995
|
+
stream = true
|
996
|
+
range_col1 = decode_a1_addressing_col($1)
|
997
|
+
range_row1 = $2.to_i
|
998
|
+
range_col2 = decode_a1_addressing_col($3)
|
999
|
+
range_row2 = $4.to_i
|
1000
|
+
else
|
1001
|
+
raise ArgumentError, "unexpected range argument: #{$opt_crop_range.inspect}"
|
1002
|
+
end
|
1003
|
+
end
|
1004
|
+
if stream
|
1005
|
+
with_table_stream_output {|gen|
|
1006
|
+
Tb::Reader.open(filename, {:numeric=>true}) {|tblreader|
|
1007
|
+
rownum = 1
|
1008
|
+
tblreader.each {|ary|
|
1009
|
+
if range_row2 < rownum
|
1010
|
+
break
|
1011
|
+
end
|
1012
|
+
if range_row1 <= rownum
|
1013
|
+
if range_col2 < ary.length
|
1014
|
+
ary[range_col2..-1] = []
|
1015
|
+
end
|
1016
|
+
if 1 < range_col1
|
1017
|
+
ary[0...(range_col1-1)] = []
|
1018
|
+
end
|
1019
|
+
gen << ary
|
1020
|
+
end
|
1021
|
+
rownum += 1
|
1022
|
+
}
|
1023
|
+
}
|
1024
|
+
}
|
1025
|
+
else
|
1026
|
+
arys = []
|
1027
|
+
Tb::Reader.open(filename, {:numeric=>true}) {|tblreader|
|
1028
|
+
tblreader.each {|a|
|
1029
|
+
a.pop while !a.empty? && (a.last.nil? || a.last == '')
|
1030
|
+
arys << a
|
1031
|
+
}
|
1032
|
+
}
|
1033
|
+
arys.pop while !arys.empty? && arys.last.all? {|v| v.nil? || v == '' }
|
1034
|
+
arys.shift while !arys.empty? && arys.first.all? {|v| v.nil? || v == '' }
|
1035
|
+
if !arys.empty?
|
1036
|
+
while arys.all? {|a| a.empty? || (a.first.nil? || a.first == '') }
|
1037
|
+
arys.each {|a| a.shift }
|
1038
|
+
end
|
1039
|
+
end
|
1040
|
+
with_table_stream_output {|gen|
|
1041
|
+
arys.each {|a| gen << a }
|
1042
|
+
}
|
1043
|
+
end
|
1044
|
+
end
|
1045
|
+
|
1046
|
+
def decode_a1_addressing_col(str)
|
1047
|
+
(26**str.length-1)/25+str.tr("A-Z", "0-9A-P").to_i(26)
|
1048
|
+
end
|
1049
|
+
|
1050
|
+
def split_field_list_argument(arg)
|
1051
|
+
split_csv_argument(arg).map {|f| f || '' }
|
1052
|
+
end
|
1053
|
+
|
1054
|
+
def split_csv_argument(arg)
|
1055
|
+
Tb.csv_stream_input(arg) {|ary| return ary }
|
1056
|
+
return []
|
1057
|
+
end
|
1058
|
+
|
1059
|
+
def each_table_file(argv)
|
1060
|
+
if argv.empty?
|
1061
|
+
yield load_table('-')
|
1062
|
+
else
|
1063
|
+
argv.each {|filename|
|
1064
|
+
tbl = load_table(filename)
|
1065
|
+
yield tbl
|
1066
|
+
}
|
1067
|
+
end
|
1068
|
+
end
|
1069
|
+
|
1070
|
+
def load_table(filename)
|
1071
|
+
tablereader_open(filename) {|tblreader|
|
1072
|
+
arys = []
|
1073
|
+
tblreader.each {|ary|
|
1074
|
+
arys << ary
|
1075
|
+
}
|
1076
|
+
header = tblreader.header
|
1077
|
+
tbl = Tb.new(header)
|
1078
|
+
arys.each {|ary|
|
1079
|
+
ary << nil while ary.length < header.length
|
1080
|
+
tbl.insert_values header, ary
|
1081
|
+
}
|
1082
|
+
tbl
|
1083
|
+
}
|
1084
|
+
end
|
1085
|
+
|
1086
|
+
def tablereader_open(filename, &b)
|
1087
|
+
Tb::Reader.open(filename, {:numeric=>$opt_N}, &b)
|
1088
|
+
end
|
1089
|
+
|
1090
|
+
def with_table_stream_output
|
1091
|
+
with_output {|out|
|
1092
|
+
Tb.csv_stream_output(out) {|gen|
|
1093
|
+
def gen.output_header(header)
|
1094
|
+
self << header if !$opt_N
|
1095
|
+
end
|
1096
|
+
yield gen
|
1097
|
+
}
|
1098
|
+
}
|
1099
|
+
end
|
1100
|
+
|
1101
|
+
def tbl_generate_csv(tbl, out)
|
1102
|
+
if $opt_N
|
1103
|
+
header = tbl.list_fields
|
1104
|
+
Tb.csv_stream_output(out) {|gen|
|
1105
|
+
tbl.each {|rec|
|
1106
|
+
gen << rec.values_at(*header)
|
1107
|
+
}
|
1108
|
+
}
|
1109
|
+
else
|
1110
|
+
tbl.generate_csv(out)
|
1111
|
+
end
|
1112
|
+
end
|
1113
|
+
|
1114
|
+
def tbl_generate_tsv(tbl, out)
|
1115
|
+
if $opt_N
|
1116
|
+
header = tbl.list_fields
|
1117
|
+
Tb.tsv_stream_output(out) {|gen|
|
1118
|
+
tbl.each {|rec|
|
1119
|
+
gen << rec.values_at(*header)
|
1120
|
+
}
|
1121
|
+
}
|
1122
|
+
else
|
1123
|
+
tbl.generate_tsv(out)
|
1124
|
+
end
|
1125
|
+
end
|
1126
|
+
|
1127
|
+
def with_output
|
1128
|
+
if STDOUT.tty? && !$opt_no_pager
|
1129
|
+
IO.popen(ENV['PAGER'] || 'more', 'w') {|pager|
|
1130
|
+
yield pager
|
1131
|
+
}
|
1132
|
+
else
|
1133
|
+
yield STDOUT
|
1134
|
+
end
|
1135
|
+
end
|
1136
|
+
|
1137
|
+
main ARGV
|