tb 0.1 → 0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/README +156 -5
- data/bin/tb +2 -1110
- data/lib/tb.rb +4 -2
- data/lib/tb/catreader.rb +131 -0
- data/lib/tb/cmd_cat.rb +65 -0
- data/lib/tb/cmd_consecutive.rb +79 -0
- data/lib/tb/cmd_crop.rb +105 -0
- data/lib/tb/cmd_cross.rb +119 -0
- data/lib/tb/cmd_csv.rb +42 -0
- data/lib/tb/cmd_cut.rb +77 -0
- data/lib/tb/cmd_grep.rb +76 -0
- data/lib/tb/cmd_group.rb +82 -0
- data/lib/tb/cmd_gsub.rb +77 -0
- data/lib/tb/cmd_help.rb +98 -0
- data/lib/tb/cmd_join.rb +81 -0
- data/lib/tb/cmd_json.rb +60 -0
- data/lib/tb/cmd_ls.rb +273 -0
- data/lib/tb/cmd_mheader.rb +77 -0
- data/lib/tb/cmd_newfield.rb +59 -0
- data/lib/tb/cmd_pnm.rb +43 -0
- data/lib/tb/cmd_pp.rb +70 -0
- data/lib/tb/cmd_rename.rb +58 -0
- data/lib/tb/cmd_shape.rb +67 -0
- data/lib/tb/cmd_sort.rb +58 -0
- data/lib/tb/cmd_svn_log.rb +158 -0
- data/lib/tb/cmd_tsv.rb +43 -0
- data/lib/tb/cmd_yaml.rb +47 -0
- data/lib/tb/cmdmain.rb +45 -0
- data/lib/tb/cmdtop.rb +58 -0
- data/lib/tb/cmdutil.rb +327 -0
- data/lib/tb/csv.rb +30 -6
- data/lib/tb/fieldset.rb +39 -41
- data/lib/tb/pager.rb +132 -0
- data/lib/tb/pnm.rb +357 -0
- data/lib/tb/reader.rb +18 -128
- data/lib/tb/record.rb +3 -3
- data/lib/tb/ropen.rb +70 -0
- data/lib/tb/{pathfinder.rb → search.rb} +69 -34
- data/lib/tb/tsv.rb +29 -1
- data/sample/colors.ppm +0 -0
- data/sample/gradation.pgm +0 -0
- data/sample/langs.csv +46 -0
- data/sample/tbplot +293 -0
- data/test-all-cov.rb +65 -0
- data/test-all.rb +5 -0
- data/test/test_basic.rb +99 -2
- data/test/test_catreader.rb +27 -0
- data/test/test_cmd_cat.rb +118 -0
- data/test/test_cmd_consecutive.rb +90 -0
- data/test/test_cmd_crop.rb +101 -0
- data/test/test_cmd_cross.rb +113 -0
- data/test/test_cmd_csv.rb +129 -0
- data/test/test_cmd_cut.rb +100 -0
- data/test/test_cmd_grep.rb +89 -0
- data/test/test_cmd_group.rb +181 -0
- data/test/test_cmd_gsub.rb +103 -0
- data/test/test_cmd_help.rb +190 -0
- data/test/test_cmd_join.rb +197 -0
- data/test/test_cmd_json.rb +75 -0
- data/test/test_cmd_ls.rb +203 -0
- data/test/test_cmd_mheader.rb +86 -0
- data/test/test_cmd_newfield.rb +63 -0
- data/test/test_cmd_pnm.rb +35 -0
- data/test/test_cmd_pp.rb +62 -0
- data/test/test_cmd_rename.rb +91 -0
- data/test/test_cmd_shape.rb +50 -0
- data/test/test_cmd_sort.rb +105 -0
- data/test/test_cmd_tsv.rb +67 -0
- data/test/test_cmd_yaml.rb +55 -0
- data/test/test_cmdtty.rb +154 -0
- data/test/test_cmdutil.rb +43 -0
- data/test/test_csv.rb +10 -0
- data/test/test_fieldset.rb +42 -0
- data/test/test_pager.rb +142 -0
- data/test/test_pnm.rb +374 -0
- data/test/test_reader.rb +147 -0
- data/test/test_record.rb +49 -0
- data/test/test_search.rb +575 -0
- data/test/test_tsv.rb +7 -0
- metadata +108 -5
- data/lib/tb/qtsv.rb +0 -93
data/README
CHANGED
@@ -1,9 +1,133 @@
|
|
1
1
|
= tb - manipulation tool for table: CSV, TSV, etc.
|
2
2
|
|
3
|
-
|
3
|
+
tb provides a command and a library for manipulating tables:
|
4
|
+
Unix filter like operations (grep, sort, cat, cut, ls, etc.),
|
5
|
+
SQL like operations (join, group, etc.),
|
6
|
+
and more.
|
7
|
+
|
8
|
+
== Example
|
9
|
+
|
10
|
+
There is a CSV file for programming languages and their birth year in
|
11
|
+
sample/ directory in tb package.
|
12
|
+
|
13
|
+
% head sample/langs.csv
|
14
|
+
language,year
|
15
|
+
FORTRAN,1955
|
16
|
+
LISP,1958
|
17
|
+
COBOL,1959
|
18
|
+
ALGOL 58,1958
|
19
|
+
APL,1962
|
20
|
+
Simula,1962
|
21
|
+
SNOBOL,1962
|
22
|
+
BASIC,1964
|
23
|
+
PL/I,1964
|
24
|
+
|
25
|
+
"tb" command has many subcommands.
|
26
|
+
"sort" subcommand sort a CSV file.
|
27
|
+
You don't need to care header: header is retained as is.
|
28
|
+
|
29
|
+
% tb sort sample/langs.csv|head
|
30
|
+
language,year
|
31
|
+
ALGOL 58,1958
|
32
|
+
APL,1962
|
33
|
+
Ada,1983
|
34
|
+
B,1969
|
35
|
+
BASIC,1964
|
36
|
+
BCPL,1967
|
37
|
+
C,1972
|
38
|
+
C#,2001
|
39
|
+
C++,1980
|
40
|
+
|
41
|
+
"sort" subcommand takes -f option to specify a field to sort.
|
42
|
+
You don't need to count the position of the field.
|
43
|
+
Also, the comparison method used in tb is smart to sort numbers correctly.
|
44
|
+
|
45
|
+
% tb sort -f year sample/langs.csv|head
|
46
|
+
language,year
|
47
|
+
FORTRAN,1955
|
48
|
+
LISP,1958
|
49
|
+
ALGOL 58,1958
|
50
|
+
COBOL,1959
|
51
|
+
APL,1962
|
52
|
+
SNOBOL,1962
|
53
|
+
Simula,1962
|
54
|
+
BASIC,1964
|
55
|
+
PL/I,1964
|
56
|
+
|
57
|
+
"grep" subcommand search CSV file.
|
58
|
+
|
59
|
+
% tb grep R sample/langs.csv
|
60
|
+
language,year
|
61
|
+
FORTRAN,1955
|
62
|
+
Ruby,1993
|
63
|
+
|
64
|
+
"grep" subcommand takes -f with field name and -v to show non-matching rows.
|
65
|
+
You don't need to care field separators (comma) to match.
|
66
|
+
Following example searches languages which name contains a non-alphabet character.
|
67
|
+
|
68
|
+
% tb grep -vf language '\A[A-Za-z]*\z' sample/langs.csv |cat
|
69
|
+
language,year
|
70
|
+
ALGOL 58,1958
|
71
|
+
PL/I,1964
|
72
|
+
C++,1980
|
73
|
+
Objective-C,1983
|
74
|
+
Common Lisp,1984
|
75
|
+
Visual Basic,1991
|
76
|
+
C#,2001
|
77
|
+
F#,2002
|
78
|
+
|
79
|
+
"grep" subcommand can take Ruby expression, instead of a regexp.
|
80
|
+
|
81
|
+
% tb grep --ruby '(1990..1999).include?(_["year"].to_i)' sample/langs.csv
|
82
|
+
language,year
|
83
|
+
Haskell,1990
|
84
|
+
Python,1991
|
85
|
+
Visual Basic,1991
|
86
|
+
Ruby,1993
|
87
|
+
Lua,1993
|
88
|
+
CLOS,1994
|
89
|
+
Java,1995
|
90
|
+
Delphi,1995
|
91
|
+
JavaScript,1995
|
92
|
+
PHP,1995
|
93
|
+
D,1999
|
94
|
+
|
95
|
+
"cut" subcommand extract one or more fields.
|
96
|
+
This is similar to "cut" command of Unix and projection of relational algebra.
|
97
|
+
|
98
|
+
% tb cut language sample/langs.csv |head
|
99
|
+
language
|
100
|
+
FORTRAN
|
101
|
+
LISP
|
102
|
+
COBOL
|
103
|
+
ALGOL 58
|
104
|
+
APL
|
105
|
+
Simula
|
106
|
+
SNOBOL
|
107
|
+
BASIC
|
108
|
+
PL/I
|
109
|
+
|
110
|
+
"group" subcommand groups rows for specified field.
|
111
|
+
-a option specifies aggregation expression to aggregate the grouped rows.
|
112
|
+
|
113
|
+
% tb group year -a count -a 'values(language)' sample/langs.csv |head
|
114
|
+
year,count,values(language)
|
115
|
+
1955,1,FORTRAN
|
116
|
+
1958,2,"LISP,ALGOL 58"
|
117
|
+
1959,1,COBOL
|
118
|
+
1962,3,"APL,Simula,SNOBOL"
|
119
|
+
1964,2,"BASIC,PL/I"
|
120
|
+
1967,1,BCPL
|
121
|
+
1968,1,Logo
|
122
|
+
1969,1,B
|
123
|
+
1970,2,"Pascal,Forth"
|
124
|
+
|
125
|
+
There are more subcommands.
|
126
|
+
"help" subcommand shows list of subcommand.
|
4
127
|
|
5
128
|
% tb help
|
6
129
|
Usage:
|
130
|
+
tb help [OPTS] [SUBCOMMAND]
|
7
131
|
tb csv [OPTS] [TABLE]
|
8
132
|
tb tsv [OPTS] [TABLE]
|
9
133
|
tb json [OPTS] [TABLE]
|
@@ -12,23 +136,50 @@
|
|
12
136
|
tb grep [OPTS] REGEXP [TABLE]
|
13
137
|
tb gsub [OPTS] REGEXP STRING [TABLE]
|
14
138
|
tb sort [OPTS] [TABLE]
|
15
|
-
tb
|
139
|
+
tb cut [OPTS] FIELD,... [TABLE]
|
16
140
|
tb rename [OPTS] SRC,DST,... [TABLE]
|
17
141
|
tb newfield [OPTS] FIELD RUBY-EXP [TABLE]
|
18
142
|
tb cat [OPTS] [TABLE ...]
|
19
143
|
tb join [OPTS] [TABLE ...]
|
20
|
-
tb group [OPTS] [TABLE]
|
21
|
-
tb cross [OPTS] [TABLE]
|
144
|
+
tb group [OPTS] KEY-FIELD1,... [TABLE]
|
145
|
+
tb cross [OPTS] HKEY-FIELD1,... VKEY-FIELD1,... [TABLE]
|
22
146
|
tb shape [OPTS] [TABLE ...]
|
23
147
|
tb mheader [OPTS] [TABLE]
|
24
148
|
tb crop [OPTS] [TABLE]
|
25
149
|
|
26
|
-
|
150
|
+
== Command Line Tool
|
151
|
+
|
152
|
+
tb command has many subcommands.
|
153
|
+
|
154
|
+
help : show help message of tb command.
|
155
|
+
csv : convert a table to CSV (Comma Separated Value).
|
156
|
+
tsv : convert a table to TSV (Tab Separated Value).
|
157
|
+
json : convert a table to JSON (JavaScript Object Notation).
|
158
|
+
yaml : convert a table to YAML (YAML Ain't a Markup Language).
|
159
|
+
pp : convert a table to pretty printed format.
|
160
|
+
grep : search rows using regexp or ruby expression.
|
161
|
+
gsub : substitute cells.
|
162
|
+
sort : sort rows.
|
163
|
+
cut : select columns.
|
164
|
+
rename : rename field names.
|
165
|
+
newfield : add a field.
|
166
|
+
cat : concatenate tables vertically.
|
167
|
+
join : concatenate tables horizontally as left/right/full natural join.
|
168
|
+
group : group and aggregate rows.
|
169
|
+
cross : create a contingency table.
|
170
|
+
shape : show table size.
|
171
|
+
mheader : collapse multi rows header.
|
172
|
+
crop : extract rectangle in a table.
|
27
173
|
|
28
174
|
== Install
|
29
175
|
|
30
176
|
gem install tb
|
31
177
|
|
178
|
+
== Links
|
179
|
+
|
180
|
+
* ((<source repository on github|URL:https://github.com/akr/tb>))
|
181
|
+
* ((<tb on rubygems.org|URL:http://rubygems.org/gems/tb>))
|
182
|
+
|
32
183
|
== Author
|
33
184
|
|
34
185
|
Tanaka Akira <akr@fsij.org>
|
data/bin/tb
CHANGED
@@ -24,1114 +24,6 @@
|
|
24
24
|
# IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
|
25
25
|
# OF SUCH DAMAGE.
|
26
26
|
|
27
|
-
require 'tb'
|
28
|
-
require 'optparse'
|
29
|
-
require 'enumerator'
|
27
|
+
require 'tb/cmdtop'
|
30
28
|
|
31
|
-
|
32
|
-
subcommand = argv.shift
|
33
|
-
case subcommand
|
34
|
-
when 'help', '-h' then main_help(argv)
|
35
|
-
when 'csv' then main_csv(argv)
|
36
|
-
when 'tsv' then main_tsv(argv)
|
37
|
-
when 'json' then main_json(argv)
|
38
|
-
when 'yaml' then main_yaml(argv)
|
39
|
-
when 'pp' then main_pp(argv)
|
40
|
-
when 'grep' then main_grep(argv)
|
41
|
-
when 'gsub' then main_gsub(argv)
|
42
|
-
when 'sort' then main_sort(argv)
|
43
|
-
when 'select' then main_select(argv)
|
44
|
-
when 'rename' then main_rename(argv)
|
45
|
-
when 'newfield' then main_newfield(argv)
|
46
|
-
when 'cat' then main_cat(argv)
|
47
|
-
when 'join' then main_join(argv)
|
48
|
-
when 'group' then main_group(argv)
|
49
|
-
when 'cross' then main_cross(argv)
|
50
|
-
when 'shape' then main_shape(argv)
|
51
|
-
when 'mheader' then main_mheader(argv)
|
52
|
-
when 'crop' then main_crop(argv)
|
53
|
-
when nil
|
54
|
-
err "Usage: tb subcommand args..."
|
55
|
-
else
|
56
|
-
err "unexpected subcommand: #{subcommand.inspect}"
|
57
|
-
end
|
58
|
-
end
|
59
|
-
|
60
|
-
def usage(status)
|
61
|
-
print <<'End'
|
62
|
-
Usage:
|
63
|
-
tb csv [OPTS] [TABLE]
|
64
|
-
tb tsv [OPTS] [TABLE]
|
65
|
-
tb json [OPTS] [TABLE]
|
66
|
-
tb yaml [OPTS] [TABLE]
|
67
|
-
tb pp [OPTS] [TABLE]
|
68
|
-
tb grep [OPTS] REGEXP [TABLE]
|
69
|
-
tb gsub [OPTS] REGEXP STRING [TABLE]
|
70
|
-
tb sort [OPTS] [TABLE]
|
71
|
-
tb select [OPTS] FIELD,... [TABLE]
|
72
|
-
tb rename [OPTS] SRC,DST,... [TABLE]
|
73
|
-
tb newfield [OPTS] FIELD RUBY-EXP [TABLE]
|
74
|
-
tb cat [OPTS] [TABLE ...]
|
75
|
-
tb join [OPTS] [TABLE ...]
|
76
|
-
tb group [OPTS] [TABLE]
|
77
|
-
tb cross [OPTS] [TABLE]
|
78
|
-
tb shape [OPTS] [TABLE ...]
|
79
|
-
tb mheader [OPTS] [TABLE]
|
80
|
-
tb crop [OPTS] [TABLE]
|
81
|
-
End
|
82
|
-
exit status
|
83
|
-
end
|
84
|
-
|
85
|
-
def main_help(argv)
|
86
|
-
subcommand = argv.shift
|
87
|
-
case subcommand
|
88
|
-
when 'csv' then puts op_csv
|
89
|
-
when 'tsv' then puts op_tsv
|
90
|
-
when 'json' then puts op_json
|
91
|
-
when 'yaml' then puts op_yaml
|
92
|
-
when 'pp' then puts op_pp
|
93
|
-
when 'grep' then puts op_grep
|
94
|
-
when 'gsub' then puts op_gsub
|
95
|
-
when 'sort' then puts op_sort
|
96
|
-
when 'select' then puts op_select
|
97
|
-
when 'rename' then puts op_rename
|
98
|
-
when 'newfield' then puts op_newfield
|
99
|
-
when 'cat' then puts op_cat
|
100
|
-
when 'join' then puts op_join
|
101
|
-
when 'group' then puts op_group
|
102
|
-
when 'cross' then puts op_cross
|
103
|
-
when 'shape' then puts op_shape
|
104
|
-
when 'mheader' then puts op_mheader
|
105
|
-
when 'crop' then puts op_crop
|
106
|
-
when nil
|
107
|
-
usage(true)
|
108
|
-
else
|
109
|
-
err "unexpected subcommand: #{subcommand.inspect}"
|
110
|
-
end
|
111
|
-
end
|
112
|
-
|
113
|
-
$opt_N = nil
|
114
|
-
$opt_debug = 0
|
115
|
-
$opt_no_pager = nil
|
116
|
-
|
117
|
-
def op_csv
|
118
|
-
op = OptionParser.new
|
119
|
-
op.banner = 'Usage: tb csv [OPTS] [TABLE]'
|
120
|
-
op.def_option('-h', 'show help message') { puts op; exit 0 }
|
121
|
-
op.def_option('-N', 'use numeric field name') { $opt_N = true }
|
122
|
-
op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
|
123
|
-
op
|
124
|
-
end
|
125
|
-
|
126
|
-
def op_tsv
|
127
|
-
op = OptionParser.new
|
128
|
-
op.banner = 'Usage: tb tsv [OPTS] [TABLE]'
|
129
|
-
op.def_option('-h', 'show help message') { puts op; exit 0 }
|
130
|
-
op.def_option('-N', 'use numeric field name') { $opt_N = true }
|
131
|
-
op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
|
132
|
-
op
|
133
|
-
end
|
134
|
-
|
135
|
-
def op_json
|
136
|
-
op = OptionParser.new
|
137
|
-
op.banner = 'Usage: tb json [OPTS] [TABLE]'
|
138
|
-
op.def_option('-h', 'show help message') { puts op; exit 0 }
|
139
|
-
op.def_option('-N', 'use numeric field name') { $opt_N = true }
|
140
|
-
op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
|
141
|
-
op
|
142
|
-
end
|
143
|
-
|
144
|
-
def op_yaml
|
145
|
-
op = OptionParser.new
|
146
|
-
op.banner = 'Usage: tb yaml [OPTS] [TABLE]'
|
147
|
-
op.def_option('-h', 'show help message') { puts op; exit 0 }
|
148
|
-
op.def_option('-N', 'use numeric field name') { $opt_N = true }
|
149
|
-
op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
|
150
|
-
op
|
151
|
-
end
|
152
|
-
|
153
|
-
def op_pp
|
154
|
-
op = OptionParser.new
|
155
|
-
op.banner = 'Usage: tb pp [OPTS] [TABLE]'
|
156
|
-
op.def_option('-h', 'show help message') { puts op; exit 0 }
|
157
|
-
op.def_option('-N', 'use numeric field name') { $opt_N = true }
|
158
|
-
op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
|
159
|
-
op
|
160
|
-
end
|
161
|
-
|
162
|
-
$opt_grep_e = nil
|
163
|
-
$opt_grep_ruby = nil
|
164
|
-
$opt_grep_f = nil
|
165
|
-
$opt_grep_v = nil
|
166
|
-
def op_grep
|
167
|
-
op = OptionParser.new
|
168
|
-
op.banner = 'Usage: tb grep [OPTS] REGEXP [TABLE]'
|
169
|
-
op.def_option('-h', 'show help message') { puts op; exit 0 }
|
170
|
-
op.def_option('-N', 'use numeric field name') { $opt_N = true }
|
171
|
-
op.def_option('-f FIELD', 'search field') {|field| $opt_grep_f = field }
|
172
|
-
op.def_option('-e REGEXP', 'predicate written in ruby. A hash is given as _. no usual regexp argument.') {|pattern| $opt_grep_e = pattern }
|
173
|
-
op.def_option('--ruby RUBY-EXP', 'specify a regexp. no usual regexp argument.') {|ruby_exp| $opt_grep_ruby = ruby_exp }
|
174
|
-
op.def_option('-v', 'ouput the records which doesn\'t match') { $opt_grep_v = true }
|
175
|
-
op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
|
176
|
-
op
|
177
|
-
end
|
178
|
-
|
179
|
-
$opt_gsub_e = nil
|
180
|
-
$opt_gsub_f = nil
|
181
|
-
def op_gsub
|
182
|
-
op = OptionParser.new
|
183
|
-
op.banner = 'Usage: tb gsub [OPTS] REGEXP STRING [TABLE]'
|
184
|
-
op.def_option('-h', 'show help message') { puts op; exit 0 }
|
185
|
-
op.def_option('-N', 'use numeric field name') { $opt_N = true }
|
186
|
-
op.def_option('-f FIELD', 'search field') {|field| $opt_gsub_f = field }
|
187
|
-
op.def_option('-e REGEXP', 'predicate written in ruby. A hash is given as _. no usual regexp argument.') {|pattern| $opt_gsub_e = pattern }
|
188
|
-
op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
|
189
|
-
op
|
190
|
-
end
|
191
|
-
|
192
|
-
$opt_sort_f = nil
|
193
|
-
def op_sort
|
194
|
-
op = OptionParser.new
|
195
|
-
op.banner = 'Usage: tb sort [OPTS] [TABLE]'
|
196
|
-
op.def_option('-h', 'show help message') { puts op; exit 0 }
|
197
|
-
op.def_option('-N', 'use numeric field name') { $opt_N = true }
|
198
|
-
op.def_option('-f FIELD,...', 'specify sort keys') {|fs| $opt_sort_f = fs }
|
199
|
-
op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
|
200
|
-
op
|
201
|
-
end
|
202
|
-
|
203
|
-
$opt_select_v = nil
|
204
|
-
def op_select
|
205
|
-
op = OptionParser.new
|
206
|
-
op.banner = 'Usage: tb select [OPTS] FIELD,... [TABLE]'
|
207
|
-
op.def_option('-h', 'show help message') { puts op; exit 0 }
|
208
|
-
op.def_option('-N', 'use numeric field name') { $opt_N = true }
|
209
|
-
op.def_option('-v', 'invert match') { $opt_select_v = true }
|
210
|
-
op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
|
211
|
-
op
|
212
|
-
end
|
213
|
-
|
214
|
-
def op_rename
|
215
|
-
op = OptionParser.new
|
216
|
-
op.banner = 'Usage: tb rename [OPTS] SRC,DST,... [TABLE]'
|
217
|
-
op.def_option('-h', 'show help message') { puts op; exit 0 }
|
218
|
-
op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
|
219
|
-
op
|
220
|
-
end
|
221
|
-
|
222
|
-
def op_newfield
|
223
|
-
op = OptionParser.new
|
224
|
-
op.banner = 'Usage: tb newfield [OPTS] FIELD RUBY-EXP [TABLE]'
|
225
|
-
op.def_option('-h', 'show help message') { puts op; exit 0 }
|
226
|
-
op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
|
227
|
-
op
|
228
|
-
end
|
229
|
-
|
230
|
-
def op_cat
|
231
|
-
op = OptionParser.new
|
232
|
-
op.banner = 'Usage: tb cat [OPTS] [TABLE ...]'
|
233
|
-
op.def_option('-h', 'show help message') { puts op; exit 0 }
|
234
|
-
op.def_option('-N', 'use numeric field name') { $opt_N = true }
|
235
|
-
op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
|
236
|
-
op
|
237
|
-
end
|
238
|
-
|
239
|
-
$opt_join_outer = nil
|
240
|
-
$opt_join_outer_missing = nil
|
241
|
-
def op_join
|
242
|
-
op = OptionParser.new
|
243
|
-
op.banner = 'Usage: tb join [OPTS] [TABLE ...]'
|
244
|
-
op.def_option('-h', 'show help message') { puts op; exit 0 }
|
245
|
-
op.def_option('-d', '--debug', 'show debug message') { $opt_debug += 1 }
|
246
|
-
op.def_option('-N', 'use numeric field name') { $opt_N = true }
|
247
|
-
op.def_option('--outer', 'outer join') { $opt_join_outer = :full }
|
248
|
-
op.def_option('--left', 'left outer join') { $opt_join_outer = :left }
|
249
|
-
op.def_option('--right', 'right outer join') { $opt_join_outer = :right }
|
250
|
-
op.def_option('--outer-missing=DEFAULT', 'missing value for outer join') {|missing|
|
251
|
-
$opt_join_outer ||= :full
|
252
|
-
$opt_join_outer_missing = missing
|
253
|
-
}
|
254
|
-
op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
|
255
|
-
op
|
256
|
-
end
|
257
|
-
|
258
|
-
$opt_group_fields = []
|
259
|
-
def op_group
|
260
|
-
op = OptionParser.new
|
261
|
-
op.banner = 'Usage: tb group [OPTS] KEY-FIELD1,... [TABLE]'
|
262
|
-
op.def_option('-h', 'show help message') { puts op; exit 0 }
|
263
|
-
op.def_option('-a AGGREGATION-SPEC[,NEW-FIELD]',
|
264
|
-
'--aggregate AGGREGATION-SPEC[,NEW-FIELD]') {|arg| $opt_group_fields << arg }
|
265
|
-
op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
|
266
|
-
op
|
267
|
-
end
|
268
|
-
|
269
|
-
$opt_cross_fields = []
|
270
|
-
$opt_cross_compact = false
|
271
|
-
def op_cross
|
272
|
-
op = OptionParser.new
|
273
|
-
op.banner = 'Usage: tb cross [OPTS] HKEY-FIELD1,... VKEY-FIELD1,... [TABLE]'
|
274
|
-
op.def_option('-h', 'show help message') { puts op; exit 0 }
|
275
|
-
op.def_option('-a AGGREGATION-SPEC[,NEW-FIELD]',
|
276
|
-
'--aggregate AGGREGATION-SPEC[,NEW-FIELD]') {|arg| $opt_cross_fields << arg }
|
277
|
-
op.def_option('-c', '--compact', 'compact format') { $opt_cross_compact = true }
|
278
|
-
op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
|
279
|
-
op
|
280
|
-
end
|
281
|
-
|
282
|
-
def op_shape
|
283
|
-
op = OptionParser.new
|
284
|
-
op.banner = 'Usage: tb shape [OPTS] [TABLE ...]'
|
285
|
-
op.def_option('-h', 'show help message') { puts op; exit 0 }
|
286
|
-
op.def_option('-N', 'use numeric field name') { $opt_N = true }
|
287
|
-
op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
|
288
|
-
op
|
289
|
-
end
|
290
|
-
|
291
|
-
$opt_mheader_count = nil
|
292
|
-
def op_mheader
|
293
|
-
op = OptionParser.new
|
294
|
-
op.banner = 'Usage: tb mheader [OPTS] [TABLE]'
|
295
|
-
op.def_option('-h', 'show help message') { puts op; exit 0 }
|
296
|
-
op.def_option('-c N', 'number of header records') {|arg| $opt_mheader_count = arg.to_i }
|
297
|
-
op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
|
298
|
-
op
|
299
|
-
end
|
300
|
-
|
301
|
-
$opt_crop_range = nil
|
302
|
-
def op_crop
|
303
|
-
op = OptionParser.new
|
304
|
-
op.banner = 'Usage: tb crop [OPTS] [TABLE]'
|
305
|
-
op.def_option('-h', 'show help message') { puts op; exit 0 }
|
306
|
-
op.def_option('-r RANGE', 'range. i.e. "2,1-4,3", "B1:D3"') {|arg| $opt_crop_range = arg }
|
307
|
-
op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
|
308
|
-
op
|
309
|
-
end
|
310
|
-
|
311
|
-
def err(msg)
|
312
|
-
STDERR.puts msg
|
313
|
-
exit 1
|
314
|
-
end
|
315
|
-
|
316
|
-
def comparison_value(v)
|
317
|
-
case v
|
318
|
-
when nil
|
319
|
-
[]
|
320
|
-
when Numeric
|
321
|
-
[0, v]
|
322
|
-
when String
|
323
|
-
case v
|
324
|
-
when /\A\s*-?\d+\s*\z/
|
325
|
-
[0, Integer(v)]
|
326
|
-
when /\A\s*-?(\d+(\.\d*)?|\.\d+)([eE][-+]?\d+)?\s*\z/
|
327
|
-
[0, Float(v)]
|
328
|
-
else
|
329
|
-
a = []
|
330
|
-
v.scan(/(\d+)|\D+/) {
|
331
|
-
if $1
|
332
|
-
a << 0 << $1.to_i
|
333
|
-
else
|
334
|
-
a << 1 << $&
|
335
|
-
end
|
336
|
-
}
|
337
|
-
a
|
338
|
-
end
|
339
|
-
else
|
340
|
-
raise ArgumentError, "unexpected: #{v.inspect}"
|
341
|
-
end
|
342
|
-
end
|
343
|
-
|
344
|
-
def conv_to_numeric(v)
|
345
|
-
v = v.strip
|
346
|
-
if /\A-?\d+\z/ =~ v
|
347
|
-
v = v.to_i
|
348
|
-
elsif /\A-?(\d+(\.\d*)?|\.\d+)([eE][-+]?\d+)?\z/ =~ v
|
349
|
-
v = v.to_f
|
350
|
-
else
|
351
|
-
raise "numeric value expected: #{v.inspect}"
|
352
|
-
end
|
353
|
-
v
|
354
|
-
end
|
355
|
-
|
356
|
-
class CountAggregator
|
357
|
-
def initialize() @result = 0 end
|
358
|
-
def update(v) @result += 1 end
|
359
|
-
def finish() @result end
|
360
|
-
end
|
361
|
-
|
362
|
-
class SumAggregator
|
363
|
-
def initialize() @result = 0 end
|
364
|
-
def update(v) @result += conv_to_numeric(v) if !(v.nil? || v == '') end
|
365
|
-
def finish() @result end
|
366
|
-
end
|
367
|
-
|
368
|
-
class AvgAggregator
|
369
|
-
def initialize() @sum = 0; @count = 0 end
|
370
|
-
def update(v) @count += 1; @sum += conv_to_numeric(v) if !(v.nil? || v == '') end
|
371
|
-
def finish() @sum / @count.to_f end
|
372
|
-
end
|
373
|
-
|
374
|
-
class MaxAggregator
|
375
|
-
def initialize() @v = nil; @cmp = nil end
|
376
|
-
def update(v)
|
377
|
-
cmp = comparison_value(v)
|
378
|
-
if @cmp == nil
|
379
|
-
@v, @cmp = v, cmp
|
380
|
-
else
|
381
|
-
@v, @cmp = v, cmp if (@cmp <=> cmp) < 0
|
382
|
-
end
|
383
|
-
end
|
384
|
-
def finish() @v end
|
385
|
-
end
|
386
|
-
|
387
|
-
class MinAggregator
|
388
|
-
def initialize() @v = @cmp = nil end
|
389
|
-
def update(v)
|
390
|
-
cmp = comparison_value(v)
|
391
|
-
if @cmp == nil
|
392
|
-
@v, @cmp = v, cmp
|
393
|
-
else
|
394
|
-
@v, @cmp = v, cmp if (@cmp <=> cmp) > 0
|
395
|
-
end
|
396
|
-
end
|
397
|
-
def finish() @v end
|
398
|
-
end
|
399
|
-
|
400
|
-
class ValuesAggregator
|
401
|
-
def initialize() @result = [] end
|
402
|
-
def update(v) @result << v if v end
|
403
|
-
def finish() @result.join(",") end
|
404
|
-
end
|
405
|
-
|
406
|
-
class UniqueValuesAggregator
|
407
|
-
def initialize() @result = [] end
|
408
|
-
def update(v) @result << v if v end
|
409
|
-
def finish() @result.uniq.join(",") end
|
410
|
-
end
|
411
|
-
|
412
|
-
class Selector
|
413
|
-
def initialize(i, aggregator) @i = i; @agg = aggregator end
|
414
|
-
def update(ary) @agg.update(ary[@i]) end
|
415
|
-
def finish() @agg.finish end
|
416
|
-
end
|
417
|
-
|
418
|
-
def make_aggregator(spec, fs)
|
419
|
-
case spec
|
420
|
-
when 'count'
|
421
|
-
CountAggregator.new
|
422
|
-
when /\Asum\((.*)\)\z/
|
423
|
-
field = $1
|
424
|
-
i = fs.index(field)
|
425
|
-
raise ArgumentError, "field not found: #{field.inspect}" if !i
|
426
|
-
Selector.new(i, SumAggregator.new)
|
427
|
-
when /\Aavg\((.*)\)\z/
|
428
|
-
field = $1
|
429
|
-
i = fs.index(field)
|
430
|
-
raise ArgumentError, "field not found: #{field.inspect}" if !i
|
431
|
-
Selector.new(i, AvgAggregator.new)
|
432
|
-
when /\Amax\((.*)\)\z/
|
433
|
-
field = $1
|
434
|
-
i = fs.index(field)
|
435
|
-
raise ArgumentError, "field not found: #{field.inspect}" if !i
|
436
|
-
Selector.new(i, MaxAggregator.new)
|
437
|
-
when /\Amin\((.*)\)\z/
|
438
|
-
field = $1
|
439
|
-
i = fs.index(field)
|
440
|
-
raise ArgumentError, "field not found: #{field.inspect}" if !i
|
441
|
-
Selector.new(i, MinAggregator.new)
|
442
|
-
when /\Avalues\((.*)\)\z/
|
443
|
-
field = $1
|
444
|
-
i = fs.index(field)
|
445
|
-
raise ArgumentError, "field not found: #{field.inspect}" if !i
|
446
|
-
Selector.new(i, ValuesAggregator.new)
|
447
|
-
when /\Auniquevalues\((.*)\)\z/
|
448
|
-
field = $1
|
449
|
-
i = fs.index(field)
|
450
|
-
raise ArgumentError, "field not found: #{field.inspect}" if !i
|
451
|
-
Selector.new(i, UniqueValuesAggregator.new)
|
452
|
-
else
|
453
|
-
raise ArgumentError, "unexpected aggregation spec: #{spec.inspect}"
|
454
|
-
end
|
455
|
-
end
|
456
|
-
|
457
|
-
def aggregate(spec, table)
|
458
|
-
update, finish = make_aggregator(spec, table.list_fields)
|
459
|
-
table.each {|rec|
|
460
|
-
update.call(rec.values_at(*fs))
|
461
|
-
}
|
462
|
-
finish.call
|
463
|
-
end
|
464
|
-
|
465
|
-
def main_csv(argv)
|
466
|
-
op_csv.parse!(argv)
|
467
|
-
each_table_file(argv) {|tbl|
|
468
|
-
with_output {|out|
|
469
|
-
tbl_generate_csv(tbl, out)
|
470
|
-
}
|
471
|
-
}
|
472
|
-
end
|
473
|
-
|
474
|
-
def main_tsv(argv)
|
475
|
-
op_tsv.parse!(argv)
|
476
|
-
each_table_file(argv) {|tbl|
|
477
|
-
with_output {|out|
|
478
|
-
tbl_generate_tsv(tbl, out)
|
479
|
-
}
|
480
|
-
}
|
481
|
-
end
|
482
|
-
|
483
|
-
def main_json(argv)
|
484
|
-
require 'json'
|
485
|
-
op_json.parse!(argv)
|
486
|
-
argv = ['-'] if argv.empty?
|
487
|
-
with_output {|out|
|
488
|
-
out.print "["
|
489
|
-
sep = nil
|
490
|
-
argv.each {|filename|
|
491
|
-
sep = ",\n\n" if sep
|
492
|
-
tablereader_open(filename) {|tblreader|
|
493
|
-
tblreader.each {|ary|
|
494
|
-
out.print sep if sep
|
495
|
-
header = tblreader.header
|
496
|
-
h = {}
|
497
|
-
ary.each_with_index {|e, i|
|
498
|
-
h[header[i]] = e if !e.nil?
|
499
|
-
}
|
500
|
-
out.print JSON.pretty_generate(h)
|
501
|
-
sep = ",\n"
|
502
|
-
}
|
503
|
-
}
|
504
|
-
}
|
505
|
-
out.puts "]"
|
506
|
-
}
|
507
|
-
end
|
508
|
-
|
509
|
-
def main_yaml(argv)
|
510
|
-
require 'yaml'
|
511
|
-
op_yaml.parse!(argv)
|
512
|
-
each_table_file(argv) {|tbl|
|
513
|
-
ary = tbl.map {|rec| rec.to_h }
|
514
|
-
with_output {|out|
|
515
|
-
YAML.dump(ary, out)
|
516
|
-
out.puts
|
517
|
-
}
|
518
|
-
}
|
519
|
-
end
|
520
|
-
|
521
|
-
def main_pp(argv)
|
522
|
-
op_pp.parse!(argv)
|
523
|
-
argv.unshift '-' if argv.empty?
|
524
|
-
with_output {|out|
|
525
|
-
argv.each {|filename|
|
526
|
-
tablereader_open(filename) {|tblreader|
|
527
|
-
tblreader.each {|ary|
|
528
|
-
h = {}
|
529
|
-
ary.each_with_index {|v, i|
|
530
|
-
next if v.nil?
|
531
|
-
h[tblreader.field_from_index_ex(i)] = v
|
532
|
-
}
|
533
|
-
PP.pp h, out
|
534
|
-
}
|
535
|
-
}
|
536
|
-
}
|
537
|
-
}
|
538
|
-
end
|
539
|
-
|
540
|
-
def main_grep(argv)
|
541
|
-
op_grep.parse!(argv)
|
542
|
-
if $opt_grep_ruby
|
543
|
-
pred = eval("lambda {|_| #{$opt_grep_ruby} }")
|
544
|
-
elsif $opt_grep_e
|
545
|
-
re = Regexp.new($opt_grep_e)
|
546
|
-
pred = $opt_grep_f ? lambda {|_| re =~ _[$opt_grep_f] } :
|
547
|
-
lambda {|_| _.any? {|k, v| re =~ v.to_s } }
|
548
|
-
else
|
549
|
-
re = Regexp.new(argv.shift)
|
550
|
-
pred = $opt_grep_f ? lambda {|_| re =~ _[$opt_grep_f] } :
|
551
|
-
lambda {|_| _.any? {|k, v| re =~ v.to_s } }
|
552
|
-
end
|
553
|
-
opt_v = $opt_grep_v ? true : false
|
554
|
-
argv.unshift '-' if argv.empty?
|
555
|
-
argv.each {|filename|
|
556
|
-
tablereader_open(filename) {|tblreader|
|
557
|
-
with_table_stream_output {|gen|
|
558
|
-
gen.output_header tblreader.header
|
559
|
-
tblreader.each {|ary|
|
560
|
-
h = {}
|
561
|
-
ary.each_with_index {|str, i|
|
562
|
-
f = tblreader.field_from_index_ex(i)
|
563
|
-
h[f] = str
|
564
|
-
}
|
565
|
-
found = pred.call(h)
|
566
|
-
found = opt_v ^ !!(found)
|
567
|
-
gen << ary if found
|
568
|
-
}
|
569
|
-
}
|
570
|
-
}
|
571
|
-
}
|
572
|
-
end
|
573
|
-
|
574
|
-
def main_gsub(argv)
|
575
|
-
op_gsub.parse!(argv)
|
576
|
-
if $opt_gsub_e
|
577
|
-
re = Regexp.new($opt_gsub_e)
|
578
|
-
else
|
579
|
-
re = Regexp.new(argv.shift)
|
580
|
-
end
|
581
|
-
repl = argv.shift
|
582
|
-
filename = argv.empty? ? '-' : argv.shift
|
583
|
-
warn "extra arguments: #{argv.join(" ")}" if !argv.empty?
|
584
|
-
tablereader_open(filename) {|tblreader|
|
585
|
-
with_table_stream_output {|gen|
|
586
|
-
gen.output_header tblreader.header
|
587
|
-
tblreader.each {|ary|
|
588
|
-
if $opt_gsub_f
|
589
|
-
ary2 = []
|
590
|
-
ary.each_with_index {|str, i|
|
591
|
-
f = tblreader.field_from_index_ex(i)
|
592
|
-
if f == $opt_gsub_f
|
593
|
-
str ||= ''
|
594
|
-
ary2 << str.gsub(re, repl)
|
595
|
-
else
|
596
|
-
ary2 << str
|
597
|
-
end
|
598
|
-
}
|
599
|
-
else
|
600
|
-
ary2 = ary.map {|s|
|
601
|
-
s ||= ''
|
602
|
-
s.gsub(re, repl)
|
603
|
-
}
|
604
|
-
end
|
605
|
-
gen << ary2
|
606
|
-
}
|
607
|
-
}
|
608
|
-
}
|
609
|
-
end
|
610
|
-
|
611
|
-
def main_sort(argv)
|
612
|
-
op_sort.parse!(argv)
|
613
|
-
filename = argv.empty? ? '-' : argv.shift
|
614
|
-
warn "extra arguments: #{argv.join(" ")}" if !argv.empty?
|
615
|
-
if $opt_sort_f
|
616
|
-
fs = split_field_list_argument($opt_sort_f)
|
617
|
-
else
|
618
|
-
fs = nil
|
619
|
-
end
|
620
|
-
tbl = load_table(filename)
|
621
|
-
if fs
|
622
|
-
blk = lambda {|rec| fs.map {|f| comparison_value(rec[f]) } }
|
623
|
-
else
|
624
|
-
blk = lambda {|rec| rec.map {|k, v| comparison_value(v) } }
|
625
|
-
end
|
626
|
-
tbl2 = tbl.reorder_records_by(&blk)
|
627
|
-
with_output {|out|
|
628
|
-
tbl_generate_csv(tbl2, out)
|
629
|
-
}
|
630
|
-
end
|
631
|
-
|
632
|
-
def main_select(argv)
|
633
|
-
op_select.parse!(argv)
|
634
|
-
fs = split_field_list_argument(argv.shift)
|
635
|
-
filename = argv.shift || '-'
|
636
|
-
warn "extra arguments: #{argv.join(" ")}" if !argv.empty?
|
637
|
-
tablereader_open(filename) {|tblreader|
|
638
|
-
if $opt_select_v
|
639
|
-
h = {}
|
640
|
-
fs.each {|f| h[tblreader.index_from_field(f)] = true }
|
641
|
-
header = nil
|
642
|
-
if !$opt_N
|
643
|
-
header = []
|
644
|
-
tblreader.header.each_with_index {|f, i|
|
645
|
-
header << f if !h[i]
|
646
|
-
}
|
647
|
-
end
|
648
|
-
with_table_stream_output {|gen|
|
649
|
-
gen.output_header(header)
|
650
|
-
tblreader.each {|ary|
|
651
|
-
values = []
|
652
|
-
ary.each_with_index {|v, i|
|
653
|
-
values << v if !h[i]
|
654
|
-
}
|
655
|
-
gen << values
|
656
|
-
}
|
657
|
-
}
|
658
|
-
else
|
659
|
-
header = tblreader.header
|
660
|
-
is = []
|
661
|
-
is = fs.map {|f| tblreader.index_from_field(f) }
|
662
|
-
with_table_stream_output {|gen|
|
663
|
-
gen.output_header(is.map {|i| tblreader.field_from_index_ex(i) })
|
664
|
-
tblreader.each {|ary|
|
665
|
-
gen << ary.values_at(*is)
|
666
|
-
}
|
667
|
-
}
|
668
|
-
end
|
669
|
-
}
|
670
|
-
end
|
671
|
-
|
672
|
-
def main_rename(argv)
|
673
|
-
op_rename.parse!(argv)
|
674
|
-
fs = split_field_list_argument(argv.shift)
|
675
|
-
filename = argv.shift || '-'
|
676
|
-
warn "extra arguments: #{argv.join(" ")}" if !argv.empty?
|
677
|
-
h = {}
|
678
|
-
fs.each_slice(2) {|sf, df| h[sf] = df }
|
679
|
-
tablereader_open(filename) {|tblreader|
|
680
|
-
header = tblreader.header
|
681
|
-
h.each {|sf, df|
|
682
|
-
unless header.include? sf
|
683
|
-
raise "field not defined: #{sf.inspect}"
|
684
|
-
end
|
685
|
-
}
|
686
|
-
renamed_header = tblreader.header.map {|f| h.fetch(f, f) }
|
687
|
-
with_table_stream_output {|gen|
|
688
|
-
gen.output_header(renamed_header)
|
689
|
-
tblreader.each {|ary|
|
690
|
-
gen << ary
|
691
|
-
}
|
692
|
-
}
|
693
|
-
}
|
694
|
-
end
|
695
|
-
|
696
|
-
def main_newfield(argv)
|
697
|
-
op_rename.parse!(argv)
|
698
|
-
field = argv.shift
|
699
|
-
rubyexp = argv.shift
|
700
|
-
pr = eval("lambda {|_| #{rubyexp} }")
|
701
|
-
filename = argv.shift || '-'
|
702
|
-
warn "extra arguments: #{argv.join(" ")}" if !argv.empty?
|
703
|
-
tablereader_open(filename) {|tblreader|
|
704
|
-
renamed_header = [field] + tblreader.header
|
705
|
-
with_table_stream_output {|gen|
|
706
|
-
gen.output_header(renamed_header)
|
707
|
-
tblreader.each {|ary|
|
708
|
-
h = {}
|
709
|
-
ary.each_with_index {|str, i|
|
710
|
-
f = tblreader.field_from_index_ex(i)
|
711
|
-
h[f] = str
|
712
|
-
}
|
713
|
-
gen << [pr.call(h), *ary]
|
714
|
-
}
|
715
|
-
}
|
716
|
-
}
|
717
|
-
end
|
718
|
-
|
719
|
-
def main_cat(argv)
|
720
|
-
op_cat.parse!(argv)
|
721
|
-
argv = ['-'] if argv.empty?
|
722
|
-
if $opt_N
|
723
|
-
argv.each {|filename|
|
724
|
-
with_table_stream_output {|gen|
|
725
|
-
tablereader_open(filename) {|tblreader|
|
726
|
-
tblreader.each {|ary|
|
727
|
-
gen << ary
|
728
|
-
}
|
729
|
-
}
|
730
|
-
}
|
731
|
-
}
|
732
|
-
else
|
733
|
-
readers = []
|
734
|
-
h = {}
|
735
|
-
argv.each {|filename|
|
736
|
-
r = tablereader_open(filename)
|
737
|
-
readers << r
|
738
|
-
r.header.each {|f|
|
739
|
-
h[f] = h.size if !h[f]
|
740
|
-
}
|
741
|
-
}
|
742
|
-
with_table_stream_output {|gen|
|
743
|
-
gen.output_header h.keys.sort_by {|k| h[k] }
|
744
|
-
readers.each {|r|
|
745
|
-
header = r.header.dup
|
746
|
-
r.each {|ary|
|
747
|
-
while header.length < ary.length
|
748
|
-
f = r.field_from_index_ex(header.length)
|
749
|
-
header << f
|
750
|
-
h[f] = h.size if !h[f]
|
751
|
-
end
|
752
|
-
ary2 = []
|
753
|
-
ary.each_with_index {|v, i|
|
754
|
-
f = r.field_from_index(i)
|
755
|
-
j = h.fetch(f)
|
756
|
-
ary2[j] = v
|
757
|
-
}
|
758
|
-
gen << ary2
|
759
|
-
}
|
760
|
-
}
|
761
|
-
}
|
762
|
-
end
|
763
|
-
end
|
764
|
-
|
765
|
-
def main_join(argv)
|
766
|
-
op_join.parse!(argv)
|
767
|
-
result = Tb.new([], [])
|
768
|
-
retain_left = false
|
769
|
-
retain_right = false
|
770
|
-
case $opt_join_outer
|
771
|
-
when :full
|
772
|
-
retain_left = true
|
773
|
-
retain_right = true
|
774
|
-
when :left
|
775
|
-
retain_left = true
|
776
|
-
when :right
|
777
|
-
retain_right = true
|
778
|
-
when nil
|
779
|
-
else
|
780
|
-
raise "unexpected $opt_join_outer: #{$opt_join_outer.inspect}"
|
781
|
-
end
|
782
|
-
if $opt_join_outer
|
783
|
-
each_table_file(argv) {|tbl|
|
784
|
-
STDERR.puts "shared keys: #{(result.list_fields & tbl.list_fields).inspect}" if 1 <= $opt_debug
|
785
|
-
result = result.natjoin2_outer(tbl, $opt_join_outer_missing, retain_left, retain_right)
|
786
|
-
}
|
787
|
-
else
|
788
|
-
each_table_file(argv) {|tbl|
|
789
|
-
STDERR.puts "shared keys: #{(result.list_fields & tbl.list_fields).inspect}" if 1 <= $opt_debug
|
790
|
-
result = result.natjoin2(tbl)
|
791
|
-
}
|
792
|
-
end
|
793
|
-
with_output {|out|
|
794
|
-
tbl_generate_csv(result, out)
|
795
|
-
}
|
796
|
-
end
|
797
|
-
|
798
|
-
def main_group(argv)
|
799
|
-
op_group.parse!(argv)
|
800
|
-
kfs = split_field_list_argument(argv.shift)
|
801
|
-
opt_group_fields = $opt_group_fields.map {|arg|
|
802
|
-
aggregation_spec, new_field = split_field_list_argument(arg)
|
803
|
-
new_field ||= aggregation_spec
|
804
|
-
[new_field, lambda {|fields| make_aggregator(aggregation_spec, fields) } ]
|
805
|
-
}
|
806
|
-
filename = argv.shift || '-'
|
807
|
-
warn "extra arguments: #{argv.join(" ")}" if !argv.empty?
|
808
|
-
h = {}
|
809
|
-
tablereader_open(filename) {|tblreader|
|
810
|
-
kis = kfs.map {|f| tblreader.index_from_field(f) }
|
811
|
-
result_fields = kfs + opt_group_fields.map {|nf, maker| nf }
|
812
|
-
tblreader.each {|ary|
|
813
|
-
kvs = ary.values_at(*kis)
|
814
|
-
if !h.include?(kvs)
|
815
|
-
h[kvs] = opt_group_fields.map {|nf, maker| ag = maker.call(tblreader.header); ag.update(ary); ag }
|
816
|
-
else
|
817
|
-
h[kvs].each {|ag|
|
818
|
-
ag.update(ary)
|
819
|
-
}
|
820
|
-
end
|
821
|
-
}
|
822
|
-
result = Tb.new(result_fields)
|
823
|
-
h.keys.sort_by {|k| k.map {|v| comparison_value(v) } }.each {|k|
|
824
|
-
a = h[k]
|
825
|
-
result.insert_values result_fields, k + a.map {|ag| ag.finish }
|
826
|
-
}
|
827
|
-
with_output {|out|
|
828
|
-
tbl_generate_csv(result, out)
|
829
|
-
}
|
830
|
-
}
|
831
|
-
end
|
832
|
-
|
833
|
-
def main_cross(argv)
|
834
|
-
op_cross.parse!(argv)
|
835
|
-
hkfs = split_field_list_argument(argv.shift)
|
836
|
-
vkfs = split_field_list_argument(argv.shift)
|
837
|
-
if $opt_cross_fields.empty?
|
838
|
-
opt_cross_fields = [['count', 'count']]
|
839
|
-
else
|
840
|
-
opt_cross_fields = $opt_cross_fields.map {|arg|
|
841
|
-
agg_spec, new_field = split_field_list_argument(arg)
|
842
|
-
new_field ||= agg_spec
|
843
|
-
[agg_spec, new_field]
|
844
|
-
}
|
845
|
-
end
|
846
|
-
filename = argv.shift || '-'
|
847
|
-
warn "extra arguments: #{argv.join(" ")}" if !argv.empty?
|
848
|
-
tablereader_open(filename) {|tblreader|
|
849
|
-
vkis = vkfs.map {|f| tblreader.index_from_field(f) }
|
850
|
-
hkis = hkfs.map {|f| tblreader.index_from_field(f) }
|
851
|
-
vset = {}
|
852
|
-
hset = {}
|
853
|
-
set = {}
|
854
|
-
tblreader.each {|ary|
|
855
|
-
vkvs = ary.values_at(*vkis)
|
856
|
-
hkvs = ary.values_at(*hkis)
|
857
|
-
vset[vkvs] = true if !vset.include?(vkvs)
|
858
|
-
hset[hkvs] = true if !hset.include?(hkvs)
|
859
|
-
if !set.include?([vkvs, hkvs])
|
860
|
-
set[[vkvs, hkvs]] = opt_cross_fields.map {|agg_spec, nf|
|
861
|
-
ag = make_aggregator(agg_spec, tblreader.header)
|
862
|
-
ag.update(ary)
|
863
|
-
ag
|
864
|
-
}
|
865
|
-
else
|
866
|
-
set[[vkvs, hkvs]].each {|ag|
|
867
|
-
ag.update(ary)
|
868
|
-
}
|
869
|
-
end
|
870
|
-
}
|
871
|
-
vary = vset.keys.sort_by {|a| a.map {|v| comparison_value(v) } }
|
872
|
-
hary = hset.keys.sort_by {|a| a.map {|v| comparison_value(v) } }
|
873
|
-
with_output {|out|
|
874
|
-
Tb.csv_stream_output(out) {|gen|
|
875
|
-
hkfs.each_with_index {|hkf, i|
|
876
|
-
next if $opt_cross_compact && i == hkfs.length - 1
|
877
|
-
row = [nil] * (vkfs.length - 1) + [hkf]
|
878
|
-
hary.each {|hkvs| opt_cross_fields.length.times { row << hkvs[i] } }
|
879
|
-
gen << row
|
880
|
-
}
|
881
|
-
if $opt_cross_compact
|
882
|
-
r = vkfs.dup
|
883
|
-
hary.each {|hkvs| r.concat([hkvs[-1]] * opt_cross_fields.length) }
|
884
|
-
gen << r
|
885
|
-
else
|
886
|
-
r = vkfs.dup
|
887
|
-
hary.each {|hkvs| r.concat opt_cross_fields.map {|agg_spec, new_field| new_field } }
|
888
|
-
gen << r
|
889
|
-
end
|
890
|
-
vary.each {|vkvs|
|
891
|
-
row = vkvs.dup
|
892
|
-
hary.each {|hkvs|
|
893
|
-
ags = set[[vkvs, hkvs]]
|
894
|
-
if !ags
|
895
|
-
opt_cross_fields.length.times { row << nil }
|
896
|
-
else
|
897
|
-
ags.each {|ag| row << ag.finish }
|
898
|
-
end
|
899
|
-
}
|
900
|
-
gen << row
|
901
|
-
}
|
902
|
-
}
|
903
|
-
}
|
904
|
-
}
|
905
|
-
end
|
906
|
-
|
907
|
-
def main_shape(argv)
|
908
|
-
op_shape.parse!(argv)
|
909
|
-
filenames = argv.empty? ? ['-'] : argv
|
910
|
-
result = Tb.new(%w[header_fields min_fields max_fields records filename])
|
911
|
-
filenames.each {|filename|
|
912
|
-
tablereader_open(filename) {|tblreader|
|
913
|
-
num_header_fields = tblreader.header.length
|
914
|
-
min_num_fields = nil
|
915
|
-
max_num_fields = nil
|
916
|
-
num_records = 0
|
917
|
-
tblreader.each {|ary|
|
918
|
-
num_records += 1
|
919
|
-
n = ary.length
|
920
|
-
if min_num_fields.nil?
|
921
|
-
min_num_fields = max_num_fields = n
|
922
|
-
else
|
923
|
-
min_num_fields = n if n < min_num_fields
|
924
|
-
max_num_fields = n if max_num_fields < n
|
925
|
-
end
|
926
|
-
}
|
927
|
-
result.insert({'header_fields'=>num_header_fields,
|
928
|
-
'min_fields'=>min_num_fields,
|
929
|
-
'max_fields'=>max_num_fields,
|
930
|
-
'records'=>num_records,
|
931
|
-
'filename'=>filename})
|
932
|
-
}
|
933
|
-
}
|
934
|
-
with_output {|out|
|
935
|
-
# don't use tbl_generate_csv() because the header should always outputted.
|
936
|
-
result.generate_csv(out)
|
937
|
-
}
|
938
|
-
end
|
939
|
-
|
940
|
-
def main_mheader(argv)
|
941
|
-
op_mheader.parse!(argv)
|
942
|
-
filename = argv.shift || '-'
|
943
|
-
warn "extra arguments: #{argv.join(" ")}" if !argv.empty?
|
944
|
-
header = []
|
945
|
-
if $opt_mheader_count
|
946
|
-
c = $opt_mheader_count
|
947
|
-
header_end_p = lambda {
|
948
|
-
c -= 1
|
949
|
-
c == 0 ? header.map {|a| a.compact.join(' ').strip } : nil
|
950
|
-
}
|
951
|
-
else
|
952
|
-
header_end_p = lambda {
|
953
|
-
h2 = header.map {|a| a.compact.join(' ').strip }.uniq
|
954
|
-
header.length == h2.length ? h2 : nil
|
955
|
-
}
|
956
|
-
end
|
957
|
-
with_table_stream_output {|gen|
|
958
|
-
Tb::Reader.open(filename, {:numeric=>true}) {|tblreader|
|
959
|
-
tblreader.each {|ary|
|
960
|
-
if header
|
961
|
-
ary.each_with_index {|v,i|
|
962
|
-
header[i] ||= []
|
963
|
-
header[i] << v if header[i].empty? || header[i].last != v
|
964
|
-
}
|
965
|
-
h2 = header_end_p.call
|
966
|
-
if h2
|
967
|
-
gen << h2
|
968
|
-
header = nil
|
969
|
-
end
|
970
|
-
else
|
971
|
-
gen << ary
|
972
|
-
end
|
973
|
-
}
|
974
|
-
}
|
975
|
-
}
|
976
|
-
if header
|
977
|
-
warn "no header found."
|
978
|
-
end
|
979
|
-
end
|
980
|
-
|
981
|
-
def main_crop(argv)
|
982
|
-
op_crop.parse!(argv)
|
983
|
-
filename = argv.shift || '-'
|
984
|
-
warn "extra arguments: #{argv.join(" ")}" if !argv.empty?
|
985
|
-
stream = false
|
986
|
-
if $opt_crop_range
|
987
|
-
case $opt_crop_range
|
988
|
-
when /\A(\d+),(\d+)-(\d+),(\d+)\z/ # 1-based
|
989
|
-
stream = true
|
990
|
-
range_col1 = $1.to_i
|
991
|
-
range_row1 = $2.to_i
|
992
|
-
range_col2 = $3.to_i
|
993
|
-
range_row2 = $4.to_i
|
994
|
-
when /\A([A-Z]+)(\d+):([A-Z]+)(\d+)\z/ # 1-based
|
995
|
-
stream = true
|
996
|
-
range_col1 = decode_a1_addressing_col($1)
|
997
|
-
range_row1 = $2.to_i
|
998
|
-
range_col2 = decode_a1_addressing_col($3)
|
999
|
-
range_row2 = $4.to_i
|
1000
|
-
else
|
1001
|
-
raise ArgumentError, "unexpected range argument: #{$opt_crop_range.inspect}"
|
1002
|
-
end
|
1003
|
-
end
|
1004
|
-
if stream
|
1005
|
-
with_table_stream_output {|gen|
|
1006
|
-
Tb::Reader.open(filename, {:numeric=>true}) {|tblreader|
|
1007
|
-
rownum = 1
|
1008
|
-
tblreader.each {|ary|
|
1009
|
-
if range_row2 < rownum
|
1010
|
-
break
|
1011
|
-
end
|
1012
|
-
if range_row1 <= rownum
|
1013
|
-
if range_col2 < ary.length
|
1014
|
-
ary[range_col2..-1] = []
|
1015
|
-
end
|
1016
|
-
if 1 < range_col1
|
1017
|
-
ary[0...(range_col1-1)] = []
|
1018
|
-
end
|
1019
|
-
gen << ary
|
1020
|
-
end
|
1021
|
-
rownum += 1
|
1022
|
-
}
|
1023
|
-
}
|
1024
|
-
}
|
1025
|
-
else
|
1026
|
-
arys = []
|
1027
|
-
Tb::Reader.open(filename, {:numeric=>true}) {|tblreader|
|
1028
|
-
tblreader.each {|a|
|
1029
|
-
a.pop while !a.empty? && (a.last.nil? || a.last == '')
|
1030
|
-
arys << a
|
1031
|
-
}
|
1032
|
-
}
|
1033
|
-
arys.pop while !arys.empty? && arys.last.all? {|v| v.nil? || v == '' }
|
1034
|
-
arys.shift while !arys.empty? && arys.first.all? {|v| v.nil? || v == '' }
|
1035
|
-
if !arys.empty?
|
1036
|
-
while arys.all? {|a| a.empty? || (a.first.nil? || a.first == '') }
|
1037
|
-
arys.each {|a| a.shift }
|
1038
|
-
end
|
1039
|
-
end
|
1040
|
-
with_table_stream_output {|gen|
|
1041
|
-
arys.each {|a| gen << a }
|
1042
|
-
}
|
1043
|
-
end
|
1044
|
-
end
|
1045
|
-
|
1046
|
-
def decode_a1_addressing_col(str)
|
1047
|
-
(26**str.length-1)/25+str.tr("A-Z", "0-9A-P").to_i(26)
|
1048
|
-
end
|
1049
|
-
|
1050
|
-
def split_field_list_argument(arg)
|
1051
|
-
split_csv_argument(arg).map {|f| f || '' }
|
1052
|
-
end
|
1053
|
-
|
1054
|
-
def split_csv_argument(arg)
|
1055
|
-
Tb.csv_stream_input(arg) {|ary| return ary }
|
1056
|
-
return []
|
1057
|
-
end
|
1058
|
-
|
1059
|
-
def each_table_file(argv)
|
1060
|
-
if argv.empty?
|
1061
|
-
yield load_table('-')
|
1062
|
-
else
|
1063
|
-
argv.each {|filename|
|
1064
|
-
tbl = load_table(filename)
|
1065
|
-
yield tbl
|
1066
|
-
}
|
1067
|
-
end
|
1068
|
-
end
|
1069
|
-
|
1070
|
-
def load_table(filename)
|
1071
|
-
tablereader_open(filename) {|tblreader|
|
1072
|
-
arys = []
|
1073
|
-
tblreader.each {|ary|
|
1074
|
-
arys << ary
|
1075
|
-
}
|
1076
|
-
header = tblreader.header
|
1077
|
-
tbl = Tb.new(header)
|
1078
|
-
arys.each {|ary|
|
1079
|
-
ary << nil while ary.length < header.length
|
1080
|
-
tbl.insert_values header, ary
|
1081
|
-
}
|
1082
|
-
tbl
|
1083
|
-
}
|
1084
|
-
end
|
1085
|
-
|
1086
|
-
def tablereader_open(filename, &b)
|
1087
|
-
Tb::Reader.open(filename, {:numeric=>$opt_N}, &b)
|
1088
|
-
end
|
1089
|
-
|
1090
|
-
def with_table_stream_output
|
1091
|
-
with_output {|out|
|
1092
|
-
Tb.csv_stream_output(out) {|gen|
|
1093
|
-
def gen.output_header(header)
|
1094
|
-
self << header if !$opt_N
|
1095
|
-
end
|
1096
|
-
yield gen
|
1097
|
-
}
|
1098
|
-
}
|
1099
|
-
end
|
1100
|
-
|
1101
|
-
def tbl_generate_csv(tbl, out)
|
1102
|
-
if $opt_N
|
1103
|
-
header = tbl.list_fields
|
1104
|
-
Tb.csv_stream_output(out) {|gen|
|
1105
|
-
tbl.each {|rec|
|
1106
|
-
gen << rec.values_at(*header)
|
1107
|
-
}
|
1108
|
-
}
|
1109
|
-
else
|
1110
|
-
tbl.generate_csv(out)
|
1111
|
-
end
|
1112
|
-
end
|
1113
|
-
|
1114
|
-
def tbl_generate_tsv(tbl, out)
|
1115
|
-
if $opt_N
|
1116
|
-
header = tbl.list_fields
|
1117
|
-
Tb.tsv_stream_output(out) {|gen|
|
1118
|
-
tbl.each {|rec|
|
1119
|
-
gen << rec.values_at(*header)
|
1120
|
-
}
|
1121
|
-
}
|
1122
|
-
else
|
1123
|
-
tbl.generate_tsv(out)
|
1124
|
-
end
|
1125
|
-
end
|
1126
|
-
|
1127
|
-
def with_output
|
1128
|
-
if STDOUT.tty? && !$opt_no_pager
|
1129
|
-
IO.popen(ENV['PAGER'] || 'more', 'w') {|pager|
|
1130
|
-
yield pager
|
1131
|
-
}
|
1132
|
-
else
|
1133
|
-
yield STDOUT
|
1134
|
-
end
|
1135
|
-
end
|
1136
|
-
|
1137
|
-
main ARGV
|
29
|
+
Tb::Cmd.main(ARGV)
|