tb 0.1 → 0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +156 -5
- data/bin/tb +2 -1110
- data/lib/tb.rb +4 -2
- data/lib/tb/catreader.rb +131 -0
- data/lib/tb/cmd_cat.rb +65 -0
- data/lib/tb/cmd_consecutive.rb +79 -0
- data/lib/tb/cmd_crop.rb +105 -0
- data/lib/tb/cmd_cross.rb +119 -0
- data/lib/tb/cmd_csv.rb +42 -0
- data/lib/tb/cmd_cut.rb +77 -0
- data/lib/tb/cmd_grep.rb +76 -0
- data/lib/tb/cmd_group.rb +82 -0
- data/lib/tb/cmd_gsub.rb +77 -0
- data/lib/tb/cmd_help.rb +98 -0
- data/lib/tb/cmd_join.rb +81 -0
- data/lib/tb/cmd_json.rb +60 -0
- data/lib/tb/cmd_ls.rb +273 -0
- data/lib/tb/cmd_mheader.rb +77 -0
- data/lib/tb/cmd_newfield.rb +59 -0
- data/lib/tb/cmd_pnm.rb +43 -0
- data/lib/tb/cmd_pp.rb +70 -0
- data/lib/tb/cmd_rename.rb +58 -0
- data/lib/tb/cmd_shape.rb +67 -0
- data/lib/tb/cmd_sort.rb +58 -0
- data/lib/tb/cmd_svn_log.rb +158 -0
- data/lib/tb/cmd_tsv.rb +43 -0
- data/lib/tb/cmd_yaml.rb +47 -0
- data/lib/tb/cmdmain.rb +45 -0
- data/lib/tb/cmdtop.rb +58 -0
- data/lib/tb/cmdutil.rb +327 -0
- data/lib/tb/csv.rb +30 -6
- data/lib/tb/fieldset.rb +39 -41
- data/lib/tb/pager.rb +132 -0
- data/lib/tb/pnm.rb +357 -0
- data/lib/tb/reader.rb +18 -128
- data/lib/tb/record.rb +3 -3
- data/lib/tb/ropen.rb +70 -0
- data/lib/tb/{pathfinder.rb → search.rb} +69 -34
- data/lib/tb/tsv.rb +29 -1
- data/sample/colors.ppm +0 -0
- data/sample/gradation.pgm +0 -0
- data/sample/langs.csv +46 -0
- data/sample/tbplot +293 -0
- data/test-all-cov.rb +65 -0
- data/test-all.rb +5 -0
- data/test/test_basic.rb +99 -2
- data/test/test_catreader.rb +27 -0
- data/test/test_cmd_cat.rb +118 -0
- data/test/test_cmd_consecutive.rb +90 -0
- data/test/test_cmd_crop.rb +101 -0
- data/test/test_cmd_cross.rb +113 -0
- data/test/test_cmd_csv.rb +129 -0
- data/test/test_cmd_cut.rb +100 -0
- data/test/test_cmd_grep.rb +89 -0
- data/test/test_cmd_group.rb +181 -0
- data/test/test_cmd_gsub.rb +103 -0
- data/test/test_cmd_help.rb +190 -0
- data/test/test_cmd_join.rb +197 -0
- data/test/test_cmd_json.rb +75 -0
- data/test/test_cmd_ls.rb +203 -0
- data/test/test_cmd_mheader.rb +86 -0
- data/test/test_cmd_newfield.rb +63 -0
- data/test/test_cmd_pnm.rb +35 -0
- data/test/test_cmd_pp.rb +62 -0
- data/test/test_cmd_rename.rb +91 -0
- data/test/test_cmd_shape.rb +50 -0
- data/test/test_cmd_sort.rb +105 -0
- data/test/test_cmd_tsv.rb +67 -0
- data/test/test_cmd_yaml.rb +55 -0
- data/test/test_cmdtty.rb +154 -0
- data/test/test_cmdutil.rb +43 -0
- data/test/test_csv.rb +10 -0
- data/test/test_fieldset.rb +42 -0
- data/test/test_pager.rb +142 -0
- data/test/test_pnm.rb +374 -0
- data/test/test_reader.rb +147 -0
- data/test/test_record.rb +49 -0
- data/test/test_search.rb +575 -0
- data/test/test_tsv.rb +7 -0
- metadata +108 -5
- data/lib/tb/qtsv.rb +0 -93
data/README
CHANGED
@@ -1,9 +1,133 @@
|
|
1
1
|
= tb - manipulation tool for table: CSV, TSV, etc.
|
2
2
|
|
3
|
-
|
3
|
+
tb provides a command and a library for manipulating tables:
|
4
|
+
Unix filter like operations (grep, sort, cat, cut, ls, etc.),
|
5
|
+
SQL like operations (join, group, etc.),
|
6
|
+
and more.
|
7
|
+
|
8
|
+
== Example
|
9
|
+
|
10
|
+
There is a CSV file for programming languages and their birth year in
|
11
|
+
sample/ directory in tb package.
|
12
|
+
|
13
|
+
% head sample/langs.csv
|
14
|
+
language,year
|
15
|
+
FORTRAN,1955
|
16
|
+
LISP,1958
|
17
|
+
COBOL,1959
|
18
|
+
ALGOL 58,1958
|
19
|
+
APL,1962
|
20
|
+
Simula,1962
|
21
|
+
SNOBOL,1962
|
22
|
+
BASIC,1964
|
23
|
+
PL/I,1964
|
24
|
+
|
25
|
+
"tb" command has many subcommands.
|
26
|
+
"sort" subcommand sort a CSV file.
|
27
|
+
You don't need to care header: header is retained as is.
|
28
|
+
|
29
|
+
% tb sort sample/langs.csv|head
|
30
|
+
language,year
|
31
|
+
ALGOL 58,1958
|
32
|
+
APL,1962
|
33
|
+
Ada,1983
|
34
|
+
B,1969
|
35
|
+
BASIC,1964
|
36
|
+
BCPL,1967
|
37
|
+
C,1972
|
38
|
+
C#,2001
|
39
|
+
C++,1980
|
40
|
+
|
41
|
+
"sort" subcommand takes -f option to specify a field to sort.
|
42
|
+
You don't need to count the position of the field.
|
43
|
+
Also, the comparison method used in tb is smart to sort numbers correctly.
|
44
|
+
|
45
|
+
% tb sort -f year sample/langs.csv|head
|
46
|
+
language,year
|
47
|
+
FORTRAN,1955
|
48
|
+
LISP,1958
|
49
|
+
ALGOL 58,1958
|
50
|
+
COBOL,1959
|
51
|
+
APL,1962
|
52
|
+
SNOBOL,1962
|
53
|
+
Simula,1962
|
54
|
+
BASIC,1964
|
55
|
+
PL/I,1964
|
56
|
+
|
57
|
+
"grep" subcommand search CSV file.
|
58
|
+
|
59
|
+
% tb grep R sample/langs.csv
|
60
|
+
language,year
|
61
|
+
FORTRAN,1955
|
62
|
+
Ruby,1993
|
63
|
+
|
64
|
+
"grep" subcommand takes -f with field name and -v to show non-matching rows.
|
65
|
+
You don't need to care field separators (comma) to match.
|
66
|
+
Following example searches languages which name contains a non-alphabet character.
|
67
|
+
|
68
|
+
% tb grep -vf language '\A[A-Za-z]*\z' sample/langs.csv |cat
|
69
|
+
language,year
|
70
|
+
ALGOL 58,1958
|
71
|
+
PL/I,1964
|
72
|
+
C++,1980
|
73
|
+
Objective-C,1983
|
74
|
+
Common Lisp,1984
|
75
|
+
Visual Basic,1991
|
76
|
+
C#,2001
|
77
|
+
F#,2002
|
78
|
+
|
79
|
+
"grep" subcommand can take Ruby expression, instead of a regexp.
|
80
|
+
|
81
|
+
% tb grep --ruby '(1990..1999).include?(_["year"].to_i)' sample/langs.csv
|
82
|
+
language,year
|
83
|
+
Haskell,1990
|
84
|
+
Python,1991
|
85
|
+
Visual Basic,1991
|
86
|
+
Ruby,1993
|
87
|
+
Lua,1993
|
88
|
+
CLOS,1994
|
89
|
+
Java,1995
|
90
|
+
Delphi,1995
|
91
|
+
JavaScript,1995
|
92
|
+
PHP,1995
|
93
|
+
D,1999
|
94
|
+
|
95
|
+
"cut" subcommand extract one or more fields.
|
96
|
+
This is similar to "cut" command of Unix and projection of relational algebra.
|
97
|
+
|
98
|
+
% tb cut language sample/langs.csv |head
|
99
|
+
language
|
100
|
+
FORTRAN
|
101
|
+
LISP
|
102
|
+
COBOL
|
103
|
+
ALGOL 58
|
104
|
+
APL
|
105
|
+
Simula
|
106
|
+
SNOBOL
|
107
|
+
BASIC
|
108
|
+
PL/I
|
109
|
+
|
110
|
+
"group" subcommand groups rows for specified field.
|
111
|
+
-a option specifies aggregation expression to aggregate the grouped rows.
|
112
|
+
|
113
|
+
% tb group year -a count -a 'values(language)' sample/langs.csv |head
|
114
|
+
year,count,values(language)
|
115
|
+
1955,1,FORTRAN
|
116
|
+
1958,2,"LISP,ALGOL 58"
|
117
|
+
1959,1,COBOL
|
118
|
+
1962,3,"APL,Simula,SNOBOL"
|
119
|
+
1964,2,"BASIC,PL/I"
|
120
|
+
1967,1,BCPL
|
121
|
+
1968,1,Logo
|
122
|
+
1969,1,B
|
123
|
+
1970,2,"Pascal,Forth"
|
124
|
+
|
125
|
+
There are more subcommands.
|
126
|
+
"help" subcommand shows list of subcommand.
|
4
127
|
|
5
128
|
% tb help
|
6
129
|
Usage:
|
130
|
+
tb help [OPTS] [SUBCOMMAND]
|
7
131
|
tb csv [OPTS] [TABLE]
|
8
132
|
tb tsv [OPTS] [TABLE]
|
9
133
|
tb json [OPTS] [TABLE]
|
@@ -12,23 +136,50 @@
|
|
12
136
|
tb grep [OPTS] REGEXP [TABLE]
|
13
137
|
tb gsub [OPTS] REGEXP STRING [TABLE]
|
14
138
|
tb sort [OPTS] [TABLE]
|
15
|
-
tb
|
139
|
+
tb cut [OPTS] FIELD,... [TABLE]
|
16
140
|
tb rename [OPTS] SRC,DST,... [TABLE]
|
17
141
|
tb newfield [OPTS] FIELD RUBY-EXP [TABLE]
|
18
142
|
tb cat [OPTS] [TABLE ...]
|
19
143
|
tb join [OPTS] [TABLE ...]
|
20
|
-
tb group [OPTS] [TABLE]
|
21
|
-
tb cross [OPTS] [TABLE]
|
144
|
+
tb group [OPTS] KEY-FIELD1,... [TABLE]
|
145
|
+
tb cross [OPTS] HKEY-FIELD1,... VKEY-FIELD1,... [TABLE]
|
22
146
|
tb shape [OPTS] [TABLE ...]
|
23
147
|
tb mheader [OPTS] [TABLE]
|
24
148
|
tb crop [OPTS] [TABLE]
|
25
149
|
|
26
|
-
|
150
|
+
== Command Line Tool
|
151
|
+
|
152
|
+
tb command has many subcommands.
|
153
|
+
|
154
|
+
help : show help message of tb command.
|
155
|
+
csv : convert a table to CSV (Comma Separated Value).
|
156
|
+
tsv : convert a table to TSV (Tab Separated Value).
|
157
|
+
json : convert a table to JSON (JavaScript Object Notation).
|
158
|
+
yaml : convert a table to YAML (YAML Ain't a Markup Language).
|
159
|
+
pp : convert a table to pretty printed format.
|
160
|
+
grep : search rows using regexp or ruby expression.
|
161
|
+
gsub : substitute cells.
|
162
|
+
sort : sort rows.
|
163
|
+
cut : select columns.
|
164
|
+
rename : rename field names.
|
165
|
+
newfield : add a field.
|
166
|
+
cat : concatenate tables vertically.
|
167
|
+
join : concatenate tables horizontally as left/right/full natural join.
|
168
|
+
group : group and aggregate rows.
|
169
|
+
cross : create a contingency table.
|
170
|
+
shape : show table size.
|
171
|
+
mheader : collapse multi rows header.
|
172
|
+
crop : extract rectangle in a table.
|
27
173
|
|
28
174
|
== Install
|
29
175
|
|
30
176
|
gem install tb
|
31
177
|
|
178
|
+
== Links
|
179
|
+
|
180
|
+
* ((<source repository on github|URL:https://github.com/akr/tb>))
|
181
|
+
* ((<tb on rubygems.org|URL:http://rubygems.org/gems/tb>))
|
182
|
+
|
32
183
|
== Author
|
33
184
|
|
34
185
|
Tanaka Akira <akr@fsij.org>
|
data/bin/tb
CHANGED
@@ -24,1114 +24,6 @@
|
|
24
24
|
# IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
|
25
25
|
# OF SUCH DAMAGE.
|
26
26
|
|
27
|
-
require 'tb'
|
28
|
-
require 'optparse'
|
29
|
-
require 'enumerator'
|
27
|
+
require 'tb/cmdtop'
|
30
28
|
|
31
|
-
|
32
|
-
subcommand = argv.shift
|
33
|
-
case subcommand
|
34
|
-
when 'help', '-h' then main_help(argv)
|
35
|
-
when 'csv' then main_csv(argv)
|
36
|
-
when 'tsv' then main_tsv(argv)
|
37
|
-
when 'json' then main_json(argv)
|
38
|
-
when 'yaml' then main_yaml(argv)
|
39
|
-
when 'pp' then main_pp(argv)
|
40
|
-
when 'grep' then main_grep(argv)
|
41
|
-
when 'gsub' then main_gsub(argv)
|
42
|
-
when 'sort' then main_sort(argv)
|
43
|
-
when 'select' then main_select(argv)
|
44
|
-
when 'rename' then main_rename(argv)
|
45
|
-
when 'newfield' then main_newfield(argv)
|
46
|
-
when 'cat' then main_cat(argv)
|
47
|
-
when 'join' then main_join(argv)
|
48
|
-
when 'group' then main_group(argv)
|
49
|
-
when 'cross' then main_cross(argv)
|
50
|
-
when 'shape' then main_shape(argv)
|
51
|
-
when 'mheader' then main_mheader(argv)
|
52
|
-
when 'crop' then main_crop(argv)
|
53
|
-
when nil
|
54
|
-
err "Usage: tb subcommand args..."
|
55
|
-
else
|
56
|
-
err "unexpected subcommand: #{subcommand.inspect}"
|
57
|
-
end
|
58
|
-
end
|
59
|
-
|
60
|
-
def usage(status)
|
61
|
-
print <<'End'
|
62
|
-
Usage:
|
63
|
-
tb csv [OPTS] [TABLE]
|
64
|
-
tb tsv [OPTS] [TABLE]
|
65
|
-
tb json [OPTS] [TABLE]
|
66
|
-
tb yaml [OPTS] [TABLE]
|
67
|
-
tb pp [OPTS] [TABLE]
|
68
|
-
tb grep [OPTS] REGEXP [TABLE]
|
69
|
-
tb gsub [OPTS] REGEXP STRING [TABLE]
|
70
|
-
tb sort [OPTS] [TABLE]
|
71
|
-
tb select [OPTS] FIELD,... [TABLE]
|
72
|
-
tb rename [OPTS] SRC,DST,... [TABLE]
|
73
|
-
tb newfield [OPTS] FIELD RUBY-EXP [TABLE]
|
74
|
-
tb cat [OPTS] [TABLE ...]
|
75
|
-
tb join [OPTS] [TABLE ...]
|
76
|
-
tb group [OPTS] [TABLE]
|
77
|
-
tb cross [OPTS] [TABLE]
|
78
|
-
tb shape [OPTS] [TABLE ...]
|
79
|
-
tb mheader [OPTS] [TABLE]
|
80
|
-
tb crop [OPTS] [TABLE]
|
81
|
-
End
|
82
|
-
exit status
|
83
|
-
end
|
84
|
-
|
85
|
-
def main_help(argv)
|
86
|
-
subcommand = argv.shift
|
87
|
-
case subcommand
|
88
|
-
when 'csv' then puts op_csv
|
89
|
-
when 'tsv' then puts op_tsv
|
90
|
-
when 'json' then puts op_json
|
91
|
-
when 'yaml' then puts op_yaml
|
92
|
-
when 'pp' then puts op_pp
|
93
|
-
when 'grep' then puts op_grep
|
94
|
-
when 'gsub' then puts op_gsub
|
95
|
-
when 'sort' then puts op_sort
|
96
|
-
when 'select' then puts op_select
|
97
|
-
when 'rename' then puts op_rename
|
98
|
-
when 'newfield' then puts op_newfield
|
99
|
-
when 'cat' then puts op_cat
|
100
|
-
when 'join' then puts op_join
|
101
|
-
when 'group' then puts op_group
|
102
|
-
when 'cross' then puts op_cross
|
103
|
-
when 'shape' then puts op_shape
|
104
|
-
when 'mheader' then puts op_mheader
|
105
|
-
when 'crop' then puts op_crop
|
106
|
-
when nil
|
107
|
-
usage(true)
|
108
|
-
else
|
109
|
-
err "unexpected subcommand: #{subcommand.inspect}"
|
110
|
-
end
|
111
|
-
end
|
112
|
-
|
113
|
-
$opt_N = nil
|
114
|
-
$opt_debug = 0
|
115
|
-
$opt_no_pager = nil
|
116
|
-
|
117
|
-
def op_csv
|
118
|
-
op = OptionParser.new
|
119
|
-
op.banner = 'Usage: tb csv [OPTS] [TABLE]'
|
120
|
-
op.def_option('-h', 'show help message') { puts op; exit 0 }
|
121
|
-
op.def_option('-N', 'use numeric field name') { $opt_N = true }
|
122
|
-
op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
|
123
|
-
op
|
124
|
-
end
|
125
|
-
|
126
|
-
def op_tsv
|
127
|
-
op = OptionParser.new
|
128
|
-
op.banner = 'Usage: tb tsv [OPTS] [TABLE]'
|
129
|
-
op.def_option('-h', 'show help message') { puts op; exit 0 }
|
130
|
-
op.def_option('-N', 'use numeric field name') { $opt_N = true }
|
131
|
-
op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
|
132
|
-
op
|
133
|
-
end
|
134
|
-
|
135
|
-
def op_json
|
136
|
-
op = OptionParser.new
|
137
|
-
op.banner = 'Usage: tb json [OPTS] [TABLE]'
|
138
|
-
op.def_option('-h', 'show help message') { puts op; exit 0 }
|
139
|
-
op.def_option('-N', 'use numeric field name') { $opt_N = true }
|
140
|
-
op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
|
141
|
-
op
|
142
|
-
end
|
143
|
-
|
144
|
-
def op_yaml
|
145
|
-
op = OptionParser.new
|
146
|
-
op.banner = 'Usage: tb yaml [OPTS] [TABLE]'
|
147
|
-
op.def_option('-h', 'show help message') { puts op; exit 0 }
|
148
|
-
op.def_option('-N', 'use numeric field name') { $opt_N = true }
|
149
|
-
op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
|
150
|
-
op
|
151
|
-
end
|
152
|
-
|
153
|
-
def op_pp
|
154
|
-
op = OptionParser.new
|
155
|
-
op.banner = 'Usage: tb pp [OPTS] [TABLE]'
|
156
|
-
op.def_option('-h', 'show help message') { puts op; exit 0 }
|
157
|
-
op.def_option('-N', 'use numeric field name') { $opt_N = true }
|
158
|
-
op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
|
159
|
-
op
|
160
|
-
end
|
161
|
-
|
162
|
-
$opt_grep_e = nil
|
163
|
-
$opt_grep_ruby = nil
|
164
|
-
$opt_grep_f = nil
|
165
|
-
$opt_grep_v = nil
|
166
|
-
def op_grep
|
167
|
-
op = OptionParser.new
|
168
|
-
op.banner = 'Usage: tb grep [OPTS] REGEXP [TABLE]'
|
169
|
-
op.def_option('-h', 'show help message') { puts op; exit 0 }
|
170
|
-
op.def_option('-N', 'use numeric field name') { $opt_N = true }
|
171
|
-
op.def_option('-f FIELD', 'search field') {|field| $opt_grep_f = field }
|
172
|
-
op.def_option('-e REGEXP', 'predicate written in ruby. A hash is given as _. no usual regexp argument.') {|pattern| $opt_grep_e = pattern }
|
173
|
-
op.def_option('--ruby RUBY-EXP', 'specify a regexp. no usual regexp argument.') {|ruby_exp| $opt_grep_ruby = ruby_exp }
|
174
|
-
op.def_option('-v', 'ouput the records which doesn\'t match') { $opt_grep_v = true }
|
175
|
-
op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
|
176
|
-
op
|
177
|
-
end
|
178
|
-
|
179
|
-
$opt_gsub_e = nil
|
180
|
-
$opt_gsub_f = nil
|
181
|
-
def op_gsub
|
182
|
-
op = OptionParser.new
|
183
|
-
op.banner = 'Usage: tb gsub [OPTS] REGEXP STRING [TABLE]'
|
184
|
-
op.def_option('-h', 'show help message') { puts op; exit 0 }
|
185
|
-
op.def_option('-N', 'use numeric field name') { $opt_N = true }
|
186
|
-
op.def_option('-f FIELD', 'search field') {|field| $opt_gsub_f = field }
|
187
|
-
op.def_option('-e REGEXP', 'predicate written in ruby. A hash is given as _. no usual regexp argument.') {|pattern| $opt_gsub_e = pattern }
|
188
|
-
op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
|
189
|
-
op
|
190
|
-
end
|
191
|
-
|
192
|
-
$opt_sort_f = nil
|
193
|
-
def op_sort
|
194
|
-
op = OptionParser.new
|
195
|
-
op.banner = 'Usage: tb sort [OPTS] [TABLE]'
|
196
|
-
op.def_option('-h', 'show help message') { puts op; exit 0 }
|
197
|
-
op.def_option('-N', 'use numeric field name') { $opt_N = true }
|
198
|
-
op.def_option('-f FIELD,...', 'specify sort keys') {|fs| $opt_sort_f = fs }
|
199
|
-
op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
|
200
|
-
op
|
201
|
-
end
|
202
|
-
|
203
|
-
$opt_select_v = nil
|
204
|
-
def op_select
|
205
|
-
op = OptionParser.new
|
206
|
-
op.banner = 'Usage: tb select [OPTS] FIELD,... [TABLE]'
|
207
|
-
op.def_option('-h', 'show help message') { puts op; exit 0 }
|
208
|
-
op.def_option('-N', 'use numeric field name') { $opt_N = true }
|
209
|
-
op.def_option('-v', 'invert match') { $opt_select_v = true }
|
210
|
-
op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
|
211
|
-
op
|
212
|
-
end
|
213
|
-
|
214
|
-
def op_rename
|
215
|
-
op = OptionParser.new
|
216
|
-
op.banner = 'Usage: tb rename [OPTS] SRC,DST,... [TABLE]'
|
217
|
-
op.def_option('-h', 'show help message') { puts op; exit 0 }
|
218
|
-
op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
|
219
|
-
op
|
220
|
-
end
|
221
|
-
|
222
|
-
def op_newfield
|
223
|
-
op = OptionParser.new
|
224
|
-
op.banner = 'Usage: tb newfield [OPTS] FIELD RUBY-EXP [TABLE]'
|
225
|
-
op.def_option('-h', 'show help message') { puts op; exit 0 }
|
226
|
-
op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
|
227
|
-
op
|
228
|
-
end
|
229
|
-
|
230
|
-
def op_cat
|
231
|
-
op = OptionParser.new
|
232
|
-
op.banner = 'Usage: tb cat [OPTS] [TABLE ...]'
|
233
|
-
op.def_option('-h', 'show help message') { puts op; exit 0 }
|
234
|
-
op.def_option('-N', 'use numeric field name') { $opt_N = true }
|
235
|
-
op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
|
236
|
-
op
|
237
|
-
end
|
238
|
-
|
239
|
-
$opt_join_outer = nil
|
240
|
-
$opt_join_outer_missing = nil
|
241
|
-
def op_join
|
242
|
-
op = OptionParser.new
|
243
|
-
op.banner = 'Usage: tb join [OPTS] [TABLE ...]'
|
244
|
-
op.def_option('-h', 'show help message') { puts op; exit 0 }
|
245
|
-
op.def_option('-d', '--debug', 'show debug message') { $opt_debug += 1 }
|
246
|
-
op.def_option('-N', 'use numeric field name') { $opt_N = true }
|
247
|
-
op.def_option('--outer', 'outer join') { $opt_join_outer = :full }
|
248
|
-
op.def_option('--left', 'left outer join') { $opt_join_outer = :left }
|
249
|
-
op.def_option('--right', 'right outer join') { $opt_join_outer = :right }
|
250
|
-
op.def_option('--outer-missing=DEFAULT', 'missing value for outer join') {|missing|
|
251
|
-
$opt_join_outer ||= :full
|
252
|
-
$opt_join_outer_missing = missing
|
253
|
-
}
|
254
|
-
op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
|
255
|
-
op
|
256
|
-
end
|
257
|
-
|
258
|
-
$opt_group_fields = []
|
259
|
-
def op_group
|
260
|
-
op = OptionParser.new
|
261
|
-
op.banner = 'Usage: tb group [OPTS] KEY-FIELD1,... [TABLE]'
|
262
|
-
op.def_option('-h', 'show help message') { puts op; exit 0 }
|
263
|
-
op.def_option('-a AGGREGATION-SPEC[,NEW-FIELD]',
|
264
|
-
'--aggregate AGGREGATION-SPEC[,NEW-FIELD]') {|arg| $opt_group_fields << arg }
|
265
|
-
op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
|
266
|
-
op
|
267
|
-
end
|
268
|
-
|
269
|
-
$opt_cross_fields = []
|
270
|
-
$opt_cross_compact = false
|
271
|
-
def op_cross
|
272
|
-
op = OptionParser.new
|
273
|
-
op.banner = 'Usage: tb cross [OPTS] HKEY-FIELD1,... VKEY-FIELD1,... [TABLE]'
|
274
|
-
op.def_option('-h', 'show help message') { puts op; exit 0 }
|
275
|
-
op.def_option('-a AGGREGATION-SPEC[,NEW-FIELD]',
|
276
|
-
'--aggregate AGGREGATION-SPEC[,NEW-FIELD]') {|arg| $opt_cross_fields << arg }
|
277
|
-
op.def_option('-c', '--compact', 'compact format') { $opt_cross_compact = true }
|
278
|
-
op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
|
279
|
-
op
|
280
|
-
end
|
281
|
-
|
282
|
-
def op_shape
|
283
|
-
op = OptionParser.new
|
284
|
-
op.banner = 'Usage: tb shape [OPTS] [TABLE ...]'
|
285
|
-
op.def_option('-h', 'show help message') { puts op; exit 0 }
|
286
|
-
op.def_option('-N', 'use numeric field name') { $opt_N = true }
|
287
|
-
op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
|
288
|
-
op
|
289
|
-
end
|
290
|
-
|
291
|
-
$opt_mheader_count = nil
|
292
|
-
def op_mheader
|
293
|
-
op = OptionParser.new
|
294
|
-
op.banner = 'Usage: tb mheader [OPTS] [TABLE]'
|
295
|
-
op.def_option('-h', 'show help message') { puts op; exit 0 }
|
296
|
-
op.def_option('-c N', 'number of header records') {|arg| $opt_mheader_count = arg.to_i }
|
297
|
-
op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
|
298
|
-
op
|
299
|
-
end
|
300
|
-
|
301
|
-
$opt_crop_range = nil
|
302
|
-
def op_crop
|
303
|
-
op = OptionParser.new
|
304
|
-
op.banner = 'Usage: tb crop [OPTS] [TABLE]'
|
305
|
-
op.def_option('-h', 'show help message') { puts op; exit 0 }
|
306
|
-
op.def_option('-r RANGE', 'range. i.e. "2,1-4,3", "B1:D3"') {|arg| $opt_crop_range = arg }
|
307
|
-
op.def_option('--no-pager', 'don\'t use pager') { $opt_no_pager = true }
|
308
|
-
op
|
309
|
-
end
|
310
|
-
|
311
|
-
def err(msg)
|
312
|
-
STDERR.puts msg
|
313
|
-
exit 1
|
314
|
-
end
|
315
|
-
|
316
|
-
def comparison_value(v)
|
317
|
-
case v
|
318
|
-
when nil
|
319
|
-
[]
|
320
|
-
when Numeric
|
321
|
-
[0, v]
|
322
|
-
when String
|
323
|
-
case v
|
324
|
-
when /\A\s*-?\d+\s*\z/
|
325
|
-
[0, Integer(v)]
|
326
|
-
when /\A\s*-?(\d+(\.\d*)?|\.\d+)([eE][-+]?\d+)?\s*\z/
|
327
|
-
[0, Float(v)]
|
328
|
-
else
|
329
|
-
a = []
|
330
|
-
v.scan(/(\d+)|\D+/) {
|
331
|
-
if $1
|
332
|
-
a << 0 << $1.to_i
|
333
|
-
else
|
334
|
-
a << 1 << $&
|
335
|
-
end
|
336
|
-
}
|
337
|
-
a
|
338
|
-
end
|
339
|
-
else
|
340
|
-
raise ArgumentError, "unexpected: #{v.inspect}"
|
341
|
-
end
|
342
|
-
end
|
343
|
-
|
344
|
-
def conv_to_numeric(v)
|
345
|
-
v = v.strip
|
346
|
-
if /\A-?\d+\z/ =~ v
|
347
|
-
v = v.to_i
|
348
|
-
elsif /\A-?(\d+(\.\d*)?|\.\d+)([eE][-+]?\d+)?\z/ =~ v
|
349
|
-
v = v.to_f
|
350
|
-
else
|
351
|
-
raise "numeric value expected: #{v.inspect}"
|
352
|
-
end
|
353
|
-
v
|
354
|
-
end
|
355
|
-
|
356
|
-
class CountAggregator
|
357
|
-
def initialize() @result = 0 end
|
358
|
-
def update(v) @result += 1 end
|
359
|
-
def finish() @result end
|
360
|
-
end
|
361
|
-
|
362
|
-
class SumAggregator
|
363
|
-
def initialize() @result = 0 end
|
364
|
-
def update(v) @result += conv_to_numeric(v) if !(v.nil? || v == '') end
|
365
|
-
def finish() @result end
|
366
|
-
end
|
367
|
-
|
368
|
-
class AvgAggregator
|
369
|
-
def initialize() @sum = 0; @count = 0 end
|
370
|
-
def update(v) @count += 1; @sum += conv_to_numeric(v) if !(v.nil? || v == '') end
|
371
|
-
def finish() @sum / @count.to_f end
|
372
|
-
end
|
373
|
-
|
374
|
-
class MaxAggregator
|
375
|
-
def initialize() @v = nil; @cmp = nil end
|
376
|
-
def update(v)
|
377
|
-
cmp = comparison_value(v)
|
378
|
-
if @cmp == nil
|
379
|
-
@v, @cmp = v, cmp
|
380
|
-
else
|
381
|
-
@v, @cmp = v, cmp if (@cmp <=> cmp) < 0
|
382
|
-
end
|
383
|
-
end
|
384
|
-
def finish() @v end
|
385
|
-
end
|
386
|
-
|
387
|
-
class MinAggregator
|
388
|
-
def initialize() @v = @cmp = nil end
|
389
|
-
def update(v)
|
390
|
-
cmp = comparison_value(v)
|
391
|
-
if @cmp == nil
|
392
|
-
@v, @cmp = v, cmp
|
393
|
-
else
|
394
|
-
@v, @cmp = v, cmp if (@cmp <=> cmp) > 0
|
395
|
-
end
|
396
|
-
end
|
397
|
-
def finish() @v end
|
398
|
-
end
|
399
|
-
|
400
|
-
class ValuesAggregator
|
401
|
-
def initialize() @result = [] end
|
402
|
-
def update(v) @result << v if v end
|
403
|
-
def finish() @result.join(",") end
|
404
|
-
end
|
405
|
-
|
406
|
-
class UniqueValuesAggregator
|
407
|
-
def initialize() @result = [] end
|
408
|
-
def update(v) @result << v if v end
|
409
|
-
def finish() @result.uniq.join(",") end
|
410
|
-
end
|
411
|
-
|
412
|
-
class Selector
|
413
|
-
def initialize(i, aggregator) @i = i; @agg = aggregator end
|
414
|
-
def update(ary) @agg.update(ary[@i]) end
|
415
|
-
def finish() @agg.finish end
|
416
|
-
end
|
417
|
-
|
418
|
-
def make_aggregator(spec, fs)
|
419
|
-
case spec
|
420
|
-
when 'count'
|
421
|
-
CountAggregator.new
|
422
|
-
when /\Asum\((.*)\)\z/
|
423
|
-
field = $1
|
424
|
-
i = fs.index(field)
|
425
|
-
raise ArgumentError, "field not found: #{field.inspect}" if !i
|
426
|
-
Selector.new(i, SumAggregator.new)
|
427
|
-
when /\Aavg\((.*)\)\z/
|
428
|
-
field = $1
|
429
|
-
i = fs.index(field)
|
430
|
-
raise ArgumentError, "field not found: #{field.inspect}" if !i
|
431
|
-
Selector.new(i, AvgAggregator.new)
|
432
|
-
when /\Amax\((.*)\)\z/
|
433
|
-
field = $1
|
434
|
-
i = fs.index(field)
|
435
|
-
raise ArgumentError, "field not found: #{field.inspect}" if !i
|
436
|
-
Selector.new(i, MaxAggregator.new)
|
437
|
-
when /\Amin\((.*)\)\z/
|
438
|
-
field = $1
|
439
|
-
i = fs.index(field)
|
440
|
-
raise ArgumentError, "field not found: #{field.inspect}" if !i
|
441
|
-
Selector.new(i, MinAggregator.new)
|
442
|
-
when /\Avalues\((.*)\)\z/
|
443
|
-
field = $1
|
444
|
-
i = fs.index(field)
|
445
|
-
raise ArgumentError, "field not found: #{field.inspect}" if !i
|
446
|
-
Selector.new(i, ValuesAggregator.new)
|
447
|
-
when /\Auniquevalues\((.*)\)\z/
|
448
|
-
field = $1
|
449
|
-
i = fs.index(field)
|
450
|
-
raise ArgumentError, "field not found: #{field.inspect}" if !i
|
451
|
-
Selector.new(i, UniqueValuesAggregator.new)
|
452
|
-
else
|
453
|
-
raise ArgumentError, "unexpected aggregation spec: #{spec.inspect}"
|
454
|
-
end
|
455
|
-
end
|
456
|
-
|
457
|
-
def aggregate(spec, table)
|
458
|
-
update, finish = make_aggregator(spec, table.list_fields)
|
459
|
-
table.each {|rec|
|
460
|
-
update.call(rec.values_at(*fs))
|
461
|
-
}
|
462
|
-
finish.call
|
463
|
-
end
|
464
|
-
|
465
|
-
def main_csv(argv)
|
466
|
-
op_csv.parse!(argv)
|
467
|
-
each_table_file(argv) {|tbl|
|
468
|
-
with_output {|out|
|
469
|
-
tbl_generate_csv(tbl, out)
|
470
|
-
}
|
471
|
-
}
|
472
|
-
end
|
473
|
-
|
474
|
-
def main_tsv(argv)
|
475
|
-
op_tsv.parse!(argv)
|
476
|
-
each_table_file(argv) {|tbl|
|
477
|
-
with_output {|out|
|
478
|
-
tbl_generate_tsv(tbl, out)
|
479
|
-
}
|
480
|
-
}
|
481
|
-
end
|
482
|
-
|
483
|
-
def main_json(argv)
|
484
|
-
require 'json'
|
485
|
-
op_json.parse!(argv)
|
486
|
-
argv = ['-'] if argv.empty?
|
487
|
-
with_output {|out|
|
488
|
-
out.print "["
|
489
|
-
sep = nil
|
490
|
-
argv.each {|filename|
|
491
|
-
sep = ",\n\n" if sep
|
492
|
-
tablereader_open(filename) {|tblreader|
|
493
|
-
tblreader.each {|ary|
|
494
|
-
out.print sep if sep
|
495
|
-
header = tblreader.header
|
496
|
-
h = {}
|
497
|
-
ary.each_with_index {|e, i|
|
498
|
-
h[header[i]] = e if !e.nil?
|
499
|
-
}
|
500
|
-
out.print JSON.pretty_generate(h)
|
501
|
-
sep = ",\n"
|
502
|
-
}
|
503
|
-
}
|
504
|
-
}
|
505
|
-
out.puts "]"
|
506
|
-
}
|
507
|
-
end
|
508
|
-
|
509
|
-
def main_yaml(argv)
|
510
|
-
require 'yaml'
|
511
|
-
op_yaml.parse!(argv)
|
512
|
-
each_table_file(argv) {|tbl|
|
513
|
-
ary = tbl.map {|rec| rec.to_h }
|
514
|
-
with_output {|out|
|
515
|
-
YAML.dump(ary, out)
|
516
|
-
out.puts
|
517
|
-
}
|
518
|
-
}
|
519
|
-
end
|
520
|
-
|
521
|
-
def main_pp(argv)
|
522
|
-
op_pp.parse!(argv)
|
523
|
-
argv.unshift '-' if argv.empty?
|
524
|
-
with_output {|out|
|
525
|
-
argv.each {|filename|
|
526
|
-
tablereader_open(filename) {|tblreader|
|
527
|
-
tblreader.each {|ary|
|
528
|
-
h = {}
|
529
|
-
ary.each_with_index {|v, i|
|
530
|
-
next if v.nil?
|
531
|
-
h[tblreader.field_from_index_ex(i)] = v
|
532
|
-
}
|
533
|
-
PP.pp h, out
|
534
|
-
}
|
535
|
-
}
|
536
|
-
}
|
537
|
-
}
|
538
|
-
end
|
539
|
-
|
540
|
-
def main_grep(argv)
|
541
|
-
op_grep.parse!(argv)
|
542
|
-
if $opt_grep_ruby
|
543
|
-
pred = eval("lambda {|_| #{$opt_grep_ruby} }")
|
544
|
-
elsif $opt_grep_e
|
545
|
-
re = Regexp.new($opt_grep_e)
|
546
|
-
pred = $opt_grep_f ? lambda {|_| re =~ _[$opt_grep_f] } :
|
547
|
-
lambda {|_| _.any? {|k, v| re =~ v.to_s } }
|
548
|
-
else
|
549
|
-
re = Regexp.new(argv.shift)
|
550
|
-
pred = $opt_grep_f ? lambda {|_| re =~ _[$opt_grep_f] } :
|
551
|
-
lambda {|_| _.any? {|k, v| re =~ v.to_s } }
|
552
|
-
end
|
553
|
-
opt_v = $opt_grep_v ? true : false
|
554
|
-
argv.unshift '-' if argv.empty?
|
555
|
-
argv.each {|filename|
|
556
|
-
tablereader_open(filename) {|tblreader|
|
557
|
-
with_table_stream_output {|gen|
|
558
|
-
gen.output_header tblreader.header
|
559
|
-
tblreader.each {|ary|
|
560
|
-
h = {}
|
561
|
-
ary.each_with_index {|str, i|
|
562
|
-
f = tblreader.field_from_index_ex(i)
|
563
|
-
h[f] = str
|
564
|
-
}
|
565
|
-
found = pred.call(h)
|
566
|
-
found = opt_v ^ !!(found)
|
567
|
-
gen << ary if found
|
568
|
-
}
|
569
|
-
}
|
570
|
-
}
|
571
|
-
}
|
572
|
-
end
|
573
|
-
|
574
|
-
def main_gsub(argv)
|
575
|
-
op_gsub.parse!(argv)
|
576
|
-
if $opt_gsub_e
|
577
|
-
re = Regexp.new($opt_gsub_e)
|
578
|
-
else
|
579
|
-
re = Regexp.new(argv.shift)
|
580
|
-
end
|
581
|
-
repl = argv.shift
|
582
|
-
filename = argv.empty? ? '-' : argv.shift
|
583
|
-
warn "extra arguments: #{argv.join(" ")}" if !argv.empty?
|
584
|
-
tablereader_open(filename) {|tblreader|
|
585
|
-
with_table_stream_output {|gen|
|
586
|
-
gen.output_header tblreader.header
|
587
|
-
tblreader.each {|ary|
|
588
|
-
if $opt_gsub_f
|
589
|
-
ary2 = []
|
590
|
-
ary.each_with_index {|str, i|
|
591
|
-
f = tblreader.field_from_index_ex(i)
|
592
|
-
if f == $opt_gsub_f
|
593
|
-
str ||= ''
|
594
|
-
ary2 << str.gsub(re, repl)
|
595
|
-
else
|
596
|
-
ary2 << str
|
597
|
-
end
|
598
|
-
}
|
599
|
-
else
|
600
|
-
ary2 = ary.map {|s|
|
601
|
-
s ||= ''
|
602
|
-
s.gsub(re, repl)
|
603
|
-
}
|
604
|
-
end
|
605
|
-
gen << ary2
|
606
|
-
}
|
607
|
-
}
|
608
|
-
}
|
609
|
-
end
|
610
|
-
|
611
|
-
def main_sort(argv)
|
612
|
-
op_sort.parse!(argv)
|
613
|
-
filename = argv.empty? ? '-' : argv.shift
|
614
|
-
warn "extra arguments: #{argv.join(" ")}" if !argv.empty?
|
615
|
-
if $opt_sort_f
|
616
|
-
fs = split_field_list_argument($opt_sort_f)
|
617
|
-
else
|
618
|
-
fs = nil
|
619
|
-
end
|
620
|
-
tbl = load_table(filename)
|
621
|
-
if fs
|
622
|
-
blk = lambda {|rec| fs.map {|f| comparison_value(rec[f]) } }
|
623
|
-
else
|
624
|
-
blk = lambda {|rec| rec.map {|k, v| comparison_value(v) } }
|
625
|
-
end
|
626
|
-
tbl2 = tbl.reorder_records_by(&blk)
|
627
|
-
with_output {|out|
|
628
|
-
tbl_generate_csv(tbl2, out)
|
629
|
-
}
|
630
|
-
end
|
631
|
-
|
632
|
-
def main_select(argv)
|
633
|
-
op_select.parse!(argv)
|
634
|
-
fs = split_field_list_argument(argv.shift)
|
635
|
-
filename = argv.shift || '-'
|
636
|
-
warn "extra arguments: #{argv.join(" ")}" if !argv.empty?
|
637
|
-
tablereader_open(filename) {|tblreader|
|
638
|
-
if $opt_select_v
|
639
|
-
h = {}
|
640
|
-
fs.each {|f| h[tblreader.index_from_field(f)] = true }
|
641
|
-
header = nil
|
642
|
-
if !$opt_N
|
643
|
-
header = []
|
644
|
-
tblreader.header.each_with_index {|f, i|
|
645
|
-
header << f if !h[i]
|
646
|
-
}
|
647
|
-
end
|
648
|
-
with_table_stream_output {|gen|
|
649
|
-
gen.output_header(header)
|
650
|
-
tblreader.each {|ary|
|
651
|
-
values = []
|
652
|
-
ary.each_with_index {|v, i|
|
653
|
-
values << v if !h[i]
|
654
|
-
}
|
655
|
-
gen << values
|
656
|
-
}
|
657
|
-
}
|
658
|
-
else
|
659
|
-
header = tblreader.header
|
660
|
-
is = []
|
661
|
-
is = fs.map {|f| tblreader.index_from_field(f) }
|
662
|
-
with_table_stream_output {|gen|
|
663
|
-
gen.output_header(is.map {|i| tblreader.field_from_index_ex(i) })
|
664
|
-
tblreader.each {|ary|
|
665
|
-
gen << ary.values_at(*is)
|
666
|
-
}
|
667
|
-
}
|
668
|
-
end
|
669
|
-
}
|
670
|
-
end
|
671
|
-
|
672
|
-
def main_rename(argv)
|
673
|
-
op_rename.parse!(argv)
|
674
|
-
fs = split_field_list_argument(argv.shift)
|
675
|
-
filename = argv.shift || '-'
|
676
|
-
warn "extra arguments: #{argv.join(" ")}" if !argv.empty?
|
677
|
-
h = {}
|
678
|
-
fs.each_slice(2) {|sf, df| h[sf] = df }
|
679
|
-
tablereader_open(filename) {|tblreader|
|
680
|
-
header = tblreader.header
|
681
|
-
h.each {|sf, df|
|
682
|
-
unless header.include? sf
|
683
|
-
raise "field not defined: #{sf.inspect}"
|
684
|
-
end
|
685
|
-
}
|
686
|
-
renamed_header = tblreader.header.map {|f| h.fetch(f, f) }
|
687
|
-
with_table_stream_output {|gen|
|
688
|
-
gen.output_header(renamed_header)
|
689
|
-
tblreader.each {|ary|
|
690
|
-
gen << ary
|
691
|
-
}
|
692
|
-
}
|
693
|
-
}
|
694
|
-
end
|
695
|
-
|
696
|
-
def main_newfield(argv)
|
697
|
-
op_rename.parse!(argv)
|
698
|
-
field = argv.shift
|
699
|
-
rubyexp = argv.shift
|
700
|
-
pr = eval("lambda {|_| #{rubyexp} }")
|
701
|
-
filename = argv.shift || '-'
|
702
|
-
warn "extra arguments: #{argv.join(" ")}" if !argv.empty?
|
703
|
-
tablereader_open(filename) {|tblreader|
|
704
|
-
renamed_header = [field] + tblreader.header
|
705
|
-
with_table_stream_output {|gen|
|
706
|
-
gen.output_header(renamed_header)
|
707
|
-
tblreader.each {|ary|
|
708
|
-
h = {}
|
709
|
-
ary.each_with_index {|str, i|
|
710
|
-
f = tblreader.field_from_index_ex(i)
|
711
|
-
h[f] = str
|
712
|
-
}
|
713
|
-
gen << [pr.call(h), *ary]
|
714
|
-
}
|
715
|
-
}
|
716
|
-
}
|
717
|
-
end
|
718
|
-
|
719
|
-
def main_cat(argv)
|
720
|
-
op_cat.parse!(argv)
|
721
|
-
argv = ['-'] if argv.empty?
|
722
|
-
if $opt_N
|
723
|
-
argv.each {|filename|
|
724
|
-
with_table_stream_output {|gen|
|
725
|
-
tablereader_open(filename) {|tblreader|
|
726
|
-
tblreader.each {|ary|
|
727
|
-
gen << ary
|
728
|
-
}
|
729
|
-
}
|
730
|
-
}
|
731
|
-
}
|
732
|
-
else
|
733
|
-
readers = []
|
734
|
-
h = {}
|
735
|
-
argv.each {|filename|
|
736
|
-
r = tablereader_open(filename)
|
737
|
-
readers << r
|
738
|
-
r.header.each {|f|
|
739
|
-
h[f] = h.size if !h[f]
|
740
|
-
}
|
741
|
-
}
|
742
|
-
with_table_stream_output {|gen|
|
743
|
-
gen.output_header h.keys.sort_by {|k| h[k] }
|
744
|
-
readers.each {|r|
|
745
|
-
header = r.header.dup
|
746
|
-
r.each {|ary|
|
747
|
-
while header.length < ary.length
|
748
|
-
f = r.field_from_index_ex(header.length)
|
749
|
-
header << f
|
750
|
-
h[f] = h.size if !h[f]
|
751
|
-
end
|
752
|
-
ary2 = []
|
753
|
-
ary.each_with_index {|v, i|
|
754
|
-
f = r.field_from_index(i)
|
755
|
-
j = h.fetch(f)
|
756
|
-
ary2[j] = v
|
757
|
-
}
|
758
|
-
gen << ary2
|
759
|
-
}
|
760
|
-
}
|
761
|
-
}
|
762
|
-
end
|
763
|
-
end
|
764
|
-
|
765
|
-
def main_join(argv)
|
766
|
-
op_join.parse!(argv)
|
767
|
-
result = Tb.new([], [])
|
768
|
-
retain_left = false
|
769
|
-
retain_right = false
|
770
|
-
case $opt_join_outer
|
771
|
-
when :full
|
772
|
-
retain_left = true
|
773
|
-
retain_right = true
|
774
|
-
when :left
|
775
|
-
retain_left = true
|
776
|
-
when :right
|
777
|
-
retain_right = true
|
778
|
-
when nil
|
779
|
-
else
|
780
|
-
raise "unexpected $opt_join_outer: #{$opt_join_outer.inspect}"
|
781
|
-
end
|
782
|
-
if $opt_join_outer
|
783
|
-
each_table_file(argv) {|tbl|
|
784
|
-
STDERR.puts "shared keys: #{(result.list_fields & tbl.list_fields).inspect}" if 1 <= $opt_debug
|
785
|
-
result = result.natjoin2_outer(tbl, $opt_join_outer_missing, retain_left, retain_right)
|
786
|
-
}
|
787
|
-
else
|
788
|
-
each_table_file(argv) {|tbl|
|
789
|
-
STDERR.puts "shared keys: #{(result.list_fields & tbl.list_fields).inspect}" if 1 <= $opt_debug
|
790
|
-
result = result.natjoin2(tbl)
|
791
|
-
}
|
792
|
-
end
|
793
|
-
with_output {|out|
|
794
|
-
tbl_generate_csv(result, out)
|
795
|
-
}
|
796
|
-
end
|
797
|
-
|
798
|
-
def main_group(argv)
|
799
|
-
op_group.parse!(argv)
|
800
|
-
kfs = split_field_list_argument(argv.shift)
|
801
|
-
opt_group_fields = $opt_group_fields.map {|arg|
|
802
|
-
aggregation_spec, new_field = split_field_list_argument(arg)
|
803
|
-
new_field ||= aggregation_spec
|
804
|
-
[new_field, lambda {|fields| make_aggregator(aggregation_spec, fields) } ]
|
805
|
-
}
|
806
|
-
filename = argv.shift || '-'
|
807
|
-
warn "extra arguments: #{argv.join(" ")}" if !argv.empty?
|
808
|
-
h = {}
|
809
|
-
tablereader_open(filename) {|tblreader|
|
810
|
-
kis = kfs.map {|f| tblreader.index_from_field(f) }
|
811
|
-
result_fields = kfs + opt_group_fields.map {|nf, maker| nf }
|
812
|
-
tblreader.each {|ary|
|
813
|
-
kvs = ary.values_at(*kis)
|
814
|
-
if !h.include?(kvs)
|
815
|
-
h[kvs] = opt_group_fields.map {|nf, maker| ag = maker.call(tblreader.header); ag.update(ary); ag }
|
816
|
-
else
|
817
|
-
h[kvs].each {|ag|
|
818
|
-
ag.update(ary)
|
819
|
-
}
|
820
|
-
end
|
821
|
-
}
|
822
|
-
result = Tb.new(result_fields)
|
823
|
-
h.keys.sort_by {|k| k.map {|v| comparison_value(v) } }.each {|k|
|
824
|
-
a = h[k]
|
825
|
-
result.insert_values result_fields, k + a.map {|ag| ag.finish }
|
826
|
-
}
|
827
|
-
with_output {|out|
|
828
|
-
tbl_generate_csv(result, out)
|
829
|
-
}
|
830
|
-
}
|
831
|
-
end
|
832
|
-
|
833
|
-
def main_cross(argv)
|
834
|
-
op_cross.parse!(argv)
|
835
|
-
hkfs = split_field_list_argument(argv.shift)
|
836
|
-
vkfs = split_field_list_argument(argv.shift)
|
837
|
-
if $opt_cross_fields.empty?
|
838
|
-
opt_cross_fields = [['count', 'count']]
|
839
|
-
else
|
840
|
-
opt_cross_fields = $opt_cross_fields.map {|arg|
|
841
|
-
agg_spec, new_field = split_field_list_argument(arg)
|
842
|
-
new_field ||= agg_spec
|
843
|
-
[agg_spec, new_field]
|
844
|
-
}
|
845
|
-
end
|
846
|
-
filename = argv.shift || '-'
|
847
|
-
warn "extra arguments: #{argv.join(" ")}" if !argv.empty?
|
848
|
-
tablereader_open(filename) {|tblreader|
|
849
|
-
vkis = vkfs.map {|f| tblreader.index_from_field(f) }
|
850
|
-
hkis = hkfs.map {|f| tblreader.index_from_field(f) }
|
851
|
-
vset = {}
|
852
|
-
hset = {}
|
853
|
-
set = {}
|
854
|
-
tblreader.each {|ary|
|
855
|
-
vkvs = ary.values_at(*vkis)
|
856
|
-
hkvs = ary.values_at(*hkis)
|
857
|
-
vset[vkvs] = true if !vset.include?(vkvs)
|
858
|
-
hset[hkvs] = true if !hset.include?(hkvs)
|
859
|
-
if !set.include?([vkvs, hkvs])
|
860
|
-
set[[vkvs, hkvs]] = opt_cross_fields.map {|agg_spec, nf|
|
861
|
-
ag = make_aggregator(agg_spec, tblreader.header)
|
862
|
-
ag.update(ary)
|
863
|
-
ag
|
864
|
-
}
|
865
|
-
else
|
866
|
-
set[[vkvs, hkvs]].each {|ag|
|
867
|
-
ag.update(ary)
|
868
|
-
}
|
869
|
-
end
|
870
|
-
}
|
871
|
-
vary = vset.keys.sort_by {|a| a.map {|v| comparison_value(v) } }
|
872
|
-
hary = hset.keys.sort_by {|a| a.map {|v| comparison_value(v) } }
|
873
|
-
with_output {|out|
|
874
|
-
Tb.csv_stream_output(out) {|gen|
|
875
|
-
hkfs.each_with_index {|hkf, i|
|
876
|
-
next if $opt_cross_compact && i == hkfs.length - 1
|
877
|
-
row = [nil] * (vkfs.length - 1) + [hkf]
|
878
|
-
hary.each {|hkvs| opt_cross_fields.length.times { row << hkvs[i] } }
|
879
|
-
gen << row
|
880
|
-
}
|
881
|
-
if $opt_cross_compact
|
882
|
-
r = vkfs.dup
|
883
|
-
hary.each {|hkvs| r.concat([hkvs[-1]] * opt_cross_fields.length) }
|
884
|
-
gen << r
|
885
|
-
else
|
886
|
-
r = vkfs.dup
|
887
|
-
hary.each {|hkvs| r.concat opt_cross_fields.map {|agg_spec, new_field| new_field } }
|
888
|
-
gen << r
|
889
|
-
end
|
890
|
-
vary.each {|vkvs|
|
891
|
-
row = vkvs.dup
|
892
|
-
hary.each {|hkvs|
|
893
|
-
ags = set[[vkvs, hkvs]]
|
894
|
-
if !ags
|
895
|
-
opt_cross_fields.length.times { row << nil }
|
896
|
-
else
|
897
|
-
ags.each {|ag| row << ag.finish }
|
898
|
-
end
|
899
|
-
}
|
900
|
-
gen << row
|
901
|
-
}
|
902
|
-
}
|
903
|
-
}
|
904
|
-
}
|
905
|
-
end
|
906
|
-
|
907
|
-
def main_shape(argv)
|
908
|
-
op_shape.parse!(argv)
|
909
|
-
filenames = argv.empty? ? ['-'] : argv
|
910
|
-
result = Tb.new(%w[header_fields min_fields max_fields records filename])
|
911
|
-
filenames.each {|filename|
|
912
|
-
tablereader_open(filename) {|tblreader|
|
913
|
-
num_header_fields = tblreader.header.length
|
914
|
-
min_num_fields = nil
|
915
|
-
max_num_fields = nil
|
916
|
-
num_records = 0
|
917
|
-
tblreader.each {|ary|
|
918
|
-
num_records += 1
|
919
|
-
n = ary.length
|
920
|
-
if min_num_fields.nil?
|
921
|
-
min_num_fields = max_num_fields = n
|
922
|
-
else
|
923
|
-
min_num_fields = n if n < min_num_fields
|
924
|
-
max_num_fields = n if max_num_fields < n
|
925
|
-
end
|
926
|
-
}
|
927
|
-
result.insert({'header_fields'=>num_header_fields,
|
928
|
-
'min_fields'=>min_num_fields,
|
929
|
-
'max_fields'=>max_num_fields,
|
930
|
-
'records'=>num_records,
|
931
|
-
'filename'=>filename})
|
932
|
-
}
|
933
|
-
}
|
934
|
-
with_output {|out|
|
935
|
-
# don't use tbl_generate_csv() because the header should always outputted.
|
936
|
-
result.generate_csv(out)
|
937
|
-
}
|
938
|
-
end
|
939
|
-
|
940
|
-
def main_mheader(argv)
|
941
|
-
op_mheader.parse!(argv)
|
942
|
-
filename = argv.shift || '-'
|
943
|
-
warn "extra arguments: #{argv.join(" ")}" if !argv.empty?
|
944
|
-
header = []
|
945
|
-
if $opt_mheader_count
|
946
|
-
c = $opt_mheader_count
|
947
|
-
header_end_p = lambda {
|
948
|
-
c -= 1
|
949
|
-
c == 0 ? header.map {|a| a.compact.join(' ').strip } : nil
|
950
|
-
}
|
951
|
-
else
|
952
|
-
header_end_p = lambda {
|
953
|
-
h2 = header.map {|a| a.compact.join(' ').strip }.uniq
|
954
|
-
header.length == h2.length ? h2 : nil
|
955
|
-
}
|
956
|
-
end
|
957
|
-
with_table_stream_output {|gen|
|
958
|
-
Tb::Reader.open(filename, {:numeric=>true}) {|tblreader|
|
959
|
-
tblreader.each {|ary|
|
960
|
-
if header
|
961
|
-
ary.each_with_index {|v,i|
|
962
|
-
header[i] ||= []
|
963
|
-
header[i] << v if header[i].empty? || header[i].last != v
|
964
|
-
}
|
965
|
-
h2 = header_end_p.call
|
966
|
-
if h2
|
967
|
-
gen << h2
|
968
|
-
header = nil
|
969
|
-
end
|
970
|
-
else
|
971
|
-
gen << ary
|
972
|
-
end
|
973
|
-
}
|
974
|
-
}
|
975
|
-
}
|
976
|
-
if header
|
977
|
-
warn "no header found."
|
978
|
-
end
|
979
|
-
end
|
980
|
-
|
981
|
-
def main_crop(argv)
|
982
|
-
op_crop.parse!(argv)
|
983
|
-
filename = argv.shift || '-'
|
984
|
-
warn "extra arguments: #{argv.join(" ")}" if !argv.empty?
|
985
|
-
stream = false
|
986
|
-
if $opt_crop_range
|
987
|
-
case $opt_crop_range
|
988
|
-
when /\A(\d+),(\d+)-(\d+),(\d+)\z/ # 1-based
|
989
|
-
stream = true
|
990
|
-
range_col1 = $1.to_i
|
991
|
-
range_row1 = $2.to_i
|
992
|
-
range_col2 = $3.to_i
|
993
|
-
range_row2 = $4.to_i
|
994
|
-
when /\A([A-Z]+)(\d+):([A-Z]+)(\d+)\z/ # 1-based
|
995
|
-
stream = true
|
996
|
-
range_col1 = decode_a1_addressing_col($1)
|
997
|
-
range_row1 = $2.to_i
|
998
|
-
range_col2 = decode_a1_addressing_col($3)
|
999
|
-
range_row2 = $4.to_i
|
1000
|
-
else
|
1001
|
-
raise ArgumentError, "unexpected range argument: #{$opt_crop_range.inspect}"
|
1002
|
-
end
|
1003
|
-
end
|
1004
|
-
if stream
|
1005
|
-
with_table_stream_output {|gen|
|
1006
|
-
Tb::Reader.open(filename, {:numeric=>true}) {|tblreader|
|
1007
|
-
rownum = 1
|
1008
|
-
tblreader.each {|ary|
|
1009
|
-
if range_row2 < rownum
|
1010
|
-
break
|
1011
|
-
end
|
1012
|
-
if range_row1 <= rownum
|
1013
|
-
if range_col2 < ary.length
|
1014
|
-
ary[range_col2..-1] = []
|
1015
|
-
end
|
1016
|
-
if 1 < range_col1
|
1017
|
-
ary[0...(range_col1-1)] = []
|
1018
|
-
end
|
1019
|
-
gen << ary
|
1020
|
-
end
|
1021
|
-
rownum += 1
|
1022
|
-
}
|
1023
|
-
}
|
1024
|
-
}
|
1025
|
-
else
|
1026
|
-
arys = []
|
1027
|
-
Tb::Reader.open(filename, {:numeric=>true}) {|tblreader|
|
1028
|
-
tblreader.each {|a|
|
1029
|
-
a.pop while !a.empty? && (a.last.nil? || a.last == '')
|
1030
|
-
arys << a
|
1031
|
-
}
|
1032
|
-
}
|
1033
|
-
arys.pop while !arys.empty? && arys.last.all? {|v| v.nil? || v == '' }
|
1034
|
-
arys.shift while !arys.empty? && arys.first.all? {|v| v.nil? || v == '' }
|
1035
|
-
if !arys.empty?
|
1036
|
-
while arys.all? {|a| a.empty? || (a.first.nil? || a.first == '') }
|
1037
|
-
arys.each {|a| a.shift }
|
1038
|
-
end
|
1039
|
-
end
|
1040
|
-
with_table_stream_output {|gen|
|
1041
|
-
arys.each {|a| gen << a }
|
1042
|
-
}
|
1043
|
-
end
|
1044
|
-
end
|
1045
|
-
|
1046
|
-
def decode_a1_addressing_col(str)
|
1047
|
-
(26**str.length-1)/25+str.tr("A-Z", "0-9A-P").to_i(26)
|
1048
|
-
end
|
1049
|
-
|
1050
|
-
def split_field_list_argument(arg)
|
1051
|
-
split_csv_argument(arg).map {|f| f || '' }
|
1052
|
-
end
|
1053
|
-
|
1054
|
-
def split_csv_argument(arg)
|
1055
|
-
Tb.csv_stream_input(arg) {|ary| return ary }
|
1056
|
-
return []
|
1057
|
-
end
|
1058
|
-
|
1059
|
-
def each_table_file(argv)
|
1060
|
-
if argv.empty?
|
1061
|
-
yield load_table('-')
|
1062
|
-
else
|
1063
|
-
argv.each {|filename|
|
1064
|
-
tbl = load_table(filename)
|
1065
|
-
yield tbl
|
1066
|
-
}
|
1067
|
-
end
|
1068
|
-
end
|
1069
|
-
|
1070
|
-
def load_table(filename)
|
1071
|
-
tablereader_open(filename) {|tblreader|
|
1072
|
-
arys = []
|
1073
|
-
tblreader.each {|ary|
|
1074
|
-
arys << ary
|
1075
|
-
}
|
1076
|
-
header = tblreader.header
|
1077
|
-
tbl = Tb.new(header)
|
1078
|
-
arys.each {|ary|
|
1079
|
-
ary << nil while ary.length < header.length
|
1080
|
-
tbl.insert_values header, ary
|
1081
|
-
}
|
1082
|
-
tbl
|
1083
|
-
}
|
1084
|
-
end
|
1085
|
-
|
1086
|
-
def tablereader_open(filename, &b)
|
1087
|
-
Tb::Reader.open(filename, {:numeric=>$opt_N}, &b)
|
1088
|
-
end
|
1089
|
-
|
1090
|
-
def with_table_stream_output
|
1091
|
-
with_output {|out|
|
1092
|
-
Tb.csv_stream_output(out) {|gen|
|
1093
|
-
def gen.output_header(header)
|
1094
|
-
self << header if !$opt_N
|
1095
|
-
end
|
1096
|
-
yield gen
|
1097
|
-
}
|
1098
|
-
}
|
1099
|
-
end
|
1100
|
-
|
1101
|
-
def tbl_generate_csv(tbl, out)
|
1102
|
-
if $opt_N
|
1103
|
-
header = tbl.list_fields
|
1104
|
-
Tb.csv_stream_output(out) {|gen|
|
1105
|
-
tbl.each {|rec|
|
1106
|
-
gen << rec.values_at(*header)
|
1107
|
-
}
|
1108
|
-
}
|
1109
|
-
else
|
1110
|
-
tbl.generate_csv(out)
|
1111
|
-
end
|
1112
|
-
end
|
1113
|
-
|
1114
|
-
def tbl_generate_tsv(tbl, out)
|
1115
|
-
if $opt_N
|
1116
|
-
header = tbl.list_fields
|
1117
|
-
Tb.tsv_stream_output(out) {|gen|
|
1118
|
-
tbl.each {|rec|
|
1119
|
-
gen << rec.values_at(*header)
|
1120
|
-
}
|
1121
|
-
}
|
1122
|
-
else
|
1123
|
-
tbl.generate_tsv(out)
|
1124
|
-
end
|
1125
|
-
end
|
1126
|
-
|
1127
|
-
def with_output
|
1128
|
-
if STDOUT.tty? && !$opt_no_pager
|
1129
|
-
IO.popen(ENV['PAGER'] || 'more', 'w') {|pager|
|
1130
|
-
yield pager
|
1131
|
-
}
|
1132
|
-
else
|
1133
|
-
yield STDOUT
|
1134
|
-
end
|
1135
|
-
end
|
1136
|
-
|
1137
|
-
main ARGV
|
29
|
+
Tb::Cmd.main(ARGV)
|