tb 0.3 → 0.4
Sign up to get free protection for your applications and to get access to all the features.
- data/README +2 -1
- data/lib/tb.rb +7 -3
- data/lib/tb/basic.rb +1 -1
- data/lib/tb/cmd_cat.rb +1 -3
- data/lib/tb/cmd_consecutive.rb +4 -6
- data/lib/tb/cmd_crop.rb +5 -7
- data/lib/tb/cmd_cross.rb +51 -49
- data/lib/tb/cmd_cut.rb +2 -6
- data/lib/tb/cmd_git_log.rb +20 -11
- data/lib/tb/cmd_grep.rb +1 -3
- data/lib/tb/cmd_group.rb +18 -44
- data/lib/tb/cmd_gsub.rb +2 -4
- data/lib/tb/cmd_join.rb +1 -3
- data/lib/tb/cmd_ls.rb +8 -15
- data/lib/tb/cmd_mheader.rb +3 -4
- data/lib/tb/cmd_nest.rb +4 -9
- data/lib/tb/cmd_newfield.rb +1 -3
- data/lib/tb/cmd_rename.rb +2 -4
- data/lib/tb/cmd_shape.rb +2 -3
- data/lib/tb/cmd_sort.rb +3 -5
- data/lib/tb/cmd_svn_log.rb +3 -5
- data/lib/tb/cmd_tar_tvf.rb +2 -4
- data/lib/tb/cmd_to_csv.rb +1 -1
- data/lib/tb/cmd_unnest.rb +1 -3
- data/lib/tb/cmdutil.rb +57 -135
- data/lib/tb/csv.rb +11 -54
- data/lib/tb/customcmp.rb +41 -0
- data/lib/tb/customeq.rb +41 -0
- data/lib/tb/enumerable.rb +225 -435
- data/lib/tb/enumerator.rb +22 -14
- data/lib/tb/ex_enumerable.rb +659 -0
- data/lib/tb/ex_enumerator.rb +102 -0
- data/lib/tb/fileenumerator.rb +2 -2
- data/lib/tb/func.rb +141 -0
- data/lib/tb/json.rb +1 -1
- data/lib/tb/reader.rb +4 -4
- data/lib/tb/search.rb +2 -4
- data/lib/tb/zipper.rb +60 -0
- data/test/test_cmd_cat.rb +40 -0
- data/test/test_cmd_git_log.rb +116 -0
- data/test/test_cmd_ls.rb +90 -0
- data/test/test_cmd_svn_log.rb +87 -0
- data/test/test_cmd_to_csv.rb +14 -0
- data/test/test_cmdutil.rb +25 -10
- data/test/test_csv.rb +10 -0
- data/test/test_customcmp.rb +14 -0
- data/test/test_customeq.rb +20 -0
- data/test/{test_enumerable.rb → test_ex_enumerable.rb} +181 -3
- data/test/test_search.rb +2 -10
- data/test/test_tbenum.rb +3 -3
- data/test/test_zipper.rb +22 -0
- metadata +20 -8
- data/lib/tb/enum.rb +0 -294
- data/lib/tb/pairs.rb +0 -227
- data/test/test_pairs.rb +0 -122
data/README
CHANGED
@@ -1,8 +1,9 @@
|
|
1
|
-
= tb - manipulation tool for table: CSV, TSV, etc.
|
1
|
+
= tb - manipulation tool for table: CSV, TSV, JSON, etc.
|
2
2
|
|
3
3
|
tb provides a command and a library for manipulating tables:
|
4
4
|
Unix filter like operations (grep, sort, cat, cut, ls, etc.),
|
5
5
|
SQL like operations (join, group, etc.),
|
6
|
+
information extractions (git-log, svn-log, tar-tvf),
|
6
7
|
and more.
|
7
8
|
|
8
9
|
== Example
|
data/lib/tb.rb
CHANGED
@@ -34,9 +34,10 @@ class Tb
|
|
34
34
|
end
|
35
35
|
|
36
36
|
require 'pp'
|
37
|
-
require 'tb/
|
37
|
+
require 'tb/enumerable'
|
38
38
|
require 'tb/enumerator'
|
39
|
-
require 'tb/
|
39
|
+
require 'tb/func'
|
40
|
+
require 'tb/zipper'
|
40
41
|
require 'tb/basic'
|
41
42
|
require 'tb/record'
|
42
43
|
require 'tb/csv'
|
@@ -48,6 +49,9 @@ require 'tb/ropen'
|
|
48
49
|
require 'tb/catreader'
|
49
50
|
require 'tb/fieldset'
|
50
51
|
require 'tb/search'
|
51
|
-
require 'tb/
|
52
|
+
require 'tb/ex_enumerable'
|
53
|
+
require 'tb/ex_enumerator'
|
52
54
|
require 'tb/fileenumerator'
|
53
55
|
require 'tb/revcmp'
|
56
|
+
require 'tb/customcmp'
|
57
|
+
require 'tb/customeq'
|
data/lib/tb/basic.rb
CHANGED
data/lib/tb/cmd_cat.rb
CHANGED
@@ -63,7 +63,5 @@ def (Tb::Cmd).main_cat(argv)
|
|
63
63
|
exit_if_help('cat')
|
64
64
|
argv = ['-'] if argv.empty?
|
65
65
|
creader = Tb::CatReader.open(argv, Tb::Cmd.opt_N, Tb::Cmd.opt_cat_with_filename)
|
66
|
-
|
67
|
-
creader.write_to_csv(out, !Tb::Cmd.opt_N)
|
68
|
-
}
|
66
|
+
output_tbenum(creader)
|
69
67
|
end
|
data/lib/tb/cmd_consecutive.rb
CHANGED
@@ -68,22 +68,20 @@ def (Tb::Cmd).main_consecutive(argv)
|
|
68
68
|
}.each {|pairs, header|
|
69
69
|
buf << pairs
|
70
70
|
if buf.length == Tb::Cmd.opt_consecutive_n
|
71
|
-
pairs2 =
|
71
|
+
pairs2 = {}
|
72
72
|
header.each {|f|
|
73
73
|
Tb::Cmd.opt_consecutive_n.times {|i|
|
74
74
|
ps = buf[i]
|
75
75
|
next if !ps.has_key?(f)
|
76
76
|
v = ps[f]
|
77
|
-
pairs2
|
77
|
+
pairs2["#{f}_#{i+1}"] = v
|
78
78
|
}
|
79
79
|
}
|
80
80
|
empty = false
|
81
|
-
y.yield
|
81
|
+
y.yield pairs2
|
82
82
|
buf.shift
|
83
83
|
end
|
84
84
|
}
|
85
85
|
}
|
86
|
-
|
87
|
-
er.write_to_csv(out, !Tb::Cmd.opt_N)
|
88
|
-
}
|
86
|
+
output_tbenum(er)
|
89
87
|
end
|
data/lib/tb/cmd_crop.rb
CHANGED
@@ -84,9 +84,8 @@ def (Tb::Cmd).main_crop(argv)
|
|
84
84
|
rownum += 1
|
85
85
|
}
|
86
86
|
}
|
87
|
-
|
88
|
-
|
89
|
-
}
|
87
|
+
Tb::Cmd.opt_N = true
|
88
|
+
output_tbenum(er)
|
90
89
|
else
|
91
90
|
creader = Tb::CatReader.open(argv, true)
|
92
91
|
last_nonempty_row = nil
|
@@ -127,13 +126,12 @@ def (Tb::Cmd).main_crop(argv)
|
|
127
126
|
break
|
128
127
|
end
|
129
128
|
ary.slice!(0, lmargin_min)
|
130
|
-
pairs =
|
129
|
+
pairs = Hash[ary.map.with_index {|v, i| ["#{i+1}", v]}]
|
131
130
|
y.yield pairs
|
132
131
|
}
|
133
132
|
}
|
134
|
-
|
135
|
-
|
136
|
-
}
|
133
|
+
Tb::Cmd.opt_N = true
|
134
|
+
output_tbenum(er)
|
137
135
|
end
|
138
136
|
end
|
139
137
|
|
data/lib/tb/cmd_cross.rb
CHANGED
@@ -49,36 +49,51 @@ def (Tb::Cmd).main_cross(argv)
|
|
49
49
|
vkfs = split_field_list_argument(argv.shift)
|
50
50
|
err('no hkey-fields given.') if argv.empty?
|
51
51
|
hkfs = split_field_list_argument(argv.shift)
|
52
|
+
vhkfs = vkfs + hkfs
|
52
53
|
if Tb::Cmd.opt_cross_fields.empty?
|
53
|
-
|
54
|
+
num_aggregate_fields = 1
|
55
|
+
opt_cross_fields = (vkfs + hkfs).map {|f| [f, Tb::Func::First, f] } +
|
56
|
+
[['count', Tb::Func::Count, nil]]
|
54
57
|
else
|
55
|
-
|
58
|
+
num_aggregate_fields = Tb::Cmd.opt_cross_fields.length
|
59
|
+
opt_cross_fields = (vkfs + hkfs).map {|f| [f, Tb::Func::First, f] } +
|
60
|
+
Tb::Cmd.opt_cross_fields.map {|arg|
|
56
61
|
agg_spec, new_field = split_field_list_argument(arg)
|
57
62
|
new_field ||= agg_spec
|
58
|
-
|
63
|
+
begin
|
64
|
+
func_srcf = parse_aggregator_spec2(agg_spec)
|
65
|
+
rescue ArgumentError
|
66
|
+
err($!.message)
|
67
|
+
end
|
68
|
+
[new_field, *func_srcf]
|
59
69
|
}
|
60
70
|
end
|
61
71
|
argv = ['-'] if argv.empty?
|
62
72
|
creader = Tb::CatReader.open(argv, Tb::Cmd.opt_N)
|
63
73
|
er = Tb::Enumerator.new {|y|
|
64
|
-
header = nil
|
65
74
|
hvs_hash = {}
|
66
75
|
hvs_list = nil
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
vcv
|
72
|
-
}
|
73
|
-
sorted2 = sorted.with_header {|header0|
|
74
|
-
header = header0
|
75
|
-
(vkfs + hkfs).each {|f|
|
76
|
+
aggs_hash = nil
|
77
|
+
op = Tb::Zipper.new(opt_cross_fields.map {|dstf, func, srcf| func })
|
78
|
+
er = creader.with_header {|header0|
|
79
|
+
vhkfs.each {|f|
|
76
80
|
if !header0.include?(f)
|
77
81
|
err("field not found: #{f}")
|
78
82
|
end
|
79
83
|
}
|
80
|
-
|
81
|
-
|
84
|
+
}.extsort_reduce(op) {|pairs|
|
85
|
+
vvs = vkfs.map {|f| pairs[f] }
|
86
|
+
hvs = hkfs.map {|f| pairs[f] }
|
87
|
+
vvsc = vvs.map {|v| Tb::Func.smart_cmp_value(v) }
|
88
|
+
hvsc = hvs.map {|v| Tb::Func.smart_cmp_value(v) }
|
89
|
+
hvs_hash[hvs] = hvsc
|
90
|
+
aggs = opt_cross_fields.map {|dstf, func, srcf| func.start(srcf ? pairs[srcf] : true) }
|
91
|
+
[[vvsc, hvsc], aggs]
|
92
|
+
}
|
93
|
+
all_representative = lambda {|_| 1 }
|
94
|
+
all_before = lambda {|_|
|
95
|
+
hvs_list = hvs_hash.keys.sort_by {|hvs| hvs_hash[hvs] }
|
96
|
+
n = vkfs.length + hvs_list.length * num_aggregate_fields
|
82
97
|
header1 = (1..n).map {|i| i.to_s }
|
83
98
|
y.set_header header1
|
84
99
|
hkfs.each_with_index {|hkf, i|
|
@@ -87,7 +102,7 @@ def (Tb::Cmd).main_cross(argv)
|
|
87
102
|
j = vkfs.length
|
88
103
|
h1[j.to_s] = hkf
|
89
104
|
hvs_list.each {|hkvs|
|
90
|
-
|
105
|
+
num_aggregate_fields.times {
|
91
106
|
j += 1
|
92
107
|
h1[j.to_s] = hkvs[i]
|
93
108
|
}
|
@@ -101,49 +116,35 @@ def (Tb::Cmd).main_cross(argv)
|
|
101
116
|
h2[j.to_s] = vkf
|
102
117
|
}
|
103
118
|
hvs_list.each {|hkvs|
|
104
|
-
opt_cross_fields.each {|
|
119
|
+
opt_cross_fields.last(num_aggregate_fields).each {|dstf, func, srcf|
|
105
120
|
j += 1
|
106
121
|
if Tb::Cmd.opt_cross_compact
|
107
122
|
h2[j.to_s] = hkvs[-1]
|
108
123
|
else
|
109
|
-
h2[j.to_s] =
|
124
|
+
h2[j.to_s] = dstf
|
110
125
|
end
|
111
126
|
}
|
112
127
|
}
|
113
128
|
y.yield h2
|
114
129
|
}
|
115
|
-
|
116
|
-
|
117
|
-
vcv2 = vkfs.map {|f| smart_cmp_value(pairs2[f]) }
|
118
|
-
vcv1 != vcv2
|
130
|
+
v_representative = lambda {|((vvsc, _), _)|
|
131
|
+
vvsc
|
119
132
|
}
|
120
|
-
|
121
|
-
|
122
|
-
aggs = {}
|
133
|
+
v_before = lambda {|_|
|
134
|
+
aggs_hash = {}
|
123
135
|
}
|
124
|
-
body = lambda {|
|
125
|
-
hvs = hkfs.
|
126
|
-
|
127
|
-
aggs[hvs] = opt_cross_fields.map {|agg_spec, nf|
|
128
|
-
begin
|
129
|
-
make_aggregator(agg_spec, header)
|
130
|
-
rescue ArgumentError
|
131
|
-
err($!.message)
|
132
|
-
end
|
133
|
-
}
|
134
|
-
end
|
135
|
-
ary = header.map {|f| pairs[f] }
|
136
|
-
aggs[hvs].each {|agg|
|
137
|
-
agg.update(ary)
|
138
|
-
}
|
136
|
+
body = lambda {|(_, aggs)|
|
137
|
+
hvs = aggs[vkfs.length, hkfs.length]
|
138
|
+
aggs_hash[hvs] = op.aggregate(aggs)
|
139
139
|
}
|
140
|
-
|
141
|
-
|
140
|
+
v_after = lambda {|(_, aggs)|
|
141
|
+
vvs = aggs[0, vkfs.length]
|
142
|
+
ary = vvs
|
142
143
|
hvs_list.each {|hvs|
|
143
|
-
if
|
144
|
-
ary.concat(
|
144
|
+
if aggs_hash.has_key? hvs
|
145
|
+
ary.concat(aggs_hash[hvs].last(num_aggregate_fields))
|
145
146
|
else
|
146
|
-
ary.concat([nil] *
|
147
|
+
ary.concat([nil] * num_aggregate_fields)
|
147
148
|
end
|
148
149
|
}
|
149
150
|
pairs = {}
|
@@ -152,10 +153,11 @@ def (Tb::Cmd).main_cross(argv)
|
|
152
153
|
}
|
153
154
|
y.yield pairs
|
154
155
|
}
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
er.write_to_csv(out, false)
|
156
|
+
er.detect_nested_group_by(
|
157
|
+
[[all_representative, all_before],
|
158
|
+
[v_representative, v_before, v_after]]).each(&body)
|
159
159
|
}
|
160
|
+
Tb::Cmd.opt_N = true
|
161
|
+
output_tbenum(er)
|
160
162
|
end
|
161
163
|
|
data/lib/tb/cmd_cut.rb
CHANGED
@@ -56,9 +56,7 @@ def (Tb::Cmd).main_cut(argv)
|
|
56
56
|
y.yield pairs.reject {|k, v| fs.include? k }
|
57
57
|
}
|
58
58
|
}
|
59
|
-
|
60
|
-
er.write_to_csv(out, !Tb::Cmd.opt_N)
|
61
|
-
}
|
59
|
+
output_tbenum(er)
|
62
60
|
else
|
63
61
|
er = Tb::Enumerator.new {|y|
|
64
62
|
tblreader.with_header {|header0|
|
@@ -73,9 +71,7 @@ def (Tb::Cmd).main_cut(argv)
|
|
73
71
|
y.yield pairs.reject {|k, v| !fs.include?(k) }
|
74
72
|
}
|
75
73
|
}
|
76
|
-
|
77
|
-
er.write_to_csv(out, !Tb::Cmd.opt_N)
|
78
|
-
}
|
74
|
+
output_tbenum(er)
|
79
75
|
end
|
80
76
|
}
|
81
77
|
end
|
data/lib/tb/cmd_git_log.rb
CHANGED
@@ -29,15 +29,17 @@
|
|
29
29
|
Tb::Cmd.subcommands << 'git-log'
|
30
30
|
|
31
31
|
Tb::Cmd.default_option[:opt_git_log_git_command] = nil
|
32
|
-
Tb::Cmd.default_option[:
|
32
|
+
Tb::Cmd.default_option[:opt_git_log_debug_input] = nil
|
33
|
+
Tb::Cmd.default_option[:opt_git_log_debug_output] = nil
|
33
34
|
|
34
35
|
def (Tb::Cmd).op_git_log
|
35
36
|
op = OptionParser.new
|
36
37
|
op.banner = "Usage: tb git-log [OPTS] [GIT-DIR ...]\n" +
|
37
38
|
"Show the GIT log as a table."
|
38
|
-
define_common_option(op, "
|
39
|
+
define_common_option(op, "hNod", "--no-pager", '--debug')
|
39
40
|
op.def_option('--git-command COMMAND', 'specify the git command (default: git)') {|command| Tb::Cmd.opt_git_log_git_command = command }
|
40
|
-
op.def_option('--debug-git-log-
|
41
|
+
op.def_option('--debug-git-log-output FILE', 'store the raw output of git-log (for debug)') {|filename| Tb::Cmd.opt_git_log_debug_output = filename }
|
42
|
+
op.def_option('--debug-git-log-input FILE', 'use the file as output of git-log (for debug)') {|filename| Tb::Cmd.opt_git_log_debug_input = filename }
|
41
43
|
op
|
42
44
|
end
|
43
45
|
|
@@ -68,8 +70,8 @@ Tb::Cmd::GIT_LOG_PRETTY_FORMAT = 'format:%x01commit-separator%x01%n' +
|
|
68
70
|
Tb::Cmd::GIT_LOG_HEADER = Tb::Cmd::GIT_LOG_FORMAT_SPEC.map {|k, v| k } + ['files']
|
69
71
|
|
70
72
|
def (Tb::Cmd).git_log_with_git_log(dir)
|
71
|
-
if Tb::Cmd.
|
72
|
-
File.open(Tb::Cmd.
|
73
|
+
if Tb::Cmd.opt_git_log_debug_input
|
74
|
+
File.open(Tb::Cmd.opt_git_log_debug_input) {|f|
|
73
75
|
yield f
|
74
76
|
}
|
75
77
|
else
|
@@ -85,9 +87,18 @@ def (Tb::Cmd).git_log_with_git_log(dir)
|
|
85
87
|
'.',
|
86
88
|
{:chdir=>dir}
|
87
89
|
]
|
88
|
-
|
89
|
-
|
90
|
-
|
90
|
+
$stderr.puts "git command line: #{command.inspect}" if 1 <= Tb::Cmd.opt_debug
|
91
|
+
if Tb::Cmd.opt_git_log_debug_output
|
92
|
+
command.last[:out] = Tb::Cmd.opt_git_log_debug_output
|
93
|
+
system(*command)
|
94
|
+
File.open(Tb::Cmd.opt_git_log_debug_output) {|f|
|
95
|
+
yield f
|
96
|
+
}
|
97
|
+
else
|
98
|
+
IO.popen(command) {|f|
|
99
|
+
yield f
|
100
|
+
}
|
101
|
+
end
|
91
102
|
end
|
92
103
|
end
|
93
104
|
|
@@ -186,8 +197,6 @@ def (Tb::Cmd).main_git_log(argv)
|
|
186
197
|
}
|
187
198
|
}
|
188
199
|
}
|
189
|
-
|
190
|
-
er.write_to_csv(out, !Tb::Cmd.opt_N)
|
191
|
-
}
|
200
|
+
output_tbenum(er)
|
192
201
|
end
|
193
202
|
|
data/lib/tb/cmd_grep.rb
CHANGED
data/lib/tb/cmd_group.rb
CHANGED
@@ -46,61 +46,35 @@ def (Tb::Cmd).main_group(argv)
|
|
46
46
|
exit_if_help('group')
|
47
47
|
err("no key fields given.") if argv.empty?
|
48
48
|
kfs = split_field_list_argument(argv.shift)
|
49
|
-
opt_group_fields =
|
49
|
+
opt_group_fields = kfs.map {|f| [f, Tb::Func::First, f] } +
|
50
|
+
Tb::Cmd.opt_group_fields.map {|arg|
|
50
51
|
aggregation_spec, new_field = split_field_list_argument(arg)
|
51
52
|
new_field ||= aggregation_spec
|
52
53
|
[new_field,
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
end
|
59
|
-
}
|
54
|
+
*begin
|
55
|
+
parse_aggregator_spec2(aggregation_spec)
|
56
|
+
rescue ArgumentError
|
57
|
+
err($!.message)
|
58
|
+
end
|
60
59
|
]
|
61
60
|
}
|
62
61
|
argv = ['-'] if argv.empty?
|
63
62
|
creader = Tb::CatReader.open(argv, Tb::Cmd.opt_N)
|
64
63
|
result = Tb::Enumerator.new {|y|
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
row = nil
|
70
|
-
agg = nil
|
71
|
-
er2 = er.with_header {|header0|
|
72
|
-
header = header0
|
73
|
-
y.set_header(kfs + opt_group_fields.map {|f, maker| f })
|
74
|
-
}
|
75
|
-
boudary_p = lambda {|pairs1, pairs2|
|
76
|
-
kfs.any? {|f| pairs1[f] != pairs2[f] }
|
64
|
+
op = Tb::Zipper.new(opt_group_fields.map {|dstf, func, srcf| func })
|
65
|
+
er = creader.extsort_reduce(op) {|pairs|
|
66
|
+
[kfs.map {|f| Tb::Func.smart_cmp_value(pairs[f]) },
|
67
|
+
opt_group_fields.map {|dstf, func, srcf| func.start(srcf ? pairs[srcf] : true) } ]
|
77
68
|
}
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
agg = {}
|
84
|
-
opt_group_fields.each {|f, maker|
|
85
|
-
agg[f] = maker.call(header)
|
69
|
+
fields = opt_group_fields.map {|dstf, func, srcf| dstf }
|
70
|
+
y.set_header(fields)
|
71
|
+
er.each {|_, vals|
|
72
|
+
pairs = opt_group_fields.zip(vals).map {|(dstf, func, _), val|
|
73
|
+
[dstf, func.aggregate(val)]
|
86
74
|
}
|
75
|
+
y.yield Hash[pairs]
|
87
76
|
}
|
88
|
-
body = lambda {|pairs|
|
89
|
-
ary = header.map {|f| pairs[f] }
|
90
|
-
opt_group_fields.each {|f, maker|
|
91
|
-
agg[f].update(ary)
|
92
|
-
}
|
93
|
-
}
|
94
|
-
after = lambda {|last_pairs|
|
95
|
-
opt_group_fields.each {|f, maker|
|
96
|
-
row[f] = agg[f].finish
|
97
|
-
}
|
98
|
-
y.yield row
|
99
|
-
}
|
100
|
-
er2.each_group_element(boudary_p, before, body, after)
|
101
|
-
}
|
102
|
-
with_output {|out|
|
103
|
-
result.write_to_csv(out, !Tb::Cmd.opt_N)
|
104
77
|
}
|
78
|
+
output_tbenum(result)
|
105
79
|
end
|
106
80
|
|
data/lib/tb/cmd_gsub.rb
CHANGED