tb 0.3 → 0.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (55) hide show
  1. data/README +2 -1
  2. data/lib/tb.rb +7 -3
  3. data/lib/tb/basic.rb +1 -1
  4. data/lib/tb/cmd_cat.rb +1 -3
  5. data/lib/tb/cmd_consecutive.rb +4 -6
  6. data/lib/tb/cmd_crop.rb +5 -7
  7. data/lib/tb/cmd_cross.rb +51 -49
  8. data/lib/tb/cmd_cut.rb +2 -6
  9. data/lib/tb/cmd_git_log.rb +20 -11
  10. data/lib/tb/cmd_grep.rb +1 -3
  11. data/lib/tb/cmd_group.rb +18 -44
  12. data/lib/tb/cmd_gsub.rb +2 -4
  13. data/lib/tb/cmd_join.rb +1 -3
  14. data/lib/tb/cmd_ls.rb +8 -15
  15. data/lib/tb/cmd_mheader.rb +3 -4
  16. data/lib/tb/cmd_nest.rb +4 -9
  17. data/lib/tb/cmd_newfield.rb +1 -3
  18. data/lib/tb/cmd_rename.rb +2 -4
  19. data/lib/tb/cmd_shape.rb +2 -3
  20. data/lib/tb/cmd_sort.rb +3 -5
  21. data/lib/tb/cmd_svn_log.rb +3 -5
  22. data/lib/tb/cmd_tar_tvf.rb +2 -4
  23. data/lib/tb/cmd_to_csv.rb +1 -1
  24. data/lib/tb/cmd_unnest.rb +1 -3
  25. data/lib/tb/cmdutil.rb +57 -135
  26. data/lib/tb/csv.rb +11 -54
  27. data/lib/tb/customcmp.rb +41 -0
  28. data/lib/tb/customeq.rb +41 -0
  29. data/lib/tb/enumerable.rb +225 -435
  30. data/lib/tb/enumerator.rb +22 -14
  31. data/lib/tb/ex_enumerable.rb +659 -0
  32. data/lib/tb/ex_enumerator.rb +102 -0
  33. data/lib/tb/fileenumerator.rb +2 -2
  34. data/lib/tb/func.rb +141 -0
  35. data/lib/tb/json.rb +1 -1
  36. data/lib/tb/reader.rb +4 -4
  37. data/lib/tb/search.rb +2 -4
  38. data/lib/tb/zipper.rb +60 -0
  39. data/test/test_cmd_cat.rb +40 -0
  40. data/test/test_cmd_git_log.rb +116 -0
  41. data/test/test_cmd_ls.rb +90 -0
  42. data/test/test_cmd_svn_log.rb +87 -0
  43. data/test/test_cmd_to_csv.rb +14 -0
  44. data/test/test_cmdutil.rb +25 -10
  45. data/test/test_csv.rb +10 -0
  46. data/test/test_customcmp.rb +14 -0
  47. data/test/test_customeq.rb +20 -0
  48. data/test/{test_enumerable.rb → test_ex_enumerable.rb} +181 -3
  49. data/test/test_search.rb +2 -10
  50. data/test/test_tbenum.rb +3 -3
  51. data/test/test_zipper.rb +22 -0
  52. metadata +20 -8
  53. data/lib/tb/enum.rb +0 -294
  54. data/lib/tb/pairs.rb +0 -227
  55. data/test/test_pairs.rb +0 -122
@@ -71,8 +71,6 @@ def (Tb::Cmd).main_join(argv)
71
71
  $stderr.puts "shared keys: #{(result.list_fields & tbl.list_fields).inspect}" if 1 <= Tb::Cmd.opt_debug
72
72
  result = result.natjoin2_outer(tbl, Tb::Cmd.opt_join_outer_missing, retain_left, retain_right)
73
73
  }
74
- with_output {|out|
75
- result.write_to_csv(out, !Tb::Cmd.opt_N)
76
- }
74
+ output_tbenum(result)
77
75
  end
78
76
 
@@ -63,9 +63,7 @@ def (Tb::Cmd).main_ls(argv)
63
63
  ls.ls_run(Pathname(ls.real_pathname_string(arg)))
64
64
  }
65
65
  }
66
- with_output {|out|
67
- er.write_to_csv(out, !Tb::Cmd.opt_N)
68
- }
66
+ output_tbenum(er)
69
67
  if ls.fail
70
68
  exit false
71
69
  end
@@ -106,7 +104,7 @@ class Tb::Cmd::Ls
106
104
  return
107
105
  end
108
106
  entries.map! {|filename| real_pathname_string(filename) }
109
- entries = entries.sort_by {|filename| smart_cmp_value(filename) }
107
+ entries = entries.sort_by {|filename| Tb::Func.smart_cmp_value(filename) }
110
108
  if @opts[:a] || @opts[:A]
111
109
  entries1, entries2 = entries.partition {|filename| /\A\./ =~ filename }
112
110
  entries0, entries1 = entries1.partition {|filename| filename == '.' || filename == '..' }
@@ -155,7 +153,7 @@ class Tb::Cmd::Ls
155
153
  end
156
154
  @y.yield ls_long_info(path, st)
157
155
  else
158
- @y.yield Tb::Pairs.new([['filename', path.to_s]])
156
+ @y.yield({'filename' => path.to_s})
159
157
  end
160
158
  end
161
159
 
@@ -179,9 +177,9 @@ class Tb::Cmd::Ls
179
177
  end
180
178
 
181
179
  def ls_long_info(path, st)
182
- Tb::Pairs.new(ls_long_header.map {|info_type|
180
+ Hash[ls_long_header.map {|info_type|
183
181
  [info_type, self.send("ls_info_#{info_type}", path, st)]
184
- })
182
+ }]
185
183
  end
186
184
 
187
185
  def ls_info_dev(path, st) sprintf("0x%x", st.dev) end
@@ -234,7 +232,7 @@ class Tb::Cmd::Ls
234
232
  if pw
235
233
  pw.name
236
234
  else
237
- uid
235
+ uid.to_s
238
236
  end
239
237
  end
240
238
 
@@ -247,7 +245,7 @@ class Tb::Cmd::Ls
247
245
  if gr
248
246
  gr.name
249
247
  else
250
- gid
248
+ gid.to_s
251
249
  end
252
250
  end
253
251
 
@@ -291,11 +289,6 @@ class Tb::Cmd::Ls
291
289
  end
292
290
 
293
291
  def real_pathname_string(str)
294
- if str.respond_to? :force_encoding
295
- # pathname is a sequence of bytes on Unix.
296
- str.dup.force_encoding("ASCII-8BIT")
297
- else
298
- str
299
- end
292
+ str.dup.force_encoding("ASCII-8BIT")
300
293
  end
301
294
  end
@@ -68,7 +68,7 @@ def (Tb::Cmd).main_mheader(argv)
68
68
  }
69
69
  h2 = header_end_p.call
70
70
  if h2
71
- pairs2 = Tb::Pairs.new(h2.map.with_index {|v, i| ["#{i+1}", v] })
71
+ pairs2 = Hash[h2.map.with_index {|v, i| ["#{i+1}", v] }]
72
72
  y.yield pairs2
73
73
  header = nil
74
74
  end
@@ -77,9 +77,8 @@ def (Tb::Cmd).main_mheader(argv)
77
77
  end
78
78
  }
79
79
  }
80
- with_output {|out|
81
- er.write_to_csv(out, false)
82
- }
80
+ Tb::Cmd.opt_N = true
81
+ output_tbenum(er)
83
82
  if header
84
83
  warn "unique header fields not recognized."
85
84
  end
@@ -58,15 +58,12 @@ def (Tb::Cmd).main_nest(argv)
58
58
  cv = pairs.reject {|f, v|
59
59
  oldfields_hash[f]
60
60
  }.map {|f, v|
61
- [smart_cmp_value(f), smart_cmp_value(v)]
61
+ [Tb::Func.smart_cmp_value(f), Tb::Func.smart_cmp_value(v)]
62
62
  }.sort
63
63
  [cv, pairs]
64
64
  }
65
65
 
66
66
  nested = nil
67
- boundary_p = lambda {|(cv1, _), (cv2, _)|
68
- cv1 != cv2
69
- }
70
67
  before_group = lambda {|(_, _)|
71
68
  nested = []
72
69
  }
@@ -82,13 +79,11 @@ def (Tb::Cmd).main_nest(argv)
82
79
  }
83
80
  assoc = last_pairs.reject {|f, v| oldfields_hash[f] }.to_a
84
81
  assoc << [newfield, nested_csv]
85
- pairs = Tb::Pairs.new(assoc)
82
+ pairs = Hash[assoc]
86
83
  y.yield pairs
87
84
  }
88
- sorted.each_group_element(boundary_p, before_group, body, after_group)
89
- }
90
- with_output {|out|
91
- er.write_to_csv(out, !Tb::Cmd.opt_N)
85
+ sorted.detect_group_by(before_group, after_group) {|cv,| cv }.each(&body)
92
86
  }
87
+ output_tbenum(er)
93
88
  end
94
89
 
@@ -47,9 +47,7 @@ def (Tb::Cmd).main_newfield(argv)
47
47
  argv = ['-'] if argv.empty?
48
48
  creader = Tb::CatReader.open(argv, Tb::Cmd.opt_N)
49
49
  er = creader.newfield(field) {|pairs| pr.call(pairs) }
50
- with_output {|out|
51
- er.write_to_csv(out, !Tb::Cmd.opt_N)
52
- }
50
+ output_tbenum(er)
53
51
  end
54
52
 
55
53
 
@@ -56,11 +56,9 @@ def (Tb::Cmd).main_rename(argv)
56
56
  }
57
57
  y.set_header header.map {|f| h.fetch(f, f) }
58
58
  }.each {|pairs|
59
- y.yield Tb::Pairs.new(pairs.map {|f, v| [h.fetch(f, f), v] })
59
+ y.yield Hash[pairs.map {|f, v| [h.fetch(f, f), v] }]
60
60
  }
61
61
  }
62
- with_output {|out|
63
- er.write_to_csv(out, !Tb::Cmd.opt_N)
64
- }
62
+ output_tbenum(er)
65
63
  end
66
64
 
@@ -67,8 +67,7 @@ def (Tb::Cmd).main_shape(argv)
67
67
  'filename'=>filename})
68
68
  }
69
69
  }
70
- with_output {|out|
71
- result.write_to_csv(out, true)
72
- }
70
+ Tb::Cmd.opt_N = false
71
+ output_tbenum(result)
73
72
  end
74
73
 
@@ -53,9 +53,9 @@ def (Tb::Cmd).main_sort(argv)
53
53
  creader = Tb::CatReader.open(argv, Tb::Cmd.opt_N)
54
54
  header = []
55
55
  if fs
56
- blk = lambda {|pairs| fs.map {|f| smart_cmp_value(pairs[f]) } }
56
+ blk = lambda {|pairs| fs.map {|f| Tb::Func.smart_cmp_value(pairs[f]) } }
57
57
  else
58
- blk = lambda {|pairs| header.map {|f| smart_cmp_value(pairs[f]) } }
58
+ blk = lambda {|pairs| header.map {|f| Tb::Func.smart_cmp_value(pairs[f]) } }
59
59
  end
60
60
  if Tb::Cmd.opt_sort_r
61
61
  blk1 = blk
@@ -71,9 +71,7 @@ def (Tb::Cmd).main_sort(argv)
71
71
  y.yield pairs
72
72
  }
73
73
  }.extsort_by(&blk)
74
- with_output {|out|
75
- er.write_to_csv(out, !Tb::Cmd.opt_N)
76
- }
74
+ output_tbenum(er)
77
75
  end
78
76
 
79
77
 
@@ -101,11 +101,11 @@ class Tb::Cmd::SVNLOGListener
101
101
  @log['paths'].each {|h|
102
102
  assoc = @log.to_a.reject {|f, v| !%w[rev author date msg].include?(f) }
103
103
  assoc += h.to_a.reject {|f, v| !%w[kind action path].include?(f) }
104
- @y.yield Tb::Pairs.new(assoc)
104
+ @y.yield Hash[assoc]
105
105
  }
106
106
  else
107
107
  assoc = @log.to_a.reject {|f, v| !%w[rev author date msg].include?(f) }
108
- @y.yield Tb::Pairs.new(assoc)
108
+ @y.yield Hash[assoc]
109
109
  end
110
110
  @log = nil
111
111
  end
@@ -170,8 +170,6 @@ def (Tb::Cmd).main_svn_log(argv)
170
170
  REXML::Parsers::StreamParser.new(f, listener).parse
171
171
  }
172
172
  }
173
- with_output {|out|
174
- er.write_to_csv(out, !Tb::Cmd.opt_N)
175
- }
173
+ output_tbenum(er)
176
174
  end
177
175
 
@@ -436,12 +436,10 @@ def (Tb::Cmd).main_tar_tvf(argv)
436
436
  formatted["tar_typeflag"] = h[:typeflag]
437
437
  formatted["tar_magic"] = h[:magic]
438
438
  formatted["tar_version"] = h[:version]
439
- y.yield Tb::Pairs.new(header.map {|f2| [f2, formatted[f2]] })
439
+ y.yield Hash[header.map {|f2| [f2, formatted[f2]] }]
440
440
  }
441
441
  }
442
442
  }
443
443
  }
444
- with_output {|out|
445
- er.write_to_csv(out, !Tb::Cmd.opt_N)
446
- }
444
+ output_tbenum(er)
447
445
  end
@@ -51,7 +51,7 @@ def (Tb::Cmd).main_to_csv(argv)
51
51
  er = Tb::Enumerator.new {|y|
52
52
  y.set_header header
53
53
  ter.each {|pairs|
54
- y.yield Tb::Pairs.new(header.map {|f| [f, pairs[f]] })
54
+ y.yield Hash[header.map {|f| [f, pairs[f]] }]
55
55
  }
56
56
  }
57
57
  with_output {|out|
@@ -111,8 +111,6 @@ def (Tb::Cmd).main_unnest(argv)
111
111
  end
112
112
  }
113
113
  }
114
- with_output {|out|
115
- er.write_to_csv(out, !Tb::Cmd.opt_N)
116
- }
114
+ output_tbenum(er)
117
115
  end
118
116
 
@@ -96,150 +96,36 @@ def err(msg)
96
96
  raise SystemExit.new(1, msg)
97
97
  end
98
98
 
99
- def smart_cmp_value(v)
100
- case v
101
- when nil
102
- []
103
- when Numeric
104
- [0, v]
105
- when String
106
- if v.respond_to? :force_encoding
107
- v = v.dup.force_encoding("ASCII-8BIT")
108
- end
109
- case v
110
- when /\A\s*-?\d+\s*\z/
111
- [0, v.to_i(10)]
112
- when /\A\s*-?(\d+(\.\d*)?|\.\d+)([eE][-+]?\d+)?\s*\z/
113
- [0, Float(v)]
114
- else
115
- a = []
116
- v.scan(/(\d+)|\D+/) {
117
- if $1
118
- a << 0 << $1.to_i
119
- else
120
- a << 1 << $&
121
- end
122
- }
123
- a
124
- end
125
- else
126
- raise ArgumentError, "unexpected: #{v.inspect}"
127
- end
128
- end
129
-
130
- def conv_to_numeric(v)
131
- v = v.strip
132
- if /\A-?\d+\z/ =~ v
133
- v = v.to_i
134
- elsif /\A-?(\d+(\.\d*)?|\.\d+)([eE][-+]?\d+)?\z/ =~ v
135
- v = v.to_f
136
- else
137
- raise ArgumentError, "number string expected: #{v.inspect}"
138
- end
139
- v
140
- end
141
-
142
- class CountAggregator
143
- def initialize() @result = 0 end
144
- def update(v) @result += 1 end
145
- def finish() @result end
146
- end
147
-
148
- class SumAggregator
149
- def initialize() @result = 0 end
150
- def update(v) @result += conv_to_numeric(v) if !(v.nil? || v == '') end
151
- def finish() @result end
152
- end
153
-
154
- class AvgAggregator
155
- def initialize() @sum = 0; @count = 0 end
156
- def update(v) @count += 1; @sum += conv_to_numeric(v) if !(v.nil? || v == '') end
157
- def finish() @sum / @count.to_f end
158
- end
159
-
160
- class MaxAggregator
161
- def initialize() @v = nil; @cmp = nil end
162
- def update(v)
163
- cmp = smart_cmp_value(v)
164
- if @cmp == nil
165
- @v, @cmp = v, cmp
166
- else
167
- @v, @cmp = v, cmp if (@cmp <=> cmp) < 0
168
- end
169
- end
170
- def finish() @v end
171
- end
172
-
173
- class MinAggregator
174
- def initialize() @v = @cmp = nil end
175
- def update(v)
176
- cmp = smart_cmp_value(v)
177
- if @cmp == nil
178
- @v, @cmp = v, cmp
179
- else
180
- @v, @cmp = v, cmp if (@cmp <=> cmp) > 0
181
- end
182
- end
183
- def finish() @v end
184
- end
185
-
186
- class ValuesAggregator
187
- def initialize() @result = [] end
188
- def update(v) @result << v if v end
189
- def finish() @result.join(",") end
190
- end
191
-
192
- class UniqueValuesAggregator
193
- def initialize() @result = [] end
194
- def update(v) @result << v if v end
195
- def finish() @result.uniq.join(",") end
196
- end
197
-
198
- class Selector
199
- def initialize(i, aggregator) @i = i; @agg = aggregator end
200
- def update(ary) @agg.update(ary[@i]) end
201
- def finish() @agg.finish end
202
- end
203
-
204
- def make_aggregator(spec, fs)
99
+ def parse_aggregator_spec(spec)
205
100
  case spec
206
101
  when 'count'
207
- CountAggregator.new
102
+ ['count', nil]
208
103
  when /\Asum\((.*)\)\z/
209
- field = $1
210
- i = fs.index(field)
211
- raise ArgumentError, "field not found: #{field.inspect}" if !i
212
- Selector.new(i, SumAggregator.new)
104
+ ['sum', $1]
213
105
  when /\Aavg\((.*)\)\z/
214
- field = $1
215
- i = fs.index(field)
216
- raise ArgumentError, "field not found: #{field.inspect}" if !i
217
- Selector.new(i, AvgAggregator.new)
106
+ ['avg', $1]
218
107
  when /\Amax\((.*)\)\z/
219
- field = $1
220
- i = fs.index(field)
221
- raise ArgumentError, "field not found: #{field.inspect}" if !i
222
- Selector.new(i, MaxAggregator.new)
108
+ ['max', $1]
223
109
  when /\Amin\((.*)\)\z/
224
- field = $1
225
- i = fs.index(field)
226
- raise ArgumentError, "field not found: #{field.inspect}" if !i
227
- Selector.new(i, MinAggregator.new)
110
+ ['min', $1]
228
111
  when /\Avalues\((.*)\)\z/
229
- field = $1
230
- i = fs.index(field)
231
- raise ArgumentError, "field not found: #{field.inspect}" if !i
232
- Selector.new(i, ValuesAggregator.new)
112
+ ['values', $1]
233
113
  when /\Auniquevalues\((.*)\)\z/
234
- field = $1
235
- i = fs.index(field)
236
- raise ArgumentError, "field not found: #{field.inspect}" if !i
237
- Selector.new(i, UniqueValuesAggregator.new)
114
+ ['uniquevalues', $1]
238
115
  else
239
116
  raise ArgumentError, "unexpected aggregation spec: #{spec.inspect}"
240
117
  end
241
118
  end
242
119
 
120
+ def parse_aggregator_spec2(spec)
121
+ name, field = parse_aggregator_spec(spec)
122
+ func = Tb::Func::AggregationFunctions[name]
123
+ if !func
124
+ raise ArgumentError, "unexpected aggregation spec: #{spec.inspect}"
125
+ end
126
+ [func, field]
127
+ end
128
+
243
129
  def split_field_list_argument(arg)
244
130
  split_csv_argument(arg).map {|f| f || '' }
245
131
  end
@@ -266,14 +152,18 @@ def tbl_generate_tsv(tbl, out)
266
152
  end
267
153
  end
268
154
 
269
- def with_output
270
- if Tb::Cmd.opt_output
271
- tmp = Tb::Cmd.opt_output + ".part"
155
+ def with_output(filename=Tb::Cmd.opt_output)
156
+ if filename && filename != '-'
157
+ tmp = filename + ".part"
272
158
  begin
273
159
  File.open(tmp, 'w') {|f|
274
160
  yield f
275
161
  }
276
- File.rename tmp, Tb::Cmd.opt_output
162
+ if File.exist?(filename) && FileUtils.compare_file(filename, tmp)
163
+ File.unlink tmp
164
+ else
165
+ File.rename tmp, filename
166
+ end
277
167
  ensure
278
168
  File.unlink tmp if File.exist? tmp
279
169
  end
@@ -285,3 +175,35 @@ def with_output
285
175
  yield $stdout
286
176
  end
287
177
  end
178
+
179
+ def output_tbenum(te)
180
+ filename = Tb::Cmd.opt_output
181
+ if /\A([a-z0-9]{2,}):/ =~ filename
182
+ fmt = $1
183
+ filename = $'
184
+ else
185
+ fmt = nil
186
+ end
187
+ if !fmt
188
+ case filename
189
+ when /\.csv\z/
190
+ fmt = 'csv'
191
+ when /\.json\z/
192
+ fmt = 'json'
193
+ end
194
+ end
195
+ if fmt
196
+ case fmt
197
+ when 'csv'
198
+ write_proc = lambda {|out| te.write_to_csv(out, !Tb::Cmd.opt_N) }
199
+ when 'json'
200
+ write_proc = lambda {|out| te.write_to_json(out) }
201
+ else
202
+ err("unexpected format: #{fmt.inspect}")
203
+ end
204
+ end
205
+ write_proc ||= lambda {|out| te.write_to_csv(out, !Tb::Cmd.opt_N) }
206
+ with_output(filename) {|out|
207
+ write_proc.call(out)
208
+ }
209
+ end