tb 0.3 → 0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. data/README +2 -1
  2. data/lib/tb.rb +7 -3
  3. data/lib/tb/basic.rb +1 -1
  4. data/lib/tb/cmd_cat.rb +1 -3
  5. data/lib/tb/cmd_consecutive.rb +4 -6
  6. data/lib/tb/cmd_crop.rb +5 -7
  7. data/lib/tb/cmd_cross.rb +51 -49
  8. data/lib/tb/cmd_cut.rb +2 -6
  9. data/lib/tb/cmd_git_log.rb +20 -11
  10. data/lib/tb/cmd_grep.rb +1 -3
  11. data/lib/tb/cmd_group.rb +18 -44
  12. data/lib/tb/cmd_gsub.rb +2 -4
  13. data/lib/tb/cmd_join.rb +1 -3
  14. data/lib/tb/cmd_ls.rb +8 -15
  15. data/lib/tb/cmd_mheader.rb +3 -4
  16. data/lib/tb/cmd_nest.rb +4 -9
  17. data/lib/tb/cmd_newfield.rb +1 -3
  18. data/lib/tb/cmd_rename.rb +2 -4
  19. data/lib/tb/cmd_shape.rb +2 -3
  20. data/lib/tb/cmd_sort.rb +3 -5
  21. data/lib/tb/cmd_svn_log.rb +3 -5
  22. data/lib/tb/cmd_tar_tvf.rb +2 -4
  23. data/lib/tb/cmd_to_csv.rb +1 -1
  24. data/lib/tb/cmd_unnest.rb +1 -3
  25. data/lib/tb/cmdutil.rb +57 -135
  26. data/lib/tb/csv.rb +11 -54
  27. data/lib/tb/customcmp.rb +41 -0
  28. data/lib/tb/customeq.rb +41 -0
  29. data/lib/tb/enumerable.rb +225 -435
  30. data/lib/tb/enumerator.rb +22 -14
  31. data/lib/tb/ex_enumerable.rb +659 -0
  32. data/lib/tb/ex_enumerator.rb +102 -0
  33. data/lib/tb/fileenumerator.rb +2 -2
  34. data/lib/tb/func.rb +141 -0
  35. data/lib/tb/json.rb +1 -1
  36. data/lib/tb/reader.rb +4 -4
  37. data/lib/tb/search.rb +2 -4
  38. data/lib/tb/zipper.rb +60 -0
  39. data/test/test_cmd_cat.rb +40 -0
  40. data/test/test_cmd_git_log.rb +116 -0
  41. data/test/test_cmd_ls.rb +90 -0
  42. data/test/test_cmd_svn_log.rb +87 -0
  43. data/test/test_cmd_to_csv.rb +14 -0
  44. data/test/test_cmdutil.rb +25 -10
  45. data/test/test_csv.rb +10 -0
  46. data/test/test_customcmp.rb +14 -0
  47. data/test/test_customeq.rb +20 -0
  48. data/test/{test_enumerable.rb → test_ex_enumerable.rb} +181 -3
  49. data/test/test_search.rb +2 -10
  50. data/test/test_tbenum.rb +3 -3
  51. data/test/test_zipper.rb +22 -0
  52. metadata +20 -8
  53. data/lib/tb/enum.rb +0 -294
  54. data/lib/tb/pairs.rb +0 -227
  55. data/test/test_pairs.rb +0 -122
@@ -71,8 +71,6 @@ def (Tb::Cmd).main_join(argv)
71
71
  $stderr.puts "shared keys: #{(result.list_fields & tbl.list_fields).inspect}" if 1 <= Tb::Cmd.opt_debug
72
72
  result = result.natjoin2_outer(tbl, Tb::Cmd.opt_join_outer_missing, retain_left, retain_right)
73
73
  }
74
- with_output {|out|
75
- result.write_to_csv(out, !Tb::Cmd.opt_N)
76
- }
74
+ output_tbenum(result)
77
75
  end
78
76
 
@@ -63,9 +63,7 @@ def (Tb::Cmd).main_ls(argv)
63
63
  ls.ls_run(Pathname(ls.real_pathname_string(arg)))
64
64
  }
65
65
  }
66
- with_output {|out|
67
- er.write_to_csv(out, !Tb::Cmd.opt_N)
68
- }
66
+ output_tbenum(er)
69
67
  if ls.fail
70
68
  exit false
71
69
  end
@@ -106,7 +104,7 @@ class Tb::Cmd::Ls
106
104
  return
107
105
  end
108
106
  entries.map! {|filename| real_pathname_string(filename) }
109
- entries = entries.sort_by {|filename| smart_cmp_value(filename) }
107
+ entries = entries.sort_by {|filename| Tb::Func.smart_cmp_value(filename) }
110
108
  if @opts[:a] || @opts[:A]
111
109
  entries1, entries2 = entries.partition {|filename| /\A\./ =~ filename }
112
110
  entries0, entries1 = entries1.partition {|filename| filename == '.' || filename == '..' }
@@ -155,7 +153,7 @@ class Tb::Cmd::Ls
155
153
  end
156
154
  @y.yield ls_long_info(path, st)
157
155
  else
158
- @y.yield Tb::Pairs.new([['filename', path.to_s]])
156
+ @y.yield({'filename' => path.to_s})
159
157
  end
160
158
  end
161
159
 
@@ -179,9 +177,9 @@ class Tb::Cmd::Ls
179
177
  end
180
178
 
181
179
  def ls_long_info(path, st)
182
- Tb::Pairs.new(ls_long_header.map {|info_type|
180
+ Hash[ls_long_header.map {|info_type|
183
181
  [info_type, self.send("ls_info_#{info_type}", path, st)]
184
- })
182
+ }]
185
183
  end
186
184
 
187
185
  def ls_info_dev(path, st) sprintf("0x%x", st.dev) end
@@ -234,7 +232,7 @@ class Tb::Cmd::Ls
234
232
  if pw
235
233
  pw.name
236
234
  else
237
- uid
235
+ uid.to_s
238
236
  end
239
237
  end
240
238
 
@@ -247,7 +245,7 @@ class Tb::Cmd::Ls
247
245
  if gr
248
246
  gr.name
249
247
  else
250
- gid
248
+ gid.to_s
251
249
  end
252
250
  end
253
251
 
@@ -291,11 +289,6 @@ class Tb::Cmd::Ls
291
289
  end
292
290
 
293
291
  def real_pathname_string(str)
294
- if str.respond_to? :force_encoding
295
- # pathname is a sequence of bytes on Unix.
296
- str.dup.force_encoding("ASCII-8BIT")
297
- else
298
- str
299
- end
292
+ str.dup.force_encoding("ASCII-8BIT")
300
293
  end
301
294
  end
@@ -68,7 +68,7 @@ def (Tb::Cmd).main_mheader(argv)
68
68
  }
69
69
  h2 = header_end_p.call
70
70
  if h2
71
- pairs2 = Tb::Pairs.new(h2.map.with_index {|v, i| ["#{i+1}", v] })
71
+ pairs2 = Hash[h2.map.with_index {|v, i| ["#{i+1}", v] }]
72
72
  y.yield pairs2
73
73
  header = nil
74
74
  end
@@ -77,9 +77,8 @@ def (Tb::Cmd).main_mheader(argv)
77
77
  end
78
78
  }
79
79
  }
80
- with_output {|out|
81
- er.write_to_csv(out, false)
82
- }
80
+ Tb::Cmd.opt_N = true
81
+ output_tbenum(er)
83
82
  if header
84
83
  warn "unique header fields not recognized."
85
84
  end
@@ -58,15 +58,12 @@ def (Tb::Cmd).main_nest(argv)
58
58
  cv = pairs.reject {|f, v|
59
59
  oldfields_hash[f]
60
60
  }.map {|f, v|
61
- [smart_cmp_value(f), smart_cmp_value(v)]
61
+ [Tb::Func.smart_cmp_value(f), Tb::Func.smart_cmp_value(v)]
62
62
  }.sort
63
63
  [cv, pairs]
64
64
  }
65
65
 
66
66
  nested = nil
67
- boundary_p = lambda {|(cv1, _), (cv2, _)|
68
- cv1 != cv2
69
- }
70
67
  before_group = lambda {|(_, _)|
71
68
  nested = []
72
69
  }
@@ -82,13 +79,11 @@ def (Tb::Cmd).main_nest(argv)
82
79
  }
83
80
  assoc = last_pairs.reject {|f, v| oldfields_hash[f] }.to_a
84
81
  assoc << [newfield, nested_csv]
85
- pairs = Tb::Pairs.new(assoc)
82
+ pairs = Hash[assoc]
86
83
  y.yield pairs
87
84
  }
88
- sorted.each_group_element(boundary_p, before_group, body, after_group)
89
- }
90
- with_output {|out|
91
- er.write_to_csv(out, !Tb::Cmd.opt_N)
85
+ sorted.detect_group_by(before_group, after_group) {|cv,| cv }.each(&body)
92
86
  }
87
+ output_tbenum(er)
93
88
  end
94
89
 
@@ -47,9 +47,7 @@ def (Tb::Cmd).main_newfield(argv)
47
47
  argv = ['-'] if argv.empty?
48
48
  creader = Tb::CatReader.open(argv, Tb::Cmd.opt_N)
49
49
  er = creader.newfield(field) {|pairs| pr.call(pairs) }
50
- with_output {|out|
51
- er.write_to_csv(out, !Tb::Cmd.opt_N)
52
- }
50
+ output_tbenum(er)
53
51
  end
54
52
 
55
53
 
@@ -56,11 +56,9 @@ def (Tb::Cmd).main_rename(argv)
56
56
  }
57
57
  y.set_header header.map {|f| h.fetch(f, f) }
58
58
  }.each {|pairs|
59
- y.yield Tb::Pairs.new(pairs.map {|f, v| [h.fetch(f, f), v] })
59
+ y.yield Hash[pairs.map {|f, v| [h.fetch(f, f), v] }]
60
60
  }
61
61
  }
62
- with_output {|out|
63
- er.write_to_csv(out, !Tb::Cmd.opt_N)
64
- }
62
+ output_tbenum(er)
65
63
  end
66
64
 
@@ -67,8 +67,7 @@ def (Tb::Cmd).main_shape(argv)
67
67
  'filename'=>filename})
68
68
  }
69
69
  }
70
- with_output {|out|
71
- result.write_to_csv(out, true)
72
- }
70
+ Tb::Cmd.opt_N = false
71
+ output_tbenum(result)
73
72
  end
74
73
 
@@ -53,9 +53,9 @@ def (Tb::Cmd).main_sort(argv)
53
53
  creader = Tb::CatReader.open(argv, Tb::Cmd.opt_N)
54
54
  header = []
55
55
  if fs
56
- blk = lambda {|pairs| fs.map {|f| smart_cmp_value(pairs[f]) } }
56
+ blk = lambda {|pairs| fs.map {|f| Tb::Func.smart_cmp_value(pairs[f]) } }
57
57
  else
58
- blk = lambda {|pairs| header.map {|f| smart_cmp_value(pairs[f]) } }
58
+ blk = lambda {|pairs| header.map {|f| Tb::Func.smart_cmp_value(pairs[f]) } }
59
59
  end
60
60
  if Tb::Cmd.opt_sort_r
61
61
  blk1 = blk
@@ -71,9 +71,7 @@ def (Tb::Cmd).main_sort(argv)
71
71
  y.yield pairs
72
72
  }
73
73
  }.extsort_by(&blk)
74
- with_output {|out|
75
- er.write_to_csv(out, !Tb::Cmd.opt_N)
76
- }
74
+ output_tbenum(er)
77
75
  end
78
76
 
79
77
 
@@ -101,11 +101,11 @@ class Tb::Cmd::SVNLOGListener
101
101
  @log['paths'].each {|h|
102
102
  assoc = @log.to_a.reject {|f, v| !%w[rev author date msg].include?(f) }
103
103
  assoc += h.to_a.reject {|f, v| !%w[kind action path].include?(f) }
104
- @y.yield Tb::Pairs.new(assoc)
104
+ @y.yield Hash[assoc]
105
105
  }
106
106
  else
107
107
  assoc = @log.to_a.reject {|f, v| !%w[rev author date msg].include?(f) }
108
- @y.yield Tb::Pairs.new(assoc)
108
+ @y.yield Hash[assoc]
109
109
  end
110
110
  @log = nil
111
111
  end
@@ -170,8 +170,6 @@ def (Tb::Cmd).main_svn_log(argv)
170
170
  REXML::Parsers::StreamParser.new(f, listener).parse
171
171
  }
172
172
  }
173
- with_output {|out|
174
- er.write_to_csv(out, !Tb::Cmd.opt_N)
175
- }
173
+ output_tbenum(er)
176
174
  end
177
175
 
@@ -436,12 +436,10 @@ def (Tb::Cmd).main_tar_tvf(argv)
436
436
  formatted["tar_typeflag"] = h[:typeflag]
437
437
  formatted["tar_magic"] = h[:magic]
438
438
  formatted["tar_version"] = h[:version]
439
- y.yield Tb::Pairs.new(header.map {|f2| [f2, formatted[f2]] })
439
+ y.yield Hash[header.map {|f2| [f2, formatted[f2]] }]
440
440
  }
441
441
  }
442
442
  }
443
443
  }
444
- with_output {|out|
445
- er.write_to_csv(out, !Tb::Cmd.opt_N)
446
- }
444
+ output_tbenum(er)
447
445
  end
@@ -51,7 +51,7 @@ def (Tb::Cmd).main_to_csv(argv)
51
51
  er = Tb::Enumerator.new {|y|
52
52
  y.set_header header
53
53
  ter.each {|pairs|
54
- y.yield Tb::Pairs.new(header.map {|f| [f, pairs[f]] })
54
+ y.yield Hash[header.map {|f| [f, pairs[f]] }]
55
55
  }
56
56
  }
57
57
  with_output {|out|
@@ -111,8 +111,6 @@ def (Tb::Cmd).main_unnest(argv)
111
111
  end
112
112
  }
113
113
  }
114
- with_output {|out|
115
- er.write_to_csv(out, !Tb::Cmd.opt_N)
116
- }
114
+ output_tbenum(er)
117
115
  end
118
116
 
@@ -96,150 +96,36 @@ def err(msg)
96
96
  raise SystemExit.new(1, msg)
97
97
  end
98
98
 
99
- def smart_cmp_value(v)
100
- case v
101
- when nil
102
- []
103
- when Numeric
104
- [0, v]
105
- when String
106
- if v.respond_to? :force_encoding
107
- v = v.dup.force_encoding("ASCII-8BIT")
108
- end
109
- case v
110
- when /\A\s*-?\d+\s*\z/
111
- [0, v.to_i(10)]
112
- when /\A\s*-?(\d+(\.\d*)?|\.\d+)([eE][-+]?\d+)?\s*\z/
113
- [0, Float(v)]
114
- else
115
- a = []
116
- v.scan(/(\d+)|\D+/) {
117
- if $1
118
- a << 0 << $1.to_i
119
- else
120
- a << 1 << $&
121
- end
122
- }
123
- a
124
- end
125
- else
126
- raise ArgumentError, "unexpected: #{v.inspect}"
127
- end
128
- end
129
-
130
- def conv_to_numeric(v)
131
- v = v.strip
132
- if /\A-?\d+\z/ =~ v
133
- v = v.to_i
134
- elsif /\A-?(\d+(\.\d*)?|\.\d+)([eE][-+]?\d+)?\z/ =~ v
135
- v = v.to_f
136
- else
137
- raise ArgumentError, "number string expected: #{v.inspect}"
138
- end
139
- v
140
- end
141
-
142
- class CountAggregator
143
- def initialize() @result = 0 end
144
- def update(v) @result += 1 end
145
- def finish() @result end
146
- end
147
-
148
- class SumAggregator
149
- def initialize() @result = 0 end
150
- def update(v) @result += conv_to_numeric(v) if !(v.nil? || v == '') end
151
- def finish() @result end
152
- end
153
-
154
- class AvgAggregator
155
- def initialize() @sum = 0; @count = 0 end
156
- def update(v) @count += 1; @sum += conv_to_numeric(v) if !(v.nil? || v == '') end
157
- def finish() @sum / @count.to_f end
158
- end
159
-
160
- class MaxAggregator
161
- def initialize() @v = nil; @cmp = nil end
162
- def update(v)
163
- cmp = smart_cmp_value(v)
164
- if @cmp == nil
165
- @v, @cmp = v, cmp
166
- else
167
- @v, @cmp = v, cmp if (@cmp <=> cmp) < 0
168
- end
169
- end
170
- def finish() @v end
171
- end
172
-
173
- class MinAggregator
174
- def initialize() @v = @cmp = nil end
175
- def update(v)
176
- cmp = smart_cmp_value(v)
177
- if @cmp == nil
178
- @v, @cmp = v, cmp
179
- else
180
- @v, @cmp = v, cmp if (@cmp <=> cmp) > 0
181
- end
182
- end
183
- def finish() @v end
184
- end
185
-
186
- class ValuesAggregator
187
- def initialize() @result = [] end
188
- def update(v) @result << v if v end
189
- def finish() @result.join(",") end
190
- end
191
-
192
- class UniqueValuesAggregator
193
- def initialize() @result = [] end
194
- def update(v) @result << v if v end
195
- def finish() @result.uniq.join(",") end
196
- end
197
-
198
- class Selector
199
- def initialize(i, aggregator) @i = i; @agg = aggregator end
200
- def update(ary) @agg.update(ary[@i]) end
201
- def finish() @agg.finish end
202
- end
203
-
204
- def make_aggregator(spec, fs)
99
+ def parse_aggregator_spec(spec)
205
100
  case spec
206
101
  when 'count'
207
- CountAggregator.new
102
+ ['count', nil]
208
103
  when /\Asum\((.*)\)\z/
209
- field = $1
210
- i = fs.index(field)
211
- raise ArgumentError, "field not found: #{field.inspect}" if !i
212
- Selector.new(i, SumAggregator.new)
104
+ ['sum', $1]
213
105
  when /\Aavg\((.*)\)\z/
214
- field = $1
215
- i = fs.index(field)
216
- raise ArgumentError, "field not found: #{field.inspect}" if !i
217
- Selector.new(i, AvgAggregator.new)
106
+ ['avg', $1]
218
107
  when /\Amax\((.*)\)\z/
219
- field = $1
220
- i = fs.index(field)
221
- raise ArgumentError, "field not found: #{field.inspect}" if !i
222
- Selector.new(i, MaxAggregator.new)
108
+ ['max', $1]
223
109
  when /\Amin\((.*)\)\z/
224
- field = $1
225
- i = fs.index(field)
226
- raise ArgumentError, "field not found: #{field.inspect}" if !i
227
- Selector.new(i, MinAggregator.new)
110
+ ['min', $1]
228
111
  when /\Avalues\((.*)\)\z/
229
- field = $1
230
- i = fs.index(field)
231
- raise ArgumentError, "field not found: #{field.inspect}" if !i
232
- Selector.new(i, ValuesAggregator.new)
112
+ ['values', $1]
233
113
  when /\Auniquevalues\((.*)\)\z/
234
- field = $1
235
- i = fs.index(field)
236
- raise ArgumentError, "field not found: #{field.inspect}" if !i
237
- Selector.new(i, UniqueValuesAggregator.new)
114
+ ['uniquevalues', $1]
238
115
  else
239
116
  raise ArgumentError, "unexpected aggregation spec: #{spec.inspect}"
240
117
  end
241
118
  end
242
119
 
120
+ def parse_aggregator_spec2(spec)
121
+ name, field = parse_aggregator_spec(spec)
122
+ func = Tb::Func::AggregationFunctions[name]
123
+ if !func
124
+ raise ArgumentError, "unexpected aggregation spec: #{spec.inspect}"
125
+ end
126
+ [func, field]
127
+ end
128
+
243
129
  def split_field_list_argument(arg)
244
130
  split_csv_argument(arg).map {|f| f || '' }
245
131
  end
@@ -266,14 +152,18 @@ def tbl_generate_tsv(tbl, out)
266
152
  end
267
153
  end
268
154
 
269
- def with_output
270
- if Tb::Cmd.opt_output
271
- tmp = Tb::Cmd.opt_output + ".part"
155
+ def with_output(filename=Tb::Cmd.opt_output)
156
+ if filename && filename != '-'
157
+ tmp = filename + ".part"
272
158
  begin
273
159
  File.open(tmp, 'w') {|f|
274
160
  yield f
275
161
  }
276
- File.rename tmp, Tb::Cmd.opt_output
162
+ if File.exist?(filename) && FileUtils.compare_file(filename, tmp)
163
+ File.unlink tmp
164
+ else
165
+ File.rename tmp, filename
166
+ end
277
167
  ensure
278
168
  File.unlink tmp if File.exist? tmp
279
169
  end
@@ -285,3 +175,35 @@ def with_output
285
175
  yield $stdout
286
176
  end
287
177
  end
178
+
179
+ def output_tbenum(te)
180
+ filename = Tb::Cmd.opt_output
181
+ if /\A([a-z0-9]{2,}):/ =~ filename
182
+ fmt = $1
183
+ filename = $'
184
+ else
185
+ fmt = nil
186
+ end
187
+ if !fmt
188
+ case filename
189
+ when /\.csv\z/
190
+ fmt = 'csv'
191
+ when /\.json\z/
192
+ fmt = 'json'
193
+ end
194
+ end
195
+ if fmt
196
+ case fmt
197
+ when 'csv'
198
+ write_proc = lambda {|out| te.write_to_csv(out, !Tb::Cmd.opt_N) }
199
+ when 'json'
200
+ write_proc = lambda {|out| te.write_to_json(out) }
201
+ else
202
+ err("unexpected format: #{fmt.inspect}")
203
+ end
204
+ end
205
+ write_proc ||= lambda {|out| te.write_to_csv(out, !Tb::Cmd.opt_N) }
206
+ with_output(filename) {|out|
207
+ write_proc.call(out)
208
+ }
209
+ end