tb 0.3 → 0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. data/README +2 -1
  2. data/lib/tb.rb +7 -3
  3. data/lib/tb/basic.rb +1 -1
  4. data/lib/tb/cmd_cat.rb +1 -3
  5. data/lib/tb/cmd_consecutive.rb +4 -6
  6. data/lib/tb/cmd_crop.rb +5 -7
  7. data/lib/tb/cmd_cross.rb +51 -49
  8. data/lib/tb/cmd_cut.rb +2 -6
  9. data/lib/tb/cmd_git_log.rb +20 -11
  10. data/lib/tb/cmd_grep.rb +1 -3
  11. data/lib/tb/cmd_group.rb +18 -44
  12. data/lib/tb/cmd_gsub.rb +2 -4
  13. data/lib/tb/cmd_join.rb +1 -3
  14. data/lib/tb/cmd_ls.rb +8 -15
  15. data/lib/tb/cmd_mheader.rb +3 -4
  16. data/lib/tb/cmd_nest.rb +4 -9
  17. data/lib/tb/cmd_newfield.rb +1 -3
  18. data/lib/tb/cmd_rename.rb +2 -4
  19. data/lib/tb/cmd_shape.rb +2 -3
  20. data/lib/tb/cmd_sort.rb +3 -5
  21. data/lib/tb/cmd_svn_log.rb +3 -5
  22. data/lib/tb/cmd_tar_tvf.rb +2 -4
  23. data/lib/tb/cmd_to_csv.rb +1 -1
  24. data/lib/tb/cmd_unnest.rb +1 -3
  25. data/lib/tb/cmdutil.rb +57 -135
  26. data/lib/tb/csv.rb +11 -54
  27. data/lib/tb/customcmp.rb +41 -0
  28. data/lib/tb/customeq.rb +41 -0
  29. data/lib/tb/enumerable.rb +225 -435
  30. data/lib/tb/enumerator.rb +22 -14
  31. data/lib/tb/ex_enumerable.rb +659 -0
  32. data/lib/tb/ex_enumerator.rb +102 -0
  33. data/lib/tb/fileenumerator.rb +2 -2
  34. data/lib/tb/func.rb +141 -0
  35. data/lib/tb/json.rb +1 -1
  36. data/lib/tb/reader.rb +4 -4
  37. data/lib/tb/search.rb +2 -4
  38. data/lib/tb/zipper.rb +60 -0
  39. data/test/test_cmd_cat.rb +40 -0
  40. data/test/test_cmd_git_log.rb +116 -0
  41. data/test/test_cmd_ls.rb +90 -0
  42. data/test/test_cmd_svn_log.rb +87 -0
  43. data/test/test_cmd_to_csv.rb +14 -0
  44. data/test/test_cmdutil.rb +25 -10
  45. data/test/test_csv.rb +10 -0
  46. data/test/test_customcmp.rb +14 -0
  47. data/test/test_customeq.rb +20 -0
  48. data/test/{test_enumerable.rb → test_ex_enumerable.rb} +181 -3
  49. data/test/test_search.rb +2 -10
  50. data/test/test_tbenum.rb +3 -3
  51. data/test/test_zipper.rb +22 -0
  52. metadata +20 -8
  53. data/lib/tb/enum.rb +0 -294
  54. data/lib/tb/pairs.rb +0 -227
  55. data/test/test_pairs.rb +0 -122
data/README CHANGED
@@ -1,8 +1,9 @@
1
- = tb - manipulation tool for table: CSV, TSV, etc.
1
+ = tb - manipulation tool for table: CSV, TSV, JSON, etc.
2
2
 
3
3
  tb provides a command and a library for manipulating tables:
4
4
  Unix filter like operations (grep, sort, cat, cut, ls, etc.),
5
5
  SQL like operations (join, group, etc.),
6
+ information extractions (git-log, svn-log, tar-tvf),
6
7
  and more.
7
8
 
8
9
  == Example
data/lib/tb.rb CHANGED
@@ -34,9 +34,10 @@ class Tb
34
34
  end
35
35
 
36
36
  require 'pp'
37
- require 'tb/enum'
37
+ require 'tb/enumerable'
38
38
  require 'tb/enumerator'
39
- require 'tb/pairs'
39
+ require 'tb/func'
40
+ require 'tb/zipper'
40
41
  require 'tb/basic'
41
42
  require 'tb/record'
42
43
  require 'tb/csv'
@@ -48,6 +49,9 @@ require 'tb/ropen'
48
49
  require 'tb/catreader'
49
50
  require 'tb/fieldset'
50
51
  require 'tb/search'
51
- require 'tb/enumerable'
52
+ require 'tb/ex_enumerable'
53
+ require 'tb/ex_enumerator'
52
54
  require 'tb/fileenumerator'
53
55
  require 'tb/revcmp'
56
+ require 'tb/customcmp'
57
+ require 'tb/customeq'
@@ -55,7 +55,7 @@
55
55
  # It is an error to access a record by recordid which is not allocated.
56
56
  #
57
57
  class Tb
58
- include Tb::Enum
58
+ include Tb::Enumerable
59
59
 
60
60
  # :call-seq:
61
61
  # Tb.new
@@ -63,7 +63,5 @@ def (Tb::Cmd).main_cat(argv)
63
63
  exit_if_help('cat')
64
64
  argv = ['-'] if argv.empty?
65
65
  creader = Tb::CatReader.open(argv, Tb::Cmd.opt_N, Tb::Cmd.opt_cat_with_filename)
66
- with_output {|out|
67
- creader.write_to_csv(out, !Tb::Cmd.opt_N)
68
- }
66
+ output_tbenum(creader)
69
67
  end
@@ -68,22 +68,20 @@ def (Tb::Cmd).main_consecutive(argv)
68
68
  }.each {|pairs, header|
69
69
  buf << pairs
70
70
  if buf.length == Tb::Cmd.opt_consecutive_n
71
- pairs2 = []
71
+ pairs2 = {}
72
72
  header.each {|f|
73
73
  Tb::Cmd.opt_consecutive_n.times {|i|
74
74
  ps = buf[i]
75
75
  next if !ps.has_key?(f)
76
76
  v = ps[f]
77
- pairs2 << ["#{f}_#{i+1}", v]
77
+ pairs2["#{f}_#{i+1}"] = v
78
78
  }
79
79
  }
80
80
  empty = false
81
- y.yield Tb::Pairs.new(pairs2)
81
+ y.yield pairs2
82
82
  buf.shift
83
83
  end
84
84
  }
85
85
  }
86
- with_output {|out|
87
- er.write_to_csv(out, !Tb::Cmd.opt_N)
88
- }
86
+ output_tbenum(er)
89
87
  end
@@ -84,9 +84,8 @@ def (Tb::Cmd).main_crop(argv)
84
84
  rownum += 1
85
85
  }
86
86
  }
87
- with_output {|out|
88
- er.write_to_csv(out, false)
89
- }
87
+ Tb::Cmd.opt_N = true
88
+ output_tbenum(er)
90
89
  else
91
90
  creader = Tb::CatReader.open(argv, true)
92
91
  last_nonempty_row = nil
@@ -127,13 +126,12 @@ def (Tb::Cmd).main_crop(argv)
127
126
  break
128
127
  end
129
128
  ary.slice!(0, lmargin_min)
130
- pairs = Tb::Pairs.new(ary.map.with_index {|v, i| ["#{i+1}", v]})
129
+ pairs = Hash[ary.map.with_index {|v, i| ["#{i+1}", v]}]
131
130
  y.yield pairs
132
131
  }
133
132
  }
134
- with_output {|out|
135
- er.write_to_csv(out, false)
136
- }
133
+ Tb::Cmd.opt_N = true
134
+ output_tbenum(er)
137
135
  end
138
136
  end
139
137
 
@@ -49,36 +49,51 @@ def (Tb::Cmd).main_cross(argv)
49
49
  vkfs = split_field_list_argument(argv.shift)
50
50
  err('no hkey-fields given.') if argv.empty?
51
51
  hkfs = split_field_list_argument(argv.shift)
52
+ vhkfs = vkfs + hkfs
52
53
  if Tb::Cmd.opt_cross_fields.empty?
53
- opt_cross_fields = [['count', 'count']]
54
+ num_aggregate_fields = 1
55
+ opt_cross_fields = (vkfs + hkfs).map {|f| [f, Tb::Func::First, f] } +
56
+ [['count', Tb::Func::Count, nil]]
54
57
  else
55
- opt_cross_fields = Tb::Cmd.opt_cross_fields.map {|arg|
58
+ num_aggregate_fields = Tb::Cmd.opt_cross_fields.length
59
+ opt_cross_fields = (vkfs + hkfs).map {|f| [f, Tb::Func::First, f] } +
60
+ Tb::Cmd.opt_cross_fields.map {|arg|
56
61
  agg_spec, new_field = split_field_list_argument(arg)
57
62
  new_field ||= agg_spec
58
- [agg_spec, new_field]
63
+ begin
64
+ func_srcf = parse_aggregator_spec2(agg_spec)
65
+ rescue ArgumentError
66
+ err($!.message)
67
+ end
68
+ [new_field, *func_srcf]
59
69
  }
60
70
  end
61
71
  argv = ['-'] if argv.empty?
62
72
  creader = Tb::CatReader.open(argv, Tb::Cmd.opt_N)
63
73
  er = Tb::Enumerator.new {|y|
64
- header = nil
65
74
  hvs_hash = {}
66
75
  hvs_list = nil
67
- sorted = creader.extsort_by {|pairs|
68
- hvs = hkfs.map {|f| pairs[f] }
69
- hvs_hash[hvs] = true
70
- vcv = vkfs.map {|f| smart_cmp_value(pairs[f]) }
71
- vcv
72
- }
73
- sorted2 = sorted.with_header {|header0|
74
- header = header0
75
- (vkfs + hkfs).each {|f|
76
+ aggs_hash = nil
77
+ op = Tb::Zipper.new(opt_cross_fields.map {|dstf, func, srcf| func })
78
+ er = creader.with_header {|header0|
79
+ vhkfs.each {|f|
76
80
  if !header0.include?(f)
77
81
  err("field not found: #{f}")
78
82
  end
79
83
  }
80
- hvs_list = hvs_hash.keys.sort_by {|hvs| hvs.map {|hv| smart_cmp_value(hv) } }
81
- n = vkfs.length + hvs_list.length * opt_cross_fields.length
84
+ }.extsort_reduce(op) {|pairs|
85
+ vvs = vkfs.map {|f| pairs[f] }
86
+ hvs = hkfs.map {|f| pairs[f] }
87
+ vvsc = vvs.map {|v| Tb::Func.smart_cmp_value(v) }
88
+ hvsc = hvs.map {|v| Tb::Func.smart_cmp_value(v) }
89
+ hvs_hash[hvs] = hvsc
90
+ aggs = opt_cross_fields.map {|dstf, func, srcf| func.start(srcf ? pairs[srcf] : true) }
91
+ [[vvsc, hvsc], aggs]
92
+ }
93
+ all_representative = lambda {|_| 1 }
94
+ all_before = lambda {|_|
95
+ hvs_list = hvs_hash.keys.sort_by {|hvs| hvs_hash[hvs] }
96
+ n = vkfs.length + hvs_list.length * num_aggregate_fields
82
97
  header1 = (1..n).map {|i| i.to_s }
83
98
  y.set_header header1
84
99
  hkfs.each_with_index {|hkf, i|
@@ -87,7 +102,7 @@ def (Tb::Cmd).main_cross(argv)
87
102
  j = vkfs.length
88
103
  h1[j.to_s] = hkf
89
104
  hvs_list.each {|hkvs|
90
- opt_cross_fields.length.times {
105
+ num_aggregate_fields.times {
91
106
  j += 1
92
107
  h1[j.to_s] = hkvs[i]
93
108
  }
@@ -101,49 +116,35 @@ def (Tb::Cmd).main_cross(argv)
101
116
  h2[j.to_s] = vkf
102
117
  }
103
118
  hvs_list.each {|hkvs|
104
- opt_cross_fields.each {|agg_spec, new_field|
119
+ opt_cross_fields.last(num_aggregate_fields).each {|dstf, func, srcf|
105
120
  j += 1
106
121
  if Tb::Cmd.opt_cross_compact
107
122
  h2[j.to_s] = hkvs[-1]
108
123
  else
109
- h2[j.to_s] = new_field
124
+ h2[j.to_s] = dstf
110
125
  end
111
126
  }
112
127
  }
113
128
  y.yield h2
114
129
  }
115
- boudary_p = lambda {|pairs1, pairs2|
116
- vcv1 = vkfs.map {|f| smart_cmp_value(pairs1[f]) }
117
- vcv2 = vkfs.map {|f| smart_cmp_value(pairs2[f]) }
118
- vcv1 != vcv2
130
+ v_representative = lambda {|((vvsc, _), _)|
131
+ vvsc
119
132
  }
120
- aggs = nil
121
- before = lambda {|_|
122
- aggs = {}
133
+ v_before = lambda {|_|
134
+ aggs_hash = {}
123
135
  }
124
- body = lambda {|pairs|
125
- hvs = hkfs.map {|f| pairs[f] }
126
- if !aggs.has_key?(hvs)
127
- aggs[hvs] = opt_cross_fields.map {|agg_spec, nf|
128
- begin
129
- make_aggregator(agg_spec, header)
130
- rescue ArgumentError
131
- err($!.message)
132
- end
133
- }
134
- end
135
- ary = header.map {|f| pairs[f] }
136
- aggs[hvs].each {|agg|
137
- agg.update(ary)
138
- }
136
+ body = lambda {|(_, aggs)|
137
+ hvs = aggs[vkfs.length, hkfs.length]
138
+ aggs_hash[hvs] = op.aggregate(aggs)
139
139
  }
140
- after = lambda {|last_pairs|
141
- ary = vkfs.map {|f| last_pairs[f] }
140
+ v_after = lambda {|(_, aggs)|
141
+ vvs = aggs[0, vkfs.length]
142
+ ary = vvs
142
143
  hvs_list.each {|hvs|
143
- if aggs.has_key? hvs
144
- ary.concat(aggs[hvs].map {|agg| agg.finish })
144
+ if aggs_hash.has_key? hvs
145
+ ary.concat(aggs_hash[hvs].last(num_aggregate_fields))
145
146
  else
146
- ary.concat([nil] * opt_cross_fields.length)
147
+ ary.concat([nil] * num_aggregate_fields)
147
148
  end
148
149
  }
149
150
  pairs = {}
@@ -152,10 +153,11 @@ def (Tb::Cmd).main_cross(argv)
152
153
  }
153
154
  y.yield pairs
154
155
  }
155
- sorted2.each_group_element(boudary_p, before, body, after)
156
- }
157
- with_output {|out|
158
- er.write_to_csv(out, false)
156
+ er.detect_nested_group_by(
157
+ [[all_representative, all_before],
158
+ [v_representative, v_before, v_after]]).each(&body)
159
159
  }
160
+ Tb::Cmd.opt_N = true
161
+ output_tbenum(er)
160
162
  end
161
163
 
@@ -56,9 +56,7 @@ def (Tb::Cmd).main_cut(argv)
56
56
  y.yield pairs.reject {|k, v| fs.include? k }
57
57
  }
58
58
  }
59
- with_output {|out|
60
- er.write_to_csv(out, !Tb::Cmd.opt_N)
61
- }
59
+ output_tbenum(er)
62
60
  else
63
61
  er = Tb::Enumerator.new {|y|
64
62
  tblreader.with_header {|header0|
@@ -73,9 +71,7 @@ def (Tb::Cmd).main_cut(argv)
73
71
  y.yield pairs.reject {|k, v| !fs.include?(k) }
74
72
  }
75
73
  }
76
- with_output {|out|
77
- er.write_to_csv(out, !Tb::Cmd.opt_N)
78
- }
74
+ output_tbenum(er)
79
75
  end
80
76
  }
81
77
  end
@@ -29,15 +29,17 @@
29
29
  Tb::Cmd.subcommands << 'git-log'
30
30
 
31
31
  Tb::Cmd.default_option[:opt_git_log_git_command] = nil
32
- Tb::Cmd.default_option[:opt_git_log_debug_git_log_file] = nil
32
+ Tb::Cmd.default_option[:opt_git_log_debug_input] = nil
33
+ Tb::Cmd.default_option[:opt_git_log_debug_output] = nil
33
34
 
34
35
  def (Tb::Cmd).op_git_log
35
36
  op = OptionParser.new
36
37
  op.banner = "Usage: tb git-log [OPTS] [GIT-DIR ...]\n" +
37
38
  "Show the GIT log as a table."
38
- define_common_option(op, "hNo", "--no-pager")
39
+ define_common_option(op, "hNod", "--no-pager", '--debug')
39
40
  op.def_option('--git-command COMMAND', 'specify the git command (default: git)') {|command| Tb::Cmd.opt_git_log_git_command = command }
40
- op.def_option('--debug-git-log-file FILE', 'specify the result git log (for debug)') {|filename| Tb::Cmd.opt_git_log_debug_git_log_file = filename }
41
+ op.def_option('--debug-git-log-output FILE', 'store the raw output of git-log (for debug)') {|filename| Tb::Cmd.opt_git_log_debug_output = filename }
42
+ op.def_option('--debug-git-log-input FILE', 'use the file as output of git-log (for debug)') {|filename| Tb::Cmd.opt_git_log_debug_input = filename }
41
43
  op
42
44
  end
43
45
 
@@ -68,8 +70,8 @@ Tb::Cmd::GIT_LOG_PRETTY_FORMAT = 'format:%x01commit-separator%x01%n' +
68
70
  Tb::Cmd::GIT_LOG_HEADER = Tb::Cmd::GIT_LOG_FORMAT_SPEC.map {|k, v| k } + ['files']
69
71
 
70
72
  def (Tb::Cmd).git_log_with_git_log(dir)
71
- if Tb::Cmd.opt_git_log_debug_git_log_file
72
- File.open(Tb::Cmd.opt_git_log_debug_git_log_file) {|f|
73
+ if Tb::Cmd.opt_git_log_debug_input
74
+ File.open(Tb::Cmd.opt_git_log_debug_input) {|f|
73
75
  yield f
74
76
  }
75
77
  else
@@ -85,9 +87,18 @@ def (Tb::Cmd).git_log_with_git_log(dir)
85
87
  '.',
86
88
  {:chdir=>dir}
87
89
  ]
88
- IO.popen(command) {|f|
89
- yield f
90
- }
90
+ $stderr.puts "git command line: #{command.inspect}" if 1 <= Tb::Cmd.opt_debug
91
+ if Tb::Cmd.opt_git_log_debug_output
92
+ command.last[:out] = Tb::Cmd.opt_git_log_debug_output
93
+ system(*command)
94
+ File.open(Tb::Cmd.opt_git_log_debug_output) {|f|
95
+ yield f
96
+ }
97
+ else
98
+ IO.popen(command) {|f|
99
+ yield f
100
+ }
101
+ end
91
102
  end
92
103
  end
93
104
 
@@ -186,8 +197,6 @@ def (Tb::Cmd).main_git_log(argv)
186
197
  }
187
198
  }
188
199
  }
189
- with_output {|out|
190
- er.write_to_csv(out, !Tb::Cmd.opt_N)
191
- }
200
+ output_tbenum(er)
192
201
  end
193
202
 
@@ -80,8 +80,6 @@ def (Tb::Cmd).main_grep(argv)
80
80
  end
81
81
  }
82
82
  }
83
- with_output {|out|
84
- er.write_to_csv(out, !Tb::Cmd.opt_N)
85
- }
83
+ output_tbenum(er)
86
84
  end
87
85
 
@@ -46,61 +46,35 @@ def (Tb::Cmd).main_group(argv)
46
46
  exit_if_help('group')
47
47
  err("no key fields given.") if argv.empty?
48
48
  kfs = split_field_list_argument(argv.shift)
49
- opt_group_fields = Tb::Cmd.opt_group_fields.map {|arg|
49
+ opt_group_fields = kfs.map {|f| [f, Tb::Func::First, f] } +
50
+ Tb::Cmd.opt_group_fields.map {|arg|
50
51
  aggregation_spec, new_field = split_field_list_argument(arg)
51
52
  new_field ||= aggregation_spec
52
53
  [new_field,
53
- lambda {|fields|
54
- begin
55
- make_aggregator(aggregation_spec, fields)
56
- rescue ArgumentError
57
- err($!.message)
58
- end
59
- }
54
+ *begin
55
+ parse_aggregator_spec2(aggregation_spec)
56
+ rescue ArgumentError
57
+ err($!.message)
58
+ end
60
59
  ]
61
60
  }
62
61
  argv = ['-'] if argv.empty?
63
62
  creader = Tb::CatReader.open(argv, Tb::Cmd.opt_N)
64
63
  result = Tb::Enumerator.new {|y|
65
- er = creader.extsort_by {|pairs|
66
- kfs.map {|f| smart_cmp_value(pairs[f]) }
67
- }
68
- header = nil
69
- row = nil
70
- agg = nil
71
- er2 = er.with_header {|header0|
72
- header = header0
73
- y.set_header(kfs + opt_group_fields.map {|f, maker| f })
74
- }
75
- boudary_p = lambda {|pairs1, pairs2|
76
- kfs.any? {|f| pairs1[f] != pairs2[f] }
64
+ op = Tb::Zipper.new(opt_group_fields.map {|dstf, func, srcf| func })
65
+ er = creader.extsort_reduce(op) {|pairs|
66
+ [kfs.map {|f| Tb::Func.smart_cmp_value(pairs[f]) },
67
+ opt_group_fields.map {|dstf, func, srcf| func.start(srcf ? pairs[srcf] : true) } ]
77
68
  }
78
- before = lambda {|first_pairs|
79
- row = {}
80
- kfs.each {|f|
81
- row[f] = first_pairs[f]
82
- }
83
- agg = {}
84
- opt_group_fields.each {|f, maker|
85
- agg[f] = maker.call(header)
69
+ fields = opt_group_fields.map {|dstf, func, srcf| dstf }
70
+ y.set_header(fields)
71
+ er.each {|_, vals|
72
+ pairs = opt_group_fields.zip(vals).map {|(dstf, func, _), val|
73
+ [dstf, func.aggregate(val)]
86
74
  }
75
+ y.yield Hash[pairs]
87
76
  }
88
- body = lambda {|pairs|
89
- ary = header.map {|f| pairs[f] }
90
- opt_group_fields.each {|f, maker|
91
- agg[f].update(ary)
92
- }
93
- }
94
- after = lambda {|last_pairs|
95
- opt_group_fields.each {|f, maker|
96
- row[f] = agg[f].finish
97
- }
98
- y.yield row
99
- }
100
- er2.each_group_element(boudary_p, before, body, after)
101
- }
102
- with_output {|out|
103
- result.write_to_csv(out, !Tb::Cmd.opt_N)
104
77
  }
78
+ output_tbenum(result)
105
79
  end
106
80
 
@@ -77,11 +77,9 @@ def (Tb::Cmd).main_gsub(argv)
77
77
  [f, v.gsub(re, repl)]
78
78
  }
79
79
  end
80
- y.yield Tb::Pairs.new(pairs2)
80
+ y.yield Hash[pairs2]
81
81
  }
82
82
  }
83
- with_output {|out|
84
- er.write_to_csv(out, !Tb::Cmd.opt_N)
85
- }
83
+ output_tbenum(er)
86
84
  end
87
85