tb 0.3 → 0.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (55) hide show
  1. data/README +2 -1
  2. data/lib/tb.rb +7 -3
  3. data/lib/tb/basic.rb +1 -1
  4. data/lib/tb/cmd_cat.rb +1 -3
  5. data/lib/tb/cmd_consecutive.rb +4 -6
  6. data/lib/tb/cmd_crop.rb +5 -7
  7. data/lib/tb/cmd_cross.rb +51 -49
  8. data/lib/tb/cmd_cut.rb +2 -6
  9. data/lib/tb/cmd_git_log.rb +20 -11
  10. data/lib/tb/cmd_grep.rb +1 -3
  11. data/lib/tb/cmd_group.rb +18 -44
  12. data/lib/tb/cmd_gsub.rb +2 -4
  13. data/lib/tb/cmd_join.rb +1 -3
  14. data/lib/tb/cmd_ls.rb +8 -15
  15. data/lib/tb/cmd_mheader.rb +3 -4
  16. data/lib/tb/cmd_nest.rb +4 -9
  17. data/lib/tb/cmd_newfield.rb +1 -3
  18. data/lib/tb/cmd_rename.rb +2 -4
  19. data/lib/tb/cmd_shape.rb +2 -3
  20. data/lib/tb/cmd_sort.rb +3 -5
  21. data/lib/tb/cmd_svn_log.rb +3 -5
  22. data/lib/tb/cmd_tar_tvf.rb +2 -4
  23. data/lib/tb/cmd_to_csv.rb +1 -1
  24. data/lib/tb/cmd_unnest.rb +1 -3
  25. data/lib/tb/cmdutil.rb +57 -135
  26. data/lib/tb/csv.rb +11 -54
  27. data/lib/tb/customcmp.rb +41 -0
  28. data/lib/tb/customeq.rb +41 -0
  29. data/lib/tb/enumerable.rb +225 -435
  30. data/lib/tb/enumerator.rb +22 -14
  31. data/lib/tb/ex_enumerable.rb +659 -0
  32. data/lib/tb/ex_enumerator.rb +102 -0
  33. data/lib/tb/fileenumerator.rb +2 -2
  34. data/lib/tb/func.rb +141 -0
  35. data/lib/tb/json.rb +1 -1
  36. data/lib/tb/reader.rb +4 -4
  37. data/lib/tb/search.rb +2 -4
  38. data/lib/tb/zipper.rb +60 -0
  39. data/test/test_cmd_cat.rb +40 -0
  40. data/test/test_cmd_git_log.rb +116 -0
  41. data/test/test_cmd_ls.rb +90 -0
  42. data/test/test_cmd_svn_log.rb +87 -0
  43. data/test/test_cmd_to_csv.rb +14 -0
  44. data/test/test_cmdutil.rb +25 -10
  45. data/test/test_csv.rb +10 -0
  46. data/test/test_customcmp.rb +14 -0
  47. data/test/test_customeq.rb +20 -0
  48. data/test/{test_enumerable.rb → test_ex_enumerable.rb} +181 -3
  49. data/test/test_search.rb +2 -10
  50. data/test/test_tbenum.rb +3 -3
  51. data/test/test_zipper.rb +22 -0
  52. metadata +20 -8
  53. data/lib/tb/enum.rb +0 -294
  54. data/lib/tb/pairs.rb +0 -227
  55. data/test/test_pairs.rb +0 -122
data/README CHANGED
@@ -1,8 +1,9 @@
1
- = tb - manipulation tool for table: CSV, TSV, etc.
1
+ = tb - manipulation tool for table: CSV, TSV, JSON, etc.
2
2
 
3
3
  tb provides a command and a library for manipulating tables:
4
4
  Unix filter like operations (grep, sort, cat, cut, ls, etc.),
5
5
  SQL like operations (join, group, etc.),
6
+ information extractions (git-log, svn-log, tar-tvf),
6
7
  and more.
7
8
 
8
9
  == Example
data/lib/tb.rb CHANGED
@@ -34,9 +34,10 @@ class Tb
34
34
  end
35
35
 
36
36
  require 'pp'
37
- require 'tb/enum'
37
+ require 'tb/enumerable'
38
38
  require 'tb/enumerator'
39
- require 'tb/pairs'
39
+ require 'tb/func'
40
+ require 'tb/zipper'
40
41
  require 'tb/basic'
41
42
  require 'tb/record'
42
43
  require 'tb/csv'
@@ -48,6 +49,9 @@ require 'tb/ropen'
48
49
  require 'tb/catreader'
49
50
  require 'tb/fieldset'
50
51
  require 'tb/search'
51
- require 'tb/enumerable'
52
+ require 'tb/ex_enumerable'
53
+ require 'tb/ex_enumerator'
52
54
  require 'tb/fileenumerator'
53
55
  require 'tb/revcmp'
56
+ require 'tb/customcmp'
57
+ require 'tb/customeq'
@@ -55,7 +55,7 @@
55
55
  # It is an error to access a record by recordid which is not allocated.
56
56
  #
57
57
  class Tb
58
- include Tb::Enum
58
+ include Tb::Enumerable
59
59
 
60
60
  # :call-seq:
61
61
  # Tb.new
@@ -63,7 +63,5 @@ def (Tb::Cmd).main_cat(argv)
63
63
  exit_if_help('cat')
64
64
  argv = ['-'] if argv.empty?
65
65
  creader = Tb::CatReader.open(argv, Tb::Cmd.opt_N, Tb::Cmd.opt_cat_with_filename)
66
- with_output {|out|
67
- creader.write_to_csv(out, !Tb::Cmd.opt_N)
68
- }
66
+ output_tbenum(creader)
69
67
  end
@@ -68,22 +68,20 @@ def (Tb::Cmd).main_consecutive(argv)
68
68
  }.each {|pairs, header|
69
69
  buf << pairs
70
70
  if buf.length == Tb::Cmd.opt_consecutive_n
71
- pairs2 = []
71
+ pairs2 = {}
72
72
  header.each {|f|
73
73
  Tb::Cmd.opt_consecutive_n.times {|i|
74
74
  ps = buf[i]
75
75
  next if !ps.has_key?(f)
76
76
  v = ps[f]
77
- pairs2 << ["#{f}_#{i+1}", v]
77
+ pairs2["#{f}_#{i+1}"] = v
78
78
  }
79
79
  }
80
80
  empty = false
81
- y.yield Tb::Pairs.new(pairs2)
81
+ y.yield pairs2
82
82
  buf.shift
83
83
  end
84
84
  }
85
85
  }
86
- with_output {|out|
87
- er.write_to_csv(out, !Tb::Cmd.opt_N)
88
- }
86
+ output_tbenum(er)
89
87
  end
@@ -84,9 +84,8 @@ def (Tb::Cmd).main_crop(argv)
84
84
  rownum += 1
85
85
  }
86
86
  }
87
- with_output {|out|
88
- er.write_to_csv(out, false)
89
- }
87
+ Tb::Cmd.opt_N = true
88
+ output_tbenum(er)
90
89
  else
91
90
  creader = Tb::CatReader.open(argv, true)
92
91
  last_nonempty_row = nil
@@ -127,13 +126,12 @@ def (Tb::Cmd).main_crop(argv)
127
126
  break
128
127
  end
129
128
  ary.slice!(0, lmargin_min)
130
- pairs = Tb::Pairs.new(ary.map.with_index {|v, i| ["#{i+1}", v]})
129
+ pairs = Hash[ary.map.with_index {|v, i| ["#{i+1}", v]}]
131
130
  y.yield pairs
132
131
  }
133
132
  }
134
- with_output {|out|
135
- er.write_to_csv(out, false)
136
- }
133
+ Tb::Cmd.opt_N = true
134
+ output_tbenum(er)
137
135
  end
138
136
  end
139
137
 
@@ -49,36 +49,51 @@ def (Tb::Cmd).main_cross(argv)
49
49
  vkfs = split_field_list_argument(argv.shift)
50
50
  err('no hkey-fields given.') if argv.empty?
51
51
  hkfs = split_field_list_argument(argv.shift)
52
+ vhkfs = vkfs + hkfs
52
53
  if Tb::Cmd.opt_cross_fields.empty?
53
- opt_cross_fields = [['count', 'count']]
54
+ num_aggregate_fields = 1
55
+ opt_cross_fields = (vkfs + hkfs).map {|f| [f, Tb::Func::First, f] } +
56
+ [['count', Tb::Func::Count, nil]]
54
57
  else
55
- opt_cross_fields = Tb::Cmd.opt_cross_fields.map {|arg|
58
+ num_aggregate_fields = Tb::Cmd.opt_cross_fields.length
59
+ opt_cross_fields = (vkfs + hkfs).map {|f| [f, Tb::Func::First, f] } +
60
+ Tb::Cmd.opt_cross_fields.map {|arg|
56
61
  agg_spec, new_field = split_field_list_argument(arg)
57
62
  new_field ||= agg_spec
58
- [agg_spec, new_field]
63
+ begin
64
+ func_srcf = parse_aggregator_spec2(agg_spec)
65
+ rescue ArgumentError
66
+ err($!.message)
67
+ end
68
+ [new_field, *func_srcf]
59
69
  }
60
70
  end
61
71
  argv = ['-'] if argv.empty?
62
72
  creader = Tb::CatReader.open(argv, Tb::Cmd.opt_N)
63
73
  er = Tb::Enumerator.new {|y|
64
- header = nil
65
74
  hvs_hash = {}
66
75
  hvs_list = nil
67
- sorted = creader.extsort_by {|pairs|
68
- hvs = hkfs.map {|f| pairs[f] }
69
- hvs_hash[hvs] = true
70
- vcv = vkfs.map {|f| smart_cmp_value(pairs[f]) }
71
- vcv
72
- }
73
- sorted2 = sorted.with_header {|header0|
74
- header = header0
75
- (vkfs + hkfs).each {|f|
76
+ aggs_hash = nil
77
+ op = Tb::Zipper.new(opt_cross_fields.map {|dstf, func, srcf| func })
78
+ er = creader.with_header {|header0|
79
+ vhkfs.each {|f|
76
80
  if !header0.include?(f)
77
81
  err("field not found: #{f}")
78
82
  end
79
83
  }
80
- hvs_list = hvs_hash.keys.sort_by {|hvs| hvs.map {|hv| smart_cmp_value(hv) } }
81
- n = vkfs.length + hvs_list.length * opt_cross_fields.length
84
+ }.extsort_reduce(op) {|pairs|
85
+ vvs = vkfs.map {|f| pairs[f] }
86
+ hvs = hkfs.map {|f| pairs[f] }
87
+ vvsc = vvs.map {|v| Tb::Func.smart_cmp_value(v) }
88
+ hvsc = hvs.map {|v| Tb::Func.smart_cmp_value(v) }
89
+ hvs_hash[hvs] = hvsc
90
+ aggs = opt_cross_fields.map {|dstf, func, srcf| func.start(srcf ? pairs[srcf] : true) }
91
+ [[vvsc, hvsc], aggs]
92
+ }
93
+ all_representative = lambda {|_| 1 }
94
+ all_before = lambda {|_|
95
+ hvs_list = hvs_hash.keys.sort_by {|hvs| hvs_hash[hvs] }
96
+ n = vkfs.length + hvs_list.length * num_aggregate_fields
82
97
  header1 = (1..n).map {|i| i.to_s }
83
98
  y.set_header header1
84
99
  hkfs.each_with_index {|hkf, i|
@@ -87,7 +102,7 @@ def (Tb::Cmd).main_cross(argv)
87
102
  j = vkfs.length
88
103
  h1[j.to_s] = hkf
89
104
  hvs_list.each {|hkvs|
90
- opt_cross_fields.length.times {
105
+ num_aggregate_fields.times {
91
106
  j += 1
92
107
  h1[j.to_s] = hkvs[i]
93
108
  }
@@ -101,49 +116,35 @@ def (Tb::Cmd).main_cross(argv)
101
116
  h2[j.to_s] = vkf
102
117
  }
103
118
  hvs_list.each {|hkvs|
104
- opt_cross_fields.each {|agg_spec, new_field|
119
+ opt_cross_fields.last(num_aggregate_fields).each {|dstf, func, srcf|
105
120
  j += 1
106
121
  if Tb::Cmd.opt_cross_compact
107
122
  h2[j.to_s] = hkvs[-1]
108
123
  else
109
- h2[j.to_s] = new_field
124
+ h2[j.to_s] = dstf
110
125
  end
111
126
  }
112
127
  }
113
128
  y.yield h2
114
129
  }
115
- boudary_p = lambda {|pairs1, pairs2|
116
- vcv1 = vkfs.map {|f| smart_cmp_value(pairs1[f]) }
117
- vcv2 = vkfs.map {|f| smart_cmp_value(pairs2[f]) }
118
- vcv1 != vcv2
130
+ v_representative = lambda {|((vvsc, _), _)|
131
+ vvsc
119
132
  }
120
- aggs = nil
121
- before = lambda {|_|
122
- aggs = {}
133
+ v_before = lambda {|_|
134
+ aggs_hash = {}
123
135
  }
124
- body = lambda {|pairs|
125
- hvs = hkfs.map {|f| pairs[f] }
126
- if !aggs.has_key?(hvs)
127
- aggs[hvs] = opt_cross_fields.map {|agg_spec, nf|
128
- begin
129
- make_aggregator(agg_spec, header)
130
- rescue ArgumentError
131
- err($!.message)
132
- end
133
- }
134
- end
135
- ary = header.map {|f| pairs[f] }
136
- aggs[hvs].each {|agg|
137
- agg.update(ary)
138
- }
136
+ body = lambda {|(_, aggs)|
137
+ hvs = aggs[vkfs.length, hkfs.length]
138
+ aggs_hash[hvs] = op.aggregate(aggs)
139
139
  }
140
- after = lambda {|last_pairs|
141
- ary = vkfs.map {|f| last_pairs[f] }
140
+ v_after = lambda {|(_, aggs)|
141
+ vvs = aggs[0, vkfs.length]
142
+ ary = vvs
142
143
  hvs_list.each {|hvs|
143
- if aggs.has_key? hvs
144
- ary.concat(aggs[hvs].map {|agg| agg.finish })
144
+ if aggs_hash.has_key? hvs
145
+ ary.concat(aggs_hash[hvs].last(num_aggregate_fields))
145
146
  else
146
- ary.concat([nil] * opt_cross_fields.length)
147
+ ary.concat([nil] * num_aggregate_fields)
147
148
  end
148
149
  }
149
150
  pairs = {}
@@ -152,10 +153,11 @@ def (Tb::Cmd).main_cross(argv)
152
153
  }
153
154
  y.yield pairs
154
155
  }
155
- sorted2.each_group_element(boudary_p, before, body, after)
156
- }
157
- with_output {|out|
158
- er.write_to_csv(out, false)
156
+ er.detect_nested_group_by(
157
+ [[all_representative, all_before],
158
+ [v_representative, v_before, v_after]]).each(&body)
159
159
  }
160
+ Tb::Cmd.opt_N = true
161
+ output_tbenum(er)
160
162
  end
161
163
 
@@ -56,9 +56,7 @@ def (Tb::Cmd).main_cut(argv)
56
56
  y.yield pairs.reject {|k, v| fs.include? k }
57
57
  }
58
58
  }
59
- with_output {|out|
60
- er.write_to_csv(out, !Tb::Cmd.opt_N)
61
- }
59
+ output_tbenum(er)
62
60
  else
63
61
  er = Tb::Enumerator.new {|y|
64
62
  tblreader.with_header {|header0|
@@ -73,9 +71,7 @@ def (Tb::Cmd).main_cut(argv)
73
71
  y.yield pairs.reject {|k, v| !fs.include?(k) }
74
72
  }
75
73
  }
76
- with_output {|out|
77
- er.write_to_csv(out, !Tb::Cmd.opt_N)
78
- }
74
+ output_tbenum(er)
79
75
  end
80
76
  }
81
77
  end
@@ -29,15 +29,17 @@
29
29
  Tb::Cmd.subcommands << 'git-log'
30
30
 
31
31
  Tb::Cmd.default_option[:opt_git_log_git_command] = nil
32
- Tb::Cmd.default_option[:opt_git_log_debug_git_log_file] = nil
32
+ Tb::Cmd.default_option[:opt_git_log_debug_input] = nil
33
+ Tb::Cmd.default_option[:opt_git_log_debug_output] = nil
33
34
 
34
35
  def (Tb::Cmd).op_git_log
35
36
  op = OptionParser.new
36
37
  op.banner = "Usage: tb git-log [OPTS] [GIT-DIR ...]\n" +
37
38
  "Show the GIT log as a table."
38
- define_common_option(op, "hNo", "--no-pager")
39
+ define_common_option(op, "hNod", "--no-pager", '--debug')
39
40
  op.def_option('--git-command COMMAND', 'specify the git command (default: git)') {|command| Tb::Cmd.opt_git_log_git_command = command }
40
- op.def_option('--debug-git-log-file FILE', 'specify the result git log (for debug)') {|filename| Tb::Cmd.opt_git_log_debug_git_log_file = filename }
41
+ op.def_option('--debug-git-log-output FILE', 'store the raw output of git-log (for debug)') {|filename| Tb::Cmd.opt_git_log_debug_output = filename }
42
+ op.def_option('--debug-git-log-input FILE', 'use the file as output of git-log (for debug)') {|filename| Tb::Cmd.opt_git_log_debug_input = filename }
41
43
  op
42
44
  end
43
45
 
@@ -68,8 +70,8 @@ Tb::Cmd::GIT_LOG_PRETTY_FORMAT = 'format:%x01commit-separator%x01%n' +
68
70
  Tb::Cmd::GIT_LOG_HEADER = Tb::Cmd::GIT_LOG_FORMAT_SPEC.map {|k, v| k } + ['files']
69
71
 
70
72
  def (Tb::Cmd).git_log_with_git_log(dir)
71
- if Tb::Cmd.opt_git_log_debug_git_log_file
72
- File.open(Tb::Cmd.opt_git_log_debug_git_log_file) {|f|
73
+ if Tb::Cmd.opt_git_log_debug_input
74
+ File.open(Tb::Cmd.opt_git_log_debug_input) {|f|
73
75
  yield f
74
76
  }
75
77
  else
@@ -85,9 +87,18 @@ def (Tb::Cmd).git_log_with_git_log(dir)
85
87
  '.',
86
88
  {:chdir=>dir}
87
89
  ]
88
- IO.popen(command) {|f|
89
- yield f
90
- }
90
+ $stderr.puts "git command line: #{command.inspect}" if 1 <= Tb::Cmd.opt_debug
91
+ if Tb::Cmd.opt_git_log_debug_output
92
+ command.last[:out] = Tb::Cmd.opt_git_log_debug_output
93
+ system(*command)
94
+ File.open(Tb::Cmd.opt_git_log_debug_output) {|f|
95
+ yield f
96
+ }
97
+ else
98
+ IO.popen(command) {|f|
99
+ yield f
100
+ }
101
+ end
91
102
  end
92
103
  end
93
104
 
@@ -186,8 +197,6 @@ def (Tb::Cmd).main_git_log(argv)
186
197
  }
187
198
  }
188
199
  }
189
- with_output {|out|
190
- er.write_to_csv(out, !Tb::Cmd.opt_N)
191
- }
200
+ output_tbenum(er)
192
201
  end
193
202
 
@@ -80,8 +80,6 @@ def (Tb::Cmd).main_grep(argv)
80
80
  end
81
81
  }
82
82
  }
83
- with_output {|out|
84
- er.write_to_csv(out, !Tb::Cmd.opt_N)
85
- }
83
+ output_tbenum(er)
86
84
  end
87
85
 
@@ -46,61 +46,35 @@ def (Tb::Cmd).main_group(argv)
46
46
  exit_if_help('group')
47
47
  err("no key fields given.") if argv.empty?
48
48
  kfs = split_field_list_argument(argv.shift)
49
- opt_group_fields = Tb::Cmd.opt_group_fields.map {|arg|
49
+ opt_group_fields = kfs.map {|f| [f, Tb::Func::First, f] } +
50
+ Tb::Cmd.opt_group_fields.map {|arg|
50
51
  aggregation_spec, new_field = split_field_list_argument(arg)
51
52
  new_field ||= aggregation_spec
52
53
  [new_field,
53
- lambda {|fields|
54
- begin
55
- make_aggregator(aggregation_spec, fields)
56
- rescue ArgumentError
57
- err($!.message)
58
- end
59
- }
54
+ *begin
55
+ parse_aggregator_spec2(aggregation_spec)
56
+ rescue ArgumentError
57
+ err($!.message)
58
+ end
60
59
  ]
61
60
  }
62
61
  argv = ['-'] if argv.empty?
63
62
  creader = Tb::CatReader.open(argv, Tb::Cmd.opt_N)
64
63
  result = Tb::Enumerator.new {|y|
65
- er = creader.extsort_by {|pairs|
66
- kfs.map {|f| smart_cmp_value(pairs[f]) }
67
- }
68
- header = nil
69
- row = nil
70
- agg = nil
71
- er2 = er.with_header {|header0|
72
- header = header0
73
- y.set_header(kfs + opt_group_fields.map {|f, maker| f })
74
- }
75
- boudary_p = lambda {|pairs1, pairs2|
76
- kfs.any? {|f| pairs1[f] != pairs2[f] }
64
+ op = Tb::Zipper.new(opt_group_fields.map {|dstf, func, srcf| func })
65
+ er = creader.extsort_reduce(op) {|pairs|
66
+ [kfs.map {|f| Tb::Func.smart_cmp_value(pairs[f]) },
67
+ opt_group_fields.map {|dstf, func, srcf| func.start(srcf ? pairs[srcf] : true) } ]
77
68
  }
78
- before = lambda {|first_pairs|
79
- row = {}
80
- kfs.each {|f|
81
- row[f] = first_pairs[f]
82
- }
83
- agg = {}
84
- opt_group_fields.each {|f, maker|
85
- agg[f] = maker.call(header)
69
+ fields = opt_group_fields.map {|dstf, func, srcf| dstf }
70
+ y.set_header(fields)
71
+ er.each {|_, vals|
72
+ pairs = opt_group_fields.zip(vals).map {|(dstf, func, _), val|
73
+ [dstf, func.aggregate(val)]
86
74
  }
75
+ y.yield Hash[pairs]
87
76
  }
88
- body = lambda {|pairs|
89
- ary = header.map {|f| pairs[f] }
90
- opt_group_fields.each {|f, maker|
91
- agg[f].update(ary)
92
- }
93
- }
94
- after = lambda {|last_pairs|
95
- opt_group_fields.each {|f, maker|
96
- row[f] = agg[f].finish
97
- }
98
- y.yield row
99
- }
100
- er2.each_group_element(boudary_p, before, body, after)
101
- }
102
- with_output {|out|
103
- result.write_to_csv(out, !Tb::Cmd.opt_N)
104
77
  }
78
+ output_tbenum(result)
105
79
  end
106
80
 
@@ -77,11 +77,9 @@ def (Tb::Cmd).main_gsub(argv)
77
77
  [f, v.gsub(re, repl)]
78
78
  }
79
79
  end
80
- y.yield Tb::Pairs.new(pairs2)
80
+ y.yield Hash[pairs2]
81
81
  }
82
82
  }
83
- with_output {|out|
84
- er.write_to_csv(out, !Tb::Cmd.opt_N)
85
- }
83
+ output_tbenum(er)
86
84
  end
87
85