tb 0.3 → 0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. data/README +2 -1
  2. data/lib/tb.rb +7 -3
  3. data/lib/tb/basic.rb +1 -1
  4. data/lib/tb/cmd_cat.rb +1 -3
  5. data/lib/tb/cmd_consecutive.rb +4 -6
  6. data/lib/tb/cmd_crop.rb +5 -7
  7. data/lib/tb/cmd_cross.rb +51 -49
  8. data/lib/tb/cmd_cut.rb +2 -6
  9. data/lib/tb/cmd_git_log.rb +20 -11
  10. data/lib/tb/cmd_grep.rb +1 -3
  11. data/lib/tb/cmd_group.rb +18 -44
  12. data/lib/tb/cmd_gsub.rb +2 -4
  13. data/lib/tb/cmd_join.rb +1 -3
  14. data/lib/tb/cmd_ls.rb +8 -15
  15. data/lib/tb/cmd_mheader.rb +3 -4
  16. data/lib/tb/cmd_nest.rb +4 -9
  17. data/lib/tb/cmd_newfield.rb +1 -3
  18. data/lib/tb/cmd_rename.rb +2 -4
  19. data/lib/tb/cmd_shape.rb +2 -3
  20. data/lib/tb/cmd_sort.rb +3 -5
  21. data/lib/tb/cmd_svn_log.rb +3 -5
  22. data/lib/tb/cmd_tar_tvf.rb +2 -4
  23. data/lib/tb/cmd_to_csv.rb +1 -1
  24. data/lib/tb/cmd_unnest.rb +1 -3
  25. data/lib/tb/cmdutil.rb +57 -135
  26. data/lib/tb/csv.rb +11 -54
  27. data/lib/tb/customcmp.rb +41 -0
  28. data/lib/tb/customeq.rb +41 -0
  29. data/lib/tb/enumerable.rb +225 -435
  30. data/lib/tb/enumerator.rb +22 -14
  31. data/lib/tb/ex_enumerable.rb +659 -0
  32. data/lib/tb/ex_enumerator.rb +102 -0
  33. data/lib/tb/fileenumerator.rb +2 -2
  34. data/lib/tb/func.rb +141 -0
  35. data/lib/tb/json.rb +1 -1
  36. data/lib/tb/reader.rb +4 -4
  37. data/lib/tb/search.rb +2 -4
  38. data/lib/tb/zipper.rb +60 -0
  39. data/test/test_cmd_cat.rb +40 -0
  40. data/test/test_cmd_git_log.rb +116 -0
  41. data/test/test_cmd_ls.rb +90 -0
  42. data/test/test_cmd_svn_log.rb +87 -0
  43. data/test/test_cmd_to_csv.rb +14 -0
  44. data/test/test_cmdutil.rb +25 -10
  45. data/test/test_csv.rb +10 -0
  46. data/test/test_customcmp.rb +14 -0
  47. data/test/test_customeq.rb +20 -0
  48. data/test/{test_enumerable.rb → test_ex_enumerable.rb} +181 -3
  49. data/test/test_search.rb +2 -10
  50. data/test/test_tbenum.rb +3 -3
  51. data/test/test_zipper.rb +22 -0
  52. metadata +20 -8
  53. data/lib/tb/enum.rb +0 -294
  54. data/lib/tb/pairs.rb +0 -227
  55. data/test/test_pairs.rb +0 -122
@@ -523,11 +523,7 @@ class TestTbPathFinder < Test::Unit::TestCase
523
523
  s = Tb::Search::EmptyState
524
524
  assert_equal("foo", s.fetch(:k) {|k| assert_equal(:k, k); "foo" })
525
525
  assert_equal("bar", s.fetch(:k, "bar"))
526
- if defined? KeyError
527
- assert_raise(KeyError) { s.fetch(:k) } # Ruby 1.9
528
- else
529
- assert_raise(IndexError) { s.fetch(:k) } # Ruby 1.8
530
- end
526
+ assert_raise(KeyError) { s.fetch(:k) }
531
527
  end
532
528
 
533
529
  def test_emptystate_values_at
@@ -555,11 +551,7 @@ class TestTbPathFinder < Test::Unit::TestCase
555
551
  s = Tb::Search::State.make(:k => 1)
556
552
  assert_equal(1, s.fetch(:k))
557
553
  assert_equal(:foo, s.fetch(:x) {|k| assert_equal(:x, k); :foo })
558
- if defined? KeyError
559
- assert_raise(KeyError) { s.fetch(:x) } # Ruby 1.9
560
- else
561
- assert_raise(IndexError) { s.fetch(:x) } # Ruby 1.8
562
- end
554
+ assert_raise(KeyError) { s.fetch(:x) }
563
555
  end
564
556
 
565
557
  def test_state_keys
@@ -49,7 +49,7 @@ class TestTbEnum < Test::Unit::TestCase
49
49
  header_proc.call(nil) if header_proc
50
50
  self.each(&block)
51
51
  end
52
- obj.extend Tb::Enum
52
+ obj.extend Tb::Enumerable
53
53
  Dir.mktmpdir {|d|
54
54
  open("#{d}/foo.csv", 'w') {|f|
55
55
  obj.write_to_csv(f)
@@ -73,7 +73,7 @@ class TestTbEnum < Test::Unit::TestCase
73
73
  header_proc.call(nil) if header_proc
74
74
  self.each(&block)
75
75
  end
76
- obj.extend Tb::Enum
76
+ obj.extend Tb::Enumerable
77
77
  Dir.mktmpdir {|d|
78
78
  open("#{d}/foo.csv", 'w') {|f|
79
79
  obj.write_to_csv(f)
@@ -97,7 +97,7 @@ class TestTbEnum < Test::Unit::TestCase
97
97
  header_proc.call(nil) if header_proc
98
98
  self.each(&block)
99
99
  end
100
- obj.extend Tb::Enum
100
+ obj.extend Tb::Enumerable
101
101
  Dir.mktmpdir {|d|
102
102
  open("#{d}/foo.csv", 'w') {|f|
103
103
  obj.write_to_csv(f, false)
@@ -0,0 +1,22 @@
1
+ require 'tb'
2
+ require 'test/unit'
3
+
4
+ class TestZipper < Test::Unit::TestCase
5
+ def test_basic
6
+ z = Tb::Zipper.new([Tb::Func::Sum, Tb::Func::Min])
7
+ assert_equal([5,2], z.aggregate(z.call(z.start([2,3]), z.start([3,2]))))
8
+ end
9
+
10
+ def test_argerr
11
+ z = Tb::Zipper.new([Tb::Func::Sum, Tb::Func::Min])
12
+ assert_raise(ArgumentError) { z.start([]) }
13
+ assert_raise(ArgumentError) { z.start([1]) }
14
+ assert_raise(ArgumentError) { z.start([1,2,3]) }
15
+ assert_raise(ArgumentError) { z.call([1], [3]) }
16
+ assert_raise(ArgumentError) { z.call([1], [3,4]) }
17
+ assert_raise(ArgumentError) { z.call([1,2], [3]) }
18
+ assert_raise(ArgumentError) { z.aggregate([]) }
19
+ assert_raise(ArgumentError) { z.aggregate([1]) }
20
+ assert_raise(ArgumentError) { z.aggregate([1,2,3]) }
21
+ end
22
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tb
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.3'
4
+ version: '0.4'
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-01-29 00:00:00.000000000 Z
12
+ date: 2012-02-29 00:00:00.000000000 Z
13
13
  dependencies: []
14
14
  description: ! 'tb is a manipulation tool for table: CSV, TSV, JSON, etc.
15
15
 
@@ -20,6 +20,8 @@ description: ! 'tb is a manipulation tool for table: CSV, TSV, JSON, etc.
20
20
 
21
21
  SQL like operations (join, group, etc.),
22
22
 
23
+ information extractions (git-log, svn-log, tar-tvf),
24
+
23
25
  and more.
24
26
 
25
27
  '
@@ -65,14 +67,17 @@ files:
65
67
  - lib/tb/cmdtop.rb
66
68
  - lib/tb/cmdutil.rb
67
69
  - lib/tb/csv.rb
68
- - lib/tb/enum.rb
70
+ - lib/tb/customcmp.rb
71
+ - lib/tb/customeq.rb
69
72
  - lib/tb/enumerable.rb
70
73
  - lib/tb/enumerator.rb
74
+ - lib/tb/ex_enumerable.rb
75
+ - lib/tb/ex_enumerator.rb
71
76
  - lib/tb/fieldset.rb
72
77
  - lib/tb/fileenumerator.rb
78
+ - lib/tb/func.rb
73
79
  - lib/tb/json.rb
74
80
  - lib/tb/pager.rb
75
- - lib/tb/pairs.rb
76
81
  - lib/tb/pnm.rb
77
82
  - lib/tb/reader.rb
78
83
  - lib/tb/record.rb
@@ -80,6 +85,7 @@ files:
80
85
  - lib/tb/ropen.rb
81
86
  - lib/tb/search.rb
82
87
  - lib/tb/tsv.rb
88
+ - lib/tb/zipper.rb
83
89
  - sample/colors.ppm
84
90
  - sample/excel2csv
85
91
  - sample/gradation.pgm
@@ -109,6 +115,7 @@ files:
109
115
  - test/test_cmd_rename.rb
110
116
  - test/test_cmd_shape.rb
111
117
  - test/test_cmd_sort.rb
118
+ - test/test_cmd_svn_log.rb
112
119
  - test/test_cmd_tar_tvf.rb
113
120
  - test/test_cmd_to_csv.rb
114
121
  - test/test_cmd_to_json.rb
@@ -120,12 +127,13 @@ files:
120
127
  - test/test_cmdtty.rb
121
128
  - test/test_cmdutil.rb
122
129
  - test/test_csv.rb
123
- - test/test_enumerable.rb
130
+ - test/test_customcmp.rb
131
+ - test/test_customeq.rb
132
+ - test/test_ex_enumerable.rb
124
133
  - test/test_fieldset.rb
125
134
  - test/test_fileenumerator.rb
126
135
  - test/test_json.rb
127
136
  - test/test_pager.rb
128
- - test/test_pairs.rb
129
137
  - test/test_pnm.rb
130
138
  - test/test_reader.rb
131
139
  - test/test_record.rb
@@ -133,6 +141,7 @@ files:
133
141
  - test/test_search.rb
134
142
  - test/test_tbenum.rb
135
143
  - test/test_tsv.rb
144
+ - test/test_zipper.rb
136
145
  homepage: https://github.com/akr/tb
137
146
  licenses: []
138
147
  post_install_message:
@@ -178,6 +187,7 @@ test_files:
178
187
  - test/test_cmd_rename.rb
179
188
  - test/test_cmd_shape.rb
180
189
  - test/test_cmd_sort.rb
190
+ - test/test_cmd_svn_log.rb
181
191
  - test/test_cmd_tar_tvf.rb
182
192
  - test/test_cmd_to_csv.rb
183
193
  - test/test_cmd_to_json.rb
@@ -189,12 +199,13 @@ test_files:
189
199
  - test/test_cmdtty.rb
190
200
  - test/test_cmdutil.rb
191
201
  - test/test_csv.rb
192
- - test/test_enumerable.rb
202
+ - test/test_customcmp.rb
203
+ - test/test_customeq.rb
204
+ - test/test_ex_enumerable.rb
193
205
  - test/test_fieldset.rb
194
206
  - test/test_fileenumerator.rb
195
207
  - test/test_json.rb
196
208
  - test/test_pager.rb
197
- - test/test_pairs.rb
198
209
  - test/test_pnm.rb
199
210
  - test/test_reader.rb
200
211
  - test/test_record.rb
@@ -202,3 +213,4 @@ test_files:
202
213
  - test/test_search.rb
203
214
  - test/test_tbenum.rb
204
215
  - test/test_tsv.rb
216
+ - test/test_zipper.rb
@@ -1,294 +0,0 @@
1
- # Copyright (C) 2012 Tanaka Akira <akr@fsij.org>
2
- #
3
- # Redistribution and use in source and binary forms, with or without
4
- # modification, are permitted provided that the following conditions
5
- # are met:
6
- #
7
- # 1. Redistributions of source code must retain the above copyright
8
- # notice, this list of conditions and the following disclaimer.
9
- # 2. Redistributions in binary form must reproduce the above
10
- # copyright notice, this list of conditions and the following
11
- # disclaimer in the documentation and/or other materials provided
12
- # with the distribution.
13
- # 3. The name of the author may not be used to endorse or promote
14
- # products derived from this software without specific prior
15
- # written permission.
16
- #
17
- # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
18
- # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19
- # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
- # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
21
- # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22
- # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
23
- # GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24
- # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25
- # WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
26
- # OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
27
- # EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
-
29
- module Tb::Enum
30
- include Enumerable
31
-
32
- def with_header(&header_proc)
33
- Enumerator.new {|y|
34
- header_and_each(header_proc) {|pairs|
35
- y.yield pairs
36
- }
37
- }
38
- end
39
-
40
- def with_cumulative_header(&header_proc)
41
- Enumerator.new {|y|
42
- hset = {}
43
- internal_header_proc = lambda {|header0|
44
- if header0
45
- header0.each {|f|
46
- hset[f] = true
47
- }
48
- end
49
- header_proc.call(header0) if header_proc
50
- }
51
- header_and_each(internal_header_proc) {|pairs|
52
- pairs.each {|f, v|
53
- if !hset[f]
54
- hset[f] = true
55
- end
56
- }
57
- y.yield [pairs, hset.keys.freeze]
58
- }
59
- }
60
- end
61
-
62
- def cat(*ers, &b)
63
- ers = [self, *ers]
64
- rec = lambda {|y, header|
65
- if ers.empty?
66
- if header
67
- y.set_header header
68
- end
69
- else
70
- last_e = ers.pop
71
- last_e.with_header {|last_e_header|
72
- if last_e_header && header
73
- header = last_e_header | header
74
- else
75
- header = nil
76
- end
77
- rec.call(y, header)
78
- }.each {|v|
79
- y.yield v
80
- }
81
- end
82
- }
83
- er = Tb::Enumerator.new {|y|
84
- rec.call(y, [])
85
- }
86
- if block_given?
87
- er.each(&b)
88
- else
89
- er
90
- end
91
- end
92
-
93
- # creates a new Tb::Enumerator object which have
94
- # new field named by _field_ with the value returned by the block.
95
- #
96
- # t1 = Tb.new %w[a b], [1, 2], [3, 4]
97
- # p t1.newfield("x") {|row| row["a"] + row["b"] + 100 }.to_a
98
- # #=> [#<Tb::Pairs: "x"=>103, "a"=>1, "b"=>2>,
99
- # # #<Tb::Pairs: "x"=>107, "a"=>3, "b"=>4>]
100
- #
101
- def newfield(field)
102
- Tb::Enumerator.new {|y|
103
- self.with_header {|header|
104
- if header
105
- y.set_header(Tb::FieldSet.normalize([field, *header]))
106
- end
107
- }.each {|row|
108
- keys = row.keys
109
- keys = Tb::FieldSet.normalize([field, *keys])
110
- vals = row.values
111
- vals = [yield(row), *vals]
112
- y << Tb::Pairs.new(keys.zip(vals))
113
- }
114
- }
115
- end
116
-
117
- # :call-seq:
118
- # table1.natjoin2(table2, missing_value=nil, retain_left=false, retain_right=false)
119
- def natjoin2(tbl2, missing_value=nil, retain_left=false, retain_right=false)
120
- Tb::Enumerator.new {|y|
121
- tbl1 = self
122
- header1 = header2 = nil
123
- sorted_tbl2 = nil
124
- common_header = nil
125
- total_header = nil
126
- sorted_tbl1 = tbl1.with_header {|h1|
127
- header1 = h1
128
- sorted_tbl2 = tbl2.with_header {|h2|
129
- header2 = h2
130
- common_header = header1 & header2
131
- total_header = header1 | header2
132
- y.set_header total_header
133
- }.lazy_map {|pairs|
134
- [common_header.map {|f| pairs[f] }, pairs]
135
- }.extsort_by {|cv, pairs| cv }.to_fileenumerator
136
- }.lazy_map {|pairs|
137
- [common_header.map {|f| pairs[f] }, pairs]
138
- }.extsort_by {|cv, pairs| cv }.to_fileenumerator
139
- sorted_tbl1.open_reader {|t1|
140
- sorted_tbl2.open_reader {|t2|
141
- t1_eof = t2_eof = false
142
- while true
143
- begin
144
- cv1, pairs1 = t1.peek
145
- rescue StopIteration
146
- t1_eof = true
147
- end
148
- begin
149
- cv2, pairs2 = t2.peek
150
- rescue StopIteration
151
- t2_eof = true
152
- end
153
- break if t1_eof || t2_eof
154
- cmp = cv1 <=> cv2
155
- if cmp < 0
156
- t1.subeach_by {|_cv1, _| _cv1 }.each {|_, _pairs1|
157
- if retain_left
158
- h = {}
159
- total_header.each {|f|
160
- h[f] = missing_value if !_pairs1.has_key?(f)
161
- }
162
- y.yield _pairs1.merge(h)
163
- end
164
- }
165
- elsif 0 < cmp
166
- t2.subeach_by {|_cv2, _| _cv2 }.each {|_, _pairs2|
167
- if retain_right
168
- h = {}
169
- total_header.each {|f|
170
- h[f] = missing_value if !_pairs2.has_key?(f)
171
- }
172
- y.yield _pairs2.merge(h)
173
- end
174
- }
175
- else
176
- t2_pos = t2.pos
177
- t1.subeach_by {|_cv1, _| _cv1 }.each {|_, _pairs1|
178
- t2.pos = t2_pos
179
- t2.subeach_by {|_cv2, _| _cv2 }.each {|_, _pairs2|
180
- pairs = {}
181
- _pairs1.each {|f, v| pairs[f] = v }
182
- _pairs2.each {|f, v| pairs[f] = v if !pairs.has_key?(f) }
183
- y.yield(pairs)
184
- }
185
- }
186
- end
187
- end
188
- begin
189
- cv1, pairs1 = t1.next
190
- if retain_left
191
- h = {}
192
- total_header.each {|f|
193
- h[f] = missing_value if !pairs1.has_key?(f)
194
- }
195
- y.yield pairs1.merge(h)
196
- end
197
- rescue StopIteration
198
- end
199
- begin
200
- cv2, pairs2 = t2.next
201
- if retain_right
202
- h = {}
203
- total_header.each {|f|
204
- h[f] = missing_value if !pairs2.has_key?(f)
205
- }
206
- y.yield pairs2.merge(h)
207
- end
208
- rescue StopIteration
209
- end
210
- }
211
- }
212
- }
213
- end
214
-
215
- # :call-seq:
216
- # table1.natjoin2_outer(table2, missing=nil, retain_left=true, retain_right=true)
217
- def natjoin2_outer(tbl2, missing_value=nil, retain_left=true, retain_right=true)
218
- natjoin2(tbl2, missing_value, retain_left, retain_right)
219
- end
220
-
221
- def to_tb
222
- tb = Tb.new
223
- self.each {|pairs|
224
- pairs.each {|k, v|
225
- unless tb.has_field? k
226
- tb.define_field(k)
227
- end
228
- }
229
- tb.insert pairs
230
- }
231
- tb
232
- end
233
-
234
- def write_to_csv(io, with_header=true)
235
- stream = nil
236
- header = []
237
- fgen = fnew = nil
238
- self.with_cumulative_header {|header0|
239
- if !with_header
240
- stream = true
241
- elsif header0
242
- stream = true
243
- io.puts Tb.csv_encode_row(header0)
244
- else
245
- stream = false
246
- fgen, fnew = Tb::FileEnumerator.gen_new
247
- end
248
- }.each {|pairs, header1|
249
- pairs = Tb::Pairs.new(pairs) unless pairs.respond_to? :has_key?
250
- header = header1
251
- if stream
252
- fs = header.dup
253
- while !fs.empty? && !pairs.has_key?(fs.last)
254
- fs.pop
255
- end
256
- ary = fs.map {|f| pairs[f] }
257
- io.puts Tb.csv_encode_row(ary)
258
- else
259
- fgen.call Tb::Pairs.new(pairs)
260
- end
261
- }
262
- if !stream
263
- if with_header
264
- io.puts Tb.csv_encode_row(header)
265
- end
266
- fnew.call.each {|pairs|
267
- fs = header.dup
268
- while !fs.empty? && !pairs.has_key?(fs.last)
269
- fs.pop
270
- end
271
- ary = fs.map {|f| pairs[f] }
272
- io.puts Tb.csv_encode_row(ary)
273
- }
274
- end
275
- end
276
-
277
- def extsort_by(opts={}, &cmpvalue_from)
278
- Tb::Enumerator.new {|ty|
279
- header = []
280
- er = Enumerator.new {|y|
281
- self.with_cumulative_header {|header0|
282
- header = header0 if header0
283
- }.each {|pairs, header1|
284
- header = header1
285
- y.yield pairs
286
- }
287
- ty.set_header header
288
- }
289
- er.extsort_by(opts, &cmpvalue_from).each {|pairs|
290
- ty.yield pairs
291
- }
292
- }
293
- end
294
- end