tb 0.3 → 0.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (55) hide show
  1. data/README +2 -1
  2. data/lib/tb.rb +7 -3
  3. data/lib/tb/basic.rb +1 -1
  4. data/lib/tb/cmd_cat.rb +1 -3
  5. data/lib/tb/cmd_consecutive.rb +4 -6
  6. data/lib/tb/cmd_crop.rb +5 -7
  7. data/lib/tb/cmd_cross.rb +51 -49
  8. data/lib/tb/cmd_cut.rb +2 -6
  9. data/lib/tb/cmd_git_log.rb +20 -11
  10. data/lib/tb/cmd_grep.rb +1 -3
  11. data/lib/tb/cmd_group.rb +18 -44
  12. data/lib/tb/cmd_gsub.rb +2 -4
  13. data/lib/tb/cmd_join.rb +1 -3
  14. data/lib/tb/cmd_ls.rb +8 -15
  15. data/lib/tb/cmd_mheader.rb +3 -4
  16. data/lib/tb/cmd_nest.rb +4 -9
  17. data/lib/tb/cmd_newfield.rb +1 -3
  18. data/lib/tb/cmd_rename.rb +2 -4
  19. data/lib/tb/cmd_shape.rb +2 -3
  20. data/lib/tb/cmd_sort.rb +3 -5
  21. data/lib/tb/cmd_svn_log.rb +3 -5
  22. data/lib/tb/cmd_tar_tvf.rb +2 -4
  23. data/lib/tb/cmd_to_csv.rb +1 -1
  24. data/lib/tb/cmd_unnest.rb +1 -3
  25. data/lib/tb/cmdutil.rb +57 -135
  26. data/lib/tb/csv.rb +11 -54
  27. data/lib/tb/customcmp.rb +41 -0
  28. data/lib/tb/customeq.rb +41 -0
  29. data/lib/tb/enumerable.rb +225 -435
  30. data/lib/tb/enumerator.rb +22 -14
  31. data/lib/tb/ex_enumerable.rb +659 -0
  32. data/lib/tb/ex_enumerator.rb +102 -0
  33. data/lib/tb/fileenumerator.rb +2 -2
  34. data/lib/tb/func.rb +141 -0
  35. data/lib/tb/json.rb +1 -1
  36. data/lib/tb/reader.rb +4 -4
  37. data/lib/tb/search.rb +2 -4
  38. data/lib/tb/zipper.rb +60 -0
  39. data/test/test_cmd_cat.rb +40 -0
  40. data/test/test_cmd_git_log.rb +116 -0
  41. data/test/test_cmd_ls.rb +90 -0
  42. data/test/test_cmd_svn_log.rb +87 -0
  43. data/test/test_cmd_to_csv.rb +14 -0
  44. data/test/test_cmdutil.rb +25 -10
  45. data/test/test_csv.rb +10 -0
  46. data/test/test_customcmp.rb +14 -0
  47. data/test/test_customeq.rb +20 -0
  48. data/test/{test_enumerable.rb → test_ex_enumerable.rb} +181 -3
  49. data/test/test_search.rb +2 -10
  50. data/test/test_tbenum.rb +3 -3
  51. data/test/test_zipper.rb +22 -0
  52. metadata +20 -8
  53. data/lib/tb/enum.rb +0 -294
  54. data/lib/tb/pairs.rb +0 -227
  55. data/test/test_pairs.rb +0 -122
@@ -523,11 +523,7 @@ class TestTbPathFinder < Test::Unit::TestCase
523
523
  s = Tb::Search::EmptyState
524
524
  assert_equal("foo", s.fetch(:k) {|k| assert_equal(:k, k); "foo" })
525
525
  assert_equal("bar", s.fetch(:k, "bar"))
526
- if defined? KeyError
527
- assert_raise(KeyError) { s.fetch(:k) } # Ruby 1.9
528
- else
529
- assert_raise(IndexError) { s.fetch(:k) } # Ruby 1.8
530
- end
526
+ assert_raise(KeyError) { s.fetch(:k) }
531
527
  end
532
528
 
533
529
  def test_emptystate_values_at
@@ -555,11 +551,7 @@ class TestTbPathFinder < Test::Unit::TestCase
555
551
  s = Tb::Search::State.make(:k => 1)
556
552
  assert_equal(1, s.fetch(:k))
557
553
  assert_equal(:foo, s.fetch(:x) {|k| assert_equal(:x, k); :foo })
558
- if defined? KeyError
559
- assert_raise(KeyError) { s.fetch(:x) } # Ruby 1.9
560
- else
561
- assert_raise(IndexError) { s.fetch(:x) } # Ruby 1.8
562
- end
554
+ assert_raise(KeyError) { s.fetch(:x) }
563
555
  end
564
556
 
565
557
  def test_state_keys
@@ -49,7 +49,7 @@ class TestTbEnum < Test::Unit::TestCase
49
49
  header_proc.call(nil) if header_proc
50
50
  self.each(&block)
51
51
  end
52
- obj.extend Tb::Enum
52
+ obj.extend Tb::Enumerable
53
53
  Dir.mktmpdir {|d|
54
54
  open("#{d}/foo.csv", 'w') {|f|
55
55
  obj.write_to_csv(f)
@@ -73,7 +73,7 @@ class TestTbEnum < Test::Unit::TestCase
73
73
  header_proc.call(nil) if header_proc
74
74
  self.each(&block)
75
75
  end
76
- obj.extend Tb::Enum
76
+ obj.extend Tb::Enumerable
77
77
  Dir.mktmpdir {|d|
78
78
  open("#{d}/foo.csv", 'w') {|f|
79
79
  obj.write_to_csv(f)
@@ -97,7 +97,7 @@ class TestTbEnum < Test::Unit::TestCase
97
97
  header_proc.call(nil) if header_proc
98
98
  self.each(&block)
99
99
  end
100
- obj.extend Tb::Enum
100
+ obj.extend Tb::Enumerable
101
101
  Dir.mktmpdir {|d|
102
102
  open("#{d}/foo.csv", 'w') {|f|
103
103
  obj.write_to_csv(f, false)
@@ -0,0 +1,22 @@
1
+ require 'tb'
2
+ require 'test/unit'
3
+
4
+ class TestZipper < Test::Unit::TestCase
5
+ def test_basic
6
+ z = Tb::Zipper.new([Tb::Func::Sum, Tb::Func::Min])
7
+ assert_equal([5,2], z.aggregate(z.call(z.start([2,3]), z.start([3,2]))))
8
+ end
9
+
10
+ def test_argerr
11
+ z = Tb::Zipper.new([Tb::Func::Sum, Tb::Func::Min])
12
+ assert_raise(ArgumentError) { z.start([]) }
13
+ assert_raise(ArgumentError) { z.start([1]) }
14
+ assert_raise(ArgumentError) { z.start([1,2,3]) }
15
+ assert_raise(ArgumentError) { z.call([1], [3]) }
16
+ assert_raise(ArgumentError) { z.call([1], [3,4]) }
17
+ assert_raise(ArgumentError) { z.call([1,2], [3]) }
18
+ assert_raise(ArgumentError) { z.aggregate([]) }
19
+ assert_raise(ArgumentError) { z.aggregate([1]) }
20
+ assert_raise(ArgumentError) { z.aggregate([1,2,3]) }
21
+ end
22
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tb
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.3'
4
+ version: '0.4'
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-01-29 00:00:00.000000000 Z
12
+ date: 2012-02-29 00:00:00.000000000 Z
13
13
  dependencies: []
14
14
  description: ! 'tb is a manipulation tool for table: CSV, TSV, JSON, etc.
15
15
 
@@ -20,6 +20,8 @@ description: ! 'tb is a manipulation tool for table: CSV, TSV, JSON, etc.
20
20
 
21
21
  SQL like operations (join, group, etc.),
22
22
 
23
+ information extractions (git-log, svn-log, tar-tvf),
24
+
23
25
  and more.
24
26
 
25
27
  '
@@ -65,14 +67,17 @@ files:
65
67
  - lib/tb/cmdtop.rb
66
68
  - lib/tb/cmdutil.rb
67
69
  - lib/tb/csv.rb
68
- - lib/tb/enum.rb
70
+ - lib/tb/customcmp.rb
71
+ - lib/tb/customeq.rb
69
72
  - lib/tb/enumerable.rb
70
73
  - lib/tb/enumerator.rb
74
+ - lib/tb/ex_enumerable.rb
75
+ - lib/tb/ex_enumerator.rb
71
76
  - lib/tb/fieldset.rb
72
77
  - lib/tb/fileenumerator.rb
78
+ - lib/tb/func.rb
73
79
  - lib/tb/json.rb
74
80
  - lib/tb/pager.rb
75
- - lib/tb/pairs.rb
76
81
  - lib/tb/pnm.rb
77
82
  - lib/tb/reader.rb
78
83
  - lib/tb/record.rb
@@ -80,6 +85,7 @@ files:
80
85
  - lib/tb/ropen.rb
81
86
  - lib/tb/search.rb
82
87
  - lib/tb/tsv.rb
88
+ - lib/tb/zipper.rb
83
89
  - sample/colors.ppm
84
90
  - sample/excel2csv
85
91
  - sample/gradation.pgm
@@ -109,6 +115,7 @@ files:
109
115
  - test/test_cmd_rename.rb
110
116
  - test/test_cmd_shape.rb
111
117
  - test/test_cmd_sort.rb
118
+ - test/test_cmd_svn_log.rb
112
119
  - test/test_cmd_tar_tvf.rb
113
120
  - test/test_cmd_to_csv.rb
114
121
  - test/test_cmd_to_json.rb
@@ -120,12 +127,13 @@ files:
120
127
  - test/test_cmdtty.rb
121
128
  - test/test_cmdutil.rb
122
129
  - test/test_csv.rb
123
- - test/test_enumerable.rb
130
+ - test/test_customcmp.rb
131
+ - test/test_customeq.rb
132
+ - test/test_ex_enumerable.rb
124
133
  - test/test_fieldset.rb
125
134
  - test/test_fileenumerator.rb
126
135
  - test/test_json.rb
127
136
  - test/test_pager.rb
128
- - test/test_pairs.rb
129
137
  - test/test_pnm.rb
130
138
  - test/test_reader.rb
131
139
  - test/test_record.rb
@@ -133,6 +141,7 @@ files:
133
141
  - test/test_search.rb
134
142
  - test/test_tbenum.rb
135
143
  - test/test_tsv.rb
144
+ - test/test_zipper.rb
136
145
  homepage: https://github.com/akr/tb
137
146
  licenses: []
138
147
  post_install_message:
@@ -178,6 +187,7 @@ test_files:
178
187
  - test/test_cmd_rename.rb
179
188
  - test/test_cmd_shape.rb
180
189
  - test/test_cmd_sort.rb
190
+ - test/test_cmd_svn_log.rb
181
191
  - test/test_cmd_tar_tvf.rb
182
192
  - test/test_cmd_to_csv.rb
183
193
  - test/test_cmd_to_json.rb
@@ -189,12 +199,13 @@ test_files:
189
199
  - test/test_cmdtty.rb
190
200
  - test/test_cmdutil.rb
191
201
  - test/test_csv.rb
192
- - test/test_enumerable.rb
202
+ - test/test_customcmp.rb
203
+ - test/test_customeq.rb
204
+ - test/test_ex_enumerable.rb
193
205
  - test/test_fieldset.rb
194
206
  - test/test_fileenumerator.rb
195
207
  - test/test_json.rb
196
208
  - test/test_pager.rb
197
- - test/test_pairs.rb
198
209
  - test/test_pnm.rb
199
210
  - test/test_reader.rb
200
211
  - test/test_record.rb
@@ -202,3 +213,4 @@ test_files:
202
213
  - test/test_search.rb
203
214
  - test/test_tbenum.rb
204
215
  - test/test_tsv.rb
216
+ - test/test_zipper.rb
@@ -1,294 +0,0 @@
1
- # Copyright (C) 2012 Tanaka Akira <akr@fsij.org>
2
- #
3
- # Redistribution and use in source and binary forms, with or without
4
- # modification, are permitted provided that the following conditions
5
- # are met:
6
- #
7
- # 1. Redistributions of source code must retain the above copyright
8
- # notice, this list of conditions and the following disclaimer.
9
- # 2. Redistributions in binary form must reproduce the above
10
- # copyright notice, this list of conditions and the following
11
- # disclaimer in the documentation and/or other materials provided
12
- # with the distribution.
13
- # 3. The name of the author may not be used to endorse or promote
14
- # products derived from this software without specific prior
15
- # written permission.
16
- #
17
- # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
18
- # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19
- # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
- # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
21
- # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22
- # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
23
- # GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24
- # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25
- # WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
26
- # OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
27
- # EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
-
29
- module Tb::Enum
30
- include Enumerable
31
-
32
- def with_header(&header_proc)
33
- Enumerator.new {|y|
34
- header_and_each(header_proc) {|pairs|
35
- y.yield pairs
36
- }
37
- }
38
- end
39
-
40
- def with_cumulative_header(&header_proc)
41
- Enumerator.new {|y|
42
- hset = {}
43
- internal_header_proc = lambda {|header0|
44
- if header0
45
- header0.each {|f|
46
- hset[f] = true
47
- }
48
- end
49
- header_proc.call(header0) if header_proc
50
- }
51
- header_and_each(internal_header_proc) {|pairs|
52
- pairs.each {|f, v|
53
- if !hset[f]
54
- hset[f] = true
55
- end
56
- }
57
- y.yield [pairs, hset.keys.freeze]
58
- }
59
- }
60
- end
61
-
62
- def cat(*ers, &b)
63
- ers = [self, *ers]
64
- rec = lambda {|y, header|
65
- if ers.empty?
66
- if header
67
- y.set_header header
68
- end
69
- else
70
- last_e = ers.pop
71
- last_e.with_header {|last_e_header|
72
- if last_e_header && header
73
- header = last_e_header | header
74
- else
75
- header = nil
76
- end
77
- rec.call(y, header)
78
- }.each {|v|
79
- y.yield v
80
- }
81
- end
82
- }
83
- er = Tb::Enumerator.new {|y|
84
- rec.call(y, [])
85
- }
86
- if block_given?
87
- er.each(&b)
88
- else
89
- er
90
- end
91
- end
92
-
93
- # creates a new Tb::Enumerator object which have
94
- # new field named by _field_ with the value returned by the block.
95
- #
96
- # t1 = Tb.new %w[a b], [1, 2], [3, 4]
97
- # p t1.newfield("x") {|row| row["a"] + row["b"] + 100 }.to_a
98
- # #=> [#<Tb::Pairs: "x"=>103, "a"=>1, "b"=>2>,
99
- # # #<Tb::Pairs: "x"=>107, "a"=>3, "b"=>4>]
100
- #
101
- def newfield(field)
102
- Tb::Enumerator.new {|y|
103
- self.with_header {|header|
104
- if header
105
- y.set_header(Tb::FieldSet.normalize([field, *header]))
106
- end
107
- }.each {|row|
108
- keys = row.keys
109
- keys = Tb::FieldSet.normalize([field, *keys])
110
- vals = row.values
111
- vals = [yield(row), *vals]
112
- y << Tb::Pairs.new(keys.zip(vals))
113
- }
114
- }
115
- end
116
-
117
- # :call-seq:
118
- # table1.natjoin2(table2, missing_value=nil, retain_left=false, retain_right=false)
119
- def natjoin2(tbl2, missing_value=nil, retain_left=false, retain_right=false)
120
- Tb::Enumerator.new {|y|
121
- tbl1 = self
122
- header1 = header2 = nil
123
- sorted_tbl2 = nil
124
- common_header = nil
125
- total_header = nil
126
- sorted_tbl1 = tbl1.with_header {|h1|
127
- header1 = h1
128
- sorted_tbl2 = tbl2.with_header {|h2|
129
- header2 = h2
130
- common_header = header1 & header2
131
- total_header = header1 | header2
132
- y.set_header total_header
133
- }.lazy_map {|pairs|
134
- [common_header.map {|f| pairs[f] }, pairs]
135
- }.extsort_by {|cv, pairs| cv }.to_fileenumerator
136
- }.lazy_map {|pairs|
137
- [common_header.map {|f| pairs[f] }, pairs]
138
- }.extsort_by {|cv, pairs| cv }.to_fileenumerator
139
- sorted_tbl1.open_reader {|t1|
140
- sorted_tbl2.open_reader {|t2|
141
- t1_eof = t2_eof = false
142
- while true
143
- begin
144
- cv1, pairs1 = t1.peek
145
- rescue StopIteration
146
- t1_eof = true
147
- end
148
- begin
149
- cv2, pairs2 = t2.peek
150
- rescue StopIteration
151
- t2_eof = true
152
- end
153
- break if t1_eof || t2_eof
154
- cmp = cv1 <=> cv2
155
- if cmp < 0
156
- t1.subeach_by {|_cv1, _| _cv1 }.each {|_, _pairs1|
157
- if retain_left
158
- h = {}
159
- total_header.each {|f|
160
- h[f] = missing_value if !_pairs1.has_key?(f)
161
- }
162
- y.yield _pairs1.merge(h)
163
- end
164
- }
165
- elsif 0 < cmp
166
- t2.subeach_by {|_cv2, _| _cv2 }.each {|_, _pairs2|
167
- if retain_right
168
- h = {}
169
- total_header.each {|f|
170
- h[f] = missing_value if !_pairs2.has_key?(f)
171
- }
172
- y.yield _pairs2.merge(h)
173
- end
174
- }
175
- else
176
- t2_pos = t2.pos
177
- t1.subeach_by {|_cv1, _| _cv1 }.each {|_, _pairs1|
178
- t2.pos = t2_pos
179
- t2.subeach_by {|_cv2, _| _cv2 }.each {|_, _pairs2|
180
- pairs = {}
181
- _pairs1.each {|f, v| pairs[f] = v }
182
- _pairs2.each {|f, v| pairs[f] = v if !pairs.has_key?(f) }
183
- y.yield(pairs)
184
- }
185
- }
186
- end
187
- end
188
- begin
189
- cv1, pairs1 = t1.next
190
- if retain_left
191
- h = {}
192
- total_header.each {|f|
193
- h[f] = missing_value if !pairs1.has_key?(f)
194
- }
195
- y.yield pairs1.merge(h)
196
- end
197
- rescue StopIteration
198
- end
199
- begin
200
- cv2, pairs2 = t2.next
201
- if retain_right
202
- h = {}
203
- total_header.each {|f|
204
- h[f] = missing_value if !pairs2.has_key?(f)
205
- }
206
- y.yield pairs2.merge(h)
207
- end
208
- rescue StopIteration
209
- end
210
- }
211
- }
212
- }
213
- end
214
-
215
- # :call-seq:
216
- # table1.natjoin2_outer(table2, missing=nil, retain_left=true, retain_right=true)
217
- def natjoin2_outer(tbl2, missing_value=nil, retain_left=true, retain_right=true)
218
- natjoin2(tbl2, missing_value, retain_left, retain_right)
219
- end
220
-
221
- def to_tb
222
- tb = Tb.new
223
- self.each {|pairs|
224
- pairs.each {|k, v|
225
- unless tb.has_field? k
226
- tb.define_field(k)
227
- end
228
- }
229
- tb.insert pairs
230
- }
231
- tb
232
- end
233
-
234
- def write_to_csv(io, with_header=true)
235
- stream = nil
236
- header = []
237
- fgen = fnew = nil
238
- self.with_cumulative_header {|header0|
239
- if !with_header
240
- stream = true
241
- elsif header0
242
- stream = true
243
- io.puts Tb.csv_encode_row(header0)
244
- else
245
- stream = false
246
- fgen, fnew = Tb::FileEnumerator.gen_new
247
- end
248
- }.each {|pairs, header1|
249
- pairs = Tb::Pairs.new(pairs) unless pairs.respond_to? :has_key?
250
- header = header1
251
- if stream
252
- fs = header.dup
253
- while !fs.empty? && !pairs.has_key?(fs.last)
254
- fs.pop
255
- end
256
- ary = fs.map {|f| pairs[f] }
257
- io.puts Tb.csv_encode_row(ary)
258
- else
259
- fgen.call Tb::Pairs.new(pairs)
260
- end
261
- }
262
- if !stream
263
- if with_header
264
- io.puts Tb.csv_encode_row(header)
265
- end
266
- fnew.call.each {|pairs|
267
- fs = header.dup
268
- while !fs.empty? && !pairs.has_key?(fs.last)
269
- fs.pop
270
- end
271
- ary = fs.map {|f| pairs[f] }
272
- io.puts Tb.csv_encode_row(ary)
273
- }
274
- end
275
- end
276
-
277
- def extsort_by(opts={}, &cmpvalue_from)
278
- Tb::Enumerator.new {|ty|
279
- header = []
280
- er = Enumerator.new {|y|
281
- self.with_cumulative_header {|header0|
282
- header = header0 if header0
283
- }.each {|pairs, header1|
284
- header = header1
285
- y.yield pairs
286
- }
287
- ty.set_header header
288
- }
289
- er.extsort_by(opts, &cmpvalue_from).each {|pairs|
290
- ty.yield pairs
291
- }
292
- }
293
- end
294
- end