tb 0.3 → 0.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (55) hide show
  1. data/README +2 -1
  2. data/lib/tb.rb +7 -3
  3. data/lib/tb/basic.rb +1 -1
  4. data/lib/tb/cmd_cat.rb +1 -3
  5. data/lib/tb/cmd_consecutive.rb +4 -6
  6. data/lib/tb/cmd_crop.rb +5 -7
  7. data/lib/tb/cmd_cross.rb +51 -49
  8. data/lib/tb/cmd_cut.rb +2 -6
  9. data/lib/tb/cmd_git_log.rb +20 -11
  10. data/lib/tb/cmd_grep.rb +1 -3
  11. data/lib/tb/cmd_group.rb +18 -44
  12. data/lib/tb/cmd_gsub.rb +2 -4
  13. data/lib/tb/cmd_join.rb +1 -3
  14. data/lib/tb/cmd_ls.rb +8 -15
  15. data/lib/tb/cmd_mheader.rb +3 -4
  16. data/lib/tb/cmd_nest.rb +4 -9
  17. data/lib/tb/cmd_newfield.rb +1 -3
  18. data/lib/tb/cmd_rename.rb +2 -4
  19. data/lib/tb/cmd_shape.rb +2 -3
  20. data/lib/tb/cmd_sort.rb +3 -5
  21. data/lib/tb/cmd_svn_log.rb +3 -5
  22. data/lib/tb/cmd_tar_tvf.rb +2 -4
  23. data/lib/tb/cmd_to_csv.rb +1 -1
  24. data/lib/tb/cmd_unnest.rb +1 -3
  25. data/lib/tb/cmdutil.rb +57 -135
  26. data/lib/tb/csv.rb +11 -54
  27. data/lib/tb/customcmp.rb +41 -0
  28. data/lib/tb/customeq.rb +41 -0
  29. data/lib/tb/enumerable.rb +225 -435
  30. data/lib/tb/enumerator.rb +22 -14
  31. data/lib/tb/ex_enumerable.rb +659 -0
  32. data/lib/tb/ex_enumerator.rb +102 -0
  33. data/lib/tb/fileenumerator.rb +2 -2
  34. data/lib/tb/func.rb +141 -0
  35. data/lib/tb/json.rb +1 -1
  36. data/lib/tb/reader.rb +4 -4
  37. data/lib/tb/search.rb +2 -4
  38. data/lib/tb/zipper.rb +60 -0
  39. data/test/test_cmd_cat.rb +40 -0
  40. data/test/test_cmd_git_log.rb +116 -0
  41. data/test/test_cmd_ls.rb +90 -0
  42. data/test/test_cmd_svn_log.rb +87 -0
  43. data/test/test_cmd_to_csv.rb +14 -0
  44. data/test/test_cmdutil.rb +25 -10
  45. data/test/test_csv.rb +10 -0
  46. data/test/test_customcmp.rb +14 -0
  47. data/test/test_customeq.rb +20 -0
  48. data/test/{test_enumerable.rb → test_ex_enumerable.rb} +181 -3
  49. data/test/test_search.rb +2 -10
  50. data/test/test_tbenum.rb +3 -3
  51. data/test/test_zipper.rb +22 -0
  52. metadata +20 -8
  53. data/lib/tb/enum.rb +0 -294
  54. data/lib/tb/pairs.rb +0 -227
  55. data/test/test_pairs.rb +0 -122
@@ -63,37 +63,12 @@ class Tb
63
63
  end
64
64
 
65
65
  class CSVReader
66
- if defined? CSV::Reader
67
- # Ruby 1.8
68
- def initialize(input)
69
- if input.respond_to? :to_str
70
- @csv = CSV::StringReader.new(input)
71
- else
72
- @csv = CSV::IOReader.new(input)
73
- end
74
- @eof = false
75
- end
76
-
77
- def shift
78
- return nil if @eof
79
- ary = @csv.shift
80
- if ary.empty?
81
- ary = nil
82
- @eof = true
83
- elsif ary == [nil]
84
- ary = []
85
- end
86
- ary
87
- end
88
- else
89
- # Ruby 1.9
90
- def initialize(input)
91
- @csv = CSV.new(input)
92
- end
66
+ def initialize(input)
67
+ @csv = CSV.new(input)
68
+ end
93
69
 
94
- def shift
95
- @csv.shift
96
- end
70
+ def shift
71
+ @csv.shift
97
72
  end
98
73
 
99
74
  def each
@@ -106,35 +81,17 @@ class Tb
106
81
 
107
82
  def Tb.csv_stream_output(out)
108
83
  require 'csv'
109
- if defined? CSV::Writer
110
- # Ruby 1.8
111
- CSV::Writer.generate(out) {|csvgen|
112
- yield csvgen
113
- }
114
- else
115
- # Ruby 1.9
116
- gen = Object.new
117
- gen.instance_variable_set(:@out, out)
118
- def gen.<<(ary)
119
- @out << ary.to_csv
120
- end
121
- yield gen
84
+ gen = Object.new
85
+ gen.instance_variable_set(:@out, out)
86
+ def gen.<<(ary)
87
+ @out << ary.to_csv
122
88
  end
89
+ yield gen
123
90
  end
124
91
 
125
92
  def Tb.csv_encode_row(ary)
126
93
  require 'csv'
127
- if defined? CSV::Writer
128
- # Ruby 1.8
129
- out = ''
130
- CSV::Writer.generate(out) {|csvgen|
131
- csvgen << ary
132
- }
133
- out
134
- else
135
- # Ruby 1.9
136
- ary.to_csv
137
- end
94
+ ary.to_csv
138
95
  end
139
96
 
140
97
  # :call-seq:
@@ -0,0 +1,41 @@
1
+ # Copyright (C) 2012 Tanaka Akira <akr@fsij.org>
2
+ #
3
+ # Redistribution and use in source and binary forms, with or without
4
+ # modification, are permitted provided that the following conditions
5
+ # are met:
6
+ #
7
+ # 1. Redistributions of source code must retain the above copyright
8
+ # notice, this list of conditions and the following disclaimer.
9
+ # 2. Redistributions in binary form must reproduce the above
10
+ # copyright notice, this list of conditions and the following
11
+ # disclaimer in the documentation and/or other materials provided
12
+ # with the distribution.
13
+ # 3. The name of the author may not be used to endorse or promote
14
+ # products derived from this software without specific prior
15
+ # written permission.
16
+ #
17
+ # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
18
+ # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19
+ # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
+ # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
21
+ # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22
+ # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
23
+ # GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24
+ # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25
+ # WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
26
+ # OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
27
+ # EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
+
29
+ class Tb::CustomCmp
30
+ include Comparable
31
+
32
+ def initialize(customcmp_object, &cmp)
33
+ @customcmp_object = customcmp_object
34
+ @cmp = cmp
35
+ end
36
+ attr_reader :customcmp_object, :cmp
37
+
38
+ def <=> other
39
+ @cmp.call(@customcmp_object, other.customcmp_object)
40
+ end
41
+ end
@@ -0,0 +1,41 @@
1
+ # Copyright (C) 2012 Tanaka Akira <akr@fsij.org>
2
+ #
3
+ # Redistribution and use in source and binary forms, with or without
4
+ # modification, are permitted provided that the following conditions
5
+ # are met:
6
+ #
7
+ # 1. Redistributions of source code must retain the above copyright
8
+ # notice, this list of conditions and the following disclaimer.
9
+ # 2. Redistributions in binary form must reproduce the above
10
+ # copyright notice, this list of conditions and the following
11
+ # disclaimer in the documentation and/or other materials provided
12
+ # with the distribution.
13
+ # 3. The name of the author may not be used to endorse or promote
14
+ # products derived from this software without specific prior
15
+ # written permission.
16
+ #
17
+ # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
18
+ # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19
+ # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
+ # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
21
+ # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22
+ # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
23
+ # GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24
+ # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25
+ # WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
26
+ # OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
27
+ # EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
+
29
+ class Tb::CustomEq
30
+ include Comparable
31
+
32
+ def initialize(customeq_object, &eq)
33
+ @customeq_object = customeq_object
34
+ @eq = eq
35
+ end
36
+ attr_reader :customeq_object, :eq
37
+
38
+ def ==(other)
39
+ @eq.call(@customeq_object, other.customeq_object)
40
+ end
41
+ end
@@ -1,474 +1,264 @@
1
- # lib/tb/enumerable.rb - extensions for Enumerable
2
- #
3
- # Copyright (C) 2010-2012 Tanaka Akira <akr@fsij.org>
4
- #
1
+ # Copyright (C) 2012 Tanaka Akira <akr@fsij.org>
2
+ #
5
3
  # Redistribution and use in source and binary forms, with or without
6
- # modification, are permitted provided that the following conditions are met:
7
- #
8
- # 1. Redistributions of source code must retain the above copyright notice, this
9
- # list of conditions and the following disclaimer.
10
- # 2. Redistributions in binary form must reproduce the above copyright notice,
11
- # this list of conditions and the following disclaimer in the documentation
12
- # and/or other materials provided with the distribution.
13
- # 3. The name of the author may not be used to endorse or promote products
14
- # derived from this software without specific prior written permission.
15
- #
16
- # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
17
- # WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
18
- # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
19
- # EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20
- # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
21
- # OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22
- # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23
- # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
24
- # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
25
- # OF SUCH DAMAGE.
4
+ # modification, are permitted provided that the following conditions
5
+ # are met:
6
+ #
7
+ # 1. Redistributions of source code must retain the above copyright
8
+ # notice, this list of conditions and the following disclaimer.
9
+ # 2. Redistributions in binary form must reproduce the above
10
+ # copyright notice, this list of conditions and the following
11
+ # disclaimer in the documentation and/or other materials provided
12
+ # with the distribution.
13
+ # 3. The name of the author may not be used to endorse or promote
14
+ # products derived from this software without specific prior
15
+ # written permission.
16
+ #
17
+ # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
18
+ # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19
+ # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
+ # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
21
+ # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22
+ # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
23
+ # GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24
+ # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25
+ # WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
26
+ # OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
27
+ # EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
28
 
27
- module Enumerable
28
- # :call-seq:
29
- # enum.tb_categorize(ksel1, ksel2, ..., vsel, [opts])
30
- # enum.tb_categorize(ksel1, ksel2, ..., vsel, [opts]) {|ks, vs| ... }
31
- #
32
- # categorizes the elements in _enum_ and returns a hash.
33
- # This method assumes multiple elements for a category.
34
- #
35
- # +tb_categorize+ takes one or more key selectors,
36
- # one value selector and
37
- # an optional option hash.
38
- # It also takes an optional block.
39
- #
40
- # The selectors specify how to extract a value from an element in _enum_.
41
- #
42
- # The key selectors, _kselN_, are used to extract hash keys from an element.
43
- # If two or more key selectors are specified, the result hash will be nested.
44
- #
45
- # The value selector, _vsel_, is used for the values of innermost hashes.
46
- # By default, all values extracted by _vsel_ from the elements which
47
- # key selectors extracts same value are composed as an array.
48
- # The array is set to the values of the innermost hashes.
49
- # This behavior can be customized by the options: :seed, :op and :update.
50
- #
51
- # a = [{:fruit => "banana", :color => "yellow", :taste => "sweet", :price => 100},
52
- # {:fruit => "melon", :color => "green", :taste => "sweet", :price => 300},
53
- # {:fruit => "grapefruit", :color => "yellow", :taste => "tart", :price => 200}]
54
- # p a.tb_categorize(:color, :fruit)
55
- # #=> {"yellow"=>["banana", "grapefruit"], "green"=>["melon"]}
56
- # p a.tb_categorize(:taste, :fruit)
57
- # #=> {"sweet"=>["banana", "melon"], "tart"=>["grapefruit"]}
58
- # p a.tb_categorize(:taste, :color, :fruit)
59
- # #=> {"sweet"=>{"yellow"=>["banana"], "green"=>["melon"]}, "tart"=>{"yellow"=>["grapefruit"]}}
60
- # p a.tb_categorize(:taste, :color)
61
- # #=> {"sweet"=>["yellow", "green"], "tart"=>["yellow"]}
62
- #
63
- # In the above example, :fruit, :color and :taste is specified as selectors.
64
- # There are several types of selectors as follows:
65
- #
66
- # - object with +call+ method (procedure, etc.): extracts a value from the element by calling the procedure with the element as an argument.
67
- # - array of selectors: make an array which contains the values extracted by the selectors.
68
- # - other object: extracts a value from the element using +[]+ method as +element[selector]+.
69
- #
70
- # So the selector :fruit extracts the value from the element
71
- # {:fruit => "banana", :color => "yellow", :taste => "sweet", :price => 100}
72
- # as {...}[:fruit].
73
- #
74
- # p a.tb_categorize(lambda {|elt| elt[:fruit][4] }, :fruit)
75
- # #=> {"n"=>["banana", "melon"], "e"=>["grapefruit"]}
76
- #
77
- # When the key selectors returns same key for two or or more elements,
78
- # corresponding values extracted by the value selector are combined.
79
- # By default, all values are collected as an array.
80
- # :seed, :op and :update option in the option hash customizes this behavior.
81
- # :seed option and :op option is similar to Enumerable#inject.
82
- # :seed option specifies an initial value.
83
- # (If :seed option is not given, the first value for each category is treated as an initial value.)
84
- # :op option specifies a procedure to combine a seed and an element into a next seed.
85
- # :update option is same as :op option except it takes three arguments instead of two:
86
- # keys, seed and element.
87
- # +to_proc+ method is used to convert :op and :update option to a procedure.
88
- # So a symbol can be used for them.
89
- #
90
- # # count categorized elements.
91
- # p a.tb_categorize(:color, lambda {|e| 1 }, :op=>:+)
92
- # #=> {"yellow"=>2, "green"=>1}
93
- #
94
- # p a.tb_categorize(:color, :fruit, :seed=>"", :op=>:+)
95
- # #=> {"yellow"=>"bananagrapefruit", "green"=>"melon"}
96
- #
97
- # The default behavior, collecting all values as an array, is implemented as follows.
98
- # :seed => nil
99
- # :update => {|ks, s, v| !s ? [v] : (s << v) }
100
- #
101
- # :op and :update option are disjoint.
102
- # ArgumentError is raised if both are specified.
103
- #
104
- # The block for +tb_categorize+ method converts combined values to final innermost hash values.
105
- #
106
- # p a.tb_categorize(:color, :fruit) {|ks, vs| vs.join(",") }
107
- # #=> {"yellow"=>"banana,grapefruit", "green"=>"melon"}
108
- #
109
- # # calculates the average price for fruits of each color.
110
- # p a.tb_categorize(:color, :price) {|ks, vs| vs.inject(0.0, &:+) / vs.length }
111
- # #=> {"yellow"=>150.0, "green"=>300.0}
112
- #
113
- def tb_categorize(*args, &reduce_proc)
114
- opts = args.last.kind_of?(Hash) ? args.pop : {}
115
- if args.length < 2
116
- raise ArgumentError, "needs 2 or more arguments without option hash (but #{args.length})"
117
- end
118
- value_selector = tb_cat_selector_proc(args.pop)
119
- key_selectors = args.map {|a| tb_cat_selector_proc(a) }
120
- has_seed = opts.has_key? :seed
121
- seed_value = opts[:seed]
122
- if opts.has_key?(:update) && opts.has_key?(:op)
123
- raise ArgumentError, "both :op and :update option specified"
124
- elsif opts.has_key? :update
125
- update_proc = opts[:update].to_proc
126
- elsif opts.has_key? :op
127
- op_proc = opts[:op].to_proc
128
- update_proc = lambda {|ks, s, v| op_proc.call(s, v) }
129
- else
130
- has_seed = true
131
- seed_value = nil
132
- update_proc = lambda {|ks, s, v| !s ? [v] : (s << v) }
133
- end
134
- result = {}
135
- each {|*elts|
136
- elt = elts.length <= 1 ? elts[0] : elts
137
- ks = key_selectors.map {|ksel| ksel.call(elt) }
138
- v = value_selector.call(elt)
139
- h = result
140
- 0.upto(ks.length-2) {|i|
141
- k = ks[i]
142
- h[k] = {} if !h.has_key?(k)
143
- h = h[k]
29
+ module Tb::Enumerable
30
+ include Enumerable
31
+
32
+ def with_header(&header_proc)
33
+ Enumerator.new {|y|
34
+ header_and_each(header_proc) {|pairs|
35
+ y.yield pairs
144
36
  }
145
- lastk = ks.last
146
- if !h.has_key?(lastk)
147
- if has_seed
148
- h[lastk] = update_proc.call(ks, seed_value, v)
149
- else
150
- h[lastk] = v
151
- end
152
- else
153
- h[lastk] = update_proc.call(ks, h[lastk], v)
154
- end
155
37
  }
156
- if reduce_proc
157
- tb_cat_reduce(result, [], key_selectors.length-1, reduce_proc)
158
- end
159
- result
160
38
  end
161
39
 
162
- def tb_cat_selector_proc(selector)
163
- if selector.respond_to?(:call)
164
- selector
165
- elsif selector.respond_to? :to_ary
166
- selector_procs = selector.to_ary.map {|sel| tb_cat_selector_proc(sel) }
167
- lambda {|elt| selector_procs.map {|selproc| selproc.call(elt) } }
168
- else
169
- lambda {|elt| elt[selector] }
170
- end
40
+ def with_cumulative_header(&header_proc)
41
+ Enumerator.new {|y|
42
+ hset = {}
43
+ internal_header_proc = lambda {|header0|
44
+ if header0
45
+ header0.each {|f|
46
+ hset[f] = true
47
+ }
48
+ end
49
+ header_proc.call(header0) if header_proc
50
+ }
51
+ header_and_each(internal_header_proc) {|pairs|
52
+ pairs.each {|f, v|
53
+ if !hset[f]
54
+ hset[f] = true
55
+ end
56
+ }
57
+ y.yield [pairs, hset.keys.freeze]
58
+ }
59
+ }
171
60
  end
172
- private :tb_cat_selector_proc
173
61
 
174
- def tb_cat_reduce(hash, ks, nestlevel, reduce_proc)
175
- if nestlevel.zero?
176
- hash.each {|k, v|
177
- ks << k
178
- begin
179
- hash[k] = reduce_proc.call(ks.dup, v)
180
- ensure
181
- ks.pop
62
+ def cat(*ers, &b)
63
+ ers = [self, *ers]
64
+ rec = lambda {|y, header|
65
+ if ers.empty?
66
+ if header
67
+ y.set_header header
182
68
  end
183
- }
69
+ else
70
+ last_e = ers.pop
71
+ last_e.with_header {|last_e_header|
72
+ if last_e_header && header
73
+ header = last_e_header | header
74
+ else
75
+ header = nil
76
+ end
77
+ rec.call(y, header)
78
+ }.each {|v|
79
+ y.yield v
80
+ }
81
+ end
82
+ }
83
+ er = Tb::Enumerator.new {|y|
84
+ rec.call(y, [])
85
+ }
86
+ if block_given?
87
+ er.each(&b)
184
88
  else
185
- hash.each {|k, h|
186
- ks << k
187
- begin
188
- tb_cat_reduce(h, ks, nestlevel-1, reduce_proc)
189
- ensure
190
- ks.pop
191
- end
192
- }
89
+ er
193
90
  end
194
91
  end
195
- private :tb_cat_reduce
196
92
 
197
- # :call-seq:
198
- # enum.tb_unique_categorize(ksel1, ksel2, ..., vsel, [opts]) -> hash
199
- # enum.tb_unique_categorize(ksel1, ksel2, ..., vsel, [opts]) {|s, v| ... } -> hash
200
- #
201
- # categorizes the elements in _enum_ and returns a hash.
202
- # This method assumes one element for a category by default.
203
- #
204
- # +tb_unique_categorize+ takes one or more key selectors,
205
- # one value selector and
206
- # an optional option hash.
207
- # It also takes an optional block.
208
- #
209
- # The selectors specify how to extract a value from an element in _enum_.
210
- # See Enumerable#tb_categorize for details of selectors.
211
- #
212
- # The key selectors, _kselN_, are used to extract hash keys from an element.
213
- # If two or more key selectors are specified, the result hash will be nested.
214
- #
215
- # The value selector, _vsel_, is used for the values of innermost hashes.
216
- # By default, this method assumes the key selectors categorizes elements in enum uniquely.
217
- # If the key selectors generates same keys for two or more elements, ArgumentError is raised.
218
- # This behavior can be customized by :seed option and the block.
219
- #
220
- # a = [{:fruit => "banana", :color => "yellow", :taste => "sweet", :price => 100},
221
- # {:fruit => "melon", :color => "green", :taste => "sweet", :price => 300},
222
- # {:fruit => "grapefruit", :color => "yellow", :taste => "tart", :price => 200}]
223
- # p a.tb_unique_categorize(:fruit, :price)
224
- # #=> {"banana"=>100, "melon"=>300, "grapefruit"=>200}
225
- #
226
- # p a.tb_unique_categorize(:color, :price)
227
- # # ArgumentError
228
- #
229
- # If the block is given, it is used for combining values in a category.
230
- # The arguments for the block is a seed and the value extracted by _vsel_.
231
- # The return value of the block is used as the next seed.
232
- # :seed option specifies the initial seed.
233
- # If :seed is not given, the first value for each category is used for the seed.
234
- #
235
- # p a.tb_unique_categorize(:taste, :price) {|s, v| s + v }
236
- # #=> {"sweet"=>400, "tart"=>200}
237
- #
238
- # p a.tb_unique_categorize(:color, :price) {|s, v| s + v }
239
- # #=> {"yellow"=>300, "green"=>300}
240
- #
241
- def tb_unique_categorize(*args, &update_proc)
242
- opts = args.last.kind_of?(Hash) ? args.pop.dup : {}
243
- if update_proc
244
- opts[:update] = lambda {|ks, s, v| update_proc.call(s, v) }
245
- else
246
- seed = Object.new
247
- opts[:seed] = seed
248
- opts[:update] = lambda {|ks, s, v|
249
- if s.equal? seed
250
- v
251
- else
252
- raise ArgumentError, "ambiguous key: #{ks.map {|k| k.inspect }.join(',')}"
93
+ # creates a new Tb::Enumerator object which have
94
+ # new field named by _field_ with the value returned by the block.
95
+ #
96
+ # t1 = Tb.new %w[a b], [1, 2], [3, 4]
97
+ # p t1.newfield("x") {|row| row["a"] + row["b"] + 100 }.to_a
98
+ # #=> [{"x"=>103, "a"=>1, "b"=>2},
99
+ # # {"x"=>107, "a"=>3, "b"=>4}]
100
+ #
101
+ def newfield(field)
102
+ Tb::Enumerator.new {|y|
103
+ self.with_header {|header|
104
+ if header
105
+ y.set_header(Tb::FieldSet.normalize([field, *header]))
253
106
  end
107
+ }.each {|row|
108
+ keys = row.keys
109
+ keys = Tb::FieldSet.normalize([field, *keys])
110
+ vals = row.values
111
+ vals = [yield(row), *vals]
112
+ y << Hash[keys.zip(vals)]
254
113
  }
255
- end
256
- tb_categorize(*(args + [opts]))
114
+ }
257
115
  end
258
116
 
259
117
  # :call-seq:
260
- # enum.tb_category_count(ksel1, ksel2, ...)
261
- #
262
- # counts elements in _enum_ for each category defined by the key selectors.
263
- #
264
- # a = [{:fruit => "banana", :color => "yellow", :taste => "sweet", :price => 100},
265
- # {:fruit => "melon", :color => "green", :taste => "sweet", :price => 300},
266
- # {:fruit => "grapefruit", :color => "yellow", :taste => "tart", :price => 200}]
267
- #
268
- # p a.tb_category_count(:color)
269
- # #=> {"yellow"=>2, "green"=>1}
270
- #
271
- # p a.tb_category_count(:taste)
272
- # #=> {"sweet"=>2, "tart"=>1}
273
- #
274
- # p a.tb_category_count(:taste, :color)
275
- # #=> {"sweet"=>{"yellow"=>1, "green"=>1}, "tart"=>{"yellow"=>1}}
276
- #
277
- # The selectors specify how to extract a value from an element in _enum_.
278
- # See Enumerable#tb_categorize for details of selectors.
279
- #
280
- def tb_category_count(*args)
281
- tb_categorize(*(args + [lambda {|e| 1 }, {:update => lambda {|ks, s, v| s + v }}]))
282
- end
283
-
284
- def dump_objsfile(title, tempfile)
285
- tempfile.flush
286
- path = tempfile
287
- a = []
288
- open(path) {|f|
289
- until f.eof?
290
- pair = Marshal.load(f)
291
- a << (pair ? pair.last : :sep)
292
- end
118
+ # table1.natjoin2(table2, missing_value=nil, retain_left=false, retain_right=false)
119
+ def natjoin2(tbl2, missing_value=nil, retain_left=false, retain_right=false)
120
+ Tb::Enumerator.new {|y|
121
+ tbl1 = self
122
+ header1 = header2 = nil
123
+ sorted_tbl2 = nil
124
+ common_header = nil
125
+ total_header = nil
126
+ sorted_tbl1 = tbl1.with_header {|h1|
127
+ header1 = h1
128
+ sorted_tbl2 = tbl2.with_header {|h2|
129
+ header2 = h2
130
+ common_header = header1 & header2
131
+ total_header = header1 | header2
132
+ y.set_header total_header
133
+ }.lazy_map {|pairs|
134
+ [common_header.map {|f| pairs[f] }, pairs]
135
+ }.extsort_by {|cv, pairs| cv }.to_fileenumerator
136
+ }.lazy_map {|pairs|
137
+ [common_header.map {|f| pairs[f] }, pairs]
138
+ }.extsort_by {|cv, pairs| cv }.to_fileenumerator
139
+ sorted_tbl1.open_reader {|t1|
140
+ sorted_tbl2.open_reader {|t2|
141
+ missing_hash = {}
142
+ total_header.each {|f|
143
+ missing_hash[f] = missing_value
144
+ }
145
+ Tb::ExEnumerator.merge_sorted(t1, t2) {|cv, t1_or_nil, t2_or_nil|
146
+ if !t2_or_nil
147
+ t1.subeach_by {|_cv1, _| _cv1 }.each {|_, _pairs1|
148
+ if retain_left
149
+ y.yield missing_hash.merge(_pairs1.to_hash)
150
+ end
151
+ }
152
+ elsif !t1_or_nil
153
+ t2.subeach_by {|_cv2, _| _cv2 }.each {|_, _pairs2|
154
+ if retain_right
155
+ y.yield missing_hash.merge(_pairs2.to_hash)
156
+ end
157
+ }
158
+ else # t1_or_nil && t1_or_nil
159
+ t2_pos = t2.pos
160
+ t1.subeach_by {|_cv1, _| _cv1 }.each {|_, _pairs1|
161
+ t2.pos = t2_pos
162
+ t2.subeach_by {|_cv2, _| _cv2 }.each {|_, _pairs2|
163
+ y.yield(_pairs2.to_hash.merge(_pairs1.to_hash))
164
+ }
165
+ }
166
+ end
167
+ }
168
+ }
169
+ }
293
170
  }
294
- puts "#{title}: #{a.inspect}"
295
171
  end
296
- private :dump_objsfile
297
172
 
298
- def extsort_by(opts={}, &cmpvalue_from)
299
- memsize = opts[:memsize] || 10000000
300
- Enumerator.new {|y|
301
- extsort_by_internal(memsize, cmpvalue_from, y)
302
- }
173
+ # :call-seq:
174
+ # table1.natjoin2_outer(table2, missing=nil, retain_left=true, retain_right=true)
175
+ def natjoin2_outer(tbl2, missing_value=nil, retain_left=true, retain_right=true)
176
+ natjoin2(tbl2, missing_value, retain_left, retain_right)
303
177
  end
304
178
 
305
- def extsort_by_internal(memsize, cmpvalue_from, y)
306
- tmp1 = Tempfile.new("tbsortA")
307
- tmp2 = Tempfile.new("tbsortB")
308
- extsort_by_first_split(tmp1, tmp2, cmpvalue_from, memsize)
309
- if tmp1.size == 0 && tmp2.size == 0
310
- return Enumerator.new {|_| }
311
- end
312
- tmp3 = Tempfile.new("tbsortC")
313
- tmp4 = Tempfile.new("tbsortD")
314
- while tmp2.size != 0
315
- #dump_objsfile(:tmp1, tmp1)
316
- #dump_objsfile(:tmp2, tmp2)
317
- #dump_objsfile(:tmp3, tmp3)
318
- #dump_objsfile(:tmp4, tmp4)
319
- extsort_by_merge(tmp1, tmp2, tmp3, tmp4)
320
- tmp1.rewind
321
- tmp1.truncate(0)
322
- tmp2.rewind
323
- tmp2.truncate(0)
324
- tmp1, tmp2, tmp3, tmp4 = tmp3, tmp4, tmp1, tmp2
325
- end
326
- #dump_objsfile(:tmp1, tmp1)
327
- #dump_objsfile(:tmp2, tmp2)
328
- #dump_objsfile(:tmp3, tmp3)
329
- #dump_objsfile(:tmp4, tmp4)
330
- extsort_by_strip_cv(tmp1, y)
331
- ensure
332
- tmp1.close(true) if tmp1
333
- tmp2.close(true) if tmp2
334
- tmp3.close(true) if tmp3
335
- tmp4.close(true) if tmp4
179
+ def to_tb
180
+ tb = Tb.new
181
+ self.each {|pairs|
182
+ pairs.each {|k, v|
183
+ unless tb.has_field? k
184
+ tb.define_field(k)
185
+ end
186
+ }
187
+ tb.insert pairs
188
+ }
189
+ tb
336
190
  end
337
- private :extsort_by_internal
338
191
 
339
- def extsort_by_first_split(tmp1, tmp2, cmpvalue_from, memsize)
340
- prevobj_cv = nil
341
- tmp_current, tmp_another = tmp1, tmp2
342
- buf = []
343
- buf_size = 0
344
- buf_mode = true
345
- self.each_with_index {|obj, i|
346
- obj_cv = cmpvalue_from.call(obj)
347
- #p [obj, obj_cv]
348
- #p [prevobj_cv, buf_mode, obj, obj_cv]
349
- if buf_mode
350
- dumped = Marshal.dump([obj_cv, obj])
351
- buf << [obj_cv, i, dumped]
352
- buf_size += dumped.size
353
- if memsize < buf_size
354
- buf.sort!
355
- buf.each {|_, _, d|
356
- tmp_current.write d
357
- }
358
- prevobj_cv, = buf.last
359
- buf.clear
360
- buf_mode = false
192
+ def write_to_csv(io, with_header=true)
193
+ stream = nil
194
+ header = []
195
+ fgen = fnew = nil
196
+ self.with_cumulative_header {|header0|
197
+ if !with_header
198
+ stream = true
199
+ elsif header0
200
+ stream = true
201
+ io.puts Tb.csv_encode_row(header0)
202
+ else
203
+ stream = false
204
+ fgen, fnew = Tb::FileEnumerator.gen_new
205
+ end
206
+ }.each {|pairs, header1|
207
+ pairs = Hash[pairs] unless pairs.respond_to? :has_key?
208
+ header = header1
209
+ if stream
210
+ fs = header.dup
211
+ while !fs.empty? && !pairs.has_key?(fs.last)
212
+ fs.pop
361
213
  end
362
- elsif prevobj_cv <= obj_cv
363
- Marshal.dump([obj_cv, obj], tmp_current)
364
- prevobj_cv = obj_cv
214
+ ary = fs.map {|f| pairs[f] }
215
+ io.puts Tb.csv_encode_row(ary)
365
216
  else
366
- dumped = Marshal.dump([obj_cv, obj])
367
- Marshal.dump(nil, tmp_current)
368
- buf = [[obj_cv, i, dumped]]
369
- buf_size = dumped.size
370
- buf_mode = true
371
- tmp_current, tmp_another = tmp_another, tmp_current
217
+ fgen.call Hash[pairs]
372
218
  end
373
219
  }
374
- if buf_mode
375
- buf.sort!
376
- buf.each {|_, _, d|
377
- tmp_current.write d
378
- }
379
- end
380
- if !buf_mode || !buf.empty?
381
- Marshal.dump(nil, tmp_current)
382
- end
383
- end
384
- private :extsort_by_first_split
385
-
386
- def extsort_by_merge(src1, src2, dst1, dst2)
387
- src1.rewind
388
- src2.rewind
389
- obj1_cv, obj1 = obj1_pair = Marshal.load(src1)
390
- obj2_cv, obj2 = obj2_pair = Marshal.load(src2)
391
- prefer1 = true
392
- while true
393
- cmp = obj1_cv <=> obj2_cv
394
- if prefer1 ? cmp > 0 : cmp >= 0
395
- obj1_pair, obj1_cv, obj1, src1, obj2_pair, obj2_cv, obj2, src2 = obj2_pair, obj2_cv, obj2, src2, obj1_pair, obj1_cv, obj1, src1
396
- prefer1 = !prefer1
397
- end
398
- Marshal.dump([obj1_cv, obj1], dst1)
399
- obj1_cv, obj1 = obj1_pair = Marshal.load(src1)
400
- if !obj1_pair
401
- begin
402
- Marshal.dump(obj2_pair, dst1)
403
- obj2_pair = Marshal.load(src2)
404
- end until !obj2_pair
405
- Marshal.dump(nil, dst1)
406
- dst1, dst2 = dst2, dst1
407
- break if src1.eof?
408
- break if src2.eof?
409
- obj1_cv, obj1 = obj1_pair = Marshal.load(src1)
410
- obj2_cv, obj2 = obj2_pair = Marshal.load(src2)
220
+ if !stream
221
+ if with_header
222
+ io.puts Tb.csv_encode_row(header)
411
223
  end
412
- end
413
- if !src1.eof?
414
- restsrc = src1
415
- elsif !src2.eof?
416
- restsrc = src2
417
- else
418
- return
419
- end
420
- until restsrc.eof?
421
- restobj_pair = Marshal.load(restsrc)
422
- Marshal.dump(restobj_pair, dst1)
423
- end
424
- end
425
- private :extsort_by_merge
426
-
427
- def extsort_by_strip_cv(tmp1, y)
428
- tmp1.rewind
429
- while true
430
- pair = Marshal.load(tmp1)
431
- break if !pair
432
- _, obj = pair
433
- y.yield obj
224
+ fnew.call.each {|pairs|
225
+ fs = header.dup
226
+ while !fs.empty? && !pairs.has_key?(fs.last)
227
+ fs.pop
228
+ end
229
+ ary = fs.map {|f| pairs[f] }
230
+ io.puts Tb.csv_encode_row(ary)
231
+ }
434
232
  end
435
233
  end
436
- private :extsort_by_strip_cv
437
234
 
438
- # splits self by _boundary_p_ which is called with adjacent two elements.
439
- #
440
- # _before_group_ is called before each group with the first element.
441
- # _after_group_ is called after each group with the last element.
442
- # _body_ is called for each element.
443
- #
444
- def each_group_element(boundary_p, before_group, body, after_group)
445
- prev = nil
446
- first = true
447
- self.each {|curr|
448
- if first
449
- before_group.call(curr)
450
- body.call(curr)
451
- prev = curr
452
- first = false
453
- elsif boundary_p.call(prev, curr)
454
- after_group.call(prev)
455
- before_group.call(curr)
456
- body.call(curr)
457
- prev = curr
458
- else
459
- body.call(curr)
460
- prev = curr
461
- end
235
+ def write_to_json(out)
236
+ require 'json'
237
+ out.print "["
238
+ sep = nil
239
+ self.each {|pairs|
240
+ out.print sep if sep
241
+ out.print JSON.pretty_generate(Hash[pairs.to_a])
242
+ sep = ",\n"
462
243
  }
463
- if !first
464
- after_group.call(prev)
465
- end
244
+ out.puts "]"
245
+ nil
466
246
  end
467
247
 
468
- def lazy_map
469
- Enumerator.new {|y|
470
- self.each {|*vs|
471
- y.yield(yield(*vs))
248
+ def extsort_by(opts={}, &cmpvalue_from)
249
+ Tb::Enumerator.new {|ty|
250
+ header = []
251
+ er = Enumerator.new {|y|
252
+ self.with_cumulative_header {|header0|
253
+ header = header0 if header0
254
+ }.each {|pairs, header1|
255
+ header = header1
256
+ y.yield pairs
257
+ }
258
+ ty.set_header header
259
+ }
260
+ er.extsort_by(opts, &cmpvalue_from).each {|pairs|
261
+ ty.yield pairs
472
262
  }
473
263
  }
474
264
  end