tb 0.3 → 0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. data/README +2 -1
  2. data/lib/tb.rb +7 -3
  3. data/lib/tb/basic.rb +1 -1
  4. data/lib/tb/cmd_cat.rb +1 -3
  5. data/lib/tb/cmd_consecutive.rb +4 -6
  6. data/lib/tb/cmd_crop.rb +5 -7
  7. data/lib/tb/cmd_cross.rb +51 -49
  8. data/lib/tb/cmd_cut.rb +2 -6
  9. data/lib/tb/cmd_git_log.rb +20 -11
  10. data/lib/tb/cmd_grep.rb +1 -3
  11. data/lib/tb/cmd_group.rb +18 -44
  12. data/lib/tb/cmd_gsub.rb +2 -4
  13. data/lib/tb/cmd_join.rb +1 -3
  14. data/lib/tb/cmd_ls.rb +8 -15
  15. data/lib/tb/cmd_mheader.rb +3 -4
  16. data/lib/tb/cmd_nest.rb +4 -9
  17. data/lib/tb/cmd_newfield.rb +1 -3
  18. data/lib/tb/cmd_rename.rb +2 -4
  19. data/lib/tb/cmd_shape.rb +2 -3
  20. data/lib/tb/cmd_sort.rb +3 -5
  21. data/lib/tb/cmd_svn_log.rb +3 -5
  22. data/lib/tb/cmd_tar_tvf.rb +2 -4
  23. data/lib/tb/cmd_to_csv.rb +1 -1
  24. data/lib/tb/cmd_unnest.rb +1 -3
  25. data/lib/tb/cmdutil.rb +57 -135
  26. data/lib/tb/csv.rb +11 -54
  27. data/lib/tb/customcmp.rb +41 -0
  28. data/lib/tb/customeq.rb +41 -0
  29. data/lib/tb/enumerable.rb +225 -435
  30. data/lib/tb/enumerator.rb +22 -14
  31. data/lib/tb/ex_enumerable.rb +659 -0
  32. data/lib/tb/ex_enumerator.rb +102 -0
  33. data/lib/tb/fileenumerator.rb +2 -2
  34. data/lib/tb/func.rb +141 -0
  35. data/lib/tb/json.rb +1 -1
  36. data/lib/tb/reader.rb +4 -4
  37. data/lib/tb/search.rb +2 -4
  38. data/lib/tb/zipper.rb +60 -0
  39. data/test/test_cmd_cat.rb +40 -0
  40. data/test/test_cmd_git_log.rb +116 -0
  41. data/test/test_cmd_ls.rb +90 -0
  42. data/test/test_cmd_svn_log.rb +87 -0
  43. data/test/test_cmd_to_csv.rb +14 -0
  44. data/test/test_cmdutil.rb +25 -10
  45. data/test/test_csv.rb +10 -0
  46. data/test/test_customcmp.rb +14 -0
  47. data/test/test_customeq.rb +20 -0
  48. data/test/{test_enumerable.rb → test_ex_enumerable.rb} +181 -3
  49. data/test/test_search.rb +2 -10
  50. data/test/test_tbenum.rb +3 -3
  51. data/test/test_zipper.rb +22 -0
  52. metadata +20 -8
  53. data/lib/tb/enum.rb +0 -294
  54. data/lib/tb/pairs.rb +0 -227
  55. data/test/test_pairs.rb +0 -122
@@ -63,37 +63,12 @@ class Tb
63
63
  end
64
64
 
65
65
  class CSVReader
66
- if defined? CSV::Reader
67
- # Ruby 1.8
68
- def initialize(input)
69
- if input.respond_to? :to_str
70
- @csv = CSV::StringReader.new(input)
71
- else
72
- @csv = CSV::IOReader.new(input)
73
- end
74
- @eof = false
75
- end
76
-
77
- def shift
78
- return nil if @eof
79
- ary = @csv.shift
80
- if ary.empty?
81
- ary = nil
82
- @eof = true
83
- elsif ary == [nil]
84
- ary = []
85
- end
86
- ary
87
- end
88
- else
89
- # Ruby 1.9
90
- def initialize(input)
91
- @csv = CSV.new(input)
92
- end
66
+ def initialize(input)
67
+ @csv = CSV.new(input)
68
+ end
93
69
 
94
- def shift
95
- @csv.shift
96
- end
70
+ def shift
71
+ @csv.shift
97
72
  end
98
73
 
99
74
  def each
@@ -106,35 +81,17 @@ class Tb
106
81
 
107
82
  def Tb.csv_stream_output(out)
108
83
  require 'csv'
109
- if defined? CSV::Writer
110
- # Ruby 1.8
111
- CSV::Writer.generate(out) {|csvgen|
112
- yield csvgen
113
- }
114
- else
115
- # Ruby 1.9
116
- gen = Object.new
117
- gen.instance_variable_set(:@out, out)
118
- def gen.<<(ary)
119
- @out << ary.to_csv
120
- end
121
- yield gen
84
+ gen = Object.new
85
+ gen.instance_variable_set(:@out, out)
86
+ def gen.<<(ary)
87
+ @out << ary.to_csv
122
88
  end
89
+ yield gen
123
90
  end
124
91
 
125
92
  def Tb.csv_encode_row(ary)
126
93
  require 'csv'
127
- if defined? CSV::Writer
128
- # Ruby 1.8
129
- out = ''
130
- CSV::Writer.generate(out) {|csvgen|
131
- csvgen << ary
132
- }
133
- out
134
- else
135
- # Ruby 1.9
136
- ary.to_csv
137
- end
94
+ ary.to_csv
138
95
  end
139
96
 
140
97
  # :call-seq:
@@ -0,0 +1,41 @@
1
+ # Copyright (C) 2012 Tanaka Akira <akr@fsij.org>
2
+ #
3
+ # Redistribution and use in source and binary forms, with or without
4
+ # modification, are permitted provided that the following conditions
5
+ # are met:
6
+ #
7
+ # 1. Redistributions of source code must retain the above copyright
8
+ # notice, this list of conditions and the following disclaimer.
9
+ # 2. Redistributions in binary form must reproduce the above
10
+ # copyright notice, this list of conditions and the following
11
+ # disclaimer in the documentation and/or other materials provided
12
+ # with the distribution.
13
+ # 3. The name of the author may not be used to endorse or promote
14
+ # products derived from this software without specific prior
15
+ # written permission.
16
+ #
17
+ # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
18
+ # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19
+ # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
+ # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
21
+ # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22
+ # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
23
+ # GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24
+ # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25
+ # WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
26
+ # OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
27
+ # EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
+
29
+ class Tb::CustomCmp
30
+ include Comparable
31
+
32
+ def initialize(customcmp_object, &cmp)
33
+ @customcmp_object = customcmp_object
34
+ @cmp = cmp
35
+ end
36
+ attr_reader :customcmp_object, :cmp
37
+
38
+ def <=> other
39
+ @cmp.call(@customcmp_object, other.customcmp_object)
40
+ end
41
+ end
@@ -0,0 +1,41 @@
1
+ # Copyright (C) 2012 Tanaka Akira <akr@fsij.org>
2
+ #
3
+ # Redistribution and use in source and binary forms, with or without
4
+ # modification, are permitted provided that the following conditions
5
+ # are met:
6
+ #
7
+ # 1. Redistributions of source code must retain the above copyright
8
+ # notice, this list of conditions and the following disclaimer.
9
+ # 2. Redistributions in binary form must reproduce the above
10
+ # copyright notice, this list of conditions and the following
11
+ # disclaimer in the documentation and/or other materials provided
12
+ # with the distribution.
13
+ # 3. The name of the author may not be used to endorse or promote
14
+ # products derived from this software without specific prior
15
+ # written permission.
16
+ #
17
+ # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
18
+ # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19
+ # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
+ # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
21
+ # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22
+ # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
23
+ # GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24
+ # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25
+ # WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
26
+ # OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
27
+ # EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
+
29
+ class Tb::CustomEq
30
+ include Comparable
31
+
32
+ def initialize(customeq_object, &eq)
33
+ @customeq_object = customeq_object
34
+ @eq = eq
35
+ end
36
+ attr_reader :customeq_object, :eq
37
+
38
+ def ==(other)
39
+ @eq.call(@customeq_object, other.customeq_object)
40
+ end
41
+ end
@@ -1,474 +1,264 @@
1
- # lib/tb/enumerable.rb - extensions for Enumerable
2
- #
3
- # Copyright (C) 2010-2012 Tanaka Akira <akr@fsij.org>
4
- #
1
+ # Copyright (C) 2012 Tanaka Akira <akr@fsij.org>
2
+ #
5
3
  # Redistribution and use in source and binary forms, with or without
6
- # modification, are permitted provided that the following conditions are met:
7
- #
8
- # 1. Redistributions of source code must retain the above copyright notice, this
9
- # list of conditions and the following disclaimer.
10
- # 2. Redistributions in binary form must reproduce the above copyright notice,
11
- # this list of conditions and the following disclaimer in the documentation
12
- # and/or other materials provided with the distribution.
13
- # 3. The name of the author may not be used to endorse or promote products
14
- # derived from this software without specific prior written permission.
15
- #
16
- # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
17
- # WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
18
- # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
19
- # EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20
- # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
21
- # OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22
- # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23
- # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
24
- # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
25
- # OF SUCH DAMAGE.
4
+ # modification, are permitted provided that the following conditions
5
+ # are met:
6
+ #
7
+ # 1. Redistributions of source code must retain the above copyright
8
+ # notice, this list of conditions and the following disclaimer.
9
+ # 2. Redistributions in binary form must reproduce the above
10
+ # copyright notice, this list of conditions and the following
11
+ # disclaimer in the documentation and/or other materials provided
12
+ # with the distribution.
13
+ # 3. The name of the author may not be used to endorse or promote
14
+ # products derived from this software without specific prior
15
+ # written permission.
16
+ #
17
+ # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
18
+ # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19
+ # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
+ # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
21
+ # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22
+ # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
23
+ # GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24
+ # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25
+ # WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
26
+ # OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
27
+ # EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
28
 
27
- module Enumerable
28
- # :call-seq:
29
- # enum.tb_categorize(ksel1, ksel2, ..., vsel, [opts])
30
- # enum.tb_categorize(ksel1, ksel2, ..., vsel, [opts]) {|ks, vs| ... }
31
- #
32
- # categorizes the elements in _enum_ and returns a hash.
33
- # This method assumes multiple elements for a category.
34
- #
35
- # +tb_categorize+ takes one or more key selectors,
36
- # one value selector and
37
- # an optional option hash.
38
- # It also takes an optional block.
39
- #
40
- # The selectors specify how to extract a value from an element in _enum_.
41
- #
42
- # The key selectors, _kselN_, are used to extract hash keys from an element.
43
- # If two or more key selectors are specified, the result hash will be nested.
44
- #
45
- # The value selector, _vsel_, is used for the values of innermost hashes.
46
- # By default, all values extracted by _vsel_ from the elements which
47
- # key selectors extracts same value are composed as an array.
48
- # The array is set to the values of the innermost hashes.
49
- # This behavior can be customized by the options: :seed, :op and :update.
50
- #
51
- # a = [{:fruit => "banana", :color => "yellow", :taste => "sweet", :price => 100},
52
- # {:fruit => "melon", :color => "green", :taste => "sweet", :price => 300},
53
- # {:fruit => "grapefruit", :color => "yellow", :taste => "tart", :price => 200}]
54
- # p a.tb_categorize(:color, :fruit)
55
- # #=> {"yellow"=>["banana", "grapefruit"], "green"=>["melon"]}
56
- # p a.tb_categorize(:taste, :fruit)
57
- # #=> {"sweet"=>["banana", "melon"], "tart"=>["grapefruit"]}
58
- # p a.tb_categorize(:taste, :color, :fruit)
59
- # #=> {"sweet"=>{"yellow"=>["banana"], "green"=>["melon"]}, "tart"=>{"yellow"=>["grapefruit"]}}
60
- # p a.tb_categorize(:taste, :color)
61
- # #=> {"sweet"=>["yellow", "green"], "tart"=>["yellow"]}
62
- #
63
- # In the above example, :fruit, :color and :taste is specified as selectors.
64
- # There are several types of selectors as follows:
65
- #
66
- # - object with +call+ method (procedure, etc.): extracts a value from the element by calling the procedure with the element as an argument.
67
- # - array of selectors: make an array which contains the values extracted by the selectors.
68
- # - other object: extracts a value from the element using +[]+ method as +element[selector]+.
69
- #
70
- # So the selector :fruit extracts the value from the element
71
- # {:fruit => "banana", :color => "yellow", :taste => "sweet", :price => 100}
72
- # as {...}[:fruit].
73
- #
74
- # p a.tb_categorize(lambda {|elt| elt[:fruit][4] }, :fruit)
75
- # #=> {"n"=>["banana", "melon"], "e"=>["grapefruit"]}
76
- #
77
- # When the key selectors returns same key for two or or more elements,
78
- # corresponding values extracted by the value selector are combined.
79
- # By default, all values are collected as an array.
80
- # :seed, :op and :update option in the option hash customizes this behavior.
81
- # :seed option and :op option is similar to Enumerable#inject.
82
- # :seed option specifies an initial value.
83
- # (If :seed option is not given, the first value for each category is treated as an initial value.)
84
- # :op option specifies a procedure to combine a seed and an element into a next seed.
85
- # :update option is same as :op option except it takes three arguments instead of two:
86
- # keys, seed and element.
87
- # +to_proc+ method is used to convert :op and :update option to a procedure.
88
- # So a symbol can be used for them.
89
- #
90
- # # count categorized elements.
91
- # p a.tb_categorize(:color, lambda {|e| 1 }, :op=>:+)
92
- # #=> {"yellow"=>2, "green"=>1}
93
- #
94
- # p a.tb_categorize(:color, :fruit, :seed=>"", :op=>:+)
95
- # #=> {"yellow"=>"bananagrapefruit", "green"=>"melon"}
96
- #
97
- # The default behavior, collecting all values as an array, is implemented as follows.
98
- # :seed => nil
99
- # :update => {|ks, s, v| !s ? [v] : (s << v) }
100
- #
101
- # :op and :update option are disjoint.
102
- # ArgumentError is raised if both are specified.
103
- #
104
- # The block for +tb_categorize+ method converts combined values to final innermost hash values.
105
- #
106
- # p a.tb_categorize(:color, :fruit) {|ks, vs| vs.join(",") }
107
- # #=> {"yellow"=>"banana,grapefruit", "green"=>"melon"}
108
- #
109
- # # calculates the average price for fruits of each color.
110
- # p a.tb_categorize(:color, :price) {|ks, vs| vs.inject(0.0, &:+) / vs.length }
111
- # #=> {"yellow"=>150.0, "green"=>300.0}
112
- #
113
- def tb_categorize(*args, &reduce_proc)
114
- opts = args.last.kind_of?(Hash) ? args.pop : {}
115
- if args.length < 2
116
- raise ArgumentError, "needs 2 or more arguments without option hash (but #{args.length})"
117
- end
118
- value_selector = tb_cat_selector_proc(args.pop)
119
- key_selectors = args.map {|a| tb_cat_selector_proc(a) }
120
- has_seed = opts.has_key? :seed
121
- seed_value = opts[:seed]
122
- if opts.has_key?(:update) && opts.has_key?(:op)
123
- raise ArgumentError, "both :op and :update option specified"
124
- elsif opts.has_key? :update
125
- update_proc = opts[:update].to_proc
126
- elsif opts.has_key? :op
127
- op_proc = opts[:op].to_proc
128
- update_proc = lambda {|ks, s, v| op_proc.call(s, v) }
129
- else
130
- has_seed = true
131
- seed_value = nil
132
- update_proc = lambda {|ks, s, v| !s ? [v] : (s << v) }
133
- end
134
- result = {}
135
- each {|*elts|
136
- elt = elts.length <= 1 ? elts[0] : elts
137
- ks = key_selectors.map {|ksel| ksel.call(elt) }
138
- v = value_selector.call(elt)
139
- h = result
140
- 0.upto(ks.length-2) {|i|
141
- k = ks[i]
142
- h[k] = {} if !h.has_key?(k)
143
- h = h[k]
29
+ module Tb::Enumerable
30
+ include Enumerable
31
+
32
+ def with_header(&header_proc)
33
+ Enumerator.new {|y|
34
+ header_and_each(header_proc) {|pairs|
35
+ y.yield pairs
144
36
  }
145
- lastk = ks.last
146
- if !h.has_key?(lastk)
147
- if has_seed
148
- h[lastk] = update_proc.call(ks, seed_value, v)
149
- else
150
- h[lastk] = v
151
- end
152
- else
153
- h[lastk] = update_proc.call(ks, h[lastk], v)
154
- end
155
37
  }
156
- if reduce_proc
157
- tb_cat_reduce(result, [], key_selectors.length-1, reduce_proc)
158
- end
159
- result
160
38
  end
161
39
 
162
- def tb_cat_selector_proc(selector)
163
- if selector.respond_to?(:call)
164
- selector
165
- elsif selector.respond_to? :to_ary
166
- selector_procs = selector.to_ary.map {|sel| tb_cat_selector_proc(sel) }
167
- lambda {|elt| selector_procs.map {|selproc| selproc.call(elt) } }
168
- else
169
- lambda {|elt| elt[selector] }
170
- end
40
+ def with_cumulative_header(&header_proc)
41
+ Enumerator.new {|y|
42
+ hset = {}
43
+ internal_header_proc = lambda {|header0|
44
+ if header0
45
+ header0.each {|f|
46
+ hset[f] = true
47
+ }
48
+ end
49
+ header_proc.call(header0) if header_proc
50
+ }
51
+ header_and_each(internal_header_proc) {|pairs|
52
+ pairs.each {|f, v|
53
+ if !hset[f]
54
+ hset[f] = true
55
+ end
56
+ }
57
+ y.yield [pairs, hset.keys.freeze]
58
+ }
59
+ }
171
60
  end
172
- private :tb_cat_selector_proc
173
61
 
174
- def tb_cat_reduce(hash, ks, nestlevel, reduce_proc)
175
- if nestlevel.zero?
176
- hash.each {|k, v|
177
- ks << k
178
- begin
179
- hash[k] = reduce_proc.call(ks.dup, v)
180
- ensure
181
- ks.pop
62
+ def cat(*ers, &b)
63
+ ers = [self, *ers]
64
+ rec = lambda {|y, header|
65
+ if ers.empty?
66
+ if header
67
+ y.set_header header
182
68
  end
183
- }
69
+ else
70
+ last_e = ers.pop
71
+ last_e.with_header {|last_e_header|
72
+ if last_e_header && header
73
+ header = last_e_header | header
74
+ else
75
+ header = nil
76
+ end
77
+ rec.call(y, header)
78
+ }.each {|v|
79
+ y.yield v
80
+ }
81
+ end
82
+ }
83
+ er = Tb::Enumerator.new {|y|
84
+ rec.call(y, [])
85
+ }
86
+ if block_given?
87
+ er.each(&b)
184
88
  else
185
- hash.each {|k, h|
186
- ks << k
187
- begin
188
- tb_cat_reduce(h, ks, nestlevel-1, reduce_proc)
189
- ensure
190
- ks.pop
191
- end
192
- }
89
+ er
193
90
  end
194
91
  end
195
- private :tb_cat_reduce
196
92
 
197
- # :call-seq:
198
- # enum.tb_unique_categorize(ksel1, ksel2, ..., vsel, [opts]) -> hash
199
- # enum.tb_unique_categorize(ksel1, ksel2, ..., vsel, [opts]) {|s, v| ... } -> hash
200
- #
201
- # categorizes the elements in _enum_ and returns a hash.
202
- # This method assumes one element for a category by default.
203
- #
204
- # +tb_unique_categorize+ takes one or more key selectors,
205
- # one value selector and
206
- # an optional option hash.
207
- # It also takes an optional block.
208
- #
209
- # The selectors specify how to extract a value from an element in _enum_.
210
- # See Enumerable#tb_categorize for details of selectors.
211
- #
212
- # The key selectors, _kselN_, are used to extract hash keys from an element.
213
- # If two or more key selectors are specified, the result hash will be nested.
214
- #
215
- # The value selector, _vsel_, is used for the values of innermost hashes.
216
- # By default, this method assumes the key selectors categorizes elements in enum uniquely.
217
- # If the key selectors generates same keys for two or more elements, ArgumentError is raised.
218
- # This behavior can be customized by :seed option and the block.
219
- #
220
- # a = [{:fruit => "banana", :color => "yellow", :taste => "sweet", :price => 100},
221
- # {:fruit => "melon", :color => "green", :taste => "sweet", :price => 300},
222
- # {:fruit => "grapefruit", :color => "yellow", :taste => "tart", :price => 200}]
223
- # p a.tb_unique_categorize(:fruit, :price)
224
- # #=> {"banana"=>100, "melon"=>300, "grapefruit"=>200}
225
- #
226
- # p a.tb_unique_categorize(:color, :price)
227
- # # ArgumentError
228
- #
229
- # If the block is given, it is used for combining values in a category.
230
- # The arguments for the block is a seed and the value extracted by _vsel_.
231
- # The return value of the block is used as the next seed.
232
- # :seed option specifies the initial seed.
233
- # If :seed is not given, the first value for each category is used for the seed.
234
- #
235
- # p a.tb_unique_categorize(:taste, :price) {|s, v| s + v }
236
- # #=> {"sweet"=>400, "tart"=>200}
237
- #
238
- # p a.tb_unique_categorize(:color, :price) {|s, v| s + v }
239
- # #=> {"yellow"=>300, "green"=>300}
240
- #
241
- def tb_unique_categorize(*args, &update_proc)
242
- opts = args.last.kind_of?(Hash) ? args.pop.dup : {}
243
- if update_proc
244
- opts[:update] = lambda {|ks, s, v| update_proc.call(s, v) }
245
- else
246
- seed = Object.new
247
- opts[:seed] = seed
248
- opts[:update] = lambda {|ks, s, v|
249
- if s.equal? seed
250
- v
251
- else
252
- raise ArgumentError, "ambiguous key: #{ks.map {|k| k.inspect }.join(',')}"
93
+ # creates a new Tb::Enumerator object which have
94
+ # new field named by _field_ with the value returned by the block.
95
+ #
96
+ # t1 = Tb.new %w[a b], [1, 2], [3, 4]
97
+ # p t1.newfield("x") {|row| row["a"] + row["b"] + 100 }.to_a
98
+ # #=> [{"x"=>103, "a"=>1, "b"=>2},
99
+ # # {"x"=>107, "a"=>3, "b"=>4}]
100
+ #
101
+ def newfield(field)
102
+ Tb::Enumerator.new {|y|
103
+ self.with_header {|header|
104
+ if header
105
+ y.set_header(Tb::FieldSet.normalize([field, *header]))
253
106
  end
107
+ }.each {|row|
108
+ keys = row.keys
109
+ keys = Tb::FieldSet.normalize([field, *keys])
110
+ vals = row.values
111
+ vals = [yield(row), *vals]
112
+ y << Hash[keys.zip(vals)]
254
113
  }
255
- end
256
- tb_categorize(*(args + [opts]))
114
+ }
257
115
  end
258
116
 
259
117
  # :call-seq:
260
- # enum.tb_category_count(ksel1, ksel2, ...)
261
- #
262
- # counts elements in _enum_ for each category defined by the key selectors.
263
- #
264
- # a = [{:fruit => "banana", :color => "yellow", :taste => "sweet", :price => 100},
265
- # {:fruit => "melon", :color => "green", :taste => "sweet", :price => 300},
266
- # {:fruit => "grapefruit", :color => "yellow", :taste => "tart", :price => 200}]
267
- #
268
- # p a.tb_category_count(:color)
269
- # #=> {"yellow"=>2, "green"=>1}
270
- #
271
- # p a.tb_category_count(:taste)
272
- # #=> {"sweet"=>2, "tart"=>1}
273
- #
274
- # p a.tb_category_count(:taste, :color)
275
- # #=> {"sweet"=>{"yellow"=>1, "green"=>1}, "tart"=>{"yellow"=>1}}
276
- #
277
- # The selectors specify how to extract a value from an element in _enum_.
278
- # See Enumerable#tb_categorize for details of selectors.
279
- #
280
- def tb_category_count(*args)
281
- tb_categorize(*(args + [lambda {|e| 1 }, {:update => lambda {|ks, s, v| s + v }}]))
282
- end
283
-
284
- def dump_objsfile(title, tempfile)
285
- tempfile.flush
286
- path = tempfile
287
- a = []
288
- open(path) {|f|
289
- until f.eof?
290
- pair = Marshal.load(f)
291
- a << (pair ? pair.last : :sep)
292
- end
118
+ # table1.natjoin2(table2, missing_value=nil, retain_left=false, retain_right=false)
119
+ def natjoin2(tbl2, missing_value=nil, retain_left=false, retain_right=false)
120
+ Tb::Enumerator.new {|y|
121
+ tbl1 = self
122
+ header1 = header2 = nil
123
+ sorted_tbl2 = nil
124
+ common_header = nil
125
+ total_header = nil
126
+ sorted_tbl1 = tbl1.with_header {|h1|
127
+ header1 = h1
128
+ sorted_tbl2 = tbl2.with_header {|h2|
129
+ header2 = h2
130
+ common_header = header1 & header2
131
+ total_header = header1 | header2
132
+ y.set_header total_header
133
+ }.lazy_map {|pairs|
134
+ [common_header.map {|f| pairs[f] }, pairs]
135
+ }.extsort_by {|cv, pairs| cv }.to_fileenumerator
136
+ }.lazy_map {|pairs|
137
+ [common_header.map {|f| pairs[f] }, pairs]
138
+ }.extsort_by {|cv, pairs| cv }.to_fileenumerator
139
+ sorted_tbl1.open_reader {|t1|
140
+ sorted_tbl2.open_reader {|t2|
141
+ missing_hash = {}
142
+ total_header.each {|f|
143
+ missing_hash[f] = missing_value
144
+ }
145
+ Tb::ExEnumerator.merge_sorted(t1, t2) {|cv, t1_or_nil, t2_or_nil|
146
+ if !t2_or_nil
147
+ t1.subeach_by {|_cv1, _| _cv1 }.each {|_, _pairs1|
148
+ if retain_left
149
+ y.yield missing_hash.merge(_pairs1.to_hash)
150
+ end
151
+ }
152
+ elsif !t1_or_nil
153
+ t2.subeach_by {|_cv2, _| _cv2 }.each {|_, _pairs2|
154
+ if retain_right
155
+ y.yield missing_hash.merge(_pairs2.to_hash)
156
+ end
157
+ }
158
+ else # t1_or_nil && t1_or_nil
159
+ t2_pos = t2.pos
160
+ t1.subeach_by {|_cv1, _| _cv1 }.each {|_, _pairs1|
161
+ t2.pos = t2_pos
162
+ t2.subeach_by {|_cv2, _| _cv2 }.each {|_, _pairs2|
163
+ y.yield(_pairs2.to_hash.merge(_pairs1.to_hash))
164
+ }
165
+ }
166
+ end
167
+ }
168
+ }
169
+ }
293
170
  }
294
- puts "#{title}: #{a.inspect}"
295
171
  end
296
- private :dump_objsfile
297
172
 
298
- def extsort_by(opts={}, &cmpvalue_from)
299
- memsize = opts[:memsize] || 10000000
300
- Enumerator.new {|y|
301
- extsort_by_internal(memsize, cmpvalue_from, y)
302
- }
173
+ # :call-seq:
174
+ # table1.natjoin2_outer(table2, missing=nil, retain_left=true, retain_right=true)
175
+ def natjoin2_outer(tbl2, missing_value=nil, retain_left=true, retain_right=true)
176
+ natjoin2(tbl2, missing_value, retain_left, retain_right)
303
177
  end
304
178
 
305
- def extsort_by_internal(memsize, cmpvalue_from, y)
306
- tmp1 = Tempfile.new("tbsortA")
307
- tmp2 = Tempfile.new("tbsortB")
308
- extsort_by_first_split(tmp1, tmp2, cmpvalue_from, memsize)
309
- if tmp1.size == 0 && tmp2.size == 0
310
- return Enumerator.new {|_| }
311
- end
312
- tmp3 = Tempfile.new("tbsortC")
313
- tmp4 = Tempfile.new("tbsortD")
314
- while tmp2.size != 0
315
- #dump_objsfile(:tmp1, tmp1)
316
- #dump_objsfile(:tmp2, tmp2)
317
- #dump_objsfile(:tmp3, tmp3)
318
- #dump_objsfile(:tmp4, tmp4)
319
- extsort_by_merge(tmp1, tmp2, tmp3, tmp4)
320
- tmp1.rewind
321
- tmp1.truncate(0)
322
- tmp2.rewind
323
- tmp2.truncate(0)
324
- tmp1, tmp2, tmp3, tmp4 = tmp3, tmp4, tmp1, tmp2
325
- end
326
- #dump_objsfile(:tmp1, tmp1)
327
- #dump_objsfile(:tmp2, tmp2)
328
- #dump_objsfile(:tmp3, tmp3)
329
- #dump_objsfile(:tmp4, tmp4)
330
- extsort_by_strip_cv(tmp1, y)
331
- ensure
332
- tmp1.close(true) if tmp1
333
- tmp2.close(true) if tmp2
334
- tmp3.close(true) if tmp3
335
- tmp4.close(true) if tmp4
179
+ def to_tb
180
+ tb = Tb.new
181
+ self.each {|pairs|
182
+ pairs.each {|k, v|
183
+ unless tb.has_field? k
184
+ tb.define_field(k)
185
+ end
186
+ }
187
+ tb.insert pairs
188
+ }
189
+ tb
336
190
  end
337
- private :extsort_by_internal
338
191
 
339
- def extsort_by_first_split(tmp1, tmp2, cmpvalue_from, memsize)
340
- prevobj_cv = nil
341
- tmp_current, tmp_another = tmp1, tmp2
342
- buf = []
343
- buf_size = 0
344
- buf_mode = true
345
- self.each_with_index {|obj, i|
346
- obj_cv = cmpvalue_from.call(obj)
347
- #p [obj, obj_cv]
348
- #p [prevobj_cv, buf_mode, obj, obj_cv]
349
- if buf_mode
350
- dumped = Marshal.dump([obj_cv, obj])
351
- buf << [obj_cv, i, dumped]
352
- buf_size += dumped.size
353
- if memsize < buf_size
354
- buf.sort!
355
- buf.each {|_, _, d|
356
- tmp_current.write d
357
- }
358
- prevobj_cv, = buf.last
359
- buf.clear
360
- buf_mode = false
192
+ def write_to_csv(io, with_header=true)
193
+ stream = nil
194
+ header = []
195
+ fgen = fnew = nil
196
+ self.with_cumulative_header {|header0|
197
+ if !with_header
198
+ stream = true
199
+ elsif header0
200
+ stream = true
201
+ io.puts Tb.csv_encode_row(header0)
202
+ else
203
+ stream = false
204
+ fgen, fnew = Tb::FileEnumerator.gen_new
205
+ end
206
+ }.each {|pairs, header1|
207
+ pairs = Hash[pairs] unless pairs.respond_to? :has_key?
208
+ header = header1
209
+ if stream
210
+ fs = header.dup
211
+ while !fs.empty? && !pairs.has_key?(fs.last)
212
+ fs.pop
361
213
  end
362
- elsif prevobj_cv <= obj_cv
363
- Marshal.dump([obj_cv, obj], tmp_current)
364
- prevobj_cv = obj_cv
214
+ ary = fs.map {|f| pairs[f] }
215
+ io.puts Tb.csv_encode_row(ary)
365
216
  else
366
- dumped = Marshal.dump([obj_cv, obj])
367
- Marshal.dump(nil, tmp_current)
368
- buf = [[obj_cv, i, dumped]]
369
- buf_size = dumped.size
370
- buf_mode = true
371
- tmp_current, tmp_another = tmp_another, tmp_current
217
+ fgen.call Hash[pairs]
372
218
  end
373
219
  }
374
- if buf_mode
375
- buf.sort!
376
- buf.each {|_, _, d|
377
- tmp_current.write d
378
- }
379
- end
380
- if !buf_mode || !buf.empty?
381
- Marshal.dump(nil, tmp_current)
382
- end
383
- end
384
- private :extsort_by_first_split
385
-
386
- def extsort_by_merge(src1, src2, dst1, dst2)
387
- src1.rewind
388
- src2.rewind
389
- obj1_cv, obj1 = obj1_pair = Marshal.load(src1)
390
- obj2_cv, obj2 = obj2_pair = Marshal.load(src2)
391
- prefer1 = true
392
- while true
393
- cmp = obj1_cv <=> obj2_cv
394
- if prefer1 ? cmp > 0 : cmp >= 0
395
- obj1_pair, obj1_cv, obj1, src1, obj2_pair, obj2_cv, obj2, src2 = obj2_pair, obj2_cv, obj2, src2, obj1_pair, obj1_cv, obj1, src1
396
- prefer1 = !prefer1
397
- end
398
- Marshal.dump([obj1_cv, obj1], dst1)
399
- obj1_cv, obj1 = obj1_pair = Marshal.load(src1)
400
- if !obj1_pair
401
- begin
402
- Marshal.dump(obj2_pair, dst1)
403
- obj2_pair = Marshal.load(src2)
404
- end until !obj2_pair
405
- Marshal.dump(nil, dst1)
406
- dst1, dst2 = dst2, dst1
407
- break if src1.eof?
408
- break if src2.eof?
409
- obj1_cv, obj1 = obj1_pair = Marshal.load(src1)
410
- obj2_cv, obj2 = obj2_pair = Marshal.load(src2)
220
+ if !stream
221
+ if with_header
222
+ io.puts Tb.csv_encode_row(header)
411
223
  end
412
- end
413
- if !src1.eof?
414
- restsrc = src1
415
- elsif !src2.eof?
416
- restsrc = src2
417
- else
418
- return
419
- end
420
- until restsrc.eof?
421
- restobj_pair = Marshal.load(restsrc)
422
- Marshal.dump(restobj_pair, dst1)
423
- end
424
- end
425
- private :extsort_by_merge
426
-
427
- def extsort_by_strip_cv(tmp1, y)
428
- tmp1.rewind
429
- while true
430
- pair = Marshal.load(tmp1)
431
- break if !pair
432
- _, obj = pair
433
- y.yield obj
224
+ fnew.call.each {|pairs|
225
+ fs = header.dup
226
+ while !fs.empty? && !pairs.has_key?(fs.last)
227
+ fs.pop
228
+ end
229
+ ary = fs.map {|f| pairs[f] }
230
+ io.puts Tb.csv_encode_row(ary)
231
+ }
434
232
  end
435
233
  end
436
- private :extsort_by_strip_cv
437
234
 
438
- # splits self by _boundary_p_ which is called with adjacent two elements.
439
- #
440
- # _before_group_ is called before each group with the first element.
441
- # _after_group_ is called after each group with the last element.
442
- # _body_ is called for each element.
443
- #
444
- def each_group_element(boundary_p, before_group, body, after_group)
445
- prev = nil
446
- first = true
447
- self.each {|curr|
448
- if first
449
- before_group.call(curr)
450
- body.call(curr)
451
- prev = curr
452
- first = false
453
- elsif boundary_p.call(prev, curr)
454
- after_group.call(prev)
455
- before_group.call(curr)
456
- body.call(curr)
457
- prev = curr
458
- else
459
- body.call(curr)
460
- prev = curr
461
- end
235
+ def write_to_json(out)
236
+ require 'json'
237
+ out.print "["
238
+ sep = nil
239
+ self.each {|pairs|
240
+ out.print sep if sep
241
+ out.print JSON.pretty_generate(Hash[pairs.to_a])
242
+ sep = ",\n"
462
243
  }
463
- if !first
464
- after_group.call(prev)
465
- end
244
+ out.puts "]"
245
+ nil
466
246
  end
467
247
 
468
- def lazy_map
469
- Enumerator.new {|y|
470
- self.each {|*vs|
471
- y.yield(yield(*vs))
248
+ def extsort_by(opts={}, &cmpvalue_from)
249
+ Tb::Enumerator.new {|ty|
250
+ header = []
251
+ er = Enumerator.new {|y|
252
+ self.with_cumulative_header {|header0|
253
+ header = header0 if header0
254
+ }.each {|pairs, header1|
255
+ header = header1
256
+ y.yield pairs
257
+ }
258
+ ty.set_header header
259
+ }
260
+ er.extsort_by(opts, &cmpvalue_from).each {|pairs|
261
+ ty.yield pairs
472
262
  }
473
263
  }
474
264
  end