tb 0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,125 @@
1
+ # lib/tb/csv.rb - CSV related fetures for table library
2
+ #
3
+ # Copyright (C) 2010-2011 Tanaka Akira <akr@fsij.org>
4
+ #
5
+ # Redistribution and use in source and binary forms, with or without
6
+ # modification, are permitted provided that the following conditions are met:
7
+ #
8
+ # 1. Redistributions of source code must retain the above copyright notice, this
9
+ # list of conditions and the following disclaimer.
10
+ # 2. Redistributions in binary form must reproduce the above copyright notice,
11
+ # this list of conditions and the following disclaimer in the documentation
12
+ # and/or other materials provided with the distribution.
13
+ # 3. The name of the author may not be used to endorse or promote products
14
+ # derived from this software without specific prior written permission.
15
+ #
16
+ # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
17
+ # WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
18
+ # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
19
+ # EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20
+ # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
21
+ # OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22
+ # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23
+ # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
24
+ # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
25
+ # OF SUCH DAMAGE.
26
+
27
+ require 'csv'
28
+
29
+ class Tb
30
+ def Tb.csv_stream_input(csv, &b)
31
+ csvreader = CSVReader.new(csv)
32
+ begin
33
+ csvreader.each(&b)
34
+ ensure
35
+ csvreader.close
36
+ end
37
+ nil
38
+ end
39
+
40
+ class CSVReader
41
+ if defined? CSV::Reader
42
+ # Ruby 1.8
43
+ def initialize(input)
44
+ if input.respond_to? :to_str
45
+ @csv = CSV::StringReader.new(input)
46
+ else
47
+ @csv = CSV::IOReader.new(input)
48
+ end
49
+ @eof = false
50
+ end
51
+
52
+ def shift
53
+ return nil if @eof
54
+ ary = @csv.shift
55
+ if ary.empty?
56
+ ary = nil
57
+ @eof = true
58
+ elsif ary == [nil]
59
+ ary = []
60
+ end
61
+ ary
62
+ end
63
+ else
64
+ # Ruby 1.9
65
+ def initialize(input)
66
+ @csv = CSV.new(input)
67
+ end
68
+
69
+ def shift
70
+ @csv.shift
71
+ end
72
+ end
73
+
74
+ def each
75
+ while ary = self.shift
76
+ yield ary
77
+ end
78
+ nil
79
+ end
80
+
81
+ def close
82
+ @csv.close
83
+ end
84
+ end
85
+
86
+ def Tb.csv_stream_output(out)
87
+ require 'csv'
88
+ if defined? CSV::Writer
89
+ # Ruby 1.8
90
+ CSV::Writer.generate(out) {|csvgen|
91
+ yield csvgen
92
+ }
93
+ else
94
+ # Ruby 1.9
95
+ gen = Object.new
96
+ gen.instance_variable_set(:@out, out)
97
+ def gen.<<(ary)
98
+ @out << ary.to_csv
99
+ end
100
+ yield gen
101
+ end
102
+ end
103
+
104
+ # :call-seq:
105
+ # generate_csv(out='', fields=nil) {|recordids| modified_recordids }
106
+ # generate_csv(out='', fields=nil)
107
+ #
108
+ def generate_csv(out='', fields=nil, &block)
109
+ if fields.nil?
110
+ fields = list_fields
111
+ end
112
+ require 'csv'
113
+ recordids = list_recordids
114
+ if block_given?
115
+ recordids = yield(recordids)
116
+ end
117
+ Tb.csv_stream_output(out) {|gen|
118
+ gen << fields
119
+ recordids.each {|recordid|
120
+ gen << get_values(recordid, *fields)
121
+ }
122
+ }
123
+ out
124
+ end
125
+ end
@@ -0,0 +1,284 @@
1
+ # lib/tb/enumerable.rb - extensions for Enumerable
2
+ #
3
+ # Copyright (C) 2010 Tanaka Akira <akr@fsij.org>
4
+ #
5
+ # Redistribution and use in source and binary forms, with or without
6
+ # modification, are permitted provided that the following conditions are met:
7
+ #
8
+ # 1. Redistributions of source code must retain the above copyright notice, this
9
+ # list of conditions and the following disclaimer.
10
+ # 2. Redistributions in binary form must reproduce the above copyright notice,
11
+ # this list of conditions and the following disclaimer in the documentation
12
+ # and/or other materials provided with the distribution.
13
+ # 3. The name of the author may not be used to endorse or promote products
14
+ # derived from this software without specific prior written permission.
15
+ #
16
+ # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
17
+ # WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
18
+ # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
19
+ # EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20
+ # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
21
+ # OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22
+ # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23
+ # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
24
+ # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
25
+ # OF SUCH DAMAGE.
26
+
27
+ module Enumerable
28
+ # :call-seq:
29
+ # enum.tb_categorize(ksel1, ksel2, ..., vsel, [opts])
30
+ # enum.tb_categorize(ksel1, ksel2, ..., vsel, [opts]) {|ks, vs| ... }
31
+ #
32
+ # categorizes the elements in _enum_ and returns a hash.
33
+ # This method assumes multiple elements for a category.
34
+ #
35
+ # +tb_categorize+ takes one or more key selectors,
36
+ # one value selector and
37
+ # an optional option hash.
38
+ # It also takes an optional block.
39
+ #
40
+ # The selectors specify how to extract a value from an element in _enum_.
41
+ #
42
+ # The key selectors, _kselN_, are used to extract hash keys from an element.
43
+ # If two or more key selectors are specified, the result hash will be nested.
44
+ #
45
+ # The value selector, _vsel_, is used for the values of innermost hashes.
46
+ # By default, all values extracted by _vsel_ from the elements which
47
+ # key selectors extracts same value are composed as an array.
48
+ # The array is set to the values of the innermost hashes.
49
+ # This behavior can be customized by the options: :seed, :op and :update.
50
+ #
51
+ # a = [{:fruit => "banana", :color => "yellow", :taste => "sweet", :price => 100},
52
+ # {:fruit => "melon", :color => "green", :taste => "sweet", :price => 300},
53
+ # {:fruit => "grapefruit", :color => "yellow", :taste => "tart", :price => 200}]
54
+ # p a.tb_categorize(:color, :fruit)
55
+ # #=> {"yellow"=>["banana", "grapefruit"], "green"=>["melon"]}
56
+ # p a.tb_categorize(:taste, :fruit)
57
+ # #=> {"sweet"=>["banana", "melon"], "tart"=>["grapefruit"]}
58
+ # p a.tb_categorize(:taste, :color, :fruit)
59
+ # #=> {"sweet"=>{"yellow"=>["banana"], "green"=>["melon"]}, "tart"=>{"yellow"=>["grapefruit"]}}
60
+ # p a.tb_categorize(:taste, :color)
61
+ # #=> {"sweet"=>["yellow", "green"], "tart"=>["yellow"]}
62
+ #
63
+ # In the above example, :fruit, :color and :taste is specified as selectors.
64
+ # There are several types of selectors as follows:
65
+ #
66
+ # - object with +call+ method (procedure, etc.): extracts a value from the element by calling the procedure with the element as an argument.
67
+ # - array of selectors: make an array which contains the values extracted by the selectors.
68
+ # - other object: extracts a value from the element using +[]+ method as +element[selector]+.
69
+ #
70
+ # So the selector :fruit extracts the value from the element
71
+ # {:fruit => "banana", :color => "yellow", :taste => "sweet", :price => 100}
72
+ # as {...}[:fruit].
73
+ #
74
+ # p a.tb_categorize(lambda {|elt| elt[:fruit][4] }, :fruit)
75
+ # #=> {"n"=>["banana", "melon"], "e"=>["grapefruit"]}
76
+ #
77
+ # When the key selectors returns same key for two or or more elements,
78
+ # corresponding values extracted by the value selector are combined.
79
+ # By default, all values are collected as an array.
80
+ # :seed, :op and :update option in the option hash customizes this behavior.
81
+ # :seed option and :op option is similar to Enumerable#inject.
82
+ # :seed option specifies an initial value.
83
+ # (If :seed option is not given, the first value for each category is treated as an initial value.)
84
+ # :op option specifies a procedure to combine a seed and an element into a next seed.
85
+ # :update option is same as :op option except it takes three arguments instead of two:
86
+ # keys, seed and element.
87
+ # +to_proc+ method is used to convert :op and :update option to a procedure.
88
+ # So a symbol can be used for them.
89
+ #
90
+ # # count categorized elements.
91
+ # p a.tb_categorize(:color, lambda {|e| 1 }, :op=>:+)
92
+ # #=> {"yellow"=>2, "green"=>1}
93
+ #
94
+ # p a.tb_categorize(:color, :fruit, :seed=>"", :op=>:+)
95
+ # #=> {"yellow"=>"bananagrapefruit", "green"=>"melon"}
96
+ #
97
+ # The default behavior, collecting all values as an array, is implemented as follows.
98
+ # :seed => nil
99
+ # :update => {|ks, s, v| !s ? [v] : (s << v) }
100
+ #
101
+ # :op and :update option are disjoint.
102
+ # ArgumentError is raised if both are specified.
103
+ #
104
+ # The block for +tb_categorize+ method converts combined values to final innermost hash values.
105
+ #
106
+ # p a.tb_categorize(:color, :fruit) {|ks, vs| vs.join(",") }
107
+ # #=> {"yellow"=>"banana,grapefruit", "green"=>"melon"}
108
+ #
109
+ # # calculates the average price for fruits of each color.
110
+ # p a.tb_categorize(:color, :price) {|ks, vs| vs.inject(0.0, &:+) / vs.length }
111
+ # #=> {"yellow"=>150.0, "green"=>300.0}
112
+ #
113
+ def tb_categorize(*args, &reduce_proc)
114
+ opts = args.last.kind_of?(Hash) ? args.pop : {}
115
+ if args.length < 2
116
+ raise ArgumentError, "needs 2 or more arguments without option hash (but #{args.length})"
117
+ end
118
+ value_selector = tb_cat_selector_proc(args.pop)
119
+ key_selectors = args.map {|a| tb_cat_selector_proc(a) }
120
+ has_seed = opts.include? :seed
121
+ seed_value = opts[:seed]
122
+ if opts.include?(:update) && opts.include?(:op)
123
+ raise ArgumentError, "both :op and :update option specified"
124
+ elsif opts.include? :update
125
+ update_proc = opts[:update].to_proc
126
+ elsif opts.include? :op
127
+ op_proc = opts[:op].to_proc
128
+ update_proc = lambda {|ks, s, v| op_proc.call(s, v) }
129
+ else
130
+ has_seed = true
131
+ seed_value = nil
132
+ update_proc = lambda {|ks, s, v| !s ? [v] : (s << v) }
133
+ end
134
+ result = {}
135
+ each {|*elts|
136
+ elt = elts.length <= 1 ? elts[0] : elts
137
+ ks = key_selectors.map {|ksel| ksel.call(elt) }
138
+ v = value_selector.call(elt)
139
+ h = result
140
+ 0.upto(ks.length-2) {|i|
141
+ k = ks[i]
142
+ h[k] = {} if !h.include?(k)
143
+ h = h[k]
144
+ }
145
+ lastk = ks.last
146
+ if !h.include?(lastk)
147
+ if has_seed
148
+ h[lastk] = update_proc.call(ks, seed_value, v)
149
+ else
150
+ h[lastk] = v
151
+ end
152
+ else
153
+ h[lastk] = update_proc.call(ks, h[lastk], v)
154
+ end
155
+ }
156
+ if reduce_proc
157
+ tb_cat_reduce(result, [], key_selectors.length-1, reduce_proc)
158
+ end
159
+ result
160
+ end
161
+
162
+ def tb_cat_selector_proc(selector)
163
+ if selector.respond_to?(:call)
164
+ selector
165
+ elsif selector.respond_to? :to_ary
166
+ selector_procs = selector.to_ary.map {|sel| tb_cat_selector_proc(sel) }
167
+ lambda {|elt| selector_procs.map {|selproc| selproc.call(elt) } }
168
+ else
169
+ lambda {|elt| elt[selector] }
170
+ end
171
+ end
172
+ private :tb_cat_selector_proc
173
+
174
+ def tb_cat_reduce(hash, ks, nestlevel, reduce_proc)
175
+ if nestlevel.zero?
176
+ hash.each {|k, v|
177
+ ks << k
178
+ begin
179
+ hash[k] = reduce_proc.call(ks.dup, v)
180
+ ensure
181
+ ks.pop
182
+ end
183
+ }
184
+ else
185
+ hash.each {|k, h|
186
+ ks << k
187
+ begin
188
+ tb_cat_reduce(h, ks, nestlevel-1, reduce_proc)
189
+ ensure
190
+ ks.pop
191
+ end
192
+ }
193
+ end
194
+ end
195
+ private :tb_cat_reduce
196
+
197
+ # :call-seq:
198
+ # enum.tb_unique_categorize(ksel1, ksel2, ..., vsel, [opts]) -> hash
199
+ # enum.tb_unique_categorize(ksel1, ksel2, ..., vsel, [opts]) {|s, v| ... } -> hash
200
+ #
201
+ # categorizes the elements in _enum_ and returns a hash.
202
+ # This method assumes one element for a category by default.
203
+ #
204
+ # +tb_unique_categorize+ takes one or more key selectors,
205
+ # one value selector and
206
+ # an optional option hash.
207
+ # It also takes an optional block.
208
+ #
209
+ # The selectors specify how to extract a value from an element in _enum_.
210
+ # See Enumerable#tb_categorize for details of selectors.
211
+ #
212
+ # The key selectors, _kselN_, are used to extract hash keys from an element.
213
+ # If two or more key selectors are specified, the result hash will be nested.
214
+ #
215
+ # The value selector, _vsel_, is used for the values of innermost hashes.
216
+ # By default, this method assumes the key selectors categorizes elements in enum uniquely.
217
+ # If the key selectors generates same keys for two or more elements, ArgumentError is raised.
218
+ # This behavior can be customized by :seed option and the block.
219
+ #
220
+ # a = [{:fruit => "banana", :color => "yellow", :taste => "sweet", :price => 100},
221
+ # {:fruit => "melon", :color => "green", :taste => "sweet", :price => 300},
222
+ # {:fruit => "grapefruit", :color => "yellow", :taste => "tart", :price => 200}]
223
+ # p a.tb_unique_categorize(:fruit, :price)
224
+ # #=> {"banana"=>100, "melon"=>300, "grapefruit"=>200}
225
+ #
226
+ # p a.tb_unique_categorize(:color, :price)
227
+ # # ArgumentError
228
+ #
229
+ # If the block is given, it is used for combining values in a category.
230
+ # The arguments for the block is a seed and the value extracted by _vsel_.
231
+ # The return value of the block is used as the next seed.
232
+ # :seed option specifies the initial seed.
233
+ # If :seed is not given, the first value for each category is used for the seed.
234
+ #
235
+ # p a.tb_unique_categorize(:taste, :price) {|s, v| s + v }
236
+ # #=> {"sweet"=>400, "tart"=>200}
237
+ #
238
+ # p a.tb_unique_categorize(:color, :price) {|s, v| s + v }
239
+ # #=> {"yellow"=>300, "green"=>300}
240
+ #
241
+ def tb_unique_categorize(*args, &update_proc)
242
+ opts = args.last.kind_of?(Hash) ? args.pop.dup : {}
243
+ if update_proc
244
+ opts[:update] = lambda {|ks, s, v| update_proc.call(s, v) }
245
+ else
246
+ seed = Object.new
247
+ opts[:seed] = seed
248
+ opts[:update] = lambda {|ks, s, v|
249
+ if s.equal? seed
250
+ v
251
+ else
252
+ raise ArgumentError, "ambiguous key: #{ks.map {|k| k.inspect }.join(',')}"
253
+ end
254
+ }
255
+ end
256
+ tb_categorize(*(args + [opts]))
257
+ end
258
+
259
+ # :call-seq:
260
+ # enum.tb_category_count(ksel1, ksel2, ...)
261
+ #
262
+ # counts elements in _enum_ for each category defined by the key selectors.
263
+ #
264
+ # a = [{:fruit => "banana", :color => "yellow", :taste => "sweet", :price => 100},
265
+ # {:fruit => "melon", :color => "green", :taste => "sweet", :price => 300},
266
+ # {:fruit => "grapefruit", :color => "yellow", :taste => "tart", :price => 200}]
267
+ #
268
+ # p a.tb_category_count(:color)
269
+ # #=> {"yellow"=>2, "green"=>1}
270
+ #
271
+ # p a.tb_category_count(:taste)
272
+ # #=> {"sweet"=>2, "tart"=>1}
273
+ #
274
+ # p a.tb_category_count(:taste, :color)
275
+ # #=> {"sweet"=>{"yellow"=>1, "green"=>1}, "tart"=>{"yellow"=>1}}
276
+ #
277
+ # The selectors specify how to extract a value from an element in _enum_.
278
+ # See Enumerable#tb_categorize for details of selectors.
279
+ #
280
+ def tb_category_count(*args)
281
+ tb_categorize(*(args + [lambda {|e| 1 }, {:update => lambda {|ks, s, v| s + v }}]))
282
+ end
283
+
284
+ end
@@ -0,0 +1,96 @@
1
+ # lib/tb/fieldset.rb - Tb::FieldSet class
2
+ #
3
+ # Copyright (C) 2011 Tanaka Akira <akr@fsij.org>
4
+ #
5
+ # Redistribution and use in source and binary forms, with or without
6
+ # modification, are permitted provided that the following conditions are met:
7
+ #
8
+ # 1. Redistributions of source code must retain the above copyright notice, this
9
+ # list of conditions and the following disclaimer.
10
+ # 2. Redistributions in binary form must reproduce the above copyright notice,
11
+ # this list of conditions and the following disclaimer in the documentation
12
+ # and/or other materials provided with the distribution.
13
+ # 3. The name of the author may not be used to endorse or promote products
14
+ # derived from this software without specific prior written permission.
15
+ #
16
+ # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
17
+ # WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
18
+ # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
19
+ # EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20
+ # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
21
+ # OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22
+ # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23
+ # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
24
+ # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
25
+ # OF SUCH DAMAGE.
26
+
27
+ class Tb::FieldSet
28
+ def initialize(*fs)
29
+ @header = []
30
+ add_fields(*fs) if !fs.empty?
31
+ end
32
+ attr_reader :header
33
+
34
+ def index_from_field(f)
35
+ i = self.header.index(f)
36
+ if i.nil?
37
+ raise ArgumentError, "unexpected field name: #{f.inspect}"
38
+ end
39
+ i
40
+ end
41
+
42
+ def field_from_index_ex(i)
43
+ if self.length <= i
44
+ fs2 = extend_length(i+1)
45
+ fs2.last
46
+ else
47
+ field_from_index(i)
48
+ end
49
+ end
50
+
51
+ def field_from_index(i)
52
+ raise ArgumentError, "negative index: #{i}" if i < 0
53
+ f = self.header[i]
54
+ if f.nil?
55
+ raise ArgumentError, "index too big: #{i}"
56
+ end
57
+ f
58
+ end
59
+
60
+ def length
61
+ @header.length
62
+ end
63
+
64
+ def extend_length(len)
65
+ fs = [""] * (len - self.length)
66
+ add_fields(*fs)
67
+ end
68
+
69
+ def add_fields(*fs)
70
+ h = {}
71
+ max = {}
72
+ @header.each {|f|
73
+ h[f] = true
74
+ if /\((\d+)\)\z/ =~ f
75
+ prefix = $`
76
+ n = $1.to_i
77
+ max[prefix] = n if !max[prefix] || max[prefix] < n
78
+ end
79
+ }
80
+ fs2 = []
81
+ fs.each {|f|
82
+ f ||= ''
83
+ if !h[f]
84
+ f2 = f
85
+ else
86
+ max[f] = 1 if !max[f]
87
+ max[f] += 1
88
+ f2 = "#{f}(#{max[f]})"
89
+ end
90
+ fs2 << f2
91
+ h[f2] = true
92
+ }
93
+ @header.concat fs2
94
+ fs2
95
+ end
96
+ end