tb 0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,125 @@
1
+ # lib/tb/csv.rb - CSV related fetures for table library
2
+ #
3
+ # Copyright (C) 2010-2011 Tanaka Akira <akr@fsij.org>
4
+ #
5
+ # Redistribution and use in source and binary forms, with or without
6
+ # modification, are permitted provided that the following conditions are met:
7
+ #
8
+ # 1. Redistributions of source code must retain the above copyright notice, this
9
+ # list of conditions and the following disclaimer.
10
+ # 2. Redistributions in binary form must reproduce the above copyright notice,
11
+ # this list of conditions and the following disclaimer in the documentation
12
+ # and/or other materials provided with the distribution.
13
+ # 3. The name of the author may not be used to endorse or promote products
14
+ # derived from this software without specific prior written permission.
15
+ #
16
+ # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
17
+ # WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
18
+ # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
19
+ # EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20
+ # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
21
+ # OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22
+ # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23
+ # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
24
+ # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
25
+ # OF SUCH DAMAGE.
26
+
27
+ require 'csv'
28
+
29
+ class Tb
30
+ def Tb.csv_stream_input(csv, &b)
31
+ csvreader = CSVReader.new(csv)
32
+ begin
33
+ csvreader.each(&b)
34
+ ensure
35
+ csvreader.close
36
+ end
37
+ nil
38
+ end
39
+
40
+ class CSVReader
41
+ if defined? CSV::Reader
42
+ # Ruby 1.8
43
+ def initialize(input)
44
+ if input.respond_to? :to_str
45
+ @csv = CSV::StringReader.new(input)
46
+ else
47
+ @csv = CSV::IOReader.new(input)
48
+ end
49
+ @eof = false
50
+ end
51
+
52
+ def shift
53
+ return nil if @eof
54
+ ary = @csv.shift
55
+ if ary.empty?
56
+ ary = nil
57
+ @eof = true
58
+ elsif ary == [nil]
59
+ ary = []
60
+ end
61
+ ary
62
+ end
63
+ else
64
+ # Ruby 1.9
65
+ def initialize(input)
66
+ @csv = CSV.new(input)
67
+ end
68
+
69
+ def shift
70
+ @csv.shift
71
+ end
72
+ end
73
+
74
+ def each
75
+ while ary = self.shift
76
+ yield ary
77
+ end
78
+ nil
79
+ end
80
+
81
+ def close
82
+ @csv.close
83
+ end
84
+ end
85
+
86
+ def Tb.csv_stream_output(out)
87
+ require 'csv'
88
+ if defined? CSV::Writer
89
+ # Ruby 1.8
90
+ CSV::Writer.generate(out) {|csvgen|
91
+ yield csvgen
92
+ }
93
+ else
94
+ # Ruby 1.9
95
+ gen = Object.new
96
+ gen.instance_variable_set(:@out, out)
97
+ def gen.<<(ary)
98
+ @out << ary.to_csv
99
+ end
100
+ yield gen
101
+ end
102
+ end
103
+
104
+ # :call-seq:
105
+ # generate_csv(out='', fields=nil) {|recordids| modified_recordids }
106
+ # generate_csv(out='', fields=nil)
107
+ #
108
+ def generate_csv(out='', fields=nil, &block)
109
+ if fields.nil?
110
+ fields = list_fields
111
+ end
112
+ require 'csv'
113
+ recordids = list_recordids
114
+ if block_given?
115
+ recordids = yield(recordids)
116
+ end
117
+ Tb.csv_stream_output(out) {|gen|
118
+ gen << fields
119
+ recordids.each {|recordid|
120
+ gen << get_values(recordid, *fields)
121
+ }
122
+ }
123
+ out
124
+ end
125
+ end
@@ -0,0 +1,284 @@
1
+ # lib/tb/enumerable.rb - extensions for Enumerable
2
+ #
3
+ # Copyright (C) 2010 Tanaka Akira <akr@fsij.org>
4
+ #
5
+ # Redistribution and use in source and binary forms, with or without
6
+ # modification, are permitted provided that the following conditions are met:
7
+ #
8
+ # 1. Redistributions of source code must retain the above copyright notice, this
9
+ # list of conditions and the following disclaimer.
10
+ # 2. Redistributions in binary form must reproduce the above copyright notice,
11
+ # this list of conditions and the following disclaimer in the documentation
12
+ # and/or other materials provided with the distribution.
13
+ # 3. The name of the author may not be used to endorse or promote products
14
+ # derived from this software without specific prior written permission.
15
+ #
16
+ # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
17
+ # WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
18
+ # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
19
+ # EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20
+ # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
21
+ # OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22
+ # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23
+ # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
24
+ # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
25
+ # OF SUCH DAMAGE.
26
+
27
+ module Enumerable
28
+ # :call-seq:
29
+ # enum.tb_categorize(ksel1, ksel2, ..., vsel, [opts])
30
+ # enum.tb_categorize(ksel1, ksel2, ..., vsel, [opts]) {|ks, vs| ... }
31
+ #
32
+ # categorizes the elements in _enum_ and returns a hash.
33
+ # This method assumes multiple elements for a category.
34
+ #
35
+ # +tb_categorize+ takes one or more key selectors,
36
+ # one value selector and
37
+ # an optional option hash.
38
+ # It also takes an optional block.
39
+ #
40
+ # The selectors specify how to extract a value from an element in _enum_.
41
+ #
42
+ # The key selectors, _kselN_, are used to extract hash keys from an element.
43
+ # If two or more key selectors are specified, the result hash will be nested.
44
+ #
45
+ # The value selector, _vsel_, is used for the values of innermost hashes.
46
+ # By default, all values extracted by _vsel_ from the elements which
47
+ # key selectors extracts same value are composed as an array.
48
+ # The array is set to the values of the innermost hashes.
49
+ # This behavior can be customized by the options: :seed, :op and :update.
50
+ #
51
+ # a = [{:fruit => "banana", :color => "yellow", :taste => "sweet", :price => 100},
52
+ # {:fruit => "melon", :color => "green", :taste => "sweet", :price => 300},
53
+ # {:fruit => "grapefruit", :color => "yellow", :taste => "tart", :price => 200}]
54
+ # p a.tb_categorize(:color, :fruit)
55
+ # #=> {"yellow"=>["banana", "grapefruit"], "green"=>["melon"]}
56
+ # p a.tb_categorize(:taste, :fruit)
57
+ # #=> {"sweet"=>["banana", "melon"], "tart"=>["grapefruit"]}
58
+ # p a.tb_categorize(:taste, :color, :fruit)
59
+ # #=> {"sweet"=>{"yellow"=>["banana"], "green"=>["melon"]}, "tart"=>{"yellow"=>["grapefruit"]}}
60
+ # p a.tb_categorize(:taste, :color)
61
+ # #=> {"sweet"=>["yellow", "green"], "tart"=>["yellow"]}
62
+ #
63
+ # In the above example, :fruit, :color and :taste is specified as selectors.
64
+ # There are several types of selectors as follows:
65
+ #
66
+ # - object with +call+ method (procedure, etc.): extracts a value from the element by calling the procedure with the element as an argument.
67
+ # - array of selectors: make an array which contains the values extracted by the selectors.
68
+ # - other object: extracts a value from the element using +[]+ method as +element[selector]+.
69
+ #
70
+ # So the selector :fruit extracts the value from the element
71
+ # {:fruit => "banana", :color => "yellow", :taste => "sweet", :price => 100}
72
+ # as {...}[:fruit].
73
+ #
74
+ # p a.tb_categorize(lambda {|elt| elt[:fruit][4] }, :fruit)
75
+ # #=> {"n"=>["banana", "melon"], "e"=>["grapefruit"]}
76
+ #
77
+ # When the key selectors returns same key for two or or more elements,
78
+ # corresponding values extracted by the value selector are combined.
79
+ # By default, all values are collected as an array.
80
+ # :seed, :op and :update option in the option hash customizes this behavior.
81
+ # :seed option and :op option is similar to Enumerable#inject.
82
+ # :seed option specifies an initial value.
83
+ # (If :seed option is not given, the first value for each category is treated as an initial value.)
84
+ # :op option specifies a procedure to combine a seed and an element into a next seed.
85
+ # :update option is same as :op option except it takes three arguments instead of two:
86
+ # keys, seed and element.
87
+ # +to_proc+ method is used to convert :op and :update option to a procedure.
88
+ # So a symbol can be used for them.
89
+ #
90
+ # # count categorized elements.
91
+ # p a.tb_categorize(:color, lambda {|e| 1 }, :op=>:+)
92
+ # #=> {"yellow"=>2, "green"=>1}
93
+ #
94
+ # p a.tb_categorize(:color, :fruit, :seed=>"", :op=>:+)
95
+ # #=> {"yellow"=>"bananagrapefruit", "green"=>"melon"}
96
+ #
97
+ # The default behavior, collecting all values as an array, is implemented as follows.
98
+ # :seed => nil
99
+ # :update => {|ks, s, v| !s ? [v] : (s << v) }
100
+ #
101
+ # :op and :update option are disjoint.
102
+ # ArgumentError is raised if both are specified.
103
+ #
104
+ # The block for +tb_categorize+ method converts combined values to final innermost hash values.
105
+ #
106
+ # p a.tb_categorize(:color, :fruit) {|ks, vs| vs.join(",") }
107
+ # #=> {"yellow"=>"banana,grapefruit", "green"=>"melon"}
108
+ #
109
+ # # calculates the average price for fruits of each color.
110
+ # p a.tb_categorize(:color, :price) {|ks, vs| vs.inject(0.0, &:+) / vs.length }
111
+ # #=> {"yellow"=>150.0, "green"=>300.0}
112
+ #
113
+ def tb_categorize(*args, &reduce_proc)
114
+ opts = args.last.kind_of?(Hash) ? args.pop : {}
115
+ if args.length < 2
116
+ raise ArgumentError, "needs 2 or more arguments without option hash (but #{args.length})"
117
+ end
118
+ value_selector = tb_cat_selector_proc(args.pop)
119
+ key_selectors = args.map {|a| tb_cat_selector_proc(a) }
120
+ has_seed = opts.include? :seed
121
+ seed_value = opts[:seed]
122
+ if opts.include?(:update) && opts.include?(:op)
123
+ raise ArgumentError, "both :op and :update option specified"
124
+ elsif opts.include? :update
125
+ update_proc = opts[:update].to_proc
126
+ elsif opts.include? :op
127
+ op_proc = opts[:op].to_proc
128
+ update_proc = lambda {|ks, s, v| op_proc.call(s, v) }
129
+ else
130
+ has_seed = true
131
+ seed_value = nil
132
+ update_proc = lambda {|ks, s, v| !s ? [v] : (s << v) }
133
+ end
134
+ result = {}
135
+ each {|*elts|
136
+ elt = elts.length <= 1 ? elts[0] : elts
137
+ ks = key_selectors.map {|ksel| ksel.call(elt) }
138
+ v = value_selector.call(elt)
139
+ h = result
140
+ 0.upto(ks.length-2) {|i|
141
+ k = ks[i]
142
+ h[k] = {} if !h.include?(k)
143
+ h = h[k]
144
+ }
145
+ lastk = ks.last
146
+ if !h.include?(lastk)
147
+ if has_seed
148
+ h[lastk] = update_proc.call(ks, seed_value, v)
149
+ else
150
+ h[lastk] = v
151
+ end
152
+ else
153
+ h[lastk] = update_proc.call(ks, h[lastk], v)
154
+ end
155
+ }
156
+ if reduce_proc
157
+ tb_cat_reduce(result, [], key_selectors.length-1, reduce_proc)
158
+ end
159
+ result
160
+ end
161
+
162
+ def tb_cat_selector_proc(selector)
163
+ if selector.respond_to?(:call)
164
+ selector
165
+ elsif selector.respond_to? :to_ary
166
+ selector_procs = selector.to_ary.map {|sel| tb_cat_selector_proc(sel) }
167
+ lambda {|elt| selector_procs.map {|selproc| selproc.call(elt) } }
168
+ else
169
+ lambda {|elt| elt[selector] }
170
+ end
171
+ end
172
+ private :tb_cat_selector_proc
173
+
174
+ def tb_cat_reduce(hash, ks, nestlevel, reduce_proc)
175
+ if nestlevel.zero?
176
+ hash.each {|k, v|
177
+ ks << k
178
+ begin
179
+ hash[k] = reduce_proc.call(ks.dup, v)
180
+ ensure
181
+ ks.pop
182
+ end
183
+ }
184
+ else
185
+ hash.each {|k, h|
186
+ ks << k
187
+ begin
188
+ tb_cat_reduce(h, ks, nestlevel-1, reduce_proc)
189
+ ensure
190
+ ks.pop
191
+ end
192
+ }
193
+ end
194
+ end
195
+ private :tb_cat_reduce
196
+
197
+ # :call-seq:
198
+ # enum.tb_unique_categorize(ksel1, ksel2, ..., vsel, [opts]) -> hash
199
+ # enum.tb_unique_categorize(ksel1, ksel2, ..., vsel, [opts]) {|s, v| ... } -> hash
200
+ #
201
+ # categorizes the elements in _enum_ and returns a hash.
202
+ # This method assumes one element for a category by default.
203
+ #
204
+ # +tb_unique_categorize+ takes one or more key selectors,
205
+ # one value selector and
206
+ # an optional option hash.
207
+ # It also takes an optional block.
208
+ #
209
+ # The selectors specify how to extract a value from an element in _enum_.
210
+ # See Enumerable#tb_categorize for details of selectors.
211
+ #
212
+ # The key selectors, _kselN_, are used to extract hash keys from an element.
213
+ # If two or more key selectors are specified, the result hash will be nested.
214
+ #
215
+ # The value selector, _vsel_, is used for the values of innermost hashes.
216
+ # By default, this method assumes the key selectors categorizes elements in enum uniquely.
217
+ # If the key selectors generates same keys for two or more elements, ArgumentError is raised.
218
+ # This behavior can be customized by :seed option and the block.
219
+ #
220
+ # a = [{:fruit => "banana", :color => "yellow", :taste => "sweet", :price => 100},
221
+ # {:fruit => "melon", :color => "green", :taste => "sweet", :price => 300},
222
+ # {:fruit => "grapefruit", :color => "yellow", :taste => "tart", :price => 200}]
223
+ # p a.tb_unique_categorize(:fruit, :price)
224
+ # #=> {"banana"=>100, "melon"=>300, "grapefruit"=>200}
225
+ #
226
+ # p a.tb_unique_categorize(:color, :price)
227
+ # # ArgumentError
228
+ #
229
+ # If the block is given, it is used for combining values in a category.
230
+ # The arguments for the block is a seed and the value extracted by _vsel_.
231
+ # The return value of the block is used as the next seed.
232
+ # :seed option specifies the initial seed.
233
+ # If :seed is not given, the first value for each category is used for the seed.
234
+ #
235
+ # p a.tb_unique_categorize(:taste, :price) {|s, v| s + v }
236
+ # #=> {"sweet"=>400, "tart"=>200}
237
+ #
238
+ # p a.tb_unique_categorize(:color, :price) {|s, v| s + v }
239
+ # #=> {"yellow"=>300, "green"=>300}
240
+ #
241
+ def tb_unique_categorize(*args, &update_proc)
242
+ opts = args.last.kind_of?(Hash) ? args.pop.dup : {}
243
+ if update_proc
244
+ opts[:update] = lambda {|ks, s, v| update_proc.call(s, v) }
245
+ else
246
+ seed = Object.new
247
+ opts[:seed] = seed
248
+ opts[:update] = lambda {|ks, s, v|
249
+ if s.equal? seed
250
+ v
251
+ else
252
+ raise ArgumentError, "ambiguous key: #{ks.map {|k| k.inspect }.join(',')}"
253
+ end
254
+ }
255
+ end
256
+ tb_categorize(*(args + [opts]))
257
+ end
258
+
259
+ # :call-seq:
260
+ # enum.tb_category_count(ksel1, ksel2, ...)
261
+ #
262
+ # counts elements in _enum_ for each category defined by the key selectors.
263
+ #
264
+ # a = [{:fruit => "banana", :color => "yellow", :taste => "sweet", :price => 100},
265
+ # {:fruit => "melon", :color => "green", :taste => "sweet", :price => 300},
266
+ # {:fruit => "grapefruit", :color => "yellow", :taste => "tart", :price => 200}]
267
+ #
268
+ # p a.tb_category_count(:color)
269
+ # #=> {"yellow"=>2, "green"=>1}
270
+ #
271
+ # p a.tb_category_count(:taste)
272
+ # #=> {"sweet"=>2, "tart"=>1}
273
+ #
274
+ # p a.tb_category_count(:taste, :color)
275
+ # #=> {"sweet"=>{"yellow"=>1, "green"=>1}, "tart"=>{"yellow"=>1}}
276
+ #
277
+ # The selectors specify how to extract a value from an element in _enum_.
278
+ # See Enumerable#tb_categorize for details of selectors.
279
+ #
280
+ def tb_category_count(*args)
281
+ tb_categorize(*(args + [lambda {|e| 1 }, {:update => lambda {|ks, s, v| s + v }}]))
282
+ end
283
+
284
+ end
@@ -0,0 +1,96 @@
1
+ # lib/tb/fieldset.rb - Tb::FieldSet class
2
+ #
3
+ # Copyright (C) 2011 Tanaka Akira <akr@fsij.org>
4
+ #
5
+ # Redistribution and use in source and binary forms, with or without
6
+ # modification, are permitted provided that the following conditions are met:
7
+ #
8
+ # 1. Redistributions of source code must retain the above copyright notice, this
9
+ # list of conditions and the following disclaimer.
10
+ # 2. Redistributions in binary form must reproduce the above copyright notice,
11
+ # this list of conditions and the following disclaimer in the documentation
12
+ # and/or other materials provided with the distribution.
13
+ # 3. The name of the author may not be used to endorse or promote products
14
+ # derived from this software without specific prior written permission.
15
+ #
16
+ # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
17
+ # WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
18
+ # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
19
+ # EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20
+ # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
21
+ # OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22
+ # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23
+ # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
24
+ # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
25
+ # OF SUCH DAMAGE.
26
+
27
+ class Tb::FieldSet
28
+ def initialize(*fs)
29
+ @header = []
30
+ add_fields(*fs) if !fs.empty?
31
+ end
32
+ attr_reader :header
33
+
34
+ def index_from_field(f)
35
+ i = self.header.index(f)
36
+ if i.nil?
37
+ raise ArgumentError, "unexpected field name: #{f.inspect}"
38
+ end
39
+ i
40
+ end
41
+
42
+ def field_from_index_ex(i)
43
+ if self.length <= i
44
+ fs2 = extend_length(i+1)
45
+ fs2.last
46
+ else
47
+ field_from_index(i)
48
+ end
49
+ end
50
+
51
+ def field_from_index(i)
52
+ raise ArgumentError, "negative index: #{i}" if i < 0
53
+ f = self.header[i]
54
+ if f.nil?
55
+ raise ArgumentError, "index too big: #{i}"
56
+ end
57
+ f
58
+ end
59
+
60
+ def length
61
+ @header.length
62
+ end
63
+
64
+ def extend_length(len)
65
+ fs = [""] * (len - self.length)
66
+ add_fields(*fs)
67
+ end
68
+
69
+ def add_fields(*fs)
70
+ h = {}
71
+ max = {}
72
+ @header.each {|f|
73
+ h[f] = true
74
+ if /\((\d+)\)\z/ =~ f
75
+ prefix = $`
76
+ n = $1.to_i
77
+ max[prefix] = n if !max[prefix] || max[prefix] < n
78
+ end
79
+ }
80
+ fs2 = []
81
+ fs.each {|f|
82
+ f ||= ''
83
+ if !h[f]
84
+ f2 = f
85
+ else
86
+ max[f] = 1 if !max[f]
87
+ max[f] += 1
88
+ f2 = "#{f}(#{max[f]})"
89
+ end
90
+ fs2 << f2
91
+ h[f2] = true
92
+ }
93
+ @header.concat fs2
94
+ fs2
95
+ end
96
+ end