tb 0.3 → 0.4
Sign up to get free protection for your applications and to get access to all the features.
- data/README +2 -1
- data/lib/tb.rb +7 -3
- data/lib/tb/basic.rb +1 -1
- data/lib/tb/cmd_cat.rb +1 -3
- data/lib/tb/cmd_consecutive.rb +4 -6
- data/lib/tb/cmd_crop.rb +5 -7
- data/lib/tb/cmd_cross.rb +51 -49
- data/lib/tb/cmd_cut.rb +2 -6
- data/lib/tb/cmd_git_log.rb +20 -11
- data/lib/tb/cmd_grep.rb +1 -3
- data/lib/tb/cmd_group.rb +18 -44
- data/lib/tb/cmd_gsub.rb +2 -4
- data/lib/tb/cmd_join.rb +1 -3
- data/lib/tb/cmd_ls.rb +8 -15
- data/lib/tb/cmd_mheader.rb +3 -4
- data/lib/tb/cmd_nest.rb +4 -9
- data/lib/tb/cmd_newfield.rb +1 -3
- data/lib/tb/cmd_rename.rb +2 -4
- data/lib/tb/cmd_shape.rb +2 -3
- data/lib/tb/cmd_sort.rb +3 -5
- data/lib/tb/cmd_svn_log.rb +3 -5
- data/lib/tb/cmd_tar_tvf.rb +2 -4
- data/lib/tb/cmd_to_csv.rb +1 -1
- data/lib/tb/cmd_unnest.rb +1 -3
- data/lib/tb/cmdutil.rb +57 -135
- data/lib/tb/csv.rb +11 -54
- data/lib/tb/customcmp.rb +41 -0
- data/lib/tb/customeq.rb +41 -0
- data/lib/tb/enumerable.rb +225 -435
- data/lib/tb/enumerator.rb +22 -14
- data/lib/tb/ex_enumerable.rb +659 -0
- data/lib/tb/ex_enumerator.rb +102 -0
- data/lib/tb/fileenumerator.rb +2 -2
- data/lib/tb/func.rb +141 -0
- data/lib/tb/json.rb +1 -1
- data/lib/tb/reader.rb +4 -4
- data/lib/tb/search.rb +2 -4
- data/lib/tb/zipper.rb +60 -0
- data/test/test_cmd_cat.rb +40 -0
- data/test/test_cmd_git_log.rb +116 -0
- data/test/test_cmd_ls.rb +90 -0
- data/test/test_cmd_svn_log.rb +87 -0
- data/test/test_cmd_to_csv.rb +14 -0
- data/test/test_cmdutil.rb +25 -10
- data/test/test_csv.rb +10 -0
- data/test/test_customcmp.rb +14 -0
- data/test/test_customeq.rb +20 -0
- data/test/{test_enumerable.rb → test_ex_enumerable.rb} +181 -3
- data/test/test_search.rb +2 -10
- data/test/test_tbenum.rb +3 -3
- data/test/test_zipper.rb +22 -0
- metadata +20 -8
- data/lib/tb/enum.rb +0 -294
- data/lib/tb/pairs.rb +0 -227
- data/test/test_pairs.rb +0 -122
data/lib/tb/csv.rb
CHANGED
@@ -63,37 +63,12 @@ class Tb
|
|
63
63
|
end
|
64
64
|
|
65
65
|
class CSVReader
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
if input.respond_to? :to_str
|
70
|
-
@csv = CSV::StringReader.new(input)
|
71
|
-
else
|
72
|
-
@csv = CSV::IOReader.new(input)
|
73
|
-
end
|
74
|
-
@eof = false
|
75
|
-
end
|
76
|
-
|
77
|
-
def shift
|
78
|
-
return nil if @eof
|
79
|
-
ary = @csv.shift
|
80
|
-
if ary.empty?
|
81
|
-
ary = nil
|
82
|
-
@eof = true
|
83
|
-
elsif ary == [nil]
|
84
|
-
ary = []
|
85
|
-
end
|
86
|
-
ary
|
87
|
-
end
|
88
|
-
else
|
89
|
-
# Ruby 1.9
|
90
|
-
def initialize(input)
|
91
|
-
@csv = CSV.new(input)
|
92
|
-
end
|
66
|
+
def initialize(input)
|
67
|
+
@csv = CSV.new(input)
|
68
|
+
end
|
93
69
|
|
94
|
-
|
95
|
-
|
96
|
-
end
|
70
|
+
def shift
|
71
|
+
@csv.shift
|
97
72
|
end
|
98
73
|
|
99
74
|
def each
|
@@ -106,35 +81,17 @@ class Tb
|
|
106
81
|
|
107
82
|
def Tb.csv_stream_output(out)
|
108
83
|
require 'csv'
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
}
|
114
|
-
else
|
115
|
-
# Ruby 1.9
|
116
|
-
gen = Object.new
|
117
|
-
gen.instance_variable_set(:@out, out)
|
118
|
-
def gen.<<(ary)
|
119
|
-
@out << ary.to_csv
|
120
|
-
end
|
121
|
-
yield gen
|
84
|
+
gen = Object.new
|
85
|
+
gen.instance_variable_set(:@out, out)
|
86
|
+
def gen.<<(ary)
|
87
|
+
@out << ary.to_csv
|
122
88
|
end
|
89
|
+
yield gen
|
123
90
|
end
|
124
91
|
|
125
92
|
def Tb.csv_encode_row(ary)
|
126
93
|
require 'csv'
|
127
|
-
|
128
|
-
# Ruby 1.8
|
129
|
-
out = ''
|
130
|
-
CSV::Writer.generate(out) {|csvgen|
|
131
|
-
csvgen << ary
|
132
|
-
}
|
133
|
-
out
|
134
|
-
else
|
135
|
-
# Ruby 1.9
|
136
|
-
ary.to_csv
|
137
|
-
end
|
94
|
+
ary.to_csv
|
138
95
|
end
|
139
96
|
|
140
97
|
# :call-seq:
|
data/lib/tb/customcmp.rb
ADDED
@@ -0,0 +1,41 @@
|
|
1
|
+
# Copyright (C) 2012 Tanaka Akira <akr@fsij.org>
|
2
|
+
#
|
3
|
+
# Redistribution and use in source and binary forms, with or without
|
4
|
+
# modification, are permitted provided that the following conditions
|
5
|
+
# are met:
|
6
|
+
#
|
7
|
+
# 1. Redistributions of source code must retain the above copyright
|
8
|
+
# notice, this list of conditions and the following disclaimer.
|
9
|
+
# 2. Redistributions in binary form must reproduce the above
|
10
|
+
# copyright notice, this list of conditions and the following
|
11
|
+
# disclaimer in the documentation and/or other materials provided
|
12
|
+
# with the distribution.
|
13
|
+
# 3. The name of the author may not be used to endorse or promote
|
14
|
+
# products derived from this software without specific prior
|
15
|
+
# written permission.
|
16
|
+
#
|
17
|
+
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
|
18
|
+
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
19
|
+
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
20
|
+
# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
|
21
|
+
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
22
|
+
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
|
23
|
+
# GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
24
|
+
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
25
|
+
# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
26
|
+
# OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
|
27
|
+
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
28
|
+
|
29
|
+
class Tb::CustomCmp
|
30
|
+
include Comparable
|
31
|
+
|
32
|
+
def initialize(customcmp_object, &cmp)
|
33
|
+
@customcmp_object = customcmp_object
|
34
|
+
@cmp = cmp
|
35
|
+
end
|
36
|
+
attr_reader :customcmp_object, :cmp
|
37
|
+
|
38
|
+
def <=> other
|
39
|
+
@cmp.call(@customcmp_object, other.customcmp_object)
|
40
|
+
end
|
41
|
+
end
|
data/lib/tb/customeq.rb
ADDED
@@ -0,0 +1,41 @@
|
|
1
|
+
# Copyright (C) 2012 Tanaka Akira <akr@fsij.org>
|
2
|
+
#
|
3
|
+
# Redistribution and use in source and binary forms, with or without
|
4
|
+
# modification, are permitted provided that the following conditions
|
5
|
+
# are met:
|
6
|
+
#
|
7
|
+
# 1. Redistributions of source code must retain the above copyright
|
8
|
+
# notice, this list of conditions and the following disclaimer.
|
9
|
+
# 2. Redistributions in binary form must reproduce the above
|
10
|
+
# copyright notice, this list of conditions and the following
|
11
|
+
# disclaimer in the documentation and/or other materials provided
|
12
|
+
# with the distribution.
|
13
|
+
# 3. The name of the author may not be used to endorse or promote
|
14
|
+
# products derived from this software without specific prior
|
15
|
+
# written permission.
|
16
|
+
#
|
17
|
+
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
|
18
|
+
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
19
|
+
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
20
|
+
# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
|
21
|
+
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
22
|
+
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
|
23
|
+
# GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
24
|
+
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
25
|
+
# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
26
|
+
# OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
|
27
|
+
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
28
|
+
|
29
|
+
class Tb::CustomEq
|
30
|
+
include Comparable
|
31
|
+
|
32
|
+
def initialize(customeq_object, &eq)
|
33
|
+
@customeq_object = customeq_object
|
34
|
+
@eq = eq
|
35
|
+
end
|
36
|
+
attr_reader :customeq_object, :eq
|
37
|
+
|
38
|
+
def ==(other)
|
39
|
+
@eq.call(@customeq_object, other.customeq_object)
|
40
|
+
end
|
41
|
+
end
|
data/lib/tb/enumerable.rb
CHANGED
@@ -1,474 +1,264 @@
|
|
1
|
-
#
|
2
|
-
#
|
3
|
-
# Copyright (C) 2010-2012 Tanaka Akira <akr@fsij.org>
|
4
|
-
#
|
1
|
+
# Copyright (C) 2012 Tanaka Akira <akr@fsij.org>
|
2
|
+
#
|
5
3
|
# Redistribution and use in source and binary forms, with or without
|
6
|
-
# modification, are permitted provided that the following conditions
|
7
|
-
#
|
8
|
-
#
|
9
|
-
#
|
10
|
-
#
|
11
|
-
#
|
12
|
-
#
|
13
|
-
#
|
14
|
-
#
|
15
|
-
#
|
16
|
-
#
|
17
|
-
#
|
18
|
-
#
|
19
|
-
#
|
20
|
-
#
|
21
|
-
# OF
|
22
|
-
#
|
23
|
-
#
|
24
|
-
#
|
25
|
-
# OF
|
4
|
+
# modification, are permitted provided that the following conditions
|
5
|
+
# are met:
|
6
|
+
#
|
7
|
+
# 1. Redistributions of source code must retain the above copyright
|
8
|
+
# notice, this list of conditions and the following disclaimer.
|
9
|
+
# 2. Redistributions in binary form must reproduce the above
|
10
|
+
# copyright notice, this list of conditions and the following
|
11
|
+
# disclaimer in the documentation and/or other materials provided
|
12
|
+
# with the distribution.
|
13
|
+
# 3. The name of the author may not be used to endorse or promote
|
14
|
+
# products derived from this software without specific prior
|
15
|
+
# written permission.
|
16
|
+
#
|
17
|
+
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
|
18
|
+
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
19
|
+
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
20
|
+
# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
|
21
|
+
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
22
|
+
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
|
23
|
+
# GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
24
|
+
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
25
|
+
# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
26
|
+
# OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
|
27
|
+
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
26
28
|
|
27
|
-
module Enumerable
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
#
|
35
|
-
# +tb_categorize+ takes one or more key selectors,
|
36
|
-
# one value selector and
|
37
|
-
# an optional option hash.
|
38
|
-
# It also takes an optional block.
|
39
|
-
#
|
40
|
-
# The selectors specify how to extract a value from an element in _enum_.
|
41
|
-
#
|
42
|
-
# The key selectors, _kselN_, are used to extract hash keys from an element.
|
43
|
-
# If two or more key selectors are specified, the result hash will be nested.
|
44
|
-
#
|
45
|
-
# The value selector, _vsel_, is used for the values of innermost hashes.
|
46
|
-
# By default, all values extracted by _vsel_ from the elements which
|
47
|
-
# key selectors extracts same value are composed as an array.
|
48
|
-
# The array is set to the values of the innermost hashes.
|
49
|
-
# This behavior can be customized by the options: :seed, :op and :update.
|
50
|
-
#
|
51
|
-
# a = [{:fruit => "banana", :color => "yellow", :taste => "sweet", :price => 100},
|
52
|
-
# {:fruit => "melon", :color => "green", :taste => "sweet", :price => 300},
|
53
|
-
# {:fruit => "grapefruit", :color => "yellow", :taste => "tart", :price => 200}]
|
54
|
-
# p a.tb_categorize(:color, :fruit)
|
55
|
-
# #=> {"yellow"=>["banana", "grapefruit"], "green"=>["melon"]}
|
56
|
-
# p a.tb_categorize(:taste, :fruit)
|
57
|
-
# #=> {"sweet"=>["banana", "melon"], "tart"=>["grapefruit"]}
|
58
|
-
# p a.tb_categorize(:taste, :color, :fruit)
|
59
|
-
# #=> {"sweet"=>{"yellow"=>["banana"], "green"=>["melon"]}, "tart"=>{"yellow"=>["grapefruit"]}}
|
60
|
-
# p a.tb_categorize(:taste, :color)
|
61
|
-
# #=> {"sweet"=>["yellow", "green"], "tart"=>["yellow"]}
|
62
|
-
#
|
63
|
-
# In the above example, :fruit, :color and :taste is specified as selectors.
|
64
|
-
# There are several types of selectors as follows:
|
65
|
-
#
|
66
|
-
# - object with +call+ method (procedure, etc.): extracts a value from the element by calling the procedure with the element as an argument.
|
67
|
-
# - array of selectors: make an array which contains the values extracted by the selectors.
|
68
|
-
# - other object: extracts a value from the element using +[]+ method as +element[selector]+.
|
69
|
-
#
|
70
|
-
# So the selector :fruit extracts the value from the element
|
71
|
-
# {:fruit => "banana", :color => "yellow", :taste => "sweet", :price => 100}
|
72
|
-
# as {...}[:fruit].
|
73
|
-
#
|
74
|
-
# p a.tb_categorize(lambda {|elt| elt[:fruit][4] }, :fruit)
|
75
|
-
# #=> {"n"=>["banana", "melon"], "e"=>["grapefruit"]}
|
76
|
-
#
|
77
|
-
# When the key selectors returns same key for two or or more elements,
|
78
|
-
# corresponding values extracted by the value selector are combined.
|
79
|
-
# By default, all values are collected as an array.
|
80
|
-
# :seed, :op and :update option in the option hash customizes this behavior.
|
81
|
-
# :seed option and :op option is similar to Enumerable#inject.
|
82
|
-
# :seed option specifies an initial value.
|
83
|
-
# (If :seed option is not given, the first value for each category is treated as an initial value.)
|
84
|
-
# :op option specifies a procedure to combine a seed and an element into a next seed.
|
85
|
-
# :update option is same as :op option except it takes three arguments instead of two:
|
86
|
-
# keys, seed and element.
|
87
|
-
# +to_proc+ method is used to convert :op and :update option to a procedure.
|
88
|
-
# So a symbol can be used for them.
|
89
|
-
#
|
90
|
-
# # count categorized elements.
|
91
|
-
# p a.tb_categorize(:color, lambda {|e| 1 }, :op=>:+)
|
92
|
-
# #=> {"yellow"=>2, "green"=>1}
|
93
|
-
#
|
94
|
-
# p a.tb_categorize(:color, :fruit, :seed=>"", :op=>:+)
|
95
|
-
# #=> {"yellow"=>"bananagrapefruit", "green"=>"melon"}
|
96
|
-
#
|
97
|
-
# The default behavior, collecting all values as an array, is implemented as follows.
|
98
|
-
# :seed => nil
|
99
|
-
# :update => {|ks, s, v| !s ? [v] : (s << v) }
|
100
|
-
#
|
101
|
-
# :op and :update option are disjoint.
|
102
|
-
# ArgumentError is raised if both are specified.
|
103
|
-
#
|
104
|
-
# The block for +tb_categorize+ method converts combined values to final innermost hash values.
|
105
|
-
#
|
106
|
-
# p a.tb_categorize(:color, :fruit) {|ks, vs| vs.join(",") }
|
107
|
-
# #=> {"yellow"=>"banana,grapefruit", "green"=>"melon"}
|
108
|
-
#
|
109
|
-
# # calculates the average price for fruits of each color.
|
110
|
-
# p a.tb_categorize(:color, :price) {|ks, vs| vs.inject(0.0, &:+) / vs.length }
|
111
|
-
# #=> {"yellow"=>150.0, "green"=>300.0}
|
112
|
-
#
|
113
|
-
def tb_categorize(*args, &reduce_proc)
|
114
|
-
opts = args.last.kind_of?(Hash) ? args.pop : {}
|
115
|
-
if args.length < 2
|
116
|
-
raise ArgumentError, "needs 2 or more arguments without option hash (but #{args.length})"
|
117
|
-
end
|
118
|
-
value_selector = tb_cat_selector_proc(args.pop)
|
119
|
-
key_selectors = args.map {|a| tb_cat_selector_proc(a) }
|
120
|
-
has_seed = opts.has_key? :seed
|
121
|
-
seed_value = opts[:seed]
|
122
|
-
if opts.has_key?(:update) && opts.has_key?(:op)
|
123
|
-
raise ArgumentError, "both :op and :update option specified"
|
124
|
-
elsif opts.has_key? :update
|
125
|
-
update_proc = opts[:update].to_proc
|
126
|
-
elsif opts.has_key? :op
|
127
|
-
op_proc = opts[:op].to_proc
|
128
|
-
update_proc = lambda {|ks, s, v| op_proc.call(s, v) }
|
129
|
-
else
|
130
|
-
has_seed = true
|
131
|
-
seed_value = nil
|
132
|
-
update_proc = lambda {|ks, s, v| !s ? [v] : (s << v) }
|
133
|
-
end
|
134
|
-
result = {}
|
135
|
-
each {|*elts|
|
136
|
-
elt = elts.length <= 1 ? elts[0] : elts
|
137
|
-
ks = key_selectors.map {|ksel| ksel.call(elt) }
|
138
|
-
v = value_selector.call(elt)
|
139
|
-
h = result
|
140
|
-
0.upto(ks.length-2) {|i|
|
141
|
-
k = ks[i]
|
142
|
-
h[k] = {} if !h.has_key?(k)
|
143
|
-
h = h[k]
|
29
|
+
module Tb::Enumerable
|
30
|
+
include Enumerable
|
31
|
+
|
32
|
+
def with_header(&header_proc)
|
33
|
+
Enumerator.new {|y|
|
34
|
+
header_and_each(header_proc) {|pairs|
|
35
|
+
y.yield pairs
|
144
36
|
}
|
145
|
-
lastk = ks.last
|
146
|
-
if !h.has_key?(lastk)
|
147
|
-
if has_seed
|
148
|
-
h[lastk] = update_proc.call(ks, seed_value, v)
|
149
|
-
else
|
150
|
-
h[lastk] = v
|
151
|
-
end
|
152
|
-
else
|
153
|
-
h[lastk] = update_proc.call(ks, h[lastk], v)
|
154
|
-
end
|
155
37
|
}
|
156
|
-
if reduce_proc
|
157
|
-
tb_cat_reduce(result, [], key_selectors.length-1, reduce_proc)
|
158
|
-
end
|
159
|
-
result
|
160
38
|
end
|
161
39
|
|
162
|
-
def
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
40
|
+
def with_cumulative_header(&header_proc)
|
41
|
+
Enumerator.new {|y|
|
42
|
+
hset = {}
|
43
|
+
internal_header_proc = lambda {|header0|
|
44
|
+
if header0
|
45
|
+
header0.each {|f|
|
46
|
+
hset[f] = true
|
47
|
+
}
|
48
|
+
end
|
49
|
+
header_proc.call(header0) if header_proc
|
50
|
+
}
|
51
|
+
header_and_each(internal_header_proc) {|pairs|
|
52
|
+
pairs.each {|f, v|
|
53
|
+
if !hset[f]
|
54
|
+
hset[f] = true
|
55
|
+
end
|
56
|
+
}
|
57
|
+
y.yield [pairs, hset.keys.freeze]
|
58
|
+
}
|
59
|
+
}
|
171
60
|
end
|
172
|
-
private :tb_cat_selector_proc
|
173
61
|
|
174
|
-
def
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
ensure
|
181
|
-
ks.pop
|
62
|
+
def cat(*ers, &b)
|
63
|
+
ers = [self, *ers]
|
64
|
+
rec = lambda {|y, header|
|
65
|
+
if ers.empty?
|
66
|
+
if header
|
67
|
+
y.set_header header
|
182
68
|
end
|
183
|
-
|
69
|
+
else
|
70
|
+
last_e = ers.pop
|
71
|
+
last_e.with_header {|last_e_header|
|
72
|
+
if last_e_header && header
|
73
|
+
header = last_e_header | header
|
74
|
+
else
|
75
|
+
header = nil
|
76
|
+
end
|
77
|
+
rec.call(y, header)
|
78
|
+
}.each {|v|
|
79
|
+
y.yield v
|
80
|
+
}
|
81
|
+
end
|
82
|
+
}
|
83
|
+
er = Tb::Enumerator.new {|y|
|
84
|
+
rec.call(y, [])
|
85
|
+
}
|
86
|
+
if block_given?
|
87
|
+
er.each(&b)
|
184
88
|
else
|
185
|
-
|
186
|
-
ks << k
|
187
|
-
begin
|
188
|
-
tb_cat_reduce(h, ks, nestlevel-1, reduce_proc)
|
189
|
-
ensure
|
190
|
-
ks.pop
|
191
|
-
end
|
192
|
-
}
|
89
|
+
er
|
193
90
|
end
|
194
91
|
end
|
195
|
-
private :tb_cat_reduce
|
196
92
|
|
197
|
-
#
|
198
|
-
#
|
199
|
-
#
|
200
|
-
#
|
201
|
-
#
|
202
|
-
#
|
203
|
-
#
|
204
|
-
#
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
# See Enumerable#tb_categorize for details of selectors.
|
211
|
-
#
|
212
|
-
# The key selectors, _kselN_, are used to extract hash keys from an element.
|
213
|
-
# If two or more key selectors are specified, the result hash will be nested.
|
214
|
-
#
|
215
|
-
# The value selector, _vsel_, is used for the values of innermost hashes.
|
216
|
-
# By default, this method assumes the key selectors categorizes elements in enum uniquely.
|
217
|
-
# If the key selectors generates same keys for two or more elements, ArgumentError is raised.
|
218
|
-
# This behavior can be customized by :seed option and the block.
|
219
|
-
#
|
220
|
-
# a = [{:fruit => "banana", :color => "yellow", :taste => "sweet", :price => 100},
|
221
|
-
# {:fruit => "melon", :color => "green", :taste => "sweet", :price => 300},
|
222
|
-
# {:fruit => "grapefruit", :color => "yellow", :taste => "tart", :price => 200}]
|
223
|
-
# p a.tb_unique_categorize(:fruit, :price)
|
224
|
-
# #=> {"banana"=>100, "melon"=>300, "grapefruit"=>200}
|
225
|
-
#
|
226
|
-
# p a.tb_unique_categorize(:color, :price)
|
227
|
-
# # ArgumentError
|
228
|
-
#
|
229
|
-
# If the block is given, it is used for combining values in a category.
|
230
|
-
# The arguments for the block is a seed and the value extracted by _vsel_.
|
231
|
-
# The return value of the block is used as the next seed.
|
232
|
-
# :seed option specifies the initial seed.
|
233
|
-
# If :seed is not given, the first value for each category is used for the seed.
|
234
|
-
#
|
235
|
-
# p a.tb_unique_categorize(:taste, :price) {|s, v| s + v }
|
236
|
-
# #=> {"sweet"=>400, "tart"=>200}
|
237
|
-
#
|
238
|
-
# p a.tb_unique_categorize(:color, :price) {|s, v| s + v }
|
239
|
-
# #=> {"yellow"=>300, "green"=>300}
|
240
|
-
#
|
241
|
-
def tb_unique_categorize(*args, &update_proc)
|
242
|
-
opts = args.last.kind_of?(Hash) ? args.pop.dup : {}
|
243
|
-
if update_proc
|
244
|
-
opts[:update] = lambda {|ks, s, v| update_proc.call(s, v) }
|
245
|
-
else
|
246
|
-
seed = Object.new
|
247
|
-
opts[:seed] = seed
|
248
|
-
opts[:update] = lambda {|ks, s, v|
|
249
|
-
if s.equal? seed
|
250
|
-
v
|
251
|
-
else
|
252
|
-
raise ArgumentError, "ambiguous key: #{ks.map {|k| k.inspect }.join(',')}"
|
93
|
+
# creates a new Tb::Enumerator object which have
|
94
|
+
# new field named by _field_ with the value returned by the block.
|
95
|
+
#
|
96
|
+
# t1 = Tb.new %w[a b], [1, 2], [3, 4]
|
97
|
+
# p t1.newfield("x") {|row| row["a"] + row["b"] + 100 }.to_a
|
98
|
+
# #=> [{"x"=>103, "a"=>1, "b"=>2},
|
99
|
+
# # {"x"=>107, "a"=>3, "b"=>4}]
|
100
|
+
#
|
101
|
+
def newfield(field)
|
102
|
+
Tb::Enumerator.new {|y|
|
103
|
+
self.with_header {|header|
|
104
|
+
if header
|
105
|
+
y.set_header(Tb::FieldSet.normalize([field, *header]))
|
253
106
|
end
|
107
|
+
}.each {|row|
|
108
|
+
keys = row.keys
|
109
|
+
keys = Tb::FieldSet.normalize([field, *keys])
|
110
|
+
vals = row.values
|
111
|
+
vals = [yield(row), *vals]
|
112
|
+
y << Hash[keys.zip(vals)]
|
254
113
|
}
|
255
|
-
|
256
|
-
tb_categorize(*(args + [opts]))
|
114
|
+
}
|
257
115
|
end
|
258
116
|
|
259
117
|
# :call-seq:
|
260
|
-
#
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
118
|
+
# table1.natjoin2(table2, missing_value=nil, retain_left=false, retain_right=false)
|
119
|
+
def natjoin2(tbl2, missing_value=nil, retain_left=false, retain_right=false)
|
120
|
+
Tb::Enumerator.new {|y|
|
121
|
+
tbl1 = self
|
122
|
+
header1 = header2 = nil
|
123
|
+
sorted_tbl2 = nil
|
124
|
+
common_header = nil
|
125
|
+
total_header = nil
|
126
|
+
sorted_tbl1 = tbl1.with_header {|h1|
|
127
|
+
header1 = h1
|
128
|
+
sorted_tbl2 = tbl2.with_header {|h2|
|
129
|
+
header2 = h2
|
130
|
+
common_header = header1 & header2
|
131
|
+
total_header = header1 | header2
|
132
|
+
y.set_header total_header
|
133
|
+
}.lazy_map {|pairs|
|
134
|
+
[common_header.map {|f| pairs[f] }, pairs]
|
135
|
+
}.extsort_by {|cv, pairs| cv }.to_fileenumerator
|
136
|
+
}.lazy_map {|pairs|
|
137
|
+
[common_header.map {|f| pairs[f] }, pairs]
|
138
|
+
}.extsort_by {|cv, pairs| cv }.to_fileenumerator
|
139
|
+
sorted_tbl1.open_reader {|t1|
|
140
|
+
sorted_tbl2.open_reader {|t2|
|
141
|
+
missing_hash = {}
|
142
|
+
total_header.each {|f|
|
143
|
+
missing_hash[f] = missing_value
|
144
|
+
}
|
145
|
+
Tb::ExEnumerator.merge_sorted(t1, t2) {|cv, t1_or_nil, t2_or_nil|
|
146
|
+
if !t2_or_nil
|
147
|
+
t1.subeach_by {|_cv1, _| _cv1 }.each {|_, _pairs1|
|
148
|
+
if retain_left
|
149
|
+
y.yield missing_hash.merge(_pairs1.to_hash)
|
150
|
+
end
|
151
|
+
}
|
152
|
+
elsif !t1_or_nil
|
153
|
+
t2.subeach_by {|_cv2, _| _cv2 }.each {|_, _pairs2|
|
154
|
+
if retain_right
|
155
|
+
y.yield missing_hash.merge(_pairs2.to_hash)
|
156
|
+
end
|
157
|
+
}
|
158
|
+
else # t1_or_nil && t1_or_nil
|
159
|
+
t2_pos = t2.pos
|
160
|
+
t1.subeach_by {|_cv1, _| _cv1 }.each {|_, _pairs1|
|
161
|
+
t2.pos = t2_pos
|
162
|
+
t2.subeach_by {|_cv2, _| _cv2 }.each {|_, _pairs2|
|
163
|
+
y.yield(_pairs2.to_hash.merge(_pairs1.to_hash))
|
164
|
+
}
|
165
|
+
}
|
166
|
+
end
|
167
|
+
}
|
168
|
+
}
|
169
|
+
}
|
293
170
|
}
|
294
|
-
puts "#{title}: #{a.inspect}"
|
295
171
|
end
|
296
|
-
private :dump_objsfile
|
297
172
|
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
}
|
173
|
+
# :call-seq:
|
174
|
+
# table1.natjoin2_outer(table2, missing=nil, retain_left=true, retain_right=true)
|
175
|
+
def natjoin2_outer(tbl2, missing_value=nil, retain_left=true, retain_right=true)
|
176
|
+
natjoin2(tbl2, missing_value, retain_left, retain_right)
|
303
177
|
end
|
304
178
|
|
305
|
-
def
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
#dump_objsfile(:tmp2, tmp2)
|
317
|
-
#dump_objsfile(:tmp3, tmp3)
|
318
|
-
#dump_objsfile(:tmp4, tmp4)
|
319
|
-
extsort_by_merge(tmp1, tmp2, tmp3, tmp4)
|
320
|
-
tmp1.rewind
|
321
|
-
tmp1.truncate(0)
|
322
|
-
tmp2.rewind
|
323
|
-
tmp2.truncate(0)
|
324
|
-
tmp1, tmp2, tmp3, tmp4 = tmp3, tmp4, tmp1, tmp2
|
325
|
-
end
|
326
|
-
#dump_objsfile(:tmp1, tmp1)
|
327
|
-
#dump_objsfile(:tmp2, tmp2)
|
328
|
-
#dump_objsfile(:tmp3, tmp3)
|
329
|
-
#dump_objsfile(:tmp4, tmp4)
|
330
|
-
extsort_by_strip_cv(tmp1, y)
|
331
|
-
ensure
|
332
|
-
tmp1.close(true) if tmp1
|
333
|
-
tmp2.close(true) if tmp2
|
334
|
-
tmp3.close(true) if tmp3
|
335
|
-
tmp4.close(true) if tmp4
|
179
|
+
def to_tb
|
180
|
+
tb = Tb.new
|
181
|
+
self.each {|pairs|
|
182
|
+
pairs.each {|k, v|
|
183
|
+
unless tb.has_field? k
|
184
|
+
tb.define_field(k)
|
185
|
+
end
|
186
|
+
}
|
187
|
+
tb.insert pairs
|
188
|
+
}
|
189
|
+
tb
|
336
190
|
end
|
337
|
-
private :extsort_by_internal
|
338
191
|
|
339
|
-
def
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
buf_mode = false
|
192
|
+
def write_to_csv(io, with_header=true)
|
193
|
+
stream = nil
|
194
|
+
header = []
|
195
|
+
fgen = fnew = nil
|
196
|
+
self.with_cumulative_header {|header0|
|
197
|
+
if !with_header
|
198
|
+
stream = true
|
199
|
+
elsif header0
|
200
|
+
stream = true
|
201
|
+
io.puts Tb.csv_encode_row(header0)
|
202
|
+
else
|
203
|
+
stream = false
|
204
|
+
fgen, fnew = Tb::FileEnumerator.gen_new
|
205
|
+
end
|
206
|
+
}.each {|pairs, header1|
|
207
|
+
pairs = Hash[pairs] unless pairs.respond_to? :has_key?
|
208
|
+
header = header1
|
209
|
+
if stream
|
210
|
+
fs = header.dup
|
211
|
+
while !fs.empty? && !pairs.has_key?(fs.last)
|
212
|
+
fs.pop
|
361
213
|
end
|
362
|
-
|
363
|
-
|
364
|
-
prevobj_cv = obj_cv
|
214
|
+
ary = fs.map {|f| pairs[f] }
|
215
|
+
io.puts Tb.csv_encode_row(ary)
|
365
216
|
else
|
366
|
-
|
367
|
-
Marshal.dump(nil, tmp_current)
|
368
|
-
buf = [[obj_cv, i, dumped]]
|
369
|
-
buf_size = dumped.size
|
370
|
-
buf_mode = true
|
371
|
-
tmp_current, tmp_another = tmp_another, tmp_current
|
217
|
+
fgen.call Hash[pairs]
|
372
218
|
end
|
373
219
|
}
|
374
|
-
if
|
375
|
-
|
376
|
-
|
377
|
-
tmp_current.write d
|
378
|
-
}
|
379
|
-
end
|
380
|
-
if !buf_mode || !buf.empty?
|
381
|
-
Marshal.dump(nil, tmp_current)
|
382
|
-
end
|
383
|
-
end
|
384
|
-
private :extsort_by_first_split
|
385
|
-
|
386
|
-
def extsort_by_merge(src1, src2, dst1, dst2)
|
387
|
-
src1.rewind
|
388
|
-
src2.rewind
|
389
|
-
obj1_cv, obj1 = obj1_pair = Marshal.load(src1)
|
390
|
-
obj2_cv, obj2 = obj2_pair = Marshal.load(src2)
|
391
|
-
prefer1 = true
|
392
|
-
while true
|
393
|
-
cmp = obj1_cv <=> obj2_cv
|
394
|
-
if prefer1 ? cmp > 0 : cmp >= 0
|
395
|
-
obj1_pair, obj1_cv, obj1, src1, obj2_pair, obj2_cv, obj2, src2 = obj2_pair, obj2_cv, obj2, src2, obj1_pair, obj1_cv, obj1, src1
|
396
|
-
prefer1 = !prefer1
|
397
|
-
end
|
398
|
-
Marshal.dump([obj1_cv, obj1], dst1)
|
399
|
-
obj1_cv, obj1 = obj1_pair = Marshal.load(src1)
|
400
|
-
if !obj1_pair
|
401
|
-
begin
|
402
|
-
Marshal.dump(obj2_pair, dst1)
|
403
|
-
obj2_pair = Marshal.load(src2)
|
404
|
-
end until !obj2_pair
|
405
|
-
Marshal.dump(nil, dst1)
|
406
|
-
dst1, dst2 = dst2, dst1
|
407
|
-
break if src1.eof?
|
408
|
-
break if src2.eof?
|
409
|
-
obj1_cv, obj1 = obj1_pair = Marshal.load(src1)
|
410
|
-
obj2_cv, obj2 = obj2_pair = Marshal.load(src2)
|
220
|
+
if !stream
|
221
|
+
if with_header
|
222
|
+
io.puts Tb.csv_encode_row(header)
|
411
223
|
end
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
until restsrc.eof?
|
421
|
-
restobj_pair = Marshal.load(restsrc)
|
422
|
-
Marshal.dump(restobj_pair, dst1)
|
423
|
-
end
|
424
|
-
end
|
425
|
-
private :extsort_by_merge
|
426
|
-
|
427
|
-
def extsort_by_strip_cv(tmp1, y)
|
428
|
-
tmp1.rewind
|
429
|
-
while true
|
430
|
-
pair = Marshal.load(tmp1)
|
431
|
-
break if !pair
|
432
|
-
_, obj = pair
|
433
|
-
y.yield obj
|
224
|
+
fnew.call.each {|pairs|
|
225
|
+
fs = header.dup
|
226
|
+
while !fs.empty? && !pairs.has_key?(fs.last)
|
227
|
+
fs.pop
|
228
|
+
end
|
229
|
+
ary = fs.map {|f| pairs[f] }
|
230
|
+
io.puts Tb.csv_encode_row(ary)
|
231
|
+
}
|
434
232
|
end
|
435
233
|
end
|
436
|
-
private :extsort_by_strip_cv
|
437
234
|
|
438
|
-
|
439
|
-
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
first = true
|
447
|
-
self.each {|curr|
|
448
|
-
if first
|
449
|
-
before_group.call(curr)
|
450
|
-
body.call(curr)
|
451
|
-
prev = curr
|
452
|
-
first = false
|
453
|
-
elsif boundary_p.call(prev, curr)
|
454
|
-
after_group.call(prev)
|
455
|
-
before_group.call(curr)
|
456
|
-
body.call(curr)
|
457
|
-
prev = curr
|
458
|
-
else
|
459
|
-
body.call(curr)
|
460
|
-
prev = curr
|
461
|
-
end
|
235
|
+
def write_to_json(out)
|
236
|
+
require 'json'
|
237
|
+
out.print "["
|
238
|
+
sep = nil
|
239
|
+
self.each {|pairs|
|
240
|
+
out.print sep if sep
|
241
|
+
out.print JSON.pretty_generate(Hash[pairs.to_a])
|
242
|
+
sep = ",\n"
|
462
243
|
}
|
463
|
-
|
464
|
-
|
465
|
-
end
|
244
|
+
out.puts "]"
|
245
|
+
nil
|
466
246
|
end
|
467
247
|
|
468
|
-
def
|
469
|
-
Enumerator.new {|
|
470
|
-
|
471
|
-
|
248
|
+
def extsort_by(opts={}, &cmpvalue_from)
|
249
|
+
Tb::Enumerator.new {|ty|
|
250
|
+
header = []
|
251
|
+
er = Enumerator.new {|y|
|
252
|
+
self.with_cumulative_header {|header0|
|
253
|
+
header = header0 if header0
|
254
|
+
}.each {|pairs, header1|
|
255
|
+
header = header1
|
256
|
+
y.yield pairs
|
257
|
+
}
|
258
|
+
ty.set_header header
|
259
|
+
}
|
260
|
+
er.extsort_by(opts, &cmpvalue_from).each {|pairs|
|
261
|
+
ty.yield pairs
|
472
262
|
}
|
473
263
|
}
|
474
264
|
end
|