bmg 0.17.8 → 0.18.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +0 -3
- data/README.md +236 -57
- data/lib/bmg.rb +6 -0
- data/lib/bmg/algebra.rb +1 -0
- data/lib/bmg/algebra/shortcuts.rb +14 -0
- data/lib/bmg/operator/allbut.rb +27 -0
- data/lib/bmg/operator/autosummarize.rb +27 -4
- data/lib/bmg/operator/autowrap.rb +27 -0
- data/lib/bmg/operator/constants.rb +7 -0
- data/lib/bmg/operator/extend.rb +7 -0
- data/lib/bmg/operator/group.rb +1 -0
- data/lib/bmg/operator/image.rb +41 -2
- data/lib/bmg/operator/join.rb +1 -0
- data/lib/bmg/operator/matching.rb +1 -0
- data/lib/bmg/operator/not_matching.rb +1 -0
- data/lib/bmg/operator/page.rb +2 -7
- data/lib/bmg/operator/project.rb +3 -2
- data/lib/bmg/operator/rename.rb +12 -5
- data/lib/bmg/operator/restrict.rb +1 -0
- data/lib/bmg/operator/rxmatch.rb +1 -0
- data/lib/bmg/operator/summarize.rb +2 -17
- data/lib/bmg/operator/transform.rb +39 -1
- data/lib/bmg/operator/union.rb +1 -0
- data/lib/bmg/reader.rb +1 -0
- data/lib/bmg/reader/csv.rb +29 -10
- data/lib/bmg/reader/excel.rb +23 -4
- data/lib/bmg/reader/text_file.rb +56 -0
- data/lib/bmg/relation.rb +28 -0
- data/lib/bmg/relation/empty.rb +4 -0
- data/lib/bmg/relation/in_memory.rb +10 -1
- data/lib/bmg/relation/materialized.rb +6 -0
- data/lib/bmg/relation/spied.rb +6 -1
- data/lib/bmg/sequel/relation.rb +5 -0
- data/lib/bmg/sql/relation.rb +2 -3
- data/lib/bmg/summarizer.rb +29 -1
- data/lib/bmg/summarizer/avg.rb +3 -3
- data/lib/bmg/summarizer/by_proc.rb +41 -0
- data/lib/bmg/summarizer/distinct.rb +36 -0
- data/lib/bmg/summarizer/multiple.rb +46 -0
- data/lib/bmg/summarizer/percentile.rb +79 -0
- data/lib/bmg/support.rb +1 -0
- data/lib/bmg/support/ordering.rb +20 -0
- data/lib/bmg/support/tuple_algebra.rb +6 -0
- data/lib/bmg/support/tuple_transformer.rb +14 -6
- data/lib/bmg/version.rb +2 -2
- data/lib/bmg/writer.rb +16 -0
- data/lib/bmg/writer/csv.rb +0 -11
- data/lib/bmg/writer/xlsx.rb +68 -0
- data/tasks/test.rake +9 -2
- metadata +36 -15
@@ -24,7 +24,24 @@ module Bmg
|
|
24
24
|
|
25
25
|
public
|
26
26
|
|
27
|
+
def self.same(*args)
|
28
|
+
Same.new(*args)
|
29
|
+
end
|
30
|
+
|
31
|
+
def self.group(*args)
|
32
|
+
Group.new(*args)
|
33
|
+
end
|
34
|
+
|
35
|
+
def self.y_by_x(*args)
|
36
|
+
YByX.new(*args)
|
37
|
+
end
|
38
|
+
|
39
|
+
def self.ys_by_x(*args)
|
40
|
+
YsByX.new(*args)
|
41
|
+
end
|
42
|
+
|
27
43
|
def each(&bl)
|
44
|
+
return to_enum unless block_given?
|
28
45
|
h = {}
|
29
46
|
@operand.each do |tuple|
|
30
47
|
key = key(tuple)
|
@@ -41,6 +58,12 @@ module Bmg
|
|
41
58
|
[:autosummarize, operand.to_ast, by.dup, sums.dup]
|
42
59
|
end
|
43
60
|
|
61
|
+
public ### for internal reasons
|
62
|
+
|
63
|
+
def _count
|
64
|
+
operand._count
|
65
|
+
end
|
66
|
+
|
44
67
|
protected
|
45
68
|
|
46
69
|
def _restrict(type, predicate)
|
@@ -175,11 +198,11 @@ module Bmg
|
|
175
198
|
end
|
176
199
|
|
177
200
|
def init(v)
|
178
|
-
[v]
|
201
|
+
v.nil? ? [] : [v]
|
179
202
|
end
|
180
203
|
|
181
204
|
def sum(v1, v2)
|
182
|
-
v1 << v2
|
205
|
+
v2.nil? ? v1 : (v1 << v2)
|
183
206
|
end
|
184
207
|
|
185
208
|
def term(v)
|
@@ -211,11 +234,11 @@ module Bmg
|
|
211
234
|
end
|
212
235
|
|
213
236
|
def init(v)
|
214
|
-
[v]
|
237
|
+
v.nil? ? [] : [v]
|
215
238
|
end
|
216
239
|
|
217
240
|
def sum(v1, v2)
|
218
|
-
v1 << v2
|
241
|
+
v2.nil? ? v1 : (v1 << v2)
|
219
242
|
end
|
220
243
|
|
221
244
|
def term(v)
|
@@ -43,6 +43,7 @@ module Bmg
|
|
43
43
|
end
|
44
44
|
|
45
45
|
def each
|
46
|
+
return to_enum unless block_given?
|
46
47
|
@operand.each do |tuple|
|
47
48
|
yield autowrap_tuple(tuple)
|
48
49
|
end
|
@@ -52,6 +53,12 @@ module Bmg
|
|
52
53
|
[ :autowrap, operand.to_ast, @original_options.dup ]
|
53
54
|
end
|
54
55
|
|
56
|
+
public ### for internal reasons
|
57
|
+
|
58
|
+
def _count
|
59
|
+
operand._count
|
60
|
+
end
|
61
|
+
|
55
62
|
protected ### optimization
|
56
63
|
|
57
64
|
def _autowrap(type, opts)
|
@@ -86,6 +93,16 @@ module Bmg
|
|
86
93
|
false
|
87
94
|
end
|
88
95
|
|
96
|
+
def _matching(type, right, on)
|
97
|
+
if (wrapped_roots! & on).empty?
|
98
|
+
operand.matching(right, on).autowrap(options)
|
99
|
+
else
|
100
|
+
super
|
101
|
+
end
|
102
|
+
rescue UnknownAttributesError
|
103
|
+
super
|
104
|
+
end
|
105
|
+
|
89
106
|
def _page(type, ordering, page_index, opts)
|
90
107
|
attrs = ordering.map{|(a,d)| a }
|
91
108
|
if (wrapped_roots! & attrs).empty?
|
@@ -97,6 +114,16 @@ module Bmg
|
|
97
114
|
super
|
98
115
|
end
|
99
116
|
|
117
|
+
def _project(type, attrlist)
|
118
|
+
if (wrapped_roots! & attrlist).empty?
|
119
|
+
operand.project(attrlist).autowrap(options)
|
120
|
+
else
|
121
|
+
super
|
122
|
+
end
|
123
|
+
rescue UnknownAttributesError
|
124
|
+
super
|
125
|
+
end
|
126
|
+
|
100
127
|
def _rename(type, renaming)
|
101
128
|
# 1. Can't optimize if renaming applies to a wrapped one
|
102
129
|
return super unless (wrapped_roots! & renaming.keys).empty?
|
@@ -23,6 +23,7 @@ module Bmg
|
|
23
23
|
public
|
24
24
|
|
25
25
|
def each
|
26
|
+
return to_enum unless block_given?
|
26
27
|
@operand.each do |tuple|
|
27
28
|
yield extend_it(tuple)
|
28
29
|
end
|
@@ -54,6 +55,12 @@ module Bmg
|
|
54
55
|
[ :constants, operand.to_ast, constants.dup ]
|
55
56
|
end
|
56
57
|
|
58
|
+
public ### for internal reasons
|
59
|
+
|
60
|
+
def _count
|
61
|
+
operand._count
|
62
|
+
end
|
63
|
+
|
57
64
|
protected ### optimization
|
58
65
|
|
59
66
|
def _page(type, ordering, page_index, options)
|
data/lib/bmg/operator/extend.rb
CHANGED
@@ -26,6 +26,7 @@ module Bmg
|
|
26
26
|
public
|
27
27
|
|
28
28
|
def each
|
29
|
+
return to_enum unless block_given?
|
29
30
|
@operand.each do |tuple|
|
30
31
|
yield extend_it(tuple)
|
31
32
|
end
|
@@ -53,6 +54,12 @@ module Bmg
|
|
53
54
|
[ :extend, operand.to_ast, extension.dup ]
|
54
55
|
end
|
55
56
|
|
57
|
+
public ### for internal reasons
|
58
|
+
|
59
|
+
def _count
|
60
|
+
operand._count
|
61
|
+
end
|
62
|
+
|
56
63
|
protected ### optimization
|
57
64
|
|
58
65
|
def _allbut(type, butlist)
|
data/lib/bmg/operator/group.rb
CHANGED
data/lib/bmg/operator/image.rb
CHANGED
@@ -46,6 +46,7 @@ module Bmg
|
|
46
46
|
public
|
47
47
|
|
48
48
|
def each(*args, &bl)
|
49
|
+
return to_enum unless block_given?
|
49
50
|
(options[:jit_optimized] ? self : jit_optimize)._each(*args, &bl)
|
50
51
|
end
|
51
52
|
|
@@ -99,9 +100,10 @@ module Bmg
|
|
99
100
|
key = tuple_project(t, on)
|
100
101
|
index[key].operand << tuple_image(t, on)
|
101
102
|
end
|
102
|
-
if options[:array]
|
103
|
+
if opt = options[:array]
|
104
|
+
sorter = to_sorter(opt)
|
103
105
|
index = index.each_with_object({}) do |(k,v),ix|
|
104
|
-
ix[k] = v.to_a
|
106
|
+
ix[k] = sorter ? v.to_a.sort(&sorter) : v.to_a
|
105
107
|
end
|
106
108
|
end
|
107
109
|
index
|
@@ -154,8 +156,32 @@ module Bmg
|
|
154
156
|
end
|
155
157
|
end
|
156
158
|
|
159
|
+
public ### for internal reasons
|
160
|
+
|
161
|
+
def _count
|
162
|
+
left._count
|
163
|
+
end
|
164
|
+
|
157
165
|
protected ### optimization
|
158
166
|
|
167
|
+
def _allbut(type, butlist)
|
168
|
+
if butlist.include?(as)
|
169
|
+
left.allbut(butlist - [as])
|
170
|
+
elsif (butlist & on).empty?
|
171
|
+
left.allbut(butlist).image(right, as, on, options)
|
172
|
+
else
|
173
|
+
super
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
def _matching(type, m_right, m_on)
|
178
|
+
if m_on.include?(as)
|
179
|
+
super
|
180
|
+
else
|
181
|
+
left.matching(m_right, m_on).image(right, as, on, options)
|
182
|
+
end
|
183
|
+
end
|
184
|
+
|
159
185
|
def _page(type, ordering, page_index, opts)
|
160
186
|
if ordering.map{|(k,v)| k}.include?(as)
|
161
187
|
super
|
@@ -166,6 +192,14 @@ module Bmg
|
|
166
192
|
end
|
167
193
|
end
|
168
194
|
|
195
|
+
def _project(type, attrlist)
|
196
|
+
if attrlist.include?(as)
|
197
|
+
super
|
198
|
+
else
|
199
|
+
left.project(attrlist)
|
200
|
+
end
|
201
|
+
end
|
202
|
+
|
169
203
|
def _restrict(type, predicate)
|
170
204
|
on_as, rest = predicate.and_split([as])
|
171
205
|
if rest.tautology?
|
@@ -227,6 +261,11 @@ module Bmg
|
|
227
261
|
Relation::InMemory.new(image_type, Set.new)
|
228
262
|
end
|
229
263
|
|
264
|
+
def to_sorter(opt)
|
265
|
+
return nil unless opt.is_a?(Array)
|
266
|
+
Ordering.new(opt).comparator
|
267
|
+
end
|
268
|
+
|
230
269
|
public
|
231
270
|
|
232
271
|
def to_s
|
data/lib/bmg/operator/join.rb
CHANGED
data/lib/bmg/operator/page.rb
CHANGED
@@ -30,6 +30,7 @@ module Bmg
|
|
30
30
|
public
|
31
31
|
|
32
32
|
def each(&bl)
|
33
|
+
return to_enum unless block_given?
|
33
34
|
page_size = options[:page_size]
|
34
35
|
@operand.to_a
|
35
36
|
.sort(&comparator)
|
@@ -45,13 +46,7 @@ module Bmg
|
|
45
46
|
protected ### inspect
|
46
47
|
|
47
48
|
def comparator
|
48
|
-
|
49
|
-
ordering.each do |(attr,direction)|
|
50
|
-
c = t1[attr] <=> t2[attr]
|
51
|
-
return (direction == :desc ? -c : c) unless c==0
|
52
|
-
end
|
53
|
-
0
|
54
|
-
}
|
49
|
+
Ordering.new(@ordering).comparator
|
55
50
|
end
|
56
51
|
|
57
52
|
def args
|
data/lib/bmg/operator/project.rb
CHANGED
@@ -29,9 +29,10 @@ module Bmg
|
|
29
29
|
public
|
30
30
|
|
31
31
|
def each
|
32
|
+
return to_enum unless block_given?
|
32
33
|
seen = {}
|
33
34
|
@operand.each do |tuple|
|
34
|
-
projected =
|
35
|
+
projected = tuple_project(tuple)
|
35
36
|
unless seen.has_key?(projected)
|
36
37
|
yield(projected)
|
37
38
|
seen[projected] = true
|
@@ -74,7 +75,7 @@ module Bmg
|
|
74
75
|
|
75
76
|
private
|
76
77
|
|
77
|
-
def
|
78
|
+
def tuple_project(tuple)
|
78
79
|
tuple.dup.delete_if{|k,_| !@attrlist.include?(k) }
|
79
80
|
end
|
80
81
|
|
data/lib/bmg/operator/rename.rb
CHANGED
@@ -29,16 +29,17 @@ module Bmg
|
|
29
29
|
public
|
30
30
|
|
31
31
|
def each
|
32
|
+
return to_enum unless block_given?
|
32
33
|
@operand.each do |tuple|
|
33
|
-
yield
|
34
|
+
yield rename_tuple(tuple, renaming)
|
34
35
|
end
|
35
36
|
end
|
36
37
|
|
37
38
|
def insert(arg)
|
38
39
|
case arg
|
39
|
-
when Hash then operand.insert(
|
40
|
+
when Hash then operand.insert(rename_tuple(arg, reverse_renaming))
|
40
41
|
when Relation then operand.insert(arg.rename(reverse_renaming))
|
41
|
-
when Enumerable then operand.insert(arg.map{|t|
|
42
|
+
when Enumerable then operand.insert(arg.map{|t| rename_tuple(t, reverse_renaming) })
|
42
43
|
else
|
43
44
|
super
|
44
45
|
end
|
@@ -46,7 +47,7 @@ module Bmg
|
|
46
47
|
|
47
48
|
def update(arg)
|
48
49
|
case arg
|
49
|
-
when Hash then operand.update(
|
50
|
+
when Hash then operand.update(rename_tuple(arg, reverse_renaming))
|
50
51
|
else
|
51
52
|
super
|
52
53
|
end
|
@@ -60,6 +61,12 @@ module Bmg
|
|
60
61
|
[ :rename, operand.to_ast, renaming.dup ]
|
61
62
|
end
|
62
63
|
|
64
|
+
public ### for internal reasons
|
65
|
+
|
66
|
+
def _count
|
67
|
+
operand._count
|
68
|
+
end
|
69
|
+
|
63
70
|
protected ### optimization
|
64
71
|
|
65
72
|
def _page(type, ordering, page_index, options)
|
@@ -82,7 +89,7 @@ module Bmg
|
|
82
89
|
|
83
90
|
private
|
84
91
|
|
85
|
-
def
|
92
|
+
def rename_tuple(tuple, renaming)
|
86
93
|
tuple.each_with_object({}){|(k,v),h|
|
87
94
|
h[renaming[k] || k] = v
|
88
95
|
h
|
data/lib/bmg/operator/rxmatch.rb
CHANGED
@@ -13,7 +13,7 @@ module Bmg
|
|
13
13
|
@type = type
|
14
14
|
@operand = operand
|
15
15
|
@by = by
|
16
|
-
@summarization =
|
16
|
+
@summarization = Summarizer.summarization(summarization)
|
17
17
|
end
|
18
18
|
|
19
19
|
protected
|
@@ -23,6 +23,7 @@ module Bmg
|
|
23
23
|
public
|
24
24
|
|
25
25
|
def each
|
26
|
+
return to_enum unless block_given?
|
26
27
|
# summary key => summarization memo, starting with least
|
27
28
|
result = Hash.new{|h,k|
|
28
29
|
h[k] = Hash[@summarization.map{|k,v|
|
@@ -56,22 +57,6 @@ module Bmg
|
|
56
57
|
[ by, summarization ]
|
57
58
|
end
|
58
59
|
|
59
|
-
private
|
60
|
-
|
61
|
-
# Compile a summarization hash so that every value is a Summarizer
|
62
|
-
# instance
|
63
|
-
def self.compile(summarization)
|
64
|
-
Hash[summarization.map{|k,v|
|
65
|
-
summarizer = case v
|
66
|
-
when Summarizer then v
|
67
|
-
when Symbol then Summarizer.send(v, k)
|
68
|
-
else
|
69
|
-
raise ArgumentError, "Unexpected summarizer #{k} => #{v}"
|
70
|
-
end
|
71
|
-
[ k, summarizer ]
|
72
|
-
}]
|
73
|
-
end
|
74
|
-
|
75
60
|
end # class Summarize
|
76
61
|
end # module Operator
|
77
62
|
end # module Bmg
|
@@ -23,11 +23,12 @@ module Bmg
|
|
23
23
|
|
24
24
|
protected
|
25
25
|
|
26
|
-
attr_reader :transformation
|
26
|
+
attr_reader :transformation, :options
|
27
27
|
|
28
28
|
public
|
29
29
|
|
30
30
|
def each
|
31
|
+
return to_enum unless block_given?
|
31
32
|
t = transformer
|
32
33
|
@operand.each do |tuple|
|
33
34
|
yield t.call(tuple)
|
@@ -40,6 +41,43 @@ module Bmg
|
|
40
41
|
|
41
42
|
protected ### optimization
|
42
43
|
|
44
|
+
def _allbut(type, butlist)
|
45
|
+
# `allbut` can always be pushed down the tree. unlike
|
46
|
+
# `extend` the Proc that might be used cannot use attributes
|
47
|
+
# in butlist, so it's safe to strip them away.
|
48
|
+
if transformer.knows_attrlist?
|
49
|
+
# We just need to clean the transformation
|
50
|
+
attrlist = transformer.to_attrlist
|
51
|
+
thrown = attrlist & butlist
|
52
|
+
t = transformation.dup.reject{|k,v| thrown.include?(k) }
|
53
|
+
operand.allbut(butlist).transform(t, options)
|
54
|
+
else
|
55
|
+
operand.allbut(butlist).transform(transformation, options)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def _project(type, attrlist)
|
60
|
+
if transformer.knows_attrlist?
|
61
|
+
t = transformation.dup.select{|k,v| attrlist.include?(k) }
|
62
|
+
operand.project(attrlist).transform(t, options)
|
63
|
+
else
|
64
|
+
operand.project(attrlist).transform(transformation, options)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def _restrict(type, predicate)
|
69
|
+
return super unless transformer.knows_attrlist?
|
70
|
+
top, bottom = predicate.and_split(transformer.to_attrlist)
|
71
|
+
if top == predicate
|
72
|
+
super
|
73
|
+
else
|
74
|
+
operand
|
75
|
+
.restrict(bottom)
|
76
|
+
.transform(transformation, options)
|
77
|
+
.restrict(top)
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
43
81
|
protected ### inspect
|
44
82
|
|
45
83
|
def args
|