bmg 0.17.7 → 0.18.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +0 -3
- data/README.md +236 -57
- data/lib/bmg.rb +6 -0
- data/lib/bmg/algebra.rb +1 -0
- data/lib/bmg/algebra/shortcuts.rb +14 -0
- data/lib/bmg/operator/allbut.rb +27 -0
- data/lib/bmg/operator/autosummarize.rb +27 -4
- data/lib/bmg/operator/autowrap.rb +27 -0
- data/lib/bmg/operator/constants.rb +7 -0
- data/lib/bmg/operator/extend.rb +7 -0
- data/lib/bmg/operator/group.rb +1 -0
- data/lib/bmg/operator/image.rb +41 -2
- data/lib/bmg/operator/join.rb +1 -0
- data/lib/bmg/operator/matching.rb +1 -0
- data/lib/bmg/operator/not_matching.rb +1 -0
- data/lib/bmg/operator/page.rb +2 -7
- data/lib/bmg/operator/project.rb +3 -2
- data/lib/bmg/operator/rename.rb +7 -0
- data/lib/bmg/operator/restrict.rb +1 -0
- data/lib/bmg/operator/rxmatch.rb +1 -0
- data/lib/bmg/operator/summarize.rb +2 -17
- data/lib/bmg/operator/transform.rb +39 -1
- data/lib/bmg/operator/union.rb +1 -0
- data/lib/bmg/reader.rb +1 -0
- data/lib/bmg/reader/csv.rb +29 -10
- data/lib/bmg/reader/excel.rb +22 -3
- data/lib/bmg/reader/text_file.rb +56 -0
- data/lib/bmg/relation.rb +31 -2
- data/lib/bmg/relation/empty.rb +4 -0
- data/lib/bmg/relation/in_memory.rb +10 -1
- data/lib/bmg/relation/materialized.rb +6 -0
- data/lib/bmg/relation/spied.rb +6 -1
- data/lib/bmg/sequel/relation.rb +5 -0
- data/lib/bmg/sql/relation.rb +2 -3
- data/lib/bmg/summarizer.rb +29 -1
- data/lib/bmg/summarizer/avg.rb +3 -3
- data/lib/bmg/summarizer/by_proc.rb +41 -0
- data/lib/bmg/summarizer/distinct.rb +36 -0
- data/lib/bmg/summarizer/multiple.rb +46 -0
- data/lib/bmg/summarizer/percentile.rb +47 -0
- data/lib/bmg/support.rb +2 -0
- data/lib/bmg/support/ordering.rb +20 -0
- data/lib/bmg/support/output_preferences.rb +44 -0
- data/lib/bmg/support/tuple_algebra.rb +6 -0
- data/lib/bmg/support/tuple_transformer.rb +4 -5
- data/lib/bmg/version.rb +2 -2
- data/lib/bmg/writer.rb +16 -0
- data/lib/bmg/writer/csv.rb +7 -7
- data/lib/bmg/writer/xlsx.rb +68 -0
- data/tasks/test.rake +9 -2
- metadata +37 -15
@@ -24,7 +24,24 @@ module Bmg
|
|
24
24
|
|
25
25
|
public
|
26
26
|
|
27
|
+
def self.same(*args)
|
28
|
+
Same.new(*args)
|
29
|
+
end
|
30
|
+
|
31
|
+
def self.group(*args)
|
32
|
+
Group.new(*args)
|
33
|
+
end
|
34
|
+
|
35
|
+
def self.y_by_x(*args)
|
36
|
+
YByX.new(*args)
|
37
|
+
end
|
38
|
+
|
39
|
+
def self.ys_by_x(*args)
|
40
|
+
YsByX.new(*args)
|
41
|
+
end
|
42
|
+
|
27
43
|
def each(&bl)
|
44
|
+
return to_enum unless block_given?
|
28
45
|
h = {}
|
29
46
|
@operand.each do |tuple|
|
30
47
|
key = key(tuple)
|
@@ -41,6 +58,12 @@ module Bmg
|
|
41
58
|
[:autosummarize, operand.to_ast, by.dup, sums.dup]
|
42
59
|
end
|
43
60
|
|
61
|
+
public ### for internal reasons
|
62
|
+
|
63
|
+
def _count
|
64
|
+
operand._count
|
65
|
+
end
|
66
|
+
|
44
67
|
protected
|
45
68
|
|
46
69
|
def _restrict(type, predicate)
|
@@ -175,11 +198,11 @@ module Bmg
|
|
175
198
|
end
|
176
199
|
|
177
200
|
def init(v)
|
178
|
-
[v]
|
201
|
+
v.nil? ? [] : [v]
|
179
202
|
end
|
180
203
|
|
181
204
|
def sum(v1, v2)
|
182
|
-
v1 << v2
|
205
|
+
v2.nil? ? v1 : (v1 << v2)
|
183
206
|
end
|
184
207
|
|
185
208
|
def term(v)
|
@@ -211,11 +234,11 @@ module Bmg
|
|
211
234
|
end
|
212
235
|
|
213
236
|
def init(v)
|
214
|
-
[v]
|
237
|
+
v.nil? ? [] : [v]
|
215
238
|
end
|
216
239
|
|
217
240
|
def sum(v1, v2)
|
218
|
-
v1 << v2
|
241
|
+
v2.nil? ? v1 : (v1 << v2)
|
219
242
|
end
|
220
243
|
|
221
244
|
def term(v)
|
@@ -43,6 +43,7 @@ module Bmg
|
|
43
43
|
end
|
44
44
|
|
45
45
|
def each
|
46
|
+
return to_enum unless block_given?
|
46
47
|
@operand.each do |tuple|
|
47
48
|
yield autowrap_tuple(tuple)
|
48
49
|
end
|
@@ -52,6 +53,12 @@ module Bmg
|
|
52
53
|
[ :autowrap, operand.to_ast, @original_options.dup ]
|
53
54
|
end
|
54
55
|
|
56
|
+
public ### for internal reasons
|
57
|
+
|
58
|
+
def _count
|
59
|
+
operand._count
|
60
|
+
end
|
61
|
+
|
55
62
|
protected ### optimization
|
56
63
|
|
57
64
|
def _autowrap(type, opts)
|
@@ -86,6 +93,16 @@ module Bmg
|
|
86
93
|
false
|
87
94
|
end
|
88
95
|
|
96
|
+
def _matching(type, right, on)
|
97
|
+
if (wrapped_roots! & on).empty?
|
98
|
+
operand.matching(right, on).autowrap(options)
|
99
|
+
else
|
100
|
+
super
|
101
|
+
end
|
102
|
+
rescue UnknownAttributesError
|
103
|
+
super
|
104
|
+
end
|
105
|
+
|
89
106
|
def _page(type, ordering, page_index, opts)
|
90
107
|
attrs = ordering.map{|(a,d)| a }
|
91
108
|
if (wrapped_roots! & attrs).empty?
|
@@ -97,6 +114,16 @@ module Bmg
|
|
97
114
|
super
|
98
115
|
end
|
99
116
|
|
117
|
+
def _project(type, attrlist)
|
118
|
+
if (wrapped_roots! & attrlist).empty?
|
119
|
+
operand.project(attrlist).autowrap(options)
|
120
|
+
else
|
121
|
+
super
|
122
|
+
end
|
123
|
+
rescue UnknownAttributesError
|
124
|
+
super
|
125
|
+
end
|
126
|
+
|
100
127
|
def _rename(type, renaming)
|
101
128
|
# 1. Can't optimize if renaming applies to a wrapped one
|
102
129
|
return super unless (wrapped_roots! & renaming.keys).empty?
|
@@ -23,6 +23,7 @@ module Bmg
|
|
23
23
|
public
|
24
24
|
|
25
25
|
def each
|
26
|
+
return to_enum unless block_given?
|
26
27
|
@operand.each do |tuple|
|
27
28
|
yield extend_it(tuple)
|
28
29
|
end
|
@@ -54,6 +55,12 @@ module Bmg
|
|
54
55
|
[ :constants, operand.to_ast, constants.dup ]
|
55
56
|
end
|
56
57
|
|
58
|
+
public ### for internal reasons
|
59
|
+
|
60
|
+
def _count
|
61
|
+
operand._count
|
62
|
+
end
|
63
|
+
|
57
64
|
protected ### optimization
|
58
65
|
|
59
66
|
def _page(type, ordering, page_index, options)
|
data/lib/bmg/operator/extend.rb
CHANGED
@@ -26,6 +26,7 @@ module Bmg
|
|
26
26
|
public
|
27
27
|
|
28
28
|
def each
|
29
|
+
return to_enum unless block_given?
|
29
30
|
@operand.each do |tuple|
|
30
31
|
yield extend_it(tuple)
|
31
32
|
end
|
@@ -53,6 +54,12 @@ module Bmg
|
|
53
54
|
[ :extend, operand.to_ast, extension.dup ]
|
54
55
|
end
|
55
56
|
|
57
|
+
public ### for internal reasons
|
58
|
+
|
59
|
+
def _count
|
60
|
+
operand._count
|
61
|
+
end
|
62
|
+
|
56
63
|
protected ### optimization
|
57
64
|
|
58
65
|
def _allbut(type, butlist)
|
data/lib/bmg/operator/group.rb
CHANGED
data/lib/bmg/operator/image.rb
CHANGED
@@ -46,6 +46,7 @@ module Bmg
|
|
46
46
|
public
|
47
47
|
|
48
48
|
def each(*args, &bl)
|
49
|
+
return to_enum unless block_given?
|
49
50
|
(options[:jit_optimized] ? self : jit_optimize)._each(*args, &bl)
|
50
51
|
end
|
51
52
|
|
@@ -99,9 +100,10 @@ module Bmg
|
|
99
100
|
key = tuple_project(t, on)
|
100
101
|
index[key].operand << tuple_image(t, on)
|
101
102
|
end
|
102
|
-
if options[:array]
|
103
|
+
if opt = options[:array]
|
104
|
+
sorter = to_sorter(opt)
|
103
105
|
index = index.each_with_object({}) do |(k,v),ix|
|
104
|
-
ix[k] = v.to_a
|
106
|
+
ix[k] = sorter ? v.to_a.sort(&sorter) : v.to_a
|
105
107
|
end
|
106
108
|
end
|
107
109
|
index
|
@@ -154,8 +156,32 @@ module Bmg
|
|
154
156
|
end
|
155
157
|
end
|
156
158
|
|
159
|
+
public ### for internal reasons
|
160
|
+
|
161
|
+
def _count
|
162
|
+
left._count
|
163
|
+
end
|
164
|
+
|
157
165
|
protected ### optimization
|
158
166
|
|
167
|
+
def _allbut(type, butlist)
|
168
|
+
if butlist.include?(as)
|
169
|
+
left.allbut(butlist - [as])
|
170
|
+
elsif (butlist & on).empty?
|
171
|
+
left.allbut(butlist).image(right, as, on, options)
|
172
|
+
else
|
173
|
+
super
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
def _matching(type, m_right, m_on)
|
178
|
+
if m_on.include?(as)
|
179
|
+
super
|
180
|
+
else
|
181
|
+
left.matching(m_right, m_on).image(right, as, on, options)
|
182
|
+
end
|
183
|
+
end
|
184
|
+
|
159
185
|
def _page(type, ordering, page_index, opts)
|
160
186
|
if ordering.map{|(k,v)| k}.include?(as)
|
161
187
|
super
|
@@ -166,6 +192,14 @@ module Bmg
|
|
166
192
|
end
|
167
193
|
end
|
168
194
|
|
195
|
+
def _project(type, attrlist)
|
196
|
+
if attrlist.include?(as)
|
197
|
+
super
|
198
|
+
else
|
199
|
+
left.project(attrlist)
|
200
|
+
end
|
201
|
+
end
|
202
|
+
|
169
203
|
def _restrict(type, predicate)
|
170
204
|
on_as, rest = predicate.and_split([as])
|
171
205
|
if rest.tautology?
|
@@ -227,6 +261,11 @@ module Bmg
|
|
227
261
|
Relation::InMemory.new(image_type, Set.new)
|
228
262
|
end
|
229
263
|
|
264
|
+
def to_sorter(opt)
|
265
|
+
return nil unless opt.is_a?(Array)
|
266
|
+
Ordering.new(opt).comparator
|
267
|
+
end
|
268
|
+
|
230
269
|
public
|
231
270
|
|
232
271
|
def to_s
|
data/lib/bmg/operator/join.rb
CHANGED
data/lib/bmg/operator/page.rb
CHANGED
@@ -30,6 +30,7 @@ module Bmg
|
|
30
30
|
public
|
31
31
|
|
32
32
|
def each(&bl)
|
33
|
+
return to_enum unless block_given?
|
33
34
|
page_size = options[:page_size]
|
34
35
|
@operand.to_a
|
35
36
|
.sort(&comparator)
|
@@ -45,13 +46,7 @@ module Bmg
|
|
45
46
|
protected ### inspect
|
46
47
|
|
47
48
|
def comparator
|
48
|
-
|
49
|
-
ordering.each do |(attr,direction)|
|
50
|
-
c = t1[attr] <=> t2[attr]
|
51
|
-
return (direction == :desc ? -c : c) unless c==0
|
52
|
-
end
|
53
|
-
0
|
54
|
-
}
|
49
|
+
Ordering.new(@ordering).comparator
|
55
50
|
end
|
56
51
|
|
57
52
|
def args
|
data/lib/bmg/operator/project.rb
CHANGED
@@ -29,9 +29,10 @@ module Bmg
|
|
29
29
|
public
|
30
30
|
|
31
31
|
def each
|
32
|
+
return to_enum unless block_given?
|
32
33
|
seen = {}
|
33
34
|
@operand.each do |tuple|
|
34
|
-
projected =
|
35
|
+
projected = tuple_project(tuple)
|
35
36
|
unless seen.has_key?(projected)
|
36
37
|
yield(projected)
|
37
38
|
seen[projected] = true
|
@@ -74,7 +75,7 @@ module Bmg
|
|
74
75
|
|
75
76
|
private
|
76
77
|
|
77
|
-
def
|
78
|
+
def tuple_project(tuple)
|
78
79
|
tuple.dup.delete_if{|k,_| !@attrlist.include?(k) }
|
79
80
|
end
|
80
81
|
|
data/lib/bmg/operator/rename.rb
CHANGED
@@ -29,6 +29,7 @@ module Bmg
|
|
29
29
|
public
|
30
30
|
|
31
31
|
def each
|
32
|
+
return to_enum unless block_given?
|
32
33
|
@operand.each do |tuple|
|
33
34
|
yield rename(tuple, renaming)
|
34
35
|
end
|
@@ -60,6 +61,12 @@ module Bmg
|
|
60
61
|
[ :rename, operand.to_ast, renaming.dup ]
|
61
62
|
end
|
62
63
|
|
64
|
+
public ### for internal reasons
|
65
|
+
|
66
|
+
def _count
|
67
|
+
operand._count
|
68
|
+
end
|
69
|
+
|
63
70
|
protected ### optimization
|
64
71
|
|
65
72
|
def _page(type, ordering, page_index, options)
|
data/lib/bmg/operator/rxmatch.rb
CHANGED
@@ -13,7 +13,7 @@ module Bmg
|
|
13
13
|
@type = type
|
14
14
|
@operand = operand
|
15
15
|
@by = by
|
16
|
-
@summarization =
|
16
|
+
@summarization = Summarizer.summarization(summarization)
|
17
17
|
end
|
18
18
|
|
19
19
|
protected
|
@@ -23,6 +23,7 @@ module Bmg
|
|
23
23
|
public
|
24
24
|
|
25
25
|
def each
|
26
|
+
return to_enum unless block_given?
|
26
27
|
# summary key => summarization memo, starting with least
|
27
28
|
result = Hash.new{|h,k|
|
28
29
|
h[k] = Hash[@summarization.map{|k,v|
|
@@ -56,22 +57,6 @@ module Bmg
|
|
56
57
|
[ by, summarization ]
|
57
58
|
end
|
58
59
|
|
59
|
-
private
|
60
|
-
|
61
|
-
# Compile a summarization hash so that every value is a Summarizer
|
62
|
-
# instance
|
63
|
-
def self.compile(summarization)
|
64
|
-
Hash[summarization.map{|k,v|
|
65
|
-
summarizer = case v
|
66
|
-
when Summarizer then v
|
67
|
-
when Symbol then Summarizer.send(v, k)
|
68
|
-
else
|
69
|
-
raise ArgumentError, "Unexpected summarizer #{k} => #{v}"
|
70
|
-
end
|
71
|
-
[ k, summarizer ]
|
72
|
-
}]
|
73
|
-
end
|
74
|
-
|
75
60
|
end # class Summarize
|
76
61
|
end # module Operator
|
77
62
|
end # module Bmg
|
@@ -23,11 +23,12 @@ module Bmg
|
|
23
23
|
|
24
24
|
protected
|
25
25
|
|
26
|
-
attr_reader :transformation
|
26
|
+
attr_reader :transformation, :options
|
27
27
|
|
28
28
|
public
|
29
29
|
|
30
30
|
def each
|
31
|
+
return to_enum unless block_given?
|
31
32
|
t = transformer
|
32
33
|
@operand.each do |tuple|
|
33
34
|
yield t.call(tuple)
|
@@ -40,6 +41,43 @@ module Bmg
|
|
40
41
|
|
41
42
|
protected ### optimization
|
42
43
|
|
44
|
+
def _allbut(type, butlist)
|
45
|
+
# `allbut` can always be pushed down the tree. unlike
|
46
|
+
# `extend` the Proc that might be used cannot use attributes
|
47
|
+
# in butlist, so it's safe to strip them away.
|
48
|
+
if transformer.knows_attrlist?
|
49
|
+
# We just need to clean the transformation
|
50
|
+
attrlist = transformer.to_attrlist
|
51
|
+
thrown = attrlist & butlist
|
52
|
+
t = transformation.dup.reject{|k,v| thrown.include?(k) }
|
53
|
+
operand.allbut(butlist).transform(t, options)
|
54
|
+
else
|
55
|
+
operand.allbut(butlist).transform(transformation, options)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def _project(type, attrlist)
|
60
|
+
if transformer.knows_attrlist?
|
61
|
+
t = transformation.dup.select{|k,v| attrlist.include?(k) }
|
62
|
+
operand.project(attrlist).transform(t, options)
|
63
|
+
else
|
64
|
+
operand.project(attrlist).transform(transformation, options)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def _restrict(type, predicate)
|
69
|
+
return super unless transformer.knows_attrlist?
|
70
|
+
top, bottom = predicate.and_split(transformer.to_attrlist)
|
71
|
+
if top == predicate
|
72
|
+
super
|
73
|
+
else
|
74
|
+
operand
|
75
|
+
.restrict(bottom)
|
76
|
+
.transform(transformation, options)
|
77
|
+
.restrict(top)
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
43
81
|
protected ### inspect
|
44
82
|
|
45
83
|
def args
|