bmg 0.18.3 → 0.18.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +4 -0
- data/lib/bmg/algebra.rb +18 -0
- data/lib/bmg/algebra/shortcuts.rb +8 -0
- data/lib/bmg/error.rb +3 -0
- data/lib/bmg/operator.rb +2 -0
- data/lib/bmg/operator/image.rb +9 -4
- data/lib/bmg/operator/rename.rb +5 -5
- data/lib/bmg/operator/ungroup.rb +61 -0
- data/lib/bmg/operator/unwrap.rb +47 -0
- data/lib/bmg/reader/csv.rb +1 -1
- data/lib/bmg/reader/excel.rb +1 -1
- data/lib/bmg/sequel/translator.rb +18 -5
- data/lib/bmg/sql.rb +4 -1
- data/lib/bmg/sql/processor.rb +1 -0
- data/lib/bmg/sql/processor/summarize.rb +2 -2
- data/lib/bmg/sql/processor/transform.rb +105 -0
- data/lib/bmg/sql/relation.rb +19 -5
- data/lib/bmg/summarizer.rb +22 -11
- data/lib/bmg/summarizer/by_proc.rb +1 -1
- data/lib/bmg/summarizer/distinct_count.rb +36 -0
- data/lib/bmg/summarizer/percentile.rb +41 -9
- data/lib/bmg/summarizer/value_by.rb +62 -0
- data/lib/bmg/support/keys.rb +5 -0
- data/lib/bmg/support/tuple_transformer.rb +23 -1
- data/lib/bmg/type.rb +19 -1
- data/lib/bmg/version.rb +1 -1
- data/lib/bmg/writer/csv.rb +2 -1
- metadata +7 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9489f2f59b1f3fc644c4a29ee53ba08662f9671ebda9cc6b0340a98c6daf20d9
|
4
|
+
data.tar.gz: f0506ba0dbea31d5d3a1c3f4e3edbdf0ea9ef88e40b386445494f6cd61f759c1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a3de98440bf031b653d2ef9f7cc6f89ab44969304ca6fdd04cadc514e617a17d5b8b5bfeb791bc03de576acac5431a1d85a52a413f59932aae77ee3f41175ff3
|
7
|
+
data.tar.gz: b583e4ade1f71740afaf2a43c5a0dad8eb27557de533b4cbd61c7f632e79d612a85340897e01ed571a91572acd4f2ab65c00d2b9e00d4d8a8cc8b97919c46102
|
data/README.md
CHANGED
@@ -234,7 +234,11 @@ t.transform(:to_s) # all-attrs transformation
|
|
234
234
|
t.transform(&:to_s) # similar, but Proc-driven
|
235
235
|
t.transform(:foo => :upcase, ...) # specific-attrs tranformation
|
236
236
|
t.transform([:to_s, :upcase]) # chain-transformation
|
237
|
+
r.ungroup([:a, :b, ...]) # ungroup relation-valued attributes within parent tuple
|
238
|
+
r.ungroup(:a) # shortcut over ungroup([:a])
|
237
239
|
r.union(right) # relational union
|
240
|
+
r.unwrap([:a, :b, ...]) # merge tuple-valued attributes within parent tuple
|
241
|
+
r.unwrap(:a) # shortcut over unwrap([:a])
|
238
242
|
r.where(predicate) # alias for restrict(predicate)
|
239
243
|
```
|
240
244
|
|
data/lib/bmg/algebra.rb
CHANGED
@@ -183,6 +183,15 @@ module Bmg
|
|
183
183
|
end
|
184
184
|
protected :_transform
|
185
185
|
|
186
|
+
def ungroup(attrs)
|
187
|
+
_ungroup self.type.ungroup(attrs), attrs
|
188
|
+
end
|
189
|
+
|
190
|
+
def _ungroup(type, attrs)
|
191
|
+
Operator::Ungroup.new(type, self, attrs)
|
192
|
+
end
|
193
|
+
protected :_ungroup
|
194
|
+
|
186
195
|
def union(other, options = {})
|
187
196
|
return self if other.is_a?(Relation::Empty)
|
188
197
|
_union self.type.union(other.type), other, options
|
@@ -193,6 +202,15 @@ module Bmg
|
|
193
202
|
end
|
194
203
|
protected :_union
|
195
204
|
|
205
|
+
def unwrap(attrs)
|
206
|
+
_unwrap self.type.unwrap(attrs), attrs
|
207
|
+
end
|
208
|
+
|
209
|
+
def _unwrap(type, attrs)
|
210
|
+
Operator::Unwrap.new(type, self, attrs)
|
211
|
+
end
|
212
|
+
protected :_unwrap
|
213
|
+
|
196
214
|
def spied(spy)
|
197
215
|
return self if spy.nil?
|
198
216
|
Relation::Spied.new(self, spy)
|
@@ -69,6 +69,14 @@ module Bmg
|
|
69
69
|
self.not_matching(right.rename(renaming), on.keys)
|
70
70
|
end
|
71
71
|
|
72
|
+
def ungroup(attr)
|
73
|
+
super(attr.is_a?(Symbol) ? [attr] : attr)
|
74
|
+
end
|
75
|
+
|
76
|
+
def unwrap(attr)
|
77
|
+
super(attr.is_a?(Symbol) ? [attr] : attr)
|
78
|
+
end
|
79
|
+
|
72
80
|
end # module Shortcuts
|
73
81
|
end # module Algebra
|
74
82
|
end # module Bmg
|
data/lib/bmg/error.rb
CHANGED
data/lib/bmg/operator.rb
CHANGED
@@ -47,4 +47,6 @@ require_relative 'operator/restrict'
|
|
47
47
|
require_relative 'operator/rxmatch'
|
48
48
|
require_relative 'operator/summarize'
|
49
49
|
require_relative 'operator/transform'
|
50
|
+
require_relative 'operator/ungroup'
|
50
51
|
require_relative 'operator/union'
|
52
|
+
require_relative 'operator/unwrap'
|
data/lib/bmg/operator/image.rb
CHANGED
@@ -26,7 +26,11 @@ module Bmg
|
|
26
26
|
# resulting operabds. This option only applies when (optimized) `on`
|
27
27
|
# contains one attribute only. ; it fallbacks on :index_right
|
28
28
|
# otherwise.
|
29
|
-
strategy: :refilter_right
|
29
|
+
strategy: :refilter_right,
|
30
|
+
|
31
|
+
# Whether the attributes on which the join is made should be kept
|
32
|
+
# in the result or not
|
33
|
+
preserve: false
|
30
34
|
|
31
35
|
}
|
32
36
|
|
@@ -96,9 +100,10 @@ module Bmg
|
|
96
100
|
|
97
101
|
def build_right_index(right)
|
98
102
|
index = Hash.new{|h,k| h[k] = empty_image }
|
103
|
+
butlist = options[:preserve] ? [] : on
|
99
104
|
right.each_with_object(index) do |t, index|
|
100
105
|
key = tuple_project(t, on)
|
101
|
-
index[key].operand <<
|
106
|
+
index[key].operand << tuple_allbut(t, butlist)
|
102
107
|
end
|
103
108
|
if opt = options[:array]
|
104
109
|
sorter = to_sorter(opt)
|
@@ -249,8 +254,8 @@ module Bmg
|
|
249
254
|
TupleAlgebra.project(tuple, on)
|
250
255
|
end
|
251
256
|
|
252
|
-
def
|
253
|
-
TupleAlgebra.allbut(tuple,
|
257
|
+
def tuple_allbut(tuple, butlist)
|
258
|
+
TupleAlgebra.allbut(tuple, butlist)
|
254
259
|
end
|
255
260
|
|
256
261
|
def image_type
|
data/lib/bmg/operator/rename.rb
CHANGED
@@ -31,15 +31,15 @@ module Bmg
|
|
31
31
|
def each
|
32
32
|
return to_enum unless block_given?
|
33
33
|
@operand.each do |tuple|
|
34
|
-
yield
|
34
|
+
yield rename_tuple(tuple, renaming)
|
35
35
|
end
|
36
36
|
end
|
37
37
|
|
38
38
|
def insert(arg)
|
39
39
|
case arg
|
40
|
-
when Hash then operand.insert(
|
40
|
+
when Hash then operand.insert(rename_tuple(arg, reverse_renaming))
|
41
41
|
when Relation then operand.insert(arg.rename(reverse_renaming))
|
42
|
-
when Enumerable then operand.insert(arg.map{|t|
|
42
|
+
when Enumerable then operand.insert(arg.map{|t| rename_tuple(t, reverse_renaming) })
|
43
43
|
else
|
44
44
|
super
|
45
45
|
end
|
@@ -47,7 +47,7 @@ module Bmg
|
|
47
47
|
|
48
48
|
def update(arg)
|
49
49
|
case arg
|
50
|
-
when Hash then operand.update(
|
50
|
+
when Hash then operand.update(rename_tuple(arg, reverse_renaming))
|
51
51
|
else
|
52
52
|
super
|
53
53
|
end
|
@@ -89,7 +89,7 @@ module Bmg
|
|
89
89
|
|
90
90
|
private
|
91
91
|
|
92
|
-
def
|
92
|
+
def rename_tuple(tuple, renaming)
|
93
93
|
tuple.each_with_object({}){|(k,v),h|
|
94
94
|
h[renaming[k] || k] = v
|
95
95
|
h
|
@@ -0,0 +1,61 @@
|
|
1
|
+
module Bmg
|
2
|
+
module Operator
|
3
|
+
class Ungroup
|
4
|
+
include Operator::Unary
|
5
|
+
|
6
|
+
def initialize(type, operand, attrs)
|
7
|
+
@type = type
|
8
|
+
@operand = operand
|
9
|
+
@attrs = attrs
|
10
|
+
end
|
11
|
+
|
12
|
+
protected
|
13
|
+
|
14
|
+
attr_reader :attrs
|
15
|
+
|
16
|
+
public
|
17
|
+
|
18
|
+
def each(&bl)
|
19
|
+
return to_enum unless block_given?
|
20
|
+
if type.knows_keys? && type.keys.any?{|k| (k & attrs).empty? }
|
21
|
+
operand.each do |tuple|
|
22
|
+
_each(tuple, attrs[0], attrs[1..-1], &bl)
|
23
|
+
end
|
24
|
+
else
|
25
|
+
with_dups = []
|
26
|
+
operand.each do |tuple|
|
27
|
+
_each(tuple, attrs[0], attrs[1..-1]){|t|
|
28
|
+
with_dups << t
|
29
|
+
}
|
30
|
+
end
|
31
|
+
with_dups.uniq.each(&bl)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def _each(tuple, attr, attrs, &bl)
|
36
|
+
rva = tuple[attr] || []
|
37
|
+
rva.each do |rvt|
|
38
|
+
t = tuple.merge(rvt).tap{|t| t.delete(attr) }
|
39
|
+
if attrs.empty?
|
40
|
+
yield(t)
|
41
|
+
else
|
42
|
+
_each(t, attrs[0], attrs[1..-1], &bl)
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def to_ast
|
48
|
+
[ :ungroup, operand.to_ast, attrs ]
|
49
|
+
end
|
50
|
+
|
51
|
+
protected
|
52
|
+
|
53
|
+
protected ### inspect
|
54
|
+
|
55
|
+
def args
|
56
|
+
[ attrs ]
|
57
|
+
end
|
58
|
+
|
59
|
+
end # class Ungroup
|
60
|
+
end # module Operator
|
61
|
+
end # module Bmg
|
@@ -0,0 +1,47 @@
|
|
1
|
+
module Bmg
|
2
|
+
module Operator
|
3
|
+
class Unwrap
|
4
|
+
include Operator::Unary
|
5
|
+
|
6
|
+
def initialize(type, operand, attrs)
|
7
|
+
@type = type
|
8
|
+
@operand = operand
|
9
|
+
@attrs = attrs
|
10
|
+
end
|
11
|
+
|
12
|
+
protected
|
13
|
+
|
14
|
+
attr_reader :attrs
|
15
|
+
|
16
|
+
public
|
17
|
+
|
18
|
+
def each(&bl)
|
19
|
+
return to_enum unless block_given?
|
20
|
+
operand.each do |tuple|
|
21
|
+
yield tuple_unwrap(tuple)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def to_ast
|
26
|
+
[ :unwrap, operand.to_ast, attrs ]
|
27
|
+
end
|
28
|
+
|
29
|
+
protected
|
30
|
+
|
31
|
+
def tuple_unwrap(tuple)
|
32
|
+
attrs.inject(tuple.dup){|t,attr|
|
33
|
+
t.merge(tuple[attr]).tap{|t2|
|
34
|
+
t2.delete(attr)
|
35
|
+
}
|
36
|
+
}
|
37
|
+
end
|
38
|
+
|
39
|
+
protected ### inspect
|
40
|
+
|
41
|
+
def args
|
42
|
+
[ attrs ]
|
43
|
+
end
|
44
|
+
|
45
|
+
end # class Unwrap
|
46
|
+
end # module Operator
|
47
|
+
end # module Bmg
|
data/lib/bmg/reader/csv.rb
CHANGED
data/lib/bmg/reader/excel.rb
CHANGED
@@ -78,16 +78,29 @@ module Bmg
|
|
78
78
|
end
|
79
79
|
|
80
80
|
def on_func_call(sexpr)
|
81
|
-
|
82
|
-
|
81
|
+
case sexpr.func_name
|
82
|
+
when :cast
|
83
|
+
to_cast = apply(sexpr.func_args.first)
|
84
|
+
type = sexpr.func_args.last.last
|
85
|
+
to_cast.cast(type)
|
86
|
+
else
|
87
|
+
args = sexpr.func_args.map{|fa| apply(fa) }
|
88
|
+
::Sequel.function(sexpr.func_name, *args)
|
89
|
+
end
|
83
90
|
end
|
84
91
|
|
85
92
|
def on_summarizer(sexpr)
|
86
|
-
if sexpr.
|
87
|
-
|
93
|
+
func, distinct = if sexpr.summary_func == :distinct_count
|
94
|
+
[:count, true]
|
95
|
+
else
|
96
|
+
[sexpr.summary_func, false]
|
97
|
+
end
|
98
|
+
f = if sexpr.summary_expr
|
99
|
+
::Sequel.function(func, apply(sexpr.summary_expr))
|
88
100
|
else
|
89
|
-
::Sequel.function(
|
101
|
+
::Sequel.function(func).*
|
90
102
|
end
|
103
|
+
distinct ? f.distinct : f
|
91
104
|
end
|
92
105
|
|
93
106
|
def on_qualified_name(sexpr)
|
data/lib/bmg/sql.rb
CHANGED
data/lib/bmg/sql/processor.rb
CHANGED
@@ -36,10 +36,10 @@ module Bmg
|
|
36
36
|
[:select_item,
|
37
37
|
[ :summarizer,
|
38
38
|
summarizer.to_summarizer_name,
|
39
|
-
sexpr.desaliaser[
|
39
|
+
sexpr.desaliaser[summarizer.functor] ],
|
40
40
|
[:column_name, attr.to_s] ]
|
41
41
|
}
|
42
|
-
[:select_list] + by_list + group_list
|
42
|
+
([:select_list] + by_list + group_list)
|
43
43
|
end
|
44
44
|
|
45
45
|
end # class Summarize
|
@@ -0,0 +1,105 @@
|
|
1
|
+
module Bmg
|
2
|
+
module Sql
|
3
|
+
class Processor
|
4
|
+
class Transform < Processor
|
5
|
+
|
6
|
+
module SplitSupported
|
7
|
+
extend(self)
|
8
|
+
|
9
|
+
def split_supported(tr, &bl)
|
10
|
+
case tr
|
11
|
+
when Array
|
12
|
+
i = tr.find_index{|x| !bl.call(x) } || tr.size
|
13
|
+
[tr[0...i], tr[i..-1]].map{|a|
|
14
|
+
case a.size
|
15
|
+
when 0 then nil
|
16
|
+
when 1 then a.first
|
17
|
+
else a
|
18
|
+
end
|
19
|
+
}
|
20
|
+
when Hash
|
21
|
+
tr.inject([{}, {}]){|(sup,unsup),(k,v)|
|
22
|
+
mine, hers = _split_supported(v, &bl)
|
23
|
+
[
|
24
|
+
sup.merge(k => mine),
|
25
|
+
unsup.merge(k => hers)
|
26
|
+
].map(&:compact)
|
27
|
+
}.map{|h| h.empty? ? nil : h }
|
28
|
+
else
|
29
|
+
_split_supported(tr, &bl)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def _split_supported(tr, &bl)
|
34
|
+
if tr.is_a?(Array)
|
35
|
+
split_supported(tr, &bl)
|
36
|
+
else
|
37
|
+
bl.call(tr) ? [tr, nil] : [nil, tr]
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end # module SplitSupported
|
41
|
+
|
42
|
+
def initialize(transformation, options, builder)
|
43
|
+
raise NotSupportedError unless options.empty?
|
44
|
+
super(builder)
|
45
|
+
@transformation = transformation
|
46
|
+
end
|
47
|
+
attr_reader :transformation
|
48
|
+
|
49
|
+
def self.split_supported(*args, &bl)
|
50
|
+
SplitSupported.split_supported(*args, &bl)
|
51
|
+
end
|
52
|
+
|
53
|
+
def on_select_list(sexpr)
|
54
|
+
sexpr.each_with_index.map{|child,index|
|
55
|
+
index == 0 ? child : apply(child)
|
56
|
+
}
|
57
|
+
end
|
58
|
+
|
59
|
+
def on_select_item(sexpr)
|
60
|
+
as = sexpr.as_name.to_sym
|
61
|
+
case t = transformation_for(as)
|
62
|
+
when NilClass
|
63
|
+
sexpr
|
64
|
+
when Class, Array
|
65
|
+
sexpr([:select_item,
|
66
|
+
func_call_node(sexpr, Array(t).reverse),
|
67
|
+
sexpr[2]
|
68
|
+
])
|
69
|
+
else
|
70
|
+
raise NotSupportedError
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
private
|
75
|
+
|
76
|
+
def func_call_node(sexpr, ts)
|
77
|
+
_func_call_node(sexpr, ts.first, ts[1..-1])
|
78
|
+
end
|
79
|
+
|
80
|
+
def _func_call_node(sexpr, head, tail)
|
81
|
+
inside = if tail.empty?
|
82
|
+
sexpr[1]
|
83
|
+
else
|
84
|
+
_func_call_node(sexpr, tail.first, tail[1..-1])
|
85
|
+
end
|
86
|
+
[:func_call,
|
87
|
+
:cast,
|
88
|
+
inside,
|
89
|
+
[ :literal, head ] ]
|
90
|
+
end
|
91
|
+
|
92
|
+
def transformation_for(as)
|
93
|
+
case t = transformation
|
94
|
+
when Class then t
|
95
|
+
when Hash then t[as]
|
96
|
+
when Array then t
|
97
|
+
else
|
98
|
+
raise Sql::NotSupportedError, "Unable to use `#{as}` for `transform`"
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
end # class Transform
|
103
|
+
end # class Processor
|
104
|
+
end # module Sql
|
105
|
+
end # module Bmg
|
data/lib/bmg/sql/relation.rb
CHANGED
@@ -123,13 +123,13 @@ module Bmg
|
|
123
123
|
|
124
124
|
def _rename(type, renaming)
|
125
125
|
expr = before_use(self.expr)
|
126
|
-
expr = Processor::Rename.new(renaming, builder).call(
|
126
|
+
expr = Processor::Rename.new(renaming, builder).call(expr)
|
127
127
|
_instance(type, builder, expr)
|
128
128
|
end
|
129
129
|
|
130
130
|
def _restrict(type, predicate)
|
131
131
|
expr = before_use(self.expr)
|
132
|
-
expr = Processor::Where.new(predicate, builder).call(
|
132
|
+
expr = Processor::Where.new(predicate, builder).call(expr)
|
133
133
|
_instance(type, builder, expr)
|
134
134
|
end
|
135
135
|
|
@@ -137,23 +137,37 @@ module Bmg
|
|
137
137
|
summarization = ::Bmg::Summarizer.summarization(defs)
|
138
138
|
if can_compile_summarization?(summarization)
|
139
139
|
expr = before_use(self.expr)
|
140
|
-
expr = Processor::Summarize.new(by, summarization, builder).call(
|
140
|
+
expr = Processor::Summarize.new(by, summarization, builder).call(expr)
|
141
141
|
_instance(type, builder, expr)
|
142
142
|
else
|
143
143
|
super
|
144
144
|
end
|
145
145
|
end
|
146
146
|
|
147
|
+
def _transform(type, transformation, options)
|
148
|
+
expr = before_use(self.expr)
|
149
|
+
sup, unsup = Processor::Transform.split_supported(transformation){|x|
|
150
|
+
[String, Integer, Float, Date, DateTime].include?(x)
|
151
|
+
}
|
152
|
+
return super if sup.nil?
|
153
|
+
expr = Processor::Transform.new(sup, options, builder).call(expr)
|
154
|
+
result = _instance(type, builder, expr)
|
155
|
+
result = result.transform(unsup, options) if unsup
|
156
|
+
result
|
157
|
+
rescue Sql::NotSupportedError
|
158
|
+
super
|
159
|
+
end
|
160
|
+
|
147
161
|
def can_compile_summarization?(summarization)
|
148
162
|
summarization.values.all?{|s|
|
149
|
-
[:avg, :count, :max, :min, :sum].include?(s.to_summarizer_name)
|
163
|
+
[:avg, :count, :max, :min, :sum, :distinct_count].include?(s.to_summarizer_name)
|
150
164
|
}
|
151
165
|
end
|
152
166
|
|
153
167
|
def _union(type, right, options)
|
154
168
|
if right_expr = extract_compatible_sexpr(right)
|
155
169
|
expr = before_use(self.expr)
|
156
|
-
expr = Processor::Merge.new(:union, !!options[:all], right_expr, builder).call(
|
170
|
+
expr = Processor::Merge.new(:union, !!options[:all], right_expr, builder).call(expr)
|
157
171
|
_instance(type, builder, expr)
|
158
172
|
else
|
159
173
|
super
|
data/lib/bmg/summarizer.rb
CHANGED
@@ -95,16 +95,7 @@ module Bmg
|
|
95
95
|
# @param the current iterated tuple
|
96
96
|
# @return updated memo value
|
97
97
|
def happens(memo, tuple)
|
98
|
-
value =
|
99
|
-
when Proc
|
100
|
-
@functor.call(tuple)
|
101
|
-
when NilClass
|
102
|
-
tuple
|
103
|
-
when Symbol
|
104
|
-
tuple[@functor]
|
105
|
-
else
|
106
|
-
tuple[@functor]
|
107
|
-
end
|
98
|
+
value = extract_value(tuple)
|
108
99
|
_happens(memo, value)
|
109
100
|
end
|
110
101
|
|
@@ -140,7 +131,25 @@ module Bmg
|
|
140
131
|
|
141
132
|
# Returns the canonical summarizer name
|
142
133
|
def to_summarizer_name
|
143
|
-
self.class.name
|
134
|
+
self.class.name
|
135
|
+
.gsub(/[a-z][A-Z]/){|x| x.split('').join('_') }
|
136
|
+
.downcase[/::([a-z_]+)$/, 1]
|
137
|
+
.to_sym
|
138
|
+
end
|
139
|
+
|
140
|
+
protected
|
141
|
+
|
142
|
+
def extract_value(tuple)
|
143
|
+
value = case @functor
|
144
|
+
when Proc
|
145
|
+
@functor.call(tuple)
|
146
|
+
when NilClass
|
147
|
+
tuple
|
148
|
+
when Symbol
|
149
|
+
tuple[@functor]
|
150
|
+
else
|
151
|
+
tuple[@functor]
|
152
|
+
end
|
144
153
|
end
|
145
154
|
|
146
155
|
end # class Summarizer
|
@@ -155,6 +164,8 @@ require_relative 'summarizer/stddev'
|
|
155
164
|
require_relative 'summarizer/percentile'
|
156
165
|
require_relative 'summarizer/collect'
|
157
166
|
require_relative 'summarizer/distinct'
|
167
|
+
require_relative 'summarizer/distinct_count'
|
158
168
|
require_relative 'summarizer/concat'
|
159
169
|
require_relative 'summarizer/by_proc'
|
160
170
|
require_relative 'summarizer/multiple'
|
171
|
+
require_relative 'summarizer/value_by'
|
@@ -0,0 +1,36 @@
|
|
1
|
+
module Bmg
|
2
|
+
class Summarizer
|
3
|
+
#
|
4
|
+
# Collect the count of distinct values.
|
5
|
+
#
|
6
|
+
# Example:
|
7
|
+
#
|
8
|
+
# # direct ruby usage
|
9
|
+
# Bmg::Summarizer.distinct_count(:qty).summarize(...)
|
10
|
+
#
|
11
|
+
class DistinctCount < Summarizer
|
12
|
+
|
13
|
+
# Returns [] as least value.
|
14
|
+
def least()
|
15
|
+
{}
|
16
|
+
end
|
17
|
+
|
18
|
+
# Adds val to the memo array
|
19
|
+
def _happens(memo, val)
|
20
|
+
memo[val] = true
|
21
|
+
memo
|
22
|
+
end
|
23
|
+
|
24
|
+
def finalize(memo)
|
25
|
+
memo.keys.size
|
26
|
+
end
|
27
|
+
|
28
|
+
end # class DistinctCount
|
29
|
+
|
30
|
+
# Factors a distinct count summarizer
|
31
|
+
def self.distinct_count(*args, &bl)
|
32
|
+
DistinctCount.new(*args, &bl)
|
33
|
+
end
|
34
|
+
|
35
|
+
end # class Summarizer
|
36
|
+
end # module Bmg
|
@@ -10,10 +10,17 @@ module Bmg
|
|
10
10
|
#
|
11
11
|
class Percentile < Summarizer
|
12
12
|
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
13
|
+
DEFAULT_OPTIONS = {
|
14
|
+
:variant => :continuous
|
15
|
+
}
|
16
|
+
|
17
|
+
def initialize(*args, &bl)
|
18
|
+
@nth = args.find{|a| a.is_a?(Integer) } || 50
|
19
|
+
functor = args.find{|a| a.is_a?(Symbol) } || bl
|
20
|
+
options = args.select{|a| a.is_a?(Hash) }.inject(DEFAULT_OPTIONS){|memo,opts|
|
21
|
+
memo.merge(opts)
|
22
|
+
}.dup
|
23
|
+
super(functor, options)
|
17
24
|
end
|
18
25
|
|
19
26
|
# Returns [] as least value.
|
@@ -29,19 +36,44 @@ module Bmg
|
|
29
36
|
# Finalizes the computation.
|
30
37
|
def finalize(memo)
|
31
38
|
return nil if memo.empty?
|
32
|
-
index = memo.size * (@nth / 100.0)
|
33
|
-
|
34
|
-
|
39
|
+
index = memo.size.to_f * (@nth.to_f / 100.0)
|
40
|
+
floor, ceil = index.floor, index.ceil
|
41
|
+
ceil +=1 if floor == ceil
|
42
|
+
below = [floor - 1, 0].max
|
43
|
+
above = [[ceil - 1, memo.size - 1].min, 0].max
|
35
44
|
sorted = memo.sort
|
36
|
-
|
45
|
+
if options[:variant] == :continuous
|
46
|
+
(sorted[above] + sorted[below]) / 2.0
|
47
|
+
else
|
48
|
+
sorted[below]
|
49
|
+
end
|
37
50
|
end
|
38
51
|
|
39
52
|
end # class Avg
|
40
53
|
|
41
|
-
# Factors an average summarizer
|
42
54
|
def self.percentile(*args, &bl)
|
43
55
|
Percentile.new(*args, &bl)
|
44
56
|
end
|
45
57
|
|
58
|
+
def self.percentile_cont(*args, &bl)
|
59
|
+
Percentile.new(*(args + [{:variant => :continuous}]), &bl)
|
60
|
+
end
|
61
|
+
|
62
|
+
def self.percentile_disc(*args, &bl)
|
63
|
+
Percentile.new(*(args + [{:variant => :discrete}]), &bl)
|
64
|
+
end
|
65
|
+
|
66
|
+
def self.median(*args, &bl)
|
67
|
+
Percentile.new(*(args + [50]), &bl)
|
68
|
+
end
|
69
|
+
|
70
|
+
def self.median_cont(*args, &bl)
|
71
|
+
Percentile.new(*(args + [50, {:variant => :continuous}]), &bl)
|
72
|
+
end
|
73
|
+
|
74
|
+
def self.median_disc(*args, &bl)
|
75
|
+
Percentile.new(*(args + [50, {:variant => :discrete}]), &bl)
|
76
|
+
end
|
77
|
+
|
46
78
|
end # class Summarizer
|
47
79
|
end # module Bmg
|
@@ -0,0 +1,62 @@
|
|
1
|
+
module Bmg
|
2
|
+
class Summarizer
|
3
|
+
#
|
4
|
+
# ValueBy summarizer.
|
5
|
+
#
|
6
|
+
# Example:
|
7
|
+
#
|
8
|
+
# # direct ruby usage
|
9
|
+
# Bmg::Summarizer.value_by(:qty, :by => :serie).summarize(...)
|
10
|
+
#
|
11
|
+
class ValueBy < Summarizer
|
12
|
+
|
13
|
+
DEFAULT_OPTIONS = {
|
14
|
+
:symbolize => false
|
15
|
+
}
|
16
|
+
|
17
|
+
# Returns {} as least value.
|
18
|
+
def least
|
19
|
+
{}
|
20
|
+
end
|
21
|
+
|
22
|
+
# Collects the value
|
23
|
+
def happens(memo, tuple)
|
24
|
+
by = tuple[options[:by]]
|
25
|
+
by = by.to_sym if by && options[:symbolize]
|
26
|
+
misuse!(tuple, memo) if memo.has_key?(by)
|
27
|
+
memo.tap{|m|
|
28
|
+
m[by] = extract_value(tuple)
|
29
|
+
}
|
30
|
+
end
|
31
|
+
|
32
|
+
# Finalizes the computation.
|
33
|
+
def finalize(memo)
|
34
|
+
default_tuple.merge(memo)
|
35
|
+
end
|
36
|
+
|
37
|
+
private
|
38
|
+
|
39
|
+
def default_tuple
|
40
|
+
(options[:series] || []).each_with_object({}){|s,ss|
|
41
|
+
s_def = options[:default]
|
42
|
+
s = s.to_sym if s && options[:symbolize]
|
43
|
+
ss[s] = s_def
|
44
|
+
}
|
45
|
+
end
|
46
|
+
|
47
|
+
def misuse!(tuple, memo)
|
48
|
+
msg = "Summarizer.value_by: summarization key + the serie must form be a candidate key"
|
49
|
+
msg += "\n"
|
50
|
+
msg += " Tuple: #{tuple.inspect}"
|
51
|
+
msg += " Memo: #{memo.inspect}"
|
52
|
+
raise MisuseError, msg
|
53
|
+
end
|
54
|
+
|
55
|
+
end # class ValueBy
|
56
|
+
|
57
|
+
def self.value_by(*args, &bl)
|
58
|
+
ValueBy.new(*args, &bl)
|
59
|
+
end
|
60
|
+
|
61
|
+
end # class Summarizer
|
62
|
+
end # module Bmg
|
data/lib/bmg/support/keys.rb
CHANGED
@@ -32,7 +32,16 @@ module Bmg
|
|
32
32
|
}
|
33
33
|
when Hash
|
34
34
|
with.each_with_object(tuple.dup){|(k,v),dup|
|
35
|
-
|
35
|
+
case k
|
36
|
+
when Symbol
|
37
|
+
dup[k] = transform_attr(dup[k], v)
|
38
|
+
when Class
|
39
|
+
dup.keys.each do |attrname|
|
40
|
+
dup[attrname] = transform_attr(dup[attrname], v) if dup[attrname].is_a?(k)
|
41
|
+
end
|
42
|
+
else
|
43
|
+
raise ArgumentError, "Unexpected transformation `#{with.inspect}`"
|
44
|
+
end
|
36
45
|
}
|
37
46
|
when Array
|
38
47
|
with.inject(tuple){|dup,on|
|
@@ -50,6 +59,19 @@ module Bmg
|
|
50
59
|
when Regexp
|
51
60
|
m = with.match(value.to_s)
|
52
61
|
m.nil? ? m : m.to_s
|
62
|
+
when Class
|
63
|
+
return value if value.nil?
|
64
|
+
if with.respond_to?(:parse)
|
65
|
+
with.parse(value)
|
66
|
+
elsif with == Integer
|
67
|
+
Integer(value)
|
68
|
+
elsif with == Float
|
69
|
+
Float(value)
|
70
|
+
elsif with == String
|
71
|
+
value.to_s
|
72
|
+
else
|
73
|
+
raise ArgumentError, "#{with} should respond to `parse`"
|
74
|
+
end
|
53
75
|
when Proc
|
54
76
|
with.call(value)
|
55
77
|
when Hash
|
data/lib/bmg/type.rb
CHANGED
@@ -82,7 +82,7 @@ module Bmg
|
|
82
82
|
|
83
83
|
def with_keys(keys)
|
84
84
|
dup.tap{|x|
|
85
|
-
x.keys = Keys.new(keys)
|
85
|
+
x.keys = keys ? Keys.new(keys) : nil
|
86
86
|
}
|
87
87
|
end
|
88
88
|
|
@@ -266,6 +266,15 @@ module Bmg
|
|
266
266
|
}
|
267
267
|
end
|
268
268
|
|
269
|
+
def ungroup(attrlist)
|
270
|
+
known_attributes!(attrlist) if typechecked? && knows_attrlist?
|
271
|
+
dup.tap{|x|
|
272
|
+
x.attrlist = nil
|
273
|
+
x.predicate = Predicate.tautology
|
274
|
+
x.keys = nil
|
275
|
+
}
|
276
|
+
end
|
277
|
+
|
269
278
|
def union(other)
|
270
279
|
if typechecked? && knows_attrlist? && other.knows_attrlist?
|
271
280
|
missing = self.attrlist - other.attrlist
|
@@ -280,6 +289,15 @@ module Bmg
|
|
280
289
|
}
|
281
290
|
end
|
282
291
|
|
292
|
+
def unwrap(attrlist)
|
293
|
+
known_attributes!(attrlist) if typechecked? && knows_attrlist?
|
294
|
+
dup.tap{|x|
|
295
|
+
x.attrlist = nil
|
296
|
+
x.predicate = predicate.and_split(attrlist).last
|
297
|
+
x.keys = self._keys.unwrap(self, x, attrlist) if knows_keys?
|
298
|
+
}
|
299
|
+
end
|
300
|
+
|
283
301
|
private
|
284
302
|
|
285
303
|
def known_attributes!(attrs)
|
data/lib/bmg/version.rb
CHANGED
data/lib/bmg/writer/csv.rb
CHANGED
@@ -19,7 +19,8 @@ module Bmg
|
|
19
19
|
relation.each do |tuple|
|
20
20
|
if csv.nil?
|
21
21
|
headers = infer_headers(tuple) if headers.nil?
|
22
|
-
|
22
|
+
csv_opts = csv_options.merge(headers: headers)
|
23
|
+
csv = CSV.new(string_or_io, **csv_opts)
|
23
24
|
end
|
24
25
|
csv << headers.map{|h| tuple[h] }
|
25
26
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bmg
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.18.
|
4
|
+
version: 0.18.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Bernard Lambeau
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-06-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: predicate
|
@@ -164,7 +164,9 @@ files:
|
|
164
164
|
- lib/bmg/operator/shared/unary.rb
|
165
165
|
- lib/bmg/operator/summarize.rb
|
166
166
|
- lib/bmg/operator/transform.rb
|
167
|
+
- lib/bmg/operator/ungroup.rb
|
167
168
|
- lib/bmg/operator/union.rb
|
169
|
+
- lib/bmg/operator/unwrap.rb
|
168
170
|
- lib/bmg/reader.rb
|
169
171
|
- lib/bmg/reader/csv.rb
|
170
172
|
- lib/bmg/reader/excel.rb
|
@@ -259,6 +261,7 @@ files:
|
|
259
261
|
- lib/bmg/sql/processor/semi_join.rb
|
260
262
|
- lib/bmg/sql/processor/star.rb
|
261
263
|
- lib/bmg/sql/processor/summarize.rb
|
264
|
+
- lib/bmg/sql/processor/transform.rb
|
262
265
|
- lib/bmg/sql/processor/where.rb
|
263
266
|
- lib/bmg/sql/relation.rb
|
264
267
|
- lib/bmg/sql/support/from_clause_orderer.rb
|
@@ -270,12 +273,14 @@ files:
|
|
270
273
|
- lib/bmg/summarizer/concat.rb
|
271
274
|
- lib/bmg/summarizer/count.rb
|
272
275
|
- lib/bmg/summarizer/distinct.rb
|
276
|
+
- lib/bmg/summarizer/distinct_count.rb
|
273
277
|
- lib/bmg/summarizer/max.rb
|
274
278
|
- lib/bmg/summarizer/min.rb
|
275
279
|
- lib/bmg/summarizer/multiple.rb
|
276
280
|
- lib/bmg/summarizer/percentile.rb
|
277
281
|
- lib/bmg/summarizer/stddev.rb
|
278
282
|
- lib/bmg/summarizer/sum.rb
|
283
|
+
- lib/bmg/summarizer/value_by.rb
|
279
284
|
- lib/bmg/summarizer/variance.rb
|
280
285
|
- lib/bmg/support.rb
|
281
286
|
- lib/bmg/support/keys.rb
|