bmg 0.18.3 → 0.18.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +4 -0
- data/lib/bmg/algebra.rb +18 -0
- data/lib/bmg/algebra/shortcuts.rb +8 -0
- data/lib/bmg/error.rb +3 -0
- data/lib/bmg/operator.rb +2 -0
- data/lib/bmg/operator/image.rb +9 -4
- data/lib/bmg/operator/rename.rb +5 -5
- data/lib/bmg/operator/ungroup.rb +61 -0
- data/lib/bmg/operator/unwrap.rb +47 -0
- data/lib/bmg/reader/csv.rb +1 -1
- data/lib/bmg/reader/excel.rb +1 -1
- data/lib/bmg/sequel/translator.rb +18 -5
- data/lib/bmg/sql.rb +4 -1
- data/lib/bmg/sql/processor.rb +1 -0
- data/lib/bmg/sql/processor/summarize.rb +2 -2
- data/lib/bmg/sql/processor/transform.rb +105 -0
- data/lib/bmg/sql/relation.rb +19 -5
- data/lib/bmg/summarizer.rb +22 -11
- data/lib/bmg/summarizer/by_proc.rb +1 -1
- data/lib/bmg/summarizer/distinct_count.rb +36 -0
- data/lib/bmg/summarizer/percentile.rb +41 -9
- data/lib/bmg/summarizer/value_by.rb +62 -0
- data/lib/bmg/support/keys.rb +5 -0
- data/lib/bmg/support/tuple_transformer.rb +23 -1
- data/lib/bmg/type.rb +19 -1
- data/lib/bmg/version.rb +1 -1
- data/lib/bmg/writer/csv.rb +2 -1
- metadata +7 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9489f2f59b1f3fc644c4a29ee53ba08662f9671ebda9cc6b0340a98c6daf20d9
|
4
|
+
data.tar.gz: f0506ba0dbea31d5d3a1c3f4e3edbdf0ea9ef88e40b386445494f6cd61f759c1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a3de98440bf031b653d2ef9f7cc6f89ab44969304ca6fdd04cadc514e617a17d5b8b5bfeb791bc03de576acac5431a1d85a52a413f59932aae77ee3f41175ff3
|
7
|
+
data.tar.gz: b583e4ade1f71740afaf2a43c5a0dad8eb27557de533b4cbd61c7f632e79d612a85340897e01ed571a91572acd4f2ab65c00d2b9e00d4d8a8cc8b97919c46102
|
data/README.md
CHANGED
@@ -234,7 +234,11 @@ t.transform(:to_s) # all-attrs transformation
|
|
234
234
|
t.transform(&:to_s) # similar, but Proc-driven
|
235
235
|
t.transform(:foo => :upcase, ...) # specific-attrs tranformation
|
236
236
|
t.transform([:to_s, :upcase]) # chain-transformation
|
237
|
+
r.ungroup([:a, :b, ...]) # ungroup relation-valued attributes within parent tuple
|
238
|
+
r.ungroup(:a) # shortcut over ungroup([:a])
|
237
239
|
r.union(right) # relational union
|
240
|
+
r.unwrap([:a, :b, ...]) # merge tuple-valued attributes within parent tuple
|
241
|
+
r.unwrap(:a) # shortcut over unwrap([:a])
|
238
242
|
r.where(predicate) # alias for restrict(predicate)
|
239
243
|
```
|
240
244
|
|
data/lib/bmg/algebra.rb
CHANGED
@@ -183,6 +183,15 @@ module Bmg
|
|
183
183
|
end
|
184
184
|
protected :_transform
|
185
185
|
|
186
|
+
def ungroup(attrs)
|
187
|
+
_ungroup self.type.ungroup(attrs), attrs
|
188
|
+
end
|
189
|
+
|
190
|
+
def _ungroup(type, attrs)
|
191
|
+
Operator::Ungroup.new(type, self, attrs)
|
192
|
+
end
|
193
|
+
protected :_ungroup
|
194
|
+
|
186
195
|
def union(other, options = {})
|
187
196
|
return self if other.is_a?(Relation::Empty)
|
188
197
|
_union self.type.union(other.type), other, options
|
@@ -193,6 +202,15 @@ module Bmg
|
|
193
202
|
end
|
194
203
|
protected :_union
|
195
204
|
|
205
|
+
def unwrap(attrs)
|
206
|
+
_unwrap self.type.unwrap(attrs), attrs
|
207
|
+
end
|
208
|
+
|
209
|
+
def _unwrap(type, attrs)
|
210
|
+
Operator::Unwrap.new(type, self, attrs)
|
211
|
+
end
|
212
|
+
protected :_unwrap
|
213
|
+
|
196
214
|
def spied(spy)
|
197
215
|
return self if spy.nil?
|
198
216
|
Relation::Spied.new(self, spy)
|
@@ -69,6 +69,14 @@ module Bmg
|
|
69
69
|
self.not_matching(right.rename(renaming), on.keys)
|
70
70
|
end
|
71
71
|
|
72
|
+
def ungroup(attr)
|
73
|
+
super(attr.is_a?(Symbol) ? [attr] : attr)
|
74
|
+
end
|
75
|
+
|
76
|
+
def unwrap(attr)
|
77
|
+
super(attr.is_a?(Symbol) ? [attr] : attr)
|
78
|
+
end
|
79
|
+
|
72
80
|
end # module Shortcuts
|
73
81
|
end # module Algebra
|
74
82
|
end # module Bmg
|
data/lib/bmg/error.rb
CHANGED
data/lib/bmg/operator.rb
CHANGED
@@ -47,4 +47,6 @@ require_relative 'operator/restrict'
|
|
47
47
|
require_relative 'operator/rxmatch'
|
48
48
|
require_relative 'operator/summarize'
|
49
49
|
require_relative 'operator/transform'
|
50
|
+
require_relative 'operator/ungroup'
|
50
51
|
require_relative 'operator/union'
|
52
|
+
require_relative 'operator/unwrap'
|
data/lib/bmg/operator/image.rb
CHANGED
@@ -26,7 +26,11 @@ module Bmg
|
|
26
26
|
# resulting operabds. This option only applies when (optimized) `on`
|
27
27
|
# contains one attribute only. ; it fallbacks on :index_right
|
28
28
|
# otherwise.
|
29
|
-
strategy: :refilter_right
|
29
|
+
strategy: :refilter_right,
|
30
|
+
|
31
|
+
# Whether the attributes on which the join is made should be kept
|
32
|
+
# in the result or not
|
33
|
+
preserve: false
|
30
34
|
|
31
35
|
}
|
32
36
|
|
@@ -96,9 +100,10 @@ module Bmg
|
|
96
100
|
|
97
101
|
def build_right_index(right)
|
98
102
|
index = Hash.new{|h,k| h[k] = empty_image }
|
103
|
+
butlist = options[:preserve] ? [] : on
|
99
104
|
right.each_with_object(index) do |t, index|
|
100
105
|
key = tuple_project(t, on)
|
101
|
-
index[key].operand <<
|
106
|
+
index[key].operand << tuple_allbut(t, butlist)
|
102
107
|
end
|
103
108
|
if opt = options[:array]
|
104
109
|
sorter = to_sorter(opt)
|
@@ -249,8 +254,8 @@ module Bmg
|
|
249
254
|
TupleAlgebra.project(tuple, on)
|
250
255
|
end
|
251
256
|
|
252
|
-
def
|
253
|
-
TupleAlgebra.allbut(tuple,
|
257
|
+
def tuple_allbut(tuple, butlist)
|
258
|
+
TupleAlgebra.allbut(tuple, butlist)
|
254
259
|
end
|
255
260
|
|
256
261
|
def image_type
|
data/lib/bmg/operator/rename.rb
CHANGED
@@ -31,15 +31,15 @@ module Bmg
|
|
31
31
|
def each
|
32
32
|
return to_enum unless block_given?
|
33
33
|
@operand.each do |tuple|
|
34
|
-
yield
|
34
|
+
yield rename_tuple(tuple, renaming)
|
35
35
|
end
|
36
36
|
end
|
37
37
|
|
38
38
|
def insert(arg)
|
39
39
|
case arg
|
40
|
-
when Hash then operand.insert(
|
40
|
+
when Hash then operand.insert(rename_tuple(arg, reverse_renaming))
|
41
41
|
when Relation then operand.insert(arg.rename(reverse_renaming))
|
42
|
-
when Enumerable then operand.insert(arg.map{|t|
|
42
|
+
when Enumerable then operand.insert(arg.map{|t| rename_tuple(t, reverse_renaming) })
|
43
43
|
else
|
44
44
|
super
|
45
45
|
end
|
@@ -47,7 +47,7 @@ module Bmg
|
|
47
47
|
|
48
48
|
def update(arg)
|
49
49
|
case arg
|
50
|
-
when Hash then operand.update(
|
50
|
+
when Hash then operand.update(rename_tuple(arg, reverse_renaming))
|
51
51
|
else
|
52
52
|
super
|
53
53
|
end
|
@@ -89,7 +89,7 @@ module Bmg
|
|
89
89
|
|
90
90
|
private
|
91
91
|
|
92
|
-
def
|
92
|
+
def rename_tuple(tuple, renaming)
|
93
93
|
tuple.each_with_object({}){|(k,v),h|
|
94
94
|
h[renaming[k] || k] = v
|
95
95
|
h
|
@@ -0,0 +1,61 @@
|
|
1
|
+
module Bmg
|
2
|
+
module Operator
|
3
|
+
class Ungroup
|
4
|
+
include Operator::Unary
|
5
|
+
|
6
|
+
def initialize(type, operand, attrs)
|
7
|
+
@type = type
|
8
|
+
@operand = operand
|
9
|
+
@attrs = attrs
|
10
|
+
end
|
11
|
+
|
12
|
+
protected
|
13
|
+
|
14
|
+
attr_reader :attrs
|
15
|
+
|
16
|
+
public
|
17
|
+
|
18
|
+
def each(&bl)
|
19
|
+
return to_enum unless block_given?
|
20
|
+
if type.knows_keys? && type.keys.any?{|k| (k & attrs).empty? }
|
21
|
+
operand.each do |tuple|
|
22
|
+
_each(tuple, attrs[0], attrs[1..-1], &bl)
|
23
|
+
end
|
24
|
+
else
|
25
|
+
with_dups = []
|
26
|
+
operand.each do |tuple|
|
27
|
+
_each(tuple, attrs[0], attrs[1..-1]){|t|
|
28
|
+
with_dups << t
|
29
|
+
}
|
30
|
+
end
|
31
|
+
with_dups.uniq.each(&bl)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def _each(tuple, attr, attrs, &bl)
|
36
|
+
rva = tuple[attr] || []
|
37
|
+
rva.each do |rvt|
|
38
|
+
t = tuple.merge(rvt).tap{|t| t.delete(attr) }
|
39
|
+
if attrs.empty?
|
40
|
+
yield(t)
|
41
|
+
else
|
42
|
+
_each(t, attrs[0], attrs[1..-1], &bl)
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def to_ast
|
48
|
+
[ :ungroup, operand.to_ast, attrs ]
|
49
|
+
end
|
50
|
+
|
51
|
+
protected
|
52
|
+
|
53
|
+
protected ### inspect
|
54
|
+
|
55
|
+
def args
|
56
|
+
[ attrs ]
|
57
|
+
end
|
58
|
+
|
59
|
+
end # class Ungroup
|
60
|
+
end # module Operator
|
61
|
+
end # module Bmg
|
@@ -0,0 +1,47 @@
|
|
1
|
+
module Bmg
|
2
|
+
module Operator
|
3
|
+
class Unwrap
|
4
|
+
include Operator::Unary
|
5
|
+
|
6
|
+
def initialize(type, operand, attrs)
|
7
|
+
@type = type
|
8
|
+
@operand = operand
|
9
|
+
@attrs = attrs
|
10
|
+
end
|
11
|
+
|
12
|
+
protected
|
13
|
+
|
14
|
+
attr_reader :attrs
|
15
|
+
|
16
|
+
public
|
17
|
+
|
18
|
+
def each(&bl)
|
19
|
+
return to_enum unless block_given?
|
20
|
+
operand.each do |tuple|
|
21
|
+
yield tuple_unwrap(tuple)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def to_ast
|
26
|
+
[ :unwrap, operand.to_ast, attrs ]
|
27
|
+
end
|
28
|
+
|
29
|
+
protected
|
30
|
+
|
31
|
+
def tuple_unwrap(tuple)
|
32
|
+
attrs.inject(tuple.dup){|t,attr|
|
33
|
+
t.merge(tuple[attr]).tap{|t2|
|
34
|
+
t2.delete(attr)
|
35
|
+
}
|
36
|
+
}
|
37
|
+
end
|
38
|
+
|
39
|
+
protected ### inspect
|
40
|
+
|
41
|
+
def args
|
42
|
+
[ attrs ]
|
43
|
+
end
|
44
|
+
|
45
|
+
end # class Unwrap
|
46
|
+
end # module Operator
|
47
|
+
end # module Bmg
|
data/lib/bmg/reader/csv.rb
CHANGED
data/lib/bmg/reader/excel.rb
CHANGED
@@ -78,16 +78,29 @@ module Bmg
|
|
78
78
|
end
|
79
79
|
|
80
80
|
def on_func_call(sexpr)
|
81
|
-
|
82
|
-
|
81
|
+
case sexpr.func_name
|
82
|
+
when :cast
|
83
|
+
to_cast = apply(sexpr.func_args.first)
|
84
|
+
type = sexpr.func_args.last.last
|
85
|
+
to_cast.cast(type)
|
86
|
+
else
|
87
|
+
args = sexpr.func_args.map{|fa| apply(fa) }
|
88
|
+
::Sequel.function(sexpr.func_name, *args)
|
89
|
+
end
|
83
90
|
end
|
84
91
|
|
85
92
|
def on_summarizer(sexpr)
|
86
|
-
if sexpr.
|
87
|
-
|
93
|
+
func, distinct = if sexpr.summary_func == :distinct_count
|
94
|
+
[:count, true]
|
95
|
+
else
|
96
|
+
[sexpr.summary_func, false]
|
97
|
+
end
|
98
|
+
f = if sexpr.summary_expr
|
99
|
+
::Sequel.function(func, apply(sexpr.summary_expr))
|
88
100
|
else
|
89
|
-
::Sequel.function(
|
101
|
+
::Sequel.function(func).*
|
90
102
|
end
|
103
|
+
distinct ? f.distinct : f
|
91
104
|
end
|
92
105
|
|
93
106
|
def on_qualified_name(sexpr)
|
data/lib/bmg/sql.rb
CHANGED
data/lib/bmg/sql/processor.rb
CHANGED
@@ -36,10 +36,10 @@ module Bmg
|
|
36
36
|
[:select_item,
|
37
37
|
[ :summarizer,
|
38
38
|
summarizer.to_summarizer_name,
|
39
|
-
sexpr.desaliaser[
|
39
|
+
sexpr.desaliaser[summarizer.functor] ],
|
40
40
|
[:column_name, attr.to_s] ]
|
41
41
|
}
|
42
|
-
[:select_list] + by_list + group_list
|
42
|
+
([:select_list] + by_list + group_list)
|
43
43
|
end
|
44
44
|
|
45
45
|
end # class Summarize
|
@@ -0,0 +1,105 @@
|
|
1
|
+
module Bmg
|
2
|
+
module Sql
|
3
|
+
class Processor
|
4
|
+
class Transform < Processor
|
5
|
+
|
6
|
+
module SplitSupported
|
7
|
+
extend(self)
|
8
|
+
|
9
|
+
def split_supported(tr, &bl)
|
10
|
+
case tr
|
11
|
+
when Array
|
12
|
+
i = tr.find_index{|x| !bl.call(x) } || tr.size
|
13
|
+
[tr[0...i], tr[i..-1]].map{|a|
|
14
|
+
case a.size
|
15
|
+
when 0 then nil
|
16
|
+
when 1 then a.first
|
17
|
+
else a
|
18
|
+
end
|
19
|
+
}
|
20
|
+
when Hash
|
21
|
+
tr.inject([{}, {}]){|(sup,unsup),(k,v)|
|
22
|
+
mine, hers = _split_supported(v, &bl)
|
23
|
+
[
|
24
|
+
sup.merge(k => mine),
|
25
|
+
unsup.merge(k => hers)
|
26
|
+
].map(&:compact)
|
27
|
+
}.map{|h| h.empty? ? nil : h }
|
28
|
+
else
|
29
|
+
_split_supported(tr, &bl)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def _split_supported(tr, &bl)
|
34
|
+
if tr.is_a?(Array)
|
35
|
+
split_supported(tr, &bl)
|
36
|
+
else
|
37
|
+
bl.call(tr) ? [tr, nil] : [nil, tr]
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end # module SplitSupported
|
41
|
+
|
42
|
+
def initialize(transformation, options, builder)
|
43
|
+
raise NotSupportedError unless options.empty?
|
44
|
+
super(builder)
|
45
|
+
@transformation = transformation
|
46
|
+
end
|
47
|
+
attr_reader :transformation
|
48
|
+
|
49
|
+
def self.split_supported(*args, &bl)
|
50
|
+
SplitSupported.split_supported(*args, &bl)
|
51
|
+
end
|
52
|
+
|
53
|
+
def on_select_list(sexpr)
|
54
|
+
sexpr.each_with_index.map{|child,index|
|
55
|
+
index == 0 ? child : apply(child)
|
56
|
+
}
|
57
|
+
end
|
58
|
+
|
59
|
+
def on_select_item(sexpr)
|
60
|
+
as = sexpr.as_name.to_sym
|
61
|
+
case t = transformation_for(as)
|
62
|
+
when NilClass
|
63
|
+
sexpr
|
64
|
+
when Class, Array
|
65
|
+
sexpr([:select_item,
|
66
|
+
func_call_node(sexpr, Array(t).reverse),
|
67
|
+
sexpr[2]
|
68
|
+
])
|
69
|
+
else
|
70
|
+
raise NotSupportedError
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
private
|
75
|
+
|
76
|
+
def func_call_node(sexpr, ts)
|
77
|
+
_func_call_node(sexpr, ts.first, ts[1..-1])
|
78
|
+
end
|
79
|
+
|
80
|
+
def _func_call_node(sexpr, head, tail)
|
81
|
+
inside = if tail.empty?
|
82
|
+
sexpr[1]
|
83
|
+
else
|
84
|
+
_func_call_node(sexpr, tail.first, tail[1..-1])
|
85
|
+
end
|
86
|
+
[:func_call,
|
87
|
+
:cast,
|
88
|
+
inside,
|
89
|
+
[ :literal, head ] ]
|
90
|
+
end
|
91
|
+
|
92
|
+
def transformation_for(as)
|
93
|
+
case t = transformation
|
94
|
+
when Class then t
|
95
|
+
when Hash then t[as]
|
96
|
+
when Array then t
|
97
|
+
else
|
98
|
+
raise Sql::NotSupportedError, "Unable to use `#{as}` for `transform`"
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
end # class Transform
|
103
|
+
end # class Processor
|
104
|
+
end # module Sql
|
105
|
+
end # module Bmg
|
data/lib/bmg/sql/relation.rb
CHANGED
@@ -123,13 +123,13 @@ module Bmg
|
|
123
123
|
|
124
124
|
def _rename(type, renaming)
|
125
125
|
expr = before_use(self.expr)
|
126
|
-
expr = Processor::Rename.new(renaming, builder).call(
|
126
|
+
expr = Processor::Rename.new(renaming, builder).call(expr)
|
127
127
|
_instance(type, builder, expr)
|
128
128
|
end
|
129
129
|
|
130
130
|
def _restrict(type, predicate)
|
131
131
|
expr = before_use(self.expr)
|
132
|
-
expr = Processor::Where.new(predicate, builder).call(
|
132
|
+
expr = Processor::Where.new(predicate, builder).call(expr)
|
133
133
|
_instance(type, builder, expr)
|
134
134
|
end
|
135
135
|
|
@@ -137,23 +137,37 @@ module Bmg
|
|
137
137
|
summarization = ::Bmg::Summarizer.summarization(defs)
|
138
138
|
if can_compile_summarization?(summarization)
|
139
139
|
expr = before_use(self.expr)
|
140
|
-
expr = Processor::Summarize.new(by, summarization, builder).call(
|
140
|
+
expr = Processor::Summarize.new(by, summarization, builder).call(expr)
|
141
141
|
_instance(type, builder, expr)
|
142
142
|
else
|
143
143
|
super
|
144
144
|
end
|
145
145
|
end
|
146
146
|
|
147
|
+
def _transform(type, transformation, options)
|
148
|
+
expr = before_use(self.expr)
|
149
|
+
sup, unsup = Processor::Transform.split_supported(transformation){|x|
|
150
|
+
[String, Integer, Float, Date, DateTime].include?(x)
|
151
|
+
}
|
152
|
+
return super if sup.nil?
|
153
|
+
expr = Processor::Transform.new(sup, options, builder).call(expr)
|
154
|
+
result = _instance(type, builder, expr)
|
155
|
+
result = result.transform(unsup, options) if unsup
|
156
|
+
result
|
157
|
+
rescue Sql::NotSupportedError
|
158
|
+
super
|
159
|
+
end
|
160
|
+
|
147
161
|
def can_compile_summarization?(summarization)
|
148
162
|
summarization.values.all?{|s|
|
149
|
-
[:avg, :count, :max, :min, :sum].include?(s.to_summarizer_name)
|
163
|
+
[:avg, :count, :max, :min, :sum, :distinct_count].include?(s.to_summarizer_name)
|
150
164
|
}
|
151
165
|
end
|
152
166
|
|
153
167
|
def _union(type, right, options)
|
154
168
|
if right_expr = extract_compatible_sexpr(right)
|
155
169
|
expr = before_use(self.expr)
|
156
|
-
expr = Processor::Merge.new(:union, !!options[:all], right_expr, builder).call(
|
170
|
+
expr = Processor::Merge.new(:union, !!options[:all], right_expr, builder).call(expr)
|
157
171
|
_instance(type, builder, expr)
|
158
172
|
else
|
159
173
|
super
|
data/lib/bmg/summarizer.rb
CHANGED
@@ -95,16 +95,7 @@ module Bmg
|
|
95
95
|
# @param the current iterated tuple
|
96
96
|
# @return updated memo value
|
97
97
|
def happens(memo, tuple)
|
98
|
-
value =
|
99
|
-
when Proc
|
100
|
-
@functor.call(tuple)
|
101
|
-
when NilClass
|
102
|
-
tuple
|
103
|
-
when Symbol
|
104
|
-
tuple[@functor]
|
105
|
-
else
|
106
|
-
tuple[@functor]
|
107
|
-
end
|
98
|
+
value = extract_value(tuple)
|
108
99
|
_happens(memo, value)
|
109
100
|
end
|
110
101
|
|
@@ -140,7 +131,25 @@ module Bmg
|
|
140
131
|
|
141
132
|
# Returns the canonical summarizer name
|
142
133
|
def to_summarizer_name
|
143
|
-
self.class.name
|
134
|
+
self.class.name
|
135
|
+
.gsub(/[a-z][A-Z]/){|x| x.split('').join('_') }
|
136
|
+
.downcase[/::([a-z_]+)$/, 1]
|
137
|
+
.to_sym
|
138
|
+
end
|
139
|
+
|
140
|
+
protected
|
141
|
+
|
142
|
+
def extract_value(tuple)
|
143
|
+
value = case @functor
|
144
|
+
when Proc
|
145
|
+
@functor.call(tuple)
|
146
|
+
when NilClass
|
147
|
+
tuple
|
148
|
+
when Symbol
|
149
|
+
tuple[@functor]
|
150
|
+
else
|
151
|
+
tuple[@functor]
|
152
|
+
end
|
144
153
|
end
|
145
154
|
|
146
155
|
end # class Summarizer
|
@@ -155,6 +164,8 @@ require_relative 'summarizer/stddev'
|
|
155
164
|
require_relative 'summarizer/percentile'
|
156
165
|
require_relative 'summarizer/collect'
|
157
166
|
require_relative 'summarizer/distinct'
|
167
|
+
require_relative 'summarizer/distinct_count'
|
158
168
|
require_relative 'summarizer/concat'
|
159
169
|
require_relative 'summarizer/by_proc'
|
160
170
|
require_relative 'summarizer/multiple'
|
171
|
+
require_relative 'summarizer/value_by'
|
@@ -0,0 +1,36 @@
|
|
1
|
+
module Bmg
|
2
|
+
class Summarizer
|
3
|
+
#
|
4
|
+
# Collect the count of distinct values.
|
5
|
+
#
|
6
|
+
# Example:
|
7
|
+
#
|
8
|
+
# # direct ruby usage
|
9
|
+
# Bmg::Summarizer.distinct_count(:qty).summarize(...)
|
10
|
+
#
|
11
|
+
class DistinctCount < Summarizer
|
12
|
+
|
13
|
+
# Returns [] as least value.
|
14
|
+
def least()
|
15
|
+
{}
|
16
|
+
end
|
17
|
+
|
18
|
+
# Adds val to the memo array
|
19
|
+
def _happens(memo, val)
|
20
|
+
memo[val] = true
|
21
|
+
memo
|
22
|
+
end
|
23
|
+
|
24
|
+
def finalize(memo)
|
25
|
+
memo.keys.size
|
26
|
+
end
|
27
|
+
|
28
|
+
end # class DistinctCount
|
29
|
+
|
30
|
+
# Factors a distinct count summarizer
|
31
|
+
def self.distinct_count(*args, &bl)
|
32
|
+
DistinctCount.new(*args, &bl)
|
33
|
+
end
|
34
|
+
|
35
|
+
end # class Summarizer
|
36
|
+
end # module Bmg
|
@@ -10,10 +10,17 @@ module Bmg
|
|
10
10
|
#
|
11
11
|
class Percentile < Summarizer
|
12
12
|
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
13
|
+
DEFAULT_OPTIONS = {
|
14
|
+
:variant => :continuous
|
15
|
+
}
|
16
|
+
|
17
|
+
def initialize(*args, &bl)
|
18
|
+
@nth = args.find{|a| a.is_a?(Integer) } || 50
|
19
|
+
functor = args.find{|a| a.is_a?(Symbol) } || bl
|
20
|
+
options = args.select{|a| a.is_a?(Hash) }.inject(DEFAULT_OPTIONS){|memo,opts|
|
21
|
+
memo.merge(opts)
|
22
|
+
}.dup
|
23
|
+
super(functor, options)
|
17
24
|
end
|
18
25
|
|
19
26
|
# Returns [] as least value.
|
@@ -29,19 +36,44 @@ module Bmg
|
|
29
36
|
# Finalizes the computation.
|
30
37
|
def finalize(memo)
|
31
38
|
return nil if memo.empty?
|
32
|
-
index = memo.size * (@nth / 100.0)
|
33
|
-
|
34
|
-
|
39
|
+
index = memo.size.to_f * (@nth.to_f / 100.0)
|
40
|
+
floor, ceil = index.floor, index.ceil
|
41
|
+
ceil +=1 if floor == ceil
|
42
|
+
below = [floor - 1, 0].max
|
43
|
+
above = [[ceil - 1, memo.size - 1].min, 0].max
|
35
44
|
sorted = memo.sort
|
36
|
-
|
45
|
+
if options[:variant] == :continuous
|
46
|
+
(sorted[above] + sorted[below]) / 2.0
|
47
|
+
else
|
48
|
+
sorted[below]
|
49
|
+
end
|
37
50
|
end
|
38
51
|
|
39
52
|
end # class Avg
|
40
53
|
|
41
|
-
# Factors an average summarizer
|
42
54
|
def self.percentile(*args, &bl)
|
43
55
|
Percentile.new(*args, &bl)
|
44
56
|
end
|
45
57
|
|
58
|
+
def self.percentile_cont(*args, &bl)
|
59
|
+
Percentile.new(*(args + [{:variant => :continuous}]), &bl)
|
60
|
+
end
|
61
|
+
|
62
|
+
def self.percentile_disc(*args, &bl)
|
63
|
+
Percentile.new(*(args + [{:variant => :discrete}]), &bl)
|
64
|
+
end
|
65
|
+
|
66
|
+
def self.median(*args, &bl)
|
67
|
+
Percentile.new(*(args + [50]), &bl)
|
68
|
+
end
|
69
|
+
|
70
|
+
def self.median_cont(*args, &bl)
|
71
|
+
Percentile.new(*(args + [50, {:variant => :continuous}]), &bl)
|
72
|
+
end
|
73
|
+
|
74
|
+
def self.median_disc(*args, &bl)
|
75
|
+
Percentile.new(*(args + [50, {:variant => :discrete}]), &bl)
|
76
|
+
end
|
77
|
+
|
46
78
|
end # class Summarizer
|
47
79
|
end # module Bmg
|
@@ -0,0 +1,62 @@
|
|
1
|
+
module Bmg
|
2
|
+
class Summarizer
|
3
|
+
#
|
4
|
+
# ValueBy summarizer.
|
5
|
+
#
|
6
|
+
# Example:
|
7
|
+
#
|
8
|
+
# # direct ruby usage
|
9
|
+
# Bmg::Summarizer.value_by(:qty, :by => :serie).summarize(...)
|
10
|
+
#
|
11
|
+
class ValueBy < Summarizer
|
12
|
+
|
13
|
+
DEFAULT_OPTIONS = {
|
14
|
+
:symbolize => false
|
15
|
+
}
|
16
|
+
|
17
|
+
# Returns {} as least value.
|
18
|
+
def least
|
19
|
+
{}
|
20
|
+
end
|
21
|
+
|
22
|
+
# Collects the value
|
23
|
+
def happens(memo, tuple)
|
24
|
+
by = tuple[options[:by]]
|
25
|
+
by = by.to_sym if by && options[:symbolize]
|
26
|
+
misuse!(tuple, memo) if memo.has_key?(by)
|
27
|
+
memo.tap{|m|
|
28
|
+
m[by] = extract_value(tuple)
|
29
|
+
}
|
30
|
+
end
|
31
|
+
|
32
|
+
# Finalizes the computation.
|
33
|
+
def finalize(memo)
|
34
|
+
default_tuple.merge(memo)
|
35
|
+
end
|
36
|
+
|
37
|
+
private
|
38
|
+
|
39
|
+
def default_tuple
|
40
|
+
(options[:series] || []).each_with_object({}){|s,ss|
|
41
|
+
s_def = options[:default]
|
42
|
+
s = s.to_sym if s && options[:symbolize]
|
43
|
+
ss[s] = s_def
|
44
|
+
}
|
45
|
+
end
|
46
|
+
|
47
|
+
def misuse!(tuple, memo)
|
48
|
+
msg = "Summarizer.value_by: summarization key + the serie must form be a candidate key"
|
49
|
+
msg += "\n"
|
50
|
+
msg += " Tuple: #{tuple.inspect}"
|
51
|
+
msg += " Memo: #{memo.inspect}"
|
52
|
+
raise MisuseError, msg
|
53
|
+
end
|
54
|
+
|
55
|
+
end # class ValueBy
|
56
|
+
|
57
|
+
def self.value_by(*args, &bl)
|
58
|
+
ValueBy.new(*args, &bl)
|
59
|
+
end
|
60
|
+
|
61
|
+
end # class Summarizer
|
62
|
+
end # module Bmg
|
data/lib/bmg/support/keys.rb
CHANGED
@@ -32,7 +32,16 @@ module Bmg
|
|
32
32
|
}
|
33
33
|
when Hash
|
34
34
|
with.each_with_object(tuple.dup){|(k,v),dup|
|
35
|
-
|
35
|
+
case k
|
36
|
+
when Symbol
|
37
|
+
dup[k] = transform_attr(dup[k], v)
|
38
|
+
when Class
|
39
|
+
dup.keys.each do |attrname|
|
40
|
+
dup[attrname] = transform_attr(dup[attrname], v) if dup[attrname].is_a?(k)
|
41
|
+
end
|
42
|
+
else
|
43
|
+
raise ArgumentError, "Unexpected transformation `#{with.inspect}`"
|
44
|
+
end
|
36
45
|
}
|
37
46
|
when Array
|
38
47
|
with.inject(tuple){|dup,on|
|
@@ -50,6 +59,19 @@ module Bmg
|
|
50
59
|
when Regexp
|
51
60
|
m = with.match(value.to_s)
|
52
61
|
m.nil? ? m : m.to_s
|
62
|
+
when Class
|
63
|
+
return value if value.nil?
|
64
|
+
if with.respond_to?(:parse)
|
65
|
+
with.parse(value)
|
66
|
+
elsif with == Integer
|
67
|
+
Integer(value)
|
68
|
+
elsif with == Float
|
69
|
+
Float(value)
|
70
|
+
elsif with == String
|
71
|
+
value.to_s
|
72
|
+
else
|
73
|
+
raise ArgumentError, "#{with} should respond to `parse`"
|
74
|
+
end
|
53
75
|
when Proc
|
54
76
|
with.call(value)
|
55
77
|
when Hash
|
data/lib/bmg/type.rb
CHANGED
@@ -82,7 +82,7 @@ module Bmg
|
|
82
82
|
|
83
83
|
def with_keys(keys)
|
84
84
|
dup.tap{|x|
|
85
|
-
x.keys = Keys.new(keys)
|
85
|
+
x.keys = keys ? Keys.new(keys) : nil
|
86
86
|
}
|
87
87
|
end
|
88
88
|
|
@@ -266,6 +266,15 @@ module Bmg
|
|
266
266
|
}
|
267
267
|
end
|
268
268
|
|
269
|
+
def ungroup(attrlist)
|
270
|
+
known_attributes!(attrlist) if typechecked? && knows_attrlist?
|
271
|
+
dup.tap{|x|
|
272
|
+
x.attrlist = nil
|
273
|
+
x.predicate = Predicate.tautology
|
274
|
+
x.keys = nil
|
275
|
+
}
|
276
|
+
end
|
277
|
+
|
269
278
|
def union(other)
|
270
279
|
if typechecked? && knows_attrlist? && other.knows_attrlist?
|
271
280
|
missing = self.attrlist - other.attrlist
|
@@ -280,6 +289,15 @@ module Bmg
|
|
280
289
|
}
|
281
290
|
end
|
282
291
|
|
292
|
+
def unwrap(attrlist)
|
293
|
+
known_attributes!(attrlist) if typechecked? && knows_attrlist?
|
294
|
+
dup.tap{|x|
|
295
|
+
x.attrlist = nil
|
296
|
+
x.predicate = predicate.and_split(attrlist).last
|
297
|
+
x.keys = self._keys.unwrap(self, x, attrlist) if knows_keys?
|
298
|
+
}
|
299
|
+
end
|
300
|
+
|
283
301
|
private
|
284
302
|
|
285
303
|
def known_attributes!(attrs)
|
data/lib/bmg/version.rb
CHANGED
data/lib/bmg/writer/csv.rb
CHANGED
@@ -19,7 +19,8 @@ module Bmg
|
|
19
19
|
relation.each do |tuple|
|
20
20
|
if csv.nil?
|
21
21
|
headers = infer_headers(tuple) if headers.nil?
|
22
|
-
|
22
|
+
csv_opts = csv_options.merge(headers: headers)
|
23
|
+
csv = CSV.new(string_or_io, **csv_opts)
|
23
24
|
end
|
24
25
|
csv << headers.map{|h| tuple[h] }
|
25
26
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bmg
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.18.
|
4
|
+
version: 0.18.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Bernard Lambeau
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-06-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: predicate
|
@@ -164,7 +164,9 @@ files:
|
|
164
164
|
- lib/bmg/operator/shared/unary.rb
|
165
165
|
- lib/bmg/operator/summarize.rb
|
166
166
|
- lib/bmg/operator/transform.rb
|
167
|
+
- lib/bmg/operator/ungroup.rb
|
167
168
|
- lib/bmg/operator/union.rb
|
169
|
+
- lib/bmg/operator/unwrap.rb
|
168
170
|
- lib/bmg/reader.rb
|
169
171
|
- lib/bmg/reader/csv.rb
|
170
172
|
- lib/bmg/reader/excel.rb
|
@@ -259,6 +261,7 @@ files:
|
|
259
261
|
- lib/bmg/sql/processor/semi_join.rb
|
260
262
|
- lib/bmg/sql/processor/star.rb
|
261
263
|
- lib/bmg/sql/processor/summarize.rb
|
264
|
+
- lib/bmg/sql/processor/transform.rb
|
262
265
|
- lib/bmg/sql/processor/where.rb
|
263
266
|
- lib/bmg/sql/relation.rb
|
264
267
|
- lib/bmg/sql/support/from_clause_orderer.rb
|
@@ -270,12 +273,14 @@ files:
|
|
270
273
|
- lib/bmg/summarizer/concat.rb
|
271
274
|
- lib/bmg/summarizer/count.rb
|
272
275
|
- lib/bmg/summarizer/distinct.rb
|
276
|
+
- lib/bmg/summarizer/distinct_count.rb
|
273
277
|
- lib/bmg/summarizer/max.rb
|
274
278
|
- lib/bmg/summarizer/min.rb
|
275
279
|
- lib/bmg/summarizer/multiple.rb
|
276
280
|
- lib/bmg/summarizer/percentile.rb
|
277
281
|
- lib/bmg/summarizer/stddev.rb
|
278
282
|
- lib/bmg/summarizer/sum.rb
|
283
|
+
- lib/bmg/summarizer/value_by.rb
|
279
284
|
- lib/bmg/summarizer/variance.rb
|
280
285
|
- lib/bmg/support.rb
|
281
286
|
- lib/bmg/support/keys.rb
|