bmg 0.16.0.pre.rc1 → 0.16.0.pre.rc2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/bmg.rb +1 -0
- data/lib/bmg/algebra.rb +17 -1
- data/lib/bmg/algebra/shortcuts.rb +27 -11
- data/lib/bmg/error.rb +4 -0
- data/lib/bmg/operator.rb +6 -44
- data/lib/bmg/operator/autowrap.rb +80 -8
- data/lib/bmg/operator/join.rb +19 -0
- data/lib/bmg/operator/shared/binary.rb +22 -0
- data/lib/bmg/operator/shared/nary.rb +19 -0
- data/lib/bmg/operator/shared/unary.rb +21 -0
- data/lib/bmg/operator/summarize.rb +77 -0
- data/lib/bmg/sequel/translator.rb +16 -1
- data/lib/bmg/sql/builder.rb +8 -0
- data/lib/bmg/sql/grammar.rb +2 -0
- data/lib/bmg/sql/grammar.sexp.yml +11 -0
- data/lib/bmg/sql/nodes/group_by_clause.rb +20 -0
- data/lib/bmg/sql/nodes/select_exp.rb +4 -0
- data/lib/bmg/sql/nodes/summarizer.rb +23 -0
- data/lib/bmg/sql/processor.rb +1 -0
- data/lib/bmg/sql/processor/summarize.rb +48 -0
- data/lib/bmg/sql/processor/where.rb +10 -5
- data/lib/bmg/sql/relation.rb +17 -0
- data/lib/bmg/sql/support/from_clause_orderer.rb +129 -39
- data/lib/bmg/summarizer.rb +132 -0
- data/lib/bmg/summarizer/avg.rb +36 -0
- data/lib/bmg/summarizer/collect.rb +31 -0
- data/lib/bmg/summarizer/concat.rb +42 -0
- data/lib/bmg/summarizer/count.rb +31 -0
- data/lib/bmg/summarizer/max.rb +31 -0
- data/lib/bmg/summarizer/min.rb +31 -0
- data/lib/bmg/summarizer/stddev.rb +26 -0
- data/lib/bmg/summarizer/sum.rb +31 -0
- data/lib/bmg/summarizer/variance.rb +42 -0
- data/lib/bmg/type.rb +11 -0
- data/lib/bmg/version.rb +1 -1
- metadata +34 -3
@@ -0,0 +1,132 @@
|
|
1
|
+
module Bmg
|
2
|
+
#
|
3
|
+
# Summarizer.
|
4
|
+
#
|
5
|
+
# This class provides a basis for implementing aggregation operators.
|
6
|
+
#
|
7
|
+
# Aggregation operators are made available through factory methods on the
|
8
|
+
# Summarizer class itself:
|
9
|
+
#
|
10
|
+
# Summarizer.count
|
11
|
+
# Summarizer.sum(:qty)
|
12
|
+
# Summarizer.sum{|t| t[:qty] * t[:price] }
|
13
|
+
#
|
14
|
+
# Once built, summarizers can be used either in black-box or white-box modes.
|
15
|
+
#
|
16
|
+
# relation = ...
|
17
|
+
# agg = Summarizer.sum(:qty)
|
18
|
+
#
|
19
|
+
# # Black box mode:
|
20
|
+
# result = agg.summarize(relation)
|
21
|
+
#
|
22
|
+
# # White box mode:
|
23
|
+
# memo = agg.least
|
24
|
+
# relation.each do |tuple|
|
25
|
+
# memo = agg.happens(memo, tuple)
|
26
|
+
# end
|
27
|
+
# result = agg.finalize(memo)
|
28
|
+
#
|
29
|
+
class Summarizer
|
30
|
+
|
31
|
+
# @return Aggregation options as a Hash
|
32
|
+
attr_reader :options
|
33
|
+
|
34
|
+
# @return the underlying functor, either a Symbol or a Proc
|
35
|
+
attr_reader :functor
|
36
|
+
|
37
|
+
# Creates an Summarizer instance.
|
38
|
+
#
|
39
|
+
# Private method, please use the factory methods
|
40
|
+
def initialize(*args, &block)
|
41
|
+
@options = default_options
|
42
|
+
args.push(block) if block
|
43
|
+
args.each do |arg|
|
44
|
+
case arg
|
45
|
+
when Symbol, Proc then @functor = arg
|
46
|
+
when Hash then @options = @options.merge(arg)
|
47
|
+
else
|
48
|
+
raise ArgumentError, "Unexpected `#{arg}`"
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
# Returns the default options to use
|
54
|
+
#
|
55
|
+
# @return the default aggregation options
|
56
|
+
def default_options
|
57
|
+
{}
|
58
|
+
end
|
59
|
+
protected :default_options
|
60
|
+
|
61
|
+
# Returns the least value, which is the one to use on an empty
|
62
|
+
# set.
|
63
|
+
#
|
64
|
+
# This method is intended to be overriden by subclasses; default
|
65
|
+
# implementation returns nil.
|
66
|
+
#
|
67
|
+
# @return the least value for this summarizer
|
68
|
+
def least
|
69
|
+
nil
|
70
|
+
end
|
71
|
+
|
72
|
+
# This method is called on each aggregated tuple and must return
|
73
|
+
# an updated _memo_ value. It can be seen as the block typically
|
74
|
+
# given to Enumerable.inject.
|
75
|
+
#
|
76
|
+
# The default implementation collects the pre-value on the tuple
|
77
|
+
# and delegates to _happens.
|
78
|
+
#
|
79
|
+
# @param memo the current aggregation value
|
80
|
+
# @param the current iterated tuple
|
81
|
+
# @return updated memo value
|
82
|
+
def happens(memo, tuple)
|
83
|
+
value = @functor.is_a?(Proc) ? @functor.call(tuple) : tuple[@functor]
|
84
|
+
_happens(memo, value)
|
85
|
+
end
|
86
|
+
|
87
|
+
# @see happens.
|
88
|
+
#
|
89
|
+
# This method is intended to be overriden and returns _value_
|
90
|
+
# by default, making this summarizer a "Last(...)" summarizer.
|
91
|
+
def _happens(memo, value)
|
92
|
+
value
|
93
|
+
end
|
94
|
+
protected :_happens
|
95
|
+
|
96
|
+
# This method finalizes an aggregation.
|
97
|
+
#
|
98
|
+
# Argument _memo_ is either _least_ or the result of aggregating
|
99
|
+
# through _happens_. The default implementation simply returns
|
100
|
+
# _memo_. The method is intended to be overriden for complex
|
101
|
+
# aggregations that need statefull information such as `avg`.
|
102
|
+
#
|
103
|
+
# @param [Object] memo the current aggregation value
|
104
|
+
# @return [Object] the aggregation value, as finalized
|
105
|
+
def finalize(memo)
|
106
|
+
memo
|
107
|
+
end
|
108
|
+
|
109
|
+
# Summarizes an enumeration of tuples.
|
110
|
+
#
|
111
|
+
# @param an enumerable of tuples
|
112
|
+
# @return the computed summarization value
|
113
|
+
def summarize(enum)
|
114
|
+
finalize(enum.inject(least){|m,t| happens(m, t) })
|
115
|
+
end
|
116
|
+
|
117
|
+
# Returns the canonical summarizer name
|
118
|
+
def to_summarizer_name
|
119
|
+
self.class.name.downcase[/::([a-z]+)$/, 1].to_sym
|
120
|
+
end
|
121
|
+
|
122
|
+
end # class Summarizer
|
123
|
+
end # module Bmg
|
124
|
+
require_relative 'summarizer/count'
|
125
|
+
require_relative 'summarizer/sum'
|
126
|
+
require_relative 'summarizer/min'
|
127
|
+
require_relative 'summarizer/max'
|
128
|
+
require_relative 'summarizer/avg'
|
129
|
+
require_relative 'summarizer/variance'
|
130
|
+
require_relative 'summarizer/stddev'
|
131
|
+
require_relative 'summarizer/collect'
|
132
|
+
require_relative 'summarizer/concat'
|
@@ -0,0 +1,36 @@
|
|
1
|
+
module Bmg
|
2
|
+
class Summarizer
|
3
|
+
#
|
4
|
+
# Average summarizer.
|
5
|
+
#
|
6
|
+
# Example:
|
7
|
+
#
|
8
|
+
# # direct ruby usage
|
9
|
+
# Bmg::Summarizer.avg(:qty).summarize(...)
|
10
|
+
#
|
11
|
+
class Avg < Summarizer
|
12
|
+
|
13
|
+
# Returns [0.0, 0.0] as least value.
|
14
|
+
def least()
|
15
|
+
[0.0, 0.0]
|
16
|
+
end
|
17
|
+
|
18
|
+
# Collects one more value + the sum of all
|
19
|
+
def _happens(memo, val)
|
20
|
+
[memo.first + val, memo.last + 1]
|
21
|
+
end
|
22
|
+
|
23
|
+
# Finalizes the computation.
|
24
|
+
def finalize(memo)
|
25
|
+
memo.first / memo.last
|
26
|
+
end
|
27
|
+
|
28
|
+
end # class Avg
|
29
|
+
|
30
|
+
# Factors an average summarizer
|
31
|
+
def self.avg(*args, &bl)
|
32
|
+
Avg.new(*args, &bl)
|
33
|
+
end
|
34
|
+
|
35
|
+
end # class Summarizer
|
36
|
+
end # module Bmg
|
@@ -0,0 +1,31 @@
|
|
1
|
+
module Bmg
|
2
|
+
class Summarizer
|
3
|
+
#
|
4
|
+
# Collects the various values as an array.
|
5
|
+
#
|
6
|
+
# Example:
|
7
|
+
#
|
8
|
+
# # direct ruby usage
|
9
|
+
# Bmg::Summarizer.collect(:qty).summarize(...)
|
10
|
+
#
|
11
|
+
class Collect < Summarizer
|
12
|
+
|
13
|
+
# Returns [] as least value.
|
14
|
+
def least()
|
15
|
+
[]
|
16
|
+
end
|
17
|
+
|
18
|
+
# Adds val to the memo array
|
19
|
+
def _happens(memo, val)
|
20
|
+
memo << val
|
21
|
+
end
|
22
|
+
|
23
|
+
end # class Collect
|
24
|
+
|
25
|
+
# Factors a collect summarizer
|
26
|
+
def self.collect(*args, &bl)
|
27
|
+
Collect.new(*args, &bl)
|
28
|
+
end
|
29
|
+
|
30
|
+
end # class Summarizer
|
31
|
+
end # module Bmg
|
@@ -0,0 +1,42 @@
|
|
1
|
+
module Bmg
|
2
|
+
class Summarizer
|
3
|
+
#
|
4
|
+
# String concatenation summarizer.
|
5
|
+
#
|
6
|
+
# Example:
|
7
|
+
#
|
8
|
+
# # direct ruby usage
|
9
|
+
# Bmg::Summarizer.concat(:qty).summarize(...)
|
10
|
+
#
|
11
|
+
class Concat < Summarizer
|
12
|
+
|
13
|
+
# Sets default options.
|
14
|
+
def default_options
|
15
|
+
{:before => "", :after => "", :between => ""}
|
16
|
+
end
|
17
|
+
|
18
|
+
# Returns least value (defaults to "")
|
19
|
+
def least()
|
20
|
+
""
|
21
|
+
end
|
22
|
+
|
23
|
+
# Concatenates current memo with val.to_s
|
24
|
+
def _happens(memo, val)
|
25
|
+
memo << options[:between].to_s unless memo.empty?
|
26
|
+
memo << val.to_s
|
27
|
+
end
|
28
|
+
|
29
|
+
# Finalizes computation
|
30
|
+
def finalize(memo)
|
31
|
+
options[:before].to_s + memo + options[:after].to_s
|
32
|
+
end
|
33
|
+
|
34
|
+
end # class Concat
|
35
|
+
|
36
|
+
# Factors a concatenation summarizer
|
37
|
+
def self.concat(*args, &bl)
|
38
|
+
Concat.new(*args, &bl)
|
39
|
+
end
|
40
|
+
|
41
|
+
end # class Summarizer
|
42
|
+
end # module Bmg
|
@@ -0,0 +1,31 @@
|
|
1
|
+
module Bmg
|
2
|
+
class Summarizer
|
3
|
+
#
|
4
|
+
# Count summarizer.
|
5
|
+
#
|
6
|
+
# Example:
|
7
|
+
#
|
8
|
+
# # direct ruby usage
|
9
|
+
# Bmg::Summarizer.count.summarize(...)
|
10
|
+
#
|
11
|
+
class Count < Summarizer
|
12
|
+
|
13
|
+
# Returns 0 as least value.
|
14
|
+
def least()
|
15
|
+
0
|
16
|
+
end
|
17
|
+
|
18
|
+
# Counts one more as new memo
|
19
|
+
def happens(memo, tuple)
|
20
|
+
memo + 1
|
21
|
+
end
|
22
|
+
|
23
|
+
end # class Count
|
24
|
+
|
25
|
+
# Factors a count summarizer
|
26
|
+
def self.count(*args, &bl)
|
27
|
+
Count.new(*args, &bl)
|
28
|
+
end
|
29
|
+
|
30
|
+
end # class Summarizer
|
31
|
+
end # module Bmg
|
@@ -0,0 +1,31 @@
|
|
1
|
+
module Bmg
|
2
|
+
class Summarizer
|
3
|
+
#
|
4
|
+
# Max summarizer.
|
5
|
+
#
|
6
|
+
# Example:
|
7
|
+
#
|
8
|
+
# # direct ruby usage
|
9
|
+
# Bmg::Summarizer.max(:qty).summarize(...)
|
10
|
+
#
|
11
|
+
class Max < Summarizer
|
12
|
+
|
13
|
+
# Returns nil as least value.
|
14
|
+
def least()
|
15
|
+
nil
|
16
|
+
end
|
17
|
+
|
18
|
+
# Keeps the maximum value between memo and val, ignoring nil
|
19
|
+
def _happens(memo, val)
|
20
|
+
memo.nil? ? val : (val.nil? ? memo : (memo > val ? memo : val))
|
21
|
+
end
|
22
|
+
|
23
|
+
end # class Max
|
24
|
+
|
25
|
+
# Factors a max summarizer
|
26
|
+
def self.max(*args, &bl)
|
27
|
+
Max.new(*args, &bl)
|
28
|
+
end
|
29
|
+
|
30
|
+
end # class Summarizer
|
31
|
+
end # module Bmg
|
@@ -0,0 +1,31 @@
|
|
1
|
+
module Bmg
|
2
|
+
class Summarizer
|
3
|
+
#
|
4
|
+
# Min summarizer.
|
5
|
+
#
|
6
|
+
# Example:
|
7
|
+
#
|
8
|
+
# # direct ruby usage
|
9
|
+
# Bmg::Summarizer.min(:qty).summarize(...)
|
10
|
+
#
|
11
|
+
class Min < Summarizer
|
12
|
+
|
13
|
+
# Returns nil as least value.
|
14
|
+
def least()
|
15
|
+
nil
|
16
|
+
end
|
17
|
+
|
18
|
+
# Keep the minimum value between memo and val, ignoring nil
|
19
|
+
def _happens(memo, val)
|
20
|
+
memo.nil? ? val : (val.nil? ? memo : (memo < val ? memo : val))
|
21
|
+
end
|
22
|
+
|
23
|
+
end # class Min
|
24
|
+
|
25
|
+
# Factors a min summarizer
|
26
|
+
def self.min(*args, &bl)
|
27
|
+
Min.new(*args, &bl)
|
28
|
+
end
|
29
|
+
|
30
|
+
end # class Summarizer
|
31
|
+
end # module Bmg
|
@@ -0,0 +1,26 @@
|
|
1
|
+
module Bmg
|
2
|
+
class Summarizer
|
3
|
+
#
|
4
|
+
# Standard deviation summarizer
|
5
|
+
#
|
6
|
+
# Example:
|
7
|
+
#
|
8
|
+
# # direct ruby usage
|
9
|
+
# Bmg::Summarizer.stddev(:qty).summarize(...)
|
10
|
+
#
|
11
|
+
class Stddev < Variance
|
12
|
+
|
13
|
+
# Finalizes the computation.
|
14
|
+
def finalize(memo)
|
15
|
+
Math.sqrt(super(memo))
|
16
|
+
end
|
17
|
+
|
18
|
+
end # class Stddev
|
19
|
+
|
20
|
+
# Factors a standard deviation summarizer
|
21
|
+
def self.stddev(*args, &bl)
|
22
|
+
Stddev.new(*args, &bl)
|
23
|
+
end
|
24
|
+
|
25
|
+
end # class Summarizer
|
26
|
+
end # module Bmg
|
@@ -0,0 +1,31 @@
|
|
1
|
+
module Bmg
|
2
|
+
class Summarizer
|
3
|
+
#
|
4
|
+
# Sum summarizer.
|
5
|
+
#
|
6
|
+
# Example:
|
7
|
+
#
|
8
|
+
# # direct ruby usage
|
9
|
+
# Bmg::Summarizer.sum(:qty).summarize(...)
|
10
|
+
#
|
11
|
+
class Sum < Summarizer
|
12
|
+
|
13
|
+
# Returns 0 as least value.
|
14
|
+
def least()
|
15
|
+
0
|
16
|
+
end
|
17
|
+
|
18
|
+
# Keep memo+val as new value
|
19
|
+
def _happens(memo, val)
|
20
|
+
memo + (val.nil? ? 0 : val)
|
21
|
+
end
|
22
|
+
|
23
|
+
end # class Sum
|
24
|
+
|
25
|
+
# Factors a sum summarizer
|
26
|
+
def self.sum(*args, &bl)
|
27
|
+
Sum.new(*args, &bl)
|
28
|
+
end
|
29
|
+
|
30
|
+
end # class Summarizer
|
31
|
+
end # module Bmg
|
@@ -0,0 +1,42 @@
|
|
1
|
+
module Bmg
|
2
|
+
class Summarizer
|
3
|
+
#
|
4
|
+
# Variance summarizer
|
5
|
+
#
|
6
|
+
# Example:
|
7
|
+
#
|
8
|
+
# # direct ruby usage
|
9
|
+
# Bmg::Summarizer.variance(:qty).summarize(...)
|
10
|
+
#
|
11
|
+
class Variance < Summarizer
|
12
|
+
|
13
|
+
# Returns the least value.
|
14
|
+
def least()
|
15
|
+
[0, 0.0, 0.0]
|
16
|
+
end
|
17
|
+
|
18
|
+
# Aggregates on a tuple occurence.
|
19
|
+
def _happens(memo, val)
|
20
|
+
count, mean, m2 = memo
|
21
|
+
count += 1
|
22
|
+
delta = val - mean
|
23
|
+
mean += (delta / count)
|
24
|
+
m2 += delta*(val - mean)
|
25
|
+
[count, mean, m2]
|
26
|
+
end
|
27
|
+
|
28
|
+
# Finalizes the computation.
|
29
|
+
def finalize(memo)
|
30
|
+
count, mean, m2 = memo
|
31
|
+
m2 / count
|
32
|
+
end
|
33
|
+
|
34
|
+
end # class Variance
|
35
|
+
|
36
|
+
# Factors a variance summarizer
|
37
|
+
def self.variance(*args, &bl)
|
38
|
+
Variance.new(*args, &bl)
|
39
|
+
end
|
40
|
+
|
41
|
+
end # class Summarizer
|
42
|
+
end # module Bmg
|