bmg 0.18.2 → 0.18.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +4 -0
- data/lib/bmg/algebra.rb +18 -0
- data/lib/bmg/algebra/shortcuts.rb +8 -0
- data/lib/bmg/error.rb +3 -0
- data/lib/bmg/operator.rb +2 -0
- data/lib/bmg/operator/allbut.rb +1 -0
- data/lib/bmg/operator/autosummarize.rb +1 -0
- data/lib/bmg/operator/autowrap.rb +1 -0
- data/lib/bmg/operator/constants.rb +1 -0
- data/lib/bmg/operator/extend.rb +1 -0
- data/lib/bmg/operator/group.rb +1 -0
- data/lib/bmg/operator/image.rb +10 -4
- data/lib/bmg/operator/join.rb +1 -0
- data/lib/bmg/operator/matching.rb +1 -0
- data/lib/bmg/operator/not_matching.rb +1 -0
- data/lib/bmg/operator/page.rb +1 -0
- data/lib/bmg/operator/project.rb +1 -0
- data/lib/bmg/operator/rename.rb +6 -5
- data/lib/bmg/operator/restrict.rb +1 -0
- data/lib/bmg/operator/rxmatch.rb +1 -0
- data/lib/bmg/operator/summarize.rb +2 -17
- data/lib/bmg/operator/transform.rb +1 -0
- data/lib/bmg/operator/ungroup.rb +61 -0
- data/lib/bmg/operator/union.rb +1 -0
- data/lib/bmg/operator/unwrap.rb +47 -0
- data/lib/bmg/reader/csv.rb +29 -10
- data/lib/bmg/reader/excel.rb +23 -4
- data/lib/bmg/relation.rb +6 -0
- data/lib/bmg/relation/in_memory.rb +0 -1
- data/lib/bmg/sequel/relation.rb +1 -0
- data/lib/bmg/sequel/translator.rb +9 -2
- data/lib/bmg/sql.rb +4 -1
- data/lib/bmg/sql/processor.rb +1 -0
- data/lib/bmg/sql/processor/transform.rb +105 -0
- data/lib/bmg/sql/relation.rb +20 -6
- data/lib/bmg/summarizer.rb +36 -1
- data/lib/bmg/summarizer/avg.rb +3 -3
- data/lib/bmg/summarizer/by_proc.rb +41 -0
- data/lib/bmg/summarizer/distinct.rb +36 -0
- data/lib/bmg/summarizer/multiple.rb +46 -0
- data/lib/bmg/summarizer/percentile.rb +79 -0
- data/lib/bmg/summarizer/value_by.rb +62 -0
- data/lib/bmg/support/keys.rb +5 -0
- data/lib/bmg/support/tuple_transformer.rb +23 -1
- data/lib/bmg/type.rb +19 -1
- data/lib/bmg/version.rb +1 -1
- data/lib/bmg/writer.rb +16 -0
- data/lib/bmg/writer/csv.rb +2 -12
- data/lib/bmg/writer/xlsx.rb +68 -0
- metadata +25 -2
data/lib/bmg/reader/csv.rb
CHANGED
@@ -5,30 +5,36 @@ module Bmg
|
|
5
5
|
|
6
6
|
DEFAULT_OPTIONS = {
|
7
7
|
:headers => true,
|
8
|
-
:return_headers => false
|
8
|
+
:return_headers => false,
|
9
|
+
:smart => true
|
9
10
|
}
|
10
11
|
|
11
|
-
def initialize(type,
|
12
|
+
def initialize(type, path_or_io, options = {})
|
12
13
|
@type = type
|
13
|
-
@
|
14
|
+
@path_or_io = path_or_io
|
14
15
|
@options = DEFAULT_OPTIONS.merge(options)
|
15
|
-
@options[:
|
16
|
-
|
16
|
+
if @options[:smart] && !@path_or_io.is_a?(IO)
|
17
|
+
@options[:col_sep] ||= infer_col_sep
|
18
|
+
@options[:quote_char] ||= infer_quote_char
|
19
|
+
end
|
17
20
|
end
|
18
21
|
|
19
22
|
def each
|
23
|
+
return to_enum unless block_given?
|
20
24
|
require 'csv'
|
21
|
-
|
22
|
-
|
25
|
+
with_io do |io|
|
26
|
+
::CSV.new(io, **csv_options).each do |row|
|
27
|
+
yield tuple(row)
|
28
|
+
end
|
23
29
|
end
|
24
30
|
end
|
25
31
|
|
26
32
|
def to_ast
|
27
|
-
[ :csv, @
|
33
|
+
[ :csv, @path_or_io, @options ]
|
28
34
|
end
|
29
35
|
|
30
36
|
def to_s
|
31
|
-
"(csv #{
|
37
|
+
"(csv #{@path_or_io})"
|
32
38
|
end
|
33
39
|
alias :inspect :to_s
|
34
40
|
|
@@ -47,7 +53,16 @@ module Bmg
|
|
47
53
|
end
|
48
54
|
|
49
55
|
def text_portion
|
50
|
-
@text_portion ||=
|
56
|
+
@text_portion ||= with_io{|io| io.readlines(10).join("\n") }
|
57
|
+
end
|
58
|
+
|
59
|
+
def with_io(&bl)
|
60
|
+
case @path_or_io
|
61
|
+
when IO, StringIO
|
62
|
+
bl.call(@path_or_io)
|
63
|
+
else
|
64
|
+
File.open(@path_or_io, "r", &bl)
|
65
|
+
end
|
51
66
|
end
|
52
67
|
|
53
68
|
# Finds the best candidate among `candidates` for a separator
|
@@ -61,6 +76,10 @@ module Bmg
|
|
61
76
|
snif.size > 0 ? snif[0][0] : default
|
62
77
|
end
|
63
78
|
|
79
|
+
def csv_options
|
80
|
+
@csv_options ||= @options.dup.tap{|opts| opts.delete(:smart) }
|
81
|
+
end
|
82
|
+
|
64
83
|
end # class Csv
|
65
84
|
end # module Reader
|
66
85
|
end # module Bmg
|
data/lib/bmg/reader/excel.rb
CHANGED
@@ -4,7 +4,8 @@ module Bmg
|
|
4
4
|
include Reader
|
5
5
|
|
6
6
|
DEFAULT_OPTIONS = {
|
7
|
-
skip: 0
|
7
|
+
skip: 0,
|
8
|
+
row_num: true
|
8
9
|
}
|
9
10
|
|
10
11
|
def initialize(type, path, options = {})
|
@@ -14,6 +15,7 @@ module Bmg
|
|
14
15
|
end
|
15
16
|
|
16
17
|
def each
|
18
|
+
return to_enum unless block_given?
|
17
19
|
require 'roo'
|
18
20
|
xlsx = Roo::Spreadsheet.open(@path, @options)
|
19
21
|
headers = nil
|
@@ -23,9 +25,13 @@ module Bmg
|
|
23
25
|
.each_with_index
|
24
26
|
.each do |row, i|
|
25
27
|
if i==0
|
26
|
-
headers = row.map
|
28
|
+
headers = row.map{|c| c.to_s.strip.to_sym }
|
27
29
|
else
|
28
|
-
|
30
|
+
init = init_tuple(i)
|
31
|
+
tuple = (0...headers.size)
|
32
|
+
.each_with_object(init){|i,t|
|
33
|
+
t[headers[i]] = row[i]
|
34
|
+
}
|
29
35
|
yield(tuple)
|
30
36
|
end
|
31
37
|
end
|
@@ -36,10 +42,23 @@ module Bmg
|
|
36
42
|
end
|
37
43
|
|
38
44
|
def to_s
|
39
|
-
"(excel #{path})"
|
45
|
+
"(excel #{@path})"
|
40
46
|
end
|
41
47
|
alias :inspect :to_s
|
42
48
|
|
49
|
+
private
|
50
|
+
|
51
|
+
def init_tuple(i)
|
52
|
+
case as = @options[:row_num]
|
53
|
+
when TrueClass
|
54
|
+
{ :row_num => i }
|
55
|
+
when FalseClass
|
56
|
+
{}
|
57
|
+
when Symbol
|
58
|
+
{ :"#{as}" => i }
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
43
62
|
end # class Excel
|
44
63
|
end # module Reader
|
45
64
|
end # module Bmg
|
data/lib/bmg/relation.rb
CHANGED
data/lib/bmg/sequel/relation.rb
CHANGED
@@ -78,8 +78,15 @@ module Bmg
|
|
78
78
|
end
|
79
79
|
|
80
80
|
def on_func_call(sexpr)
|
81
|
-
|
82
|
-
|
81
|
+
case sexpr.func_name
|
82
|
+
when :cast
|
83
|
+
to_cast = apply(sexpr.func_args.first)
|
84
|
+
type = sexpr.func_args.last.last
|
85
|
+
to_cast.cast(type)
|
86
|
+
else
|
87
|
+
args = sexpr.func_args.map{|fa| apply(fa) }
|
88
|
+
::Sequel.function(sexpr.func_name, *args)
|
89
|
+
end
|
83
90
|
end
|
84
91
|
|
85
92
|
def on_summarizer(sexpr)
|
data/lib/bmg/sql.rb
CHANGED
data/lib/bmg/sql/processor.rb
CHANGED
@@ -0,0 +1,105 @@
|
|
1
|
+
module Bmg
|
2
|
+
module Sql
|
3
|
+
class Processor
|
4
|
+
class Transform < Processor
|
5
|
+
|
6
|
+
module SplitSupported
|
7
|
+
extend(self)
|
8
|
+
|
9
|
+
def split_supported(tr, &bl)
|
10
|
+
case tr
|
11
|
+
when Array
|
12
|
+
i = tr.find_index{|x| !bl.call(x) } || tr.size
|
13
|
+
[tr[0...i], tr[i..-1]].map{|a|
|
14
|
+
case a.size
|
15
|
+
when 0 then nil
|
16
|
+
when 1 then a.first
|
17
|
+
else a
|
18
|
+
end
|
19
|
+
}
|
20
|
+
when Hash
|
21
|
+
tr.inject([{}, {}]){|(sup,unsup),(k,v)|
|
22
|
+
mine, hers = _split_supported(v, &bl)
|
23
|
+
[
|
24
|
+
sup.merge(k => mine),
|
25
|
+
unsup.merge(k => hers)
|
26
|
+
].map(&:compact)
|
27
|
+
}.map{|h| h.empty? ? nil : h }
|
28
|
+
else
|
29
|
+
_split_supported(tr, &bl)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def _split_supported(tr, &bl)
|
34
|
+
if tr.is_a?(Array)
|
35
|
+
split_supported(tr, &bl)
|
36
|
+
else
|
37
|
+
bl.call(tr) ? [tr, nil] : [nil, tr]
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end # module SplitSupported
|
41
|
+
|
42
|
+
def initialize(transformation, options, builder)
|
43
|
+
raise NotSupportedError unless options.empty?
|
44
|
+
super(builder)
|
45
|
+
@transformation = transformation
|
46
|
+
end
|
47
|
+
attr_reader :transformation
|
48
|
+
|
49
|
+
def self.split_supported(*args, &bl)
|
50
|
+
SplitSupported.split_supported(*args, &bl)
|
51
|
+
end
|
52
|
+
|
53
|
+
def on_select_list(sexpr)
|
54
|
+
sexpr.each_with_index.map{|child,index|
|
55
|
+
index == 0 ? child : apply(child)
|
56
|
+
}
|
57
|
+
end
|
58
|
+
|
59
|
+
def on_select_item(sexpr)
|
60
|
+
as = sexpr.as_name.to_sym
|
61
|
+
case t = transformation_for(as)
|
62
|
+
when NilClass
|
63
|
+
sexpr
|
64
|
+
when Class, Array
|
65
|
+
sexpr([:select_item,
|
66
|
+
func_call_node(sexpr, Array(t).reverse),
|
67
|
+
sexpr[2]
|
68
|
+
])
|
69
|
+
else
|
70
|
+
raise NotSupportedError
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
private
|
75
|
+
|
76
|
+
def func_call_node(sexpr, ts)
|
77
|
+
_func_call_node(sexpr, ts.first, ts[1..-1])
|
78
|
+
end
|
79
|
+
|
80
|
+
def _func_call_node(sexpr, head, tail)
|
81
|
+
inside = if tail.empty?
|
82
|
+
sexpr[1]
|
83
|
+
else
|
84
|
+
_func_call_node(sexpr, tail.first, tail[1..-1])
|
85
|
+
end
|
86
|
+
[:func_call,
|
87
|
+
:cast,
|
88
|
+
inside,
|
89
|
+
[ :literal, head ] ]
|
90
|
+
end
|
91
|
+
|
92
|
+
def transformation_for(as)
|
93
|
+
case t = transformation
|
94
|
+
when Class then t
|
95
|
+
when Hash then t[as]
|
96
|
+
when Array then t
|
97
|
+
else
|
98
|
+
raise Sql::NotSupportedError, "Unable to use `#{as}` for `transform`"
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
end # class Transform
|
103
|
+
end # class Processor
|
104
|
+
end # module Sql
|
105
|
+
end # module Bmg
|
data/lib/bmg/sql/relation.rb
CHANGED
@@ -123,27 +123,41 @@ module Bmg
|
|
123
123
|
|
124
124
|
def _rename(type, renaming)
|
125
125
|
expr = before_use(self.expr)
|
126
|
-
expr = Processor::Rename.new(renaming, builder).call(
|
126
|
+
expr = Processor::Rename.new(renaming, builder).call(expr)
|
127
127
|
_instance(type, builder, expr)
|
128
128
|
end
|
129
129
|
|
130
130
|
def _restrict(type, predicate)
|
131
131
|
expr = before_use(self.expr)
|
132
|
-
expr = Processor::Where.new(predicate, builder).call(
|
132
|
+
expr = Processor::Where.new(predicate, builder).call(expr)
|
133
133
|
_instance(type, builder, expr)
|
134
134
|
end
|
135
135
|
|
136
|
-
def _summarize(type, by,
|
137
|
-
summarization =
|
136
|
+
def _summarize(type, by, defs)
|
137
|
+
summarization = ::Bmg::Summarizer.summarization(defs)
|
138
138
|
if can_compile_summarization?(summarization)
|
139
139
|
expr = before_use(self.expr)
|
140
|
-
expr = Processor::Summarize.new(by, summarization, builder).call(
|
140
|
+
expr = Processor::Summarize.new(by, summarization, builder).call(expr)
|
141
141
|
_instance(type, builder, expr)
|
142
142
|
else
|
143
143
|
super
|
144
144
|
end
|
145
145
|
end
|
146
146
|
|
147
|
+
def _transform(type, transformation, options)
|
148
|
+
expr = before_use(self.expr)
|
149
|
+
sup, unsup = Processor::Transform.split_supported(transformation){|x|
|
150
|
+
[String, Integer, Float, Date, DateTime].include?(x)
|
151
|
+
}
|
152
|
+
return super if sup.nil?
|
153
|
+
expr = Processor::Transform.new(sup, options, builder).call(expr)
|
154
|
+
result = _instance(type, builder, expr)
|
155
|
+
result = result.transform(unsup, options) if unsup
|
156
|
+
result
|
157
|
+
rescue Sql::NotSupportedError
|
158
|
+
super
|
159
|
+
end
|
160
|
+
|
147
161
|
def can_compile_summarization?(summarization)
|
148
162
|
summarization.values.all?{|s|
|
149
163
|
[:avg, :count, :max, :min, :sum].include?(s.to_summarizer_name)
|
@@ -153,7 +167,7 @@ module Bmg
|
|
153
167
|
def _union(type, right, options)
|
154
168
|
if right_expr = extract_compatible_sexpr(right)
|
155
169
|
expr = before_use(self.expr)
|
156
|
-
expr = Processor::Merge.new(:union, !!options[:all], right_expr, builder).call(
|
170
|
+
expr = Processor::Merge.new(:union, !!options[:all], right_expr, builder).call(expr)
|
157
171
|
_instance(type, builder, expr)
|
158
172
|
else
|
159
173
|
super
|
data/lib/bmg/summarizer.rb
CHANGED
@@ -50,6 +50,21 @@ module Bmg
|
|
50
50
|
end
|
51
51
|
end
|
52
52
|
|
53
|
+
# Converts some summarization definitions to a Hash of
|
54
|
+
# summarizers.
|
55
|
+
def self.summarization(defs)
|
56
|
+
Hash[defs.map{|k,v|
|
57
|
+
summarizer = case v
|
58
|
+
when Summarizer then v
|
59
|
+
when Symbol then Summarizer.send(v, k)
|
60
|
+
when Proc then Summarizer.by_proc(&v)
|
61
|
+
else
|
62
|
+
raise ArgumentError, "Unexpected summarizer #{k} => #{v}"
|
63
|
+
end
|
64
|
+
[ k, summarizer ]
|
65
|
+
}]
|
66
|
+
end
|
67
|
+
|
53
68
|
# Returns the default options to use
|
54
69
|
#
|
55
70
|
# @return the default aggregation options
|
@@ -80,7 +95,7 @@ module Bmg
|
|
80
95
|
# @param the current iterated tuple
|
81
96
|
# @return updated memo value
|
82
97
|
def happens(memo, tuple)
|
83
|
-
value =
|
98
|
+
value = extract_value(tuple)
|
84
99
|
_happens(memo, value)
|
85
100
|
end
|
86
101
|
|
@@ -119,6 +134,21 @@ module Bmg
|
|
119
134
|
self.class.name.downcase[/::([a-z]+)$/, 1].to_sym
|
120
135
|
end
|
121
136
|
|
137
|
+
protected
|
138
|
+
|
139
|
+
def extract_value(tuple)
|
140
|
+
value = case @functor
|
141
|
+
when Proc
|
142
|
+
@functor.call(tuple)
|
143
|
+
when NilClass
|
144
|
+
tuple
|
145
|
+
when Symbol
|
146
|
+
tuple[@functor]
|
147
|
+
else
|
148
|
+
tuple[@functor]
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
122
152
|
end # class Summarizer
|
123
153
|
end # module Bmg
|
124
154
|
require_relative 'summarizer/count'
|
@@ -128,5 +158,10 @@ require_relative 'summarizer/max'
|
|
128
158
|
require_relative 'summarizer/avg'
|
129
159
|
require_relative 'summarizer/variance'
|
130
160
|
require_relative 'summarizer/stddev'
|
161
|
+
require_relative 'summarizer/percentile'
|
131
162
|
require_relative 'summarizer/collect'
|
163
|
+
require_relative 'summarizer/distinct'
|
132
164
|
require_relative 'summarizer/concat'
|
165
|
+
require_relative 'summarizer/by_proc'
|
166
|
+
require_relative 'summarizer/multiple'
|
167
|
+
require_relative 'summarizer/value_by'
|
data/lib/bmg/summarizer/avg.rb
CHANGED
@@ -16,13 +16,13 @@ module Bmg
|
|
16
16
|
end
|
17
17
|
|
18
18
|
# Collects one more value + the sum of all
|
19
|
-
def _happens(memo, val)
|
19
|
+
def _happens(memo, val)
|
20
20
|
[memo.first + val, memo.last + 1]
|
21
21
|
end
|
22
22
|
|
23
23
|
# Finalizes the computation.
|
24
|
-
def finalize(memo)
|
25
|
-
memo.first / memo.last
|
24
|
+
def finalize(memo)
|
25
|
+
memo.first / memo.last
|
26
26
|
end
|
27
27
|
|
28
28
|
end # class Avg
|