bmg 0.18.2 → 0.18.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +4 -0
- data/lib/bmg/algebra.rb +18 -0
- data/lib/bmg/algebra/shortcuts.rb +8 -0
- data/lib/bmg/error.rb +3 -0
- data/lib/bmg/operator.rb +2 -0
- data/lib/bmg/operator/allbut.rb +1 -0
- data/lib/bmg/operator/autosummarize.rb +1 -0
- data/lib/bmg/operator/autowrap.rb +1 -0
- data/lib/bmg/operator/constants.rb +1 -0
- data/lib/bmg/operator/extend.rb +1 -0
- data/lib/bmg/operator/group.rb +1 -0
- data/lib/bmg/operator/image.rb +10 -4
- data/lib/bmg/operator/join.rb +1 -0
- data/lib/bmg/operator/matching.rb +1 -0
- data/lib/bmg/operator/not_matching.rb +1 -0
- data/lib/bmg/operator/page.rb +1 -0
- data/lib/bmg/operator/project.rb +1 -0
- data/lib/bmg/operator/rename.rb +6 -5
- data/lib/bmg/operator/restrict.rb +1 -0
- data/lib/bmg/operator/rxmatch.rb +1 -0
- data/lib/bmg/operator/summarize.rb +2 -17
- data/lib/bmg/operator/transform.rb +1 -0
- data/lib/bmg/operator/ungroup.rb +61 -0
- data/lib/bmg/operator/union.rb +1 -0
- data/lib/bmg/operator/unwrap.rb +47 -0
- data/lib/bmg/reader/csv.rb +29 -10
- data/lib/bmg/reader/excel.rb +23 -4
- data/lib/bmg/relation.rb +6 -0
- data/lib/bmg/relation/in_memory.rb +0 -1
- data/lib/bmg/sequel/relation.rb +1 -0
- data/lib/bmg/sequel/translator.rb +9 -2
- data/lib/bmg/sql.rb +4 -1
- data/lib/bmg/sql/processor.rb +1 -0
- data/lib/bmg/sql/processor/transform.rb +105 -0
- data/lib/bmg/sql/relation.rb +20 -6
- data/lib/bmg/summarizer.rb +36 -1
- data/lib/bmg/summarizer/avg.rb +3 -3
- data/lib/bmg/summarizer/by_proc.rb +41 -0
- data/lib/bmg/summarizer/distinct.rb +36 -0
- data/lib/bmg/summarizer/multiple.rb +46 -0
- data/lib/bmg/summarizer/percentile.rb +79 -0
- data/lib/bmg/summarizer/value_by.rb +62 -0
- data/lib/bmg/support/keys.rb +5 -0
- data/lib/bmg/support/tuple_transformer.rb +23 -1
- data/lib/bmg/type.rb +19 -1
- data/lib/bmg/version.rb +1 -1
- data/lib/bmg/writer.rb +16 -0
- data/lib/bmg/writer/csv.rb +2 -12
- data/lib/bmg/writer/xlsx.rb +68 -0
- metadata +25 -2
data/lib/bmg/reader/csv.rb
CHANGED
@@ -5,30 +5,36 @@ module Bmg
|
|
5
5
|
|
6
6
|
DEFAULT_OPTIONS = {
|
7
7
|
:headers => true,
|
8
|
-
:return_headers => false
|
8
|
+
:return_headers => false,
|
9
|
+
:smart => true
|
9
10
|
}
|
10
11
|
|
11
|
-
def initialize(type,
|
12
|
+
def initialize(type, path_or_io, options = {})
|
12
13
|
@type = type
|
13
|
-
@
|
14
|
+
@path_or_io = path_or_io
|
14
15
|
@options = DEFAULT_OPTIONS.merge(options)
|
15
|
-
@options[:
|
16
|
-
|
16
|
+
if @options[:smart] && !@path_or_io.is_a?(IO)
|
17
|
+
@options[:col_sep] ||= infer_col_sep
|
18
|
+
@options[:quote_char] ||= infer_quote_char
|
19
|
+
end
|
17
20
|
end
|
18
21
|
|
19
22
|
def each
|
23
|
+
return to_enum unless block_given?
|
20
24
|
require 'csv'
|
21
|
-
|
22
|
-
|
25
|
+
with_io do |io|
|
26
|
+
::CSV.new(io, **csv_options).each do |row|
|
27
|
+
yield tuple(row)
|
28
|
+
end
|
23
29
|
end
|
24
30
|
end
|
25
31
|
|
26
32
|
def to_ast
|
27
|
-
[ :csv, @
|
33
|
+
[ :csv, @path_or_io, @options ]
|
28
34
|
end
|
29
35
|
|
30
36
|
def to_s
|
31
|
-
"(csv #{
|
37
|
+
"(csv #{@path_or_io})"
|
32
38
|
end
|
33
39
|
alias :inspect :to_s
|
34
40
|
|
@@ -47,7 +53,16 @@ module Bmg
|
|
47
53
|
end
|
48
54
|
|
49
55
|
def text_portion
|
50
|
-
@text_portion ||=
|
56
|
+
@text_portion ||= with_io{|io| io.readlines(10).join("\n") }
|
57
|
+
end
|
58
|
+
|
59
|
+
def with_io(&bl)
|
60
|
+
case @path_or_io
|
61
|
+
when IO, StringIO
|
62
|
+
bl.call(@path_or_io)
|
63
|
+
else
|
64
|
+
File.open(@path_or_io, "r", &bl)
|
65
|
+
end
|
51
66
|
end
|
52
67
|
|
53
68
|
# Finds the best candidate among `candidates` for a separator
|
@@ -61,6 +76,10 @@ module Bmg
|
|
61
76
|
snif.size > 0 ? snif[0][0] : default
|
62
77
|
end
|
63
78
|
|
79
|
+
def csv_options
|
80
|
+
@csv_options ||= @options.dup.tap{|opts| opts.delete(:smart) }
|
81
|
+
end
|
82
|
+
|
64
83
|
end # class Csv
|
65
84
|
end # module Reader
|
66
85
|
end # module Bmg
|
data/lib/bmg/reader/excel.rb
CHANGED
@@ -4,7 +4,8 @@ module Bmg
|
|
4
4
|
include Reader
|
5
5
|
|
6
6
|
DEFAULT_OPTIONS = {
|
7
|
-
skip: 0
|
7
|
+
skip: 0,
|
8
|
+
row_num: true
|
8
9
|
}
|
9
10
|
|
10
11
|
def initialize(type, path, options = {})
|
@@ -14,6 +15,7 @@ module Bmg
|
|
14
15
|
end
|
15
16
|
|
16
17
|
def each
|
18
|
+
return to_enum unless block_given?
|
17
19
|
require 'roo'
|
18
20
|
xlsx = Roo::Spreadsheet.open(@path, @options)
|
19
21
|
headers = nil
|
@@ -23,9 +25,13 @@ module Bmg
|
|
23
25
|
.each_with_index
|
24
26
|
.each do |row, i|
|
25
27
|
if i==0
|
26
|
-
headers = row.map
|
28
|
+
headers = row.map{|c| c.to_s.strip.to_sym }
|
27
29
|
else
|
28
|
-
|
30
|
+
init = init_tuple(i)
|
31
|
+
tuple = (0...headers.size)
|
32
|
+
.each_with_object(init){|i,t|
|
33
|
+
t[headers[i]] = row[i]
|
34
|
+
}
|
29
35
|
yield(tuple)
|
30
36
|
end
|
31
37
|
end
|
@@ -36,10 +42,23 @@ module Bmg
|
|
36
42
|
end
|
37
43
|
|
38
44
|
def to_s
|
39
|
-
"(excel #{path})"
|
45
|
+
"(excel #{@path})"
|
40
46
|
end
|
41
47
|
alias :inspect :to_s
|
42
48
|
|
49
|
+
private
|
50
|
+
|
51
|
+
def init_tuple(i)
|
52
|
+
case as = @options[:row_num]
|
53
|
+
when TrueClass
|
54
|
+
{ :row_num => i }
|
55
|
+
when FalseClass
|
56
|
+
{}
|
57
|
+
when Symbol
|
58
|
+
{ :"#{as}" => i }
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
43
62
|
end # class Excel
|
44
63
|
end # module Reader
|
45
64
|
end # module Bmg
|
data/lib/bmg/relation.rb
CHANGED
data/lib/bmg/sequel/relation.rb
CHANGED
@@ -78,8 +78,15 @@ module Bmg
|
|
78
78
|
end
|
79
79
|
|
80
80
|
def on_func_call(sexpr)
|
81
|
-
|
82
|
-
|
81
|
+
case sexpr.func_name
|
82
|
+
when :cast
|
83
|
+
to_cast = apply(sexpr.func_args.first)
|
84
|
+
type = sexpr.func_args.last.last
|
85
|
+
to_cast.cast(type)
|
86
|
+
else
|
87
|
+
args = sexpr.func_args.map{|fa| apply(fa) }
|
88
|
+
::Sequel.function(sexpr.func_name, *args)
|
89
|
+
end
|
83
90
|
end
|
84
91
|
|
85
92
|
def on_summarizer(sexpr)
|
data/lib/bmg/sql.rb
CHANGED
data/lib/bmg/sql/processor.rb
CHANGED
@@ -0,0 +1,105 @@
|
|
1
|
+
module Bmg
|
2
|
+
module Sql
|
3
|
+
class Processor
|
4
|
+
class Transform < Processor
|
5
|
+
|
6
|
+
module SplitSupported
|
7
|
+
extend(self)
|
8
|
+
|
9
|
+
def split_supported(tr, &bl)
|
10
|
+
case tr
|
11
|
+
when Array
|
12
|
+
i = tr.find_index{|x| !bl.call(x) } || tr.size
|
13
|
+
[tr[0...i], tr[i..-1]].map{|a|
|
14
|
+
case a.size
|
15
|
+
when 0 then nil
|
16
|
+
when 1 then a.first
|
17
|
+
else a
|
18
|
+
end
|
19
|
+
}
|
20
|
+
when Hash
|
21
|
+
tr.inject([{}, {}]){|(sup,unsup),(k,v)|
|
22
|
+
mine, hers = _split_supported(v, &bl)
|
23
|
+
[
|
24
|
+
sup.merge(k => mine),
|
25
|
+
unsup.merge(k => hers)
|
26
|
+
].map(&:compact)
|
27
|
+
}.map{|h| h.empty? ? nil : h }
|
28
|
+
else
|
29
|
+
_split_supported(tr, &bl)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def _split_supported(tr, &bl)
|
34
|
+
if tr.is_a?(Array)
|
35
|
+
split_supported(tr, &bl)
|
36
|
+
else
|
37
|
+
bl.call(tr) ? [tr, nil] : [nil, tr]
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end # module SplitSupported
|
41
|
+
|
42
|
+
def initialize(transformation, options, builder)
|
43
|
+
raise NotSupportedError unless options.empty?
|
44
|
+
super(builder)
|
45
|
+
@transformation = transformation
|
46
|
+
end
|
47
|
+
attr_reader :transformation
|
48
|
+
|
49
|
+
def self.split_supported(*args, &bl)
|
50
|
+
SplitSupported.split_supported(*args, &bl)
|
51
|
+
end
|
52
|
+
|
53
|
+
def on_select_list(sexpr)
|
54
|
+
sexpr.each_with_index.map{|child,index|
|
55
|
+
index == 0 ? child : apply(child)
|
56
|
+
}
|
57
|
+
end
|
58
|
+
|
59
|
+
def on_select_item(sexpr)
|
60
|
+
as = sexpr.as_name.to_sym
|
61
|
+
case t = transformation_for(as)
|
62
|
+
when NilClass
|
63
|
+
sexpr
|
64
|
+
when Class, Array
|
65
|
+
sexpr([:select_item,
|
66
|
+
func_call_node(sexpr, Array(t).reverse),
|
67
|
+
sexpr[2]
|
68
|
+
])
|
69
|
+
else
|
70
|
+
raise NotSupportedError
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
private
|
75
|
+
|
76
|
+
def func_call_node(sexpr, ts)
|
77
|
+
_func_call_node(sexpr, ts.first, ts[1..-1])
|
78
|
+
end
|
79
|
+
|
80
|
+
def _func_call_node(sexpr, head, tail)
|
81
|
+
inside = if tail.empty?
|
82
|
+
sexpr[1]
|
83
|
+
else
|
84
|
+
_func_call_node(sexpr, tail.first, tail[1..-1])
|
85
|
+
end
|
86
|
+
[:func_call,
|
87
|
+
:cast,
|
88
|
+
inside,
|
89
|
+
[ :literal, head ] ]
|
90
|
+
end
|
91
|
+
|
92
|
+
def transformation_for(as)
|
93
|
+
case t = transformation
|
94
|
+
when Class then t
|
95
|
+
when Hash then t[as]
|
96
|
+
when Array then t
|
97
|
+
else
|
98
|
+
raise Sql::NotSupportedError, "Unable to use `#{as}` for `transform`"
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
end # class Transform
|
103
|
+
end # class Processor
|
104
|
+
end # module Sql
|
105
|
+
end # module Bmg
|
data/lib/bmg/sql/relation.rb
CHANGED
@@ -123,27 +123,41 @@ module Bmg
|
|
123
123
|
|
124
124
|
def _rename(type, renaming)
|
125
125
|
expr = before_use(self.expr)
|
126
|
-
expr = Processor::Rename.new(renaming, builder).call(
|
126
|
+
expr = Processor::Rename.new(renaming, builder).call(expr)
|
127
127
|
_instance(type, builder, expr)
|
128
128
|
end
|
129
129
|
|
130
130
|
def _restrict(type, predicate)
|
131
131
|
expr = before_use(self.expr)
|
132
|
-
expr = Processor::Where.new(predicate, builder).call(
|
132
|
+
expr = Processor::Where.new(predicate, builder).call(expr)
|
133
133
|
_instance(type, builder, expr)
|
134
134
|
end
|
135
135
|
|
136
|
-
def _summarize(type, by,
|
137
|
-
summarization =
|
136
|
+
def _summarize(type, by, defs)
|
137
|
+
summarization = ::Bmg::Summarizer.summarization(defs)
|
138
138
|
if can_compile_summarization?(summarization)
|
139
139
|
expr = before_use(self.expr)
|
140
|
-
expr = Processor::Summarize.new(by, summarization, builder).call(
|
140
|
+
expr = Processor::Summarize.new(by, summarization, builder).call(expr)
|
141
141
|
_instance(type, builder, expr)
|
142
142
|
else
|
143
143
|
super
|
144
144
|
end
|
145
145
|
end
|
146
146
|
|
147
|
+
def _transform(type, transformation, options)
|
148
|
+
expr = before_use(self.expr)
|
149
|
+
sup, unsup = Processor::Transform.split_supported(transformation){|x|
|
150
|
+
[String, Integer, Float, Date, DateTime].include?(x)
|
151
|
+
}
|
152
|
+
return super if sup.nil?
|
153
|
+
expr = Processor::Transform.new(sup, options, builder).call(expr)
|
154
|
+
result = _instance(type, builder, expr)
|
155
|
+
result = result.transform(unsup, options) if unsup
|
156
|
+
result
|
157
|
+
rescue Sql::NotSupportedError
|
158
|
+
super
|
159
|
+
end
|
160
|
+
|
147
161
|
def can_compile_summarization?(summarization)
|
148
162
|
summarization.values.all?{|s|
|
149
163
|
[:avg, :count, :max, :min, :sum].include?(s.to_summarizer_name)
|
@@ -153,7 +167,7 @@ module Bmg
|
|
153
167
|
def _union(type, right, options)
|
154
168
|
if right_expr = extract_compatible_sexpr(right)
|
155
169
|
expr = before_use(self.expr)
|
156
|
-
expr = Processor::Merge.new(:union, !!options[:all], right_expr, builder).call(
|
170
|
+
expr = Processor::Merge.new(:union, !!options[:all], right_expr, builder).call(expr)
|
157
171
|
_instance(type, builder, expr)
|
158
172
|
else
|
159
173
|
super
|
data/lib/bmg/summarizer.rb
CHANGED
@@ -50,6 +50,21 @@ module Bmg
|
|
50
50
|
end
|
51
51
|
end
|
52
52
|
|
53
|
+
# Converts some summarization definitions to a Hash of
|
54
|
+
# summarizers.
|
55
|
+
def self.summarization(defs)
|
56
|
+
Hash[defs.map{|k,v|
|
57
|
+
summarizer = case v
|
58
|
+
when Summarizer then v
|
59
|
+
when Symbol then Summarizer.send(v, k)
|
60
|
+
when Proc then Summarizer.by_proc(&v)
|
61
|
+
else
|
62
|
+
raise ArgumentError, "Unexpected summarizer #{k} => #{v}"
|
63
|
+
end
|
64
|
+
[ k, summarizer ]
|
65
|
+
}]
|
66
|
+
end
|
67
|
+
|
53
68
|
# Returns the default options to use
|
54
69
|
#
|
55
70
|
# @return the default aggregation options
|
@@ -80,7 +95,7 @@ module Bmg
|
|
80
95
|
# @param the current iterated tuple
|
81
96
|
# @return updated memo value
|
82
97
|
def happens(memo, tuple)
|
83
|
-
value =
|
98
|
+
value = extract_value(tuple)
|
84
99
|
_happens(memo, value)
|
85
100
|
end
|
86
101
|
|
@@ -119,6 +134,21 @@ module Bmg
|
|
119
134
|
self.class.name.downcase[/::([a-z]+)$/, 1].to_sym
|
120
135
|
end
|
121
136
|
|
137
|
+
protected
|
138
|
+
|
139
|
+
def extract_value(tuple)
|
140
|
+
value = case @functor
|
141
|
+
when Proc
|
142
|
+
@functor.call(tuple)
|
143
|
+
when NilClass
|
144
|
+
tuple
|
145
|
+
when Symbol
|
146
|
+
tuple[@functor]
|
147
|
+
else
|
148
|
+
tuple[@functor]
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
122
152
|
end # class Summarizer
|
123
153
|
end # module Bmg
|
124
154
|
require_relative 'summarizer/count'
|
@@ -128,5 +158,10 @@ require_relative 'summarizer/max'
|
|
128
158
|
require_relative 'summarizer/avg'
|
129
159
|
require_relative 'summarizer/variance'
|
130
160
|
require_relative 'summarizer/stddev'
|
161
|
+
require_relative 'summarizer/percentile'
|
131
162
|
require_relative 'summarizer/collect'
|
163
|
+
require_relative 'summarizer/distinct'
|
132
164
|
require_relative 'summarizer/concat'
|
165
|
+
require_relative 'summarizer/by_proc'
|
166
|
+
require_relative 'summarizer/multiple'
|
167
|
+
require_relative 'summarizer/value_by'
|
data/lib/bmg/summarizer/avg.rb
CHANGED
@@ -16,13 +16,13 @@ module Bmg
|
|
16
16
|
end
|
17
17
|
|
18
18
|
# Collects one more value + the sum of all
|
19
|
-
def _happens(memo, val)
|
19
|
+
def _happens(memo, val)
|
20
20
|
[memo.first + val, memo.last + 1]
|
21
21
|
end
|
22
22
|
|
23
23
|
# Finalizes the computation.
|
24
|
-
def finalize(memo)
|
25
|
-
memo.first / memo.last
|
24
|
+
def finalize(memo)
|
25
|
+
memo.first / memo.last
|
26
26
|
end
|
27
27
|
|
28
28
|
end # class Avg
|