bmg 0.18.0 → 0.18.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +1 -0
- data/lib/bmg/algebra.rb +1 -0
- data/lib/bmg/algebra/shortcuts.rb +6 -0
- data/lib/bmg/error.rb +3 -0
- data/lib/bmg/operator/allbut.rb +27 -0
- data/lib/bmg/operator/autosummarize.rb +27 -4
- data/lib/bmg/operator/autowrap.rb +27 -0
- data/lib/bmg/operator/constants.rb +7 -0
- data/lib/bmg/operator/extend.rb +7 -0
- data/lib/bmg/operator/group.rb +1 -0
- data/lib/bmg/operator/image.rb +41 -2
- data/lib/bmg/operator/join.rb +1 -0
- data/lib/bmg/operator/matching.rb +1 -0
- data/lib/bmg/operator/not_matching.rb +1 -0
- data/lib/bmg/operator/page.rb +2 -7
- data/lib/bmg/operator/project.rb +3 -2
- data/lib/bmg/operator/rename.rb +12 -5
- data/lib/bmg/operator/restrict.rb +1 -0
- data/lib/bmg/operator/rxmatch.rb +1 -0
- data/lib/bmg/operator/summarize.rb +2 -17
- data/lib/bmg/operator/transform.rb +39 -1
- data/lib/bmg/operator/union.rb +1 -0
- data/lib/bmg/reader/csv.rb +29 -10
- data/lib/bmg/reader/excel.rb +23 -4
- data/lib/bmg/relation.rb +18 -0
- data/lib/bmg/relation/empty.rb +4 -0
- data/lib/bmg/relation/in_memory.rb +10 -1
- data/lib/bmg/relation/materialized.rb +6 -0
- data/lib/bmg/relation/spied.rb +5 -0
- data/lib/bmg/sequel/relation.rb +5 -0
- data/lib/bmg/sql/relation.rb +2 -3
- data/lib/bmg/summarizer.rb +36 -1
- data/lib/bmg/summarizer/avg.rb +3 -3
- data/lib/bmg/summarizer/by_proc.rb +41 -0
- data/lib/bmg/summarizer/distinct.rb +36 -0
- data/lib/bmg/summarizer/multiple.rb +46 -0
- data/lib/bmg/summarizer/percentile.rb +79 -0
- data/lib/bmg/summarizer/value_by.rb +62 -0
- data/lib/bmg/support.rb +1 -0
- data/lib/bmg/support/ordering.rb +20 -0
- data/lib/bmg/support/tuple_transformer.rb +10 -1
- data/lib/bmg/version.rb +1 -1
- data/lib/bmg/writer.rb +16 -0
- data/lib/bmg/writer/csv.rb +2 -12
- data/lib/bmg/writer/xlsx.rb +68 -0
- metadata +23 -2
data/lib/bmg/operator/rxmatch.rb
CHANGED
@@ -13,7 +13,7 @@ module Bmg
|
|
13
13
|
@type = type
|
14
14
|
@operand = operand
|
15
15
|
@by = by
|
16
|
-
@summarization =
|
16
|
+
@summarization = Summarizer.summarization(summarization)
|
17
17
|
end
|
18
18
|
|
19
19
|
protected
|
@@ -23,6 +23,7 @@ module Bmg
|
|
23
23
|
public
|
24
24
|
|
25
25
|
def each
|
26
|
+
return to_enum unless block_given?
|
26
27
|
# summary key => summarization memo, starting with least
|
27
28
|
result = Hash.new{|h,k|
|
28
29
|
h[k] = Hash[@summarization.map{|k,v|
|
@@ -56,22 +57,6 @@ module Bmg
|
|
56
57
|
[ by, summarization ]
|
57
58
|
end
|
58
59
|
|
59
|
-
private
|
60
|
-
|
61
|
-
# Compile a summarization hash so that every value is a Summarizer
|
62
|
-
# instance
|
63
|
-
def self.compile(summarization)
|
64
|
-
Hash[summarization.map{|k,v|
|
65
|
-
summarizer = case v
|
66
|
-
when Summarizer then v
|
67
|
-
when Symbol then Summarizer.send(v, k)
|
68
|
-
else
|
69
|
-
raise ArgumentError, "Unexpected summarizer #{k} => #{v}"
|
70
|
-
end
|
71
|
-
[ k, summarizer ]
|
72
|
-
}]
|
73
|
-
end
|
74
|
-
|
75
60
|
end # class Summarize
|
76
61
|
end # module Operator
|
77
62
|
end # module Bmg
|
@@ -23,11 +23,12 @@ module Bmg
|
|
23
23
|
|
24
24
|
protected
|
25
25
|
|
26
|
-
attr_reader :transformation
|
26
|
+
attr_reader :transformation, :options
|
27
27
|
|
28
28
|
public
|
29
29
|
|
30
30
|
def each
|
31
|
+
return to_enum unless block_given?
|
31
32
|
t = transformer
|
32
33
|
@operand.each do |tuple|
|
33
34
|
yield t.call(tuple)
|
@@ -40,6 +41,43 @@ module Bmg
|
|
40
41
|
|
41
42
|
protected ### optimization
|
42
43
|
|
44
|
+
def _allbut(type, butlist)
|
45
|
+
# `allbut` can always be pushed down the tree. unlike
|
46
|
+
# `extend` the Proc that might be used cannot use attributes
|
47
|
+
# in butlist, so it's safe to strip them away.
|
48
|
+
if transformer.knows_attrlist?
|
49
|
+
# We just need to clean the transformation
|
50
|
+
attrlist = transformer.to_attrlist
|
51
|
+
thrown = attrlist & butlist
|
52
|
+
t = transformation.dup.reject{|k,v| thrown.include?(k) }
|
53
|
+
operand.allbut(butlist).transform(t, options)
|
54
|
+
else
|
55
|
+
operand.allbut(butlist).transform(transformation, options)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def _project(type, attrlist)
|
60
|
+
if transformer.knows_attrlist?
|
61
|
+
t = transformation.dup.select{|k,v| attrlist.include?(k) }
|
62
|
+
operand.project(attrlist).transform(t, options)
|
63
|
+
else
|
64
|
+
operand.project(attrlist).transform(transformation, options)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def _restrict(type, predicate)
|
69
|
+
return super unless transformer.knows_attrlist?
|
70
|
+
top, bottom = predicate.and_split(transformer.to_attrlist)
|
71
|
+
if top == predicate
|
72
|
+
super
|
73
|
+
else
|
74
|
+
operand
|
75
|
+
.restrict(bottom)
|
76
|
+
.transform(transformation, options)
|
77
|
+
.restrict(top)
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
43
81
|
protected ### inspect
|
44
82
|
|
45
83
|
def args
|
data/lib/bmg/operator/union.rb
CHANGED
data/lib/bmg/reader/csv.rb
CHANGED
@@ -5,30 +5,36 @@ module Bmg
|
|
5
5
|
|
6
6
|
DEFAULT_OPTIONS = {
|
7
7
|
:headers => true,
|
8
|
-
:return_headers => false
|
8
|
+
:return_headers => false,
|
9
|
+
:smart => true
|
9
10
|
}
|
10
11
|
|
11
|
-
def initialize(type,
|
12
|
+
def initialize(type, path_or_io, options = {})
|
12
13
|
@type = type
|
13
|
-
@
|
14
|
+
@path_or_io = path_or_io
|
14
15
|
@options = DEFAULT_OPTIONS.merge(options)
|
15
|
-
@options[:
|
16
|
-
|
16
|
+
if @options[:smart] && !@path_or_io.is_a?(IO)
|
17
|
+
@options[:col_sep] ||= infer_col_sep
|
18
|
+
@options[:quote_char] ||= infer_quote_char
|
19
|
+
end
|
17
20
|
end
|
18
21
|
|
19
22
|
def each
|
23
|
+
return to_enum unless block_given?
|
20
24
|
require 'csv'
|
21
|
-
|
22
|
-
|
25
|
+
with_io do |io|
|
26
|
+
::CSV.new(io, **csv_options).each do |row|
|
27
|
+
yield tuple(row)
|
28
|
+
end
|
23
29
|
end
|
24
30
|
end
|
25
31
|
|
26
32
|
def to_ast
|
27
|
-
[ :csv, @
|
33
|
+
[ :csv, @path_or_io, @options ]
|
28
34
|
end
|
29
35
|
|
30
36
|
def to_s
|
31
|
-
"(csv #{
|
37
|
+
"(csv #{@path_or_io})"
|
32
38
|
end
|
33
39
|
alias :inspect :to_s
|
34
40
|
|
@@ -47,7 +53,16 @@ module Bmg
|
|
47
53
|
end
|
48
54
|
|
49
55
|
def text_portion
|
50
|
-
@text_portion ||=
|
56
|
+
@text_portion ||= with_io{|io| io.readlines(10).join("\n") }
|
57
|
+
end
|
58
|
+
|
59
|
+
def with_io(&bl)
|
60
|
+
case @path_or_io
|
61
|
+
when IO, StringIO
|
62
|
+
bl.call(@path_or_io)
|
63
|
+
else
|
64
|
+
File.open(@path_or_io, "r", &bl)
|
65
|
+
end
|
51
66
|
end
|
52
67
|
|
53
68
|
# Finds the best candidate among `candidates` for a separator
|
@@ -61,6 +76,10 @@ module Bmg
|
|
61
76
|
snif.size > 0 ? snif[0][0] : default
|
62
77
|
end
|
63
78
|
|
79
|
+
def csv_options
|
80
|
+
@csv_options ||= @options.dup.tap{|opts| opts.delete(:smart) }
|
81
|
+
end
|
82
|
+
|
64
83
|
end # class Csv
|
65
84
|
end # module Reader
|
66
85
|
end # module Bmg
|
data/lib/bmg/reader/excel.rb
CHANGED
@@ -4,7 +4,8 @@ module Bmg
|
|
4
4
|
include Reader
|
5
5
|
|
6
6
|
DEFAULT_OPTIONS = {
|
7
|
-
skip: 0
|
7
|
+
skip: 0,
|
8
|
+
row_num: true
|
8
9
|
}
|
9
10
|
|
10
11
|
def initialize(type, path, options = {})
|
@@ -14,6 +15,7 @@ module Bmg
|
|
14
15
|
end
|
15
16
|
|
16
17
|
def each
|
18
|
+
return to_enum unless block_given?
|
17
19
|
require 'roo'
|
18
20
|
xlsx = Roo::Spreadsheet.open(@path, @options)
|
19
21
|
headers = nil
|
@@ -23,9 +25,13 @@ module Bmg
|
|
23
25
|
.each_with_index
|
24
26
|
.each do |row, i|
|
25
27
|
if i==0
|
26
|
-
headers = row.map
|
28
|
+
headers = row.map{|c| c.to_s.strip.to_sym }
|
27
29
|
else
|
28
|
-
|
30
|
+
init = init_tuple(i)
|
31
|
+
tuple = (0...headers.size)
|
32
|
+
.each_with_object(init){|i,t|
|
33
|
+
t[headers[i]] = row[i]
|
34
|
+
}
|
29
35
|
yield(tuple)
|
30
36
|
end
|
31
37
|
end
|
@@ -36,10 +42,23 @@ module Bmg
|
|
36
42
|
end
|
37
43
|
|
38
44
|
def to_s
|
39
|
-
"(excel #{path})"
|
45
|
+
"(excel #{@path})"
|
40
46
|
end
|
41
47
|
alias :inspect :to_s
|
42
48
|
|
49
|
+
private
|
50
|
+
|
51
|
+
def init_tuple(i)
|
52
|
+
case as = @options[:row_num]
|
53
|
+
when TrueClass
|
54
|
+
{ :row_num => i }
|
55
|
+
when FalseClass
|
56
|
+
{}
|
57
|
+
when Symbol
|
58
|
+
{ :"#{as}" => i }
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
43
62
|
end # class Excel
|
44
63
|
end # module Reader
|
45
64
|
end # module Bmg
|
data/lib/bmg/relation.rb
CHANGED
@@ -27,6 +27,12 @@ module Bmg
|
|
27
27
|
}
|
28
28
|
end
|
29
29
|
|
30
|
+
def with_type_attrlist
|
31
|
+
return self if type.knows_attrlist?
|
32
|
+
attrs = self.first.keys
|
33
|
+
with_type(type.with_attrlist(attrs))
|
34
|
+
end
|
35
|
+
|
30
36
|
def with_typecheck
|
31
37
|
dup.tap{|r|
|
32
38
|
r.type = r.type.with_typecheck
|
@@ -110,6 +116,18 @@ module Bmg
|
|
110
116
|
end
|
111
117
|
end
|
112
118
|
|
119
|
+
def count
|
120
|
+
if type.knows_keys?
|
121
|
+
project(type.keys.first)._count
|
122
|
+
else
|
123
|
+
self._count
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
def _count
|
128
|
+
to_a.size
|
129
|
+
end
|
130
|
+
|
113
131
|
# Returns a json representation
|
114
132
|
def to_json(*args, &bl)
|
115
133
|
to_a.to_json(*args, &bl)
|
data/lib/bmg/relation/empty.rb
CHANGED
@@ -8,7 +8,6 @@ module Bmg
|
|
8
8
|
@type = type
|
9
9
|
end
|
10
10
|
attr_accessor :type
|
11
|
-
protected :type=
|
12
11
|
attr_reader :operand
|
13
12
|
|
14
13
|
public
|
@@ -17,6 +16,16 @@ module Bmg
|
|
17
16
|
@operand.each(&bl)
|
18
17
|
end
|
19
18
|
|
19
|
+
def _count
|
20
|
+
if operand.respond_to?(:count)
|
21
|
+
operand.count
|
22
|
+
elsif operand.respond_to?(:size)
|
23
|
+
operand.size
|
24
|
+
else
|
25
|
+
super
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
20
29
|
def to_ast
|
21
30
|
[ :in_memory, operand ]
|
22
31
|
end
|
data/lib/bmg/relation/spied.rb
CHANGED
data/lib/bmg/sequel/relation.rb
CHANGED
@@ -9,6 +9,7 @@ module Bmg
|
|
9
9
|
attr_reader :sequel_db
|
10
10
|
|
11
11
|
def each(&bl)
|
12
|
+
return to_enum unless block_given?
|
12
13
|
dataset.each(&bl)
|
13
14
|
end
|
14
15
|
|
@@ -33,6 +34,10 @@ module Bmg
|
|
33
34
|
base_table.update(arg)
|
34
35
|
end
|
35
36
|
|
37
|
+
def _count
|
38
|
+
dataset.count
|
39
|
+
end
|
40
|
+
|
36
41
|
def to_ast
|
37
42
|
[:sequel, dataset.sql]
|
38
43
|
end
|
data/lib/bmg/sql/relation.rb
CHANGED
@@ -10,7 +10,6 @@ module Bmg
|
|
10
10
|
end
|
11
11
|
|
12
12
|
attr_accessor :type
|
13
|
-
protected :type=
|
14
13
|
|
15
14
|
protected
|
16
15
|
|
@@ -134,8 +133,8 @@ module Bmg
|
|
134
133
|
_instance(type, builder, expr)
|
135
134
|
end
|
136
135
|
|
137
|
-
def _summarize(type, by,
|
138
|
-
summarization =
|
136
|
+
def _summarize(type, by, defs)
|
137
|
+
summarization = ::Bmg::Summarizer.summarization(defs)
|
139
138
|
if can_compile_summarization?(summarization)
|
140
139
|
expr = before_use(self.expr)
|
141
140
|
expr = Processor::Summarize.new(by, summarization, builder).call(self.expr)
|
data/lib/bmg/summarizer.rb
CHANGED
@@ -50,6 +50,21 @@ module Bmg
|
|
50
50
|
end
|
51
51
|
end
|
52
52
|
|
53
|
+
# Converts some summarization definitions to a Hash of
|
54
|
+
# summarizers.
|
55
|
+
def self.summarization(defs)
|
56
|
+
Hash[defs.map{|k,v|
|
57
|
+
summarizer = case v
|
58
|
+
when Summarizer then v
|
59
|
+
when Symbol then Summarizer.send(v, k)
|
60
|
+
when Proc then Summarizer.by_proc(&v)
|
61
|
+
else
|
62
|
+
raise ArgumentError, "Unexpected summarizer #{k} => #{v}"
|
63
|
+
end
|
64
|
+
[ k, summarizer ]
|
65
|
+
}]
|
66
|
+
end
|
67
|
+
|
53
68
|
# Returns the default options to use
|
54
69
|
#
|
55
70
|
# @return the default aggregation options
|
@@ -80,7 +95,7 @@ module Bmg
|
|
80
95
|
# @param the current iterated tuple
|
81
96
|
# @return updated memo value
|
82
97
|
def happens(memo, tuple)
|
83
|
-
value =
|
98
|
+
value = extract_value(tuple)
|
84
99
|
_happens(memo, value)
|
85
100
|
end
|
86
101
|
|
@@ -119,6 +134,21 @@ module Bmg
|
|
119
134
|
self.class.name.downcase[/::([a-z]+)$/, 1].to_sym
|
120
135
|
end
|
121
136
|
|
137
|
+
protected
|
138
|
+
|
139
|
+
def extract_value(tuple)
|
140
|
+
value = case @functor
|
141
|
+
when Proc
|
142
|
+
@functor.call(tuple)
|
143
|
+
when NilClass
|
144
|
+
tuple
|
145
|
+
when Symbol
|
146
|
+
tuple[@functor]
|
147
|
+
else
|
148
|
+
tuple[@functor]
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
122
152
|
end # class Summarizer
|
123
153
|
end # module Bmg
|
124
154
|
require_relative 'summarizer/count'
|
@@ -128,5 +158,10 @@ require_relative 'summarizer/max'
|
|
128
158
|
require_relative 'summarizer/avg'
|
129
159
|
require_relative 'summarizer/variance'
|
130
160
|
require_relative 'summarizer/stddev'
|
161
|
+
require_relative 'summarizer/percentile'
|
131
162
|
require_relative 'summarizer/collect'
|
163
|
+
require_relative 'summarizer/distinct'
|
132
164
|
require_relative 'summarizer/concat'
|
165
|
+
require_relative 'summarizer/by_proc'
|
166
|
+
require_relative 'summarizer/multiple'
|
167
|
+
require_relative 'summarizer/value_by'
|