bmg 0.18.1 → 0.18.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +4 -0
- data/lib/bmg/algebra.rb +18 -0
- data/lib/bmg/algebra/shortcuts.rb +8 -0
- data/lib/bmg/error.rb +3 -0
- data/lib/bmg/operator.rb +2 -0
- data/lib/bmg/operator/allbut.rb +9 -4
- data/lib/bmg/operator/autosummarize.rb +7 -0
- data/lib/bmg/operator/autowrap.rb +19 -0
- data/lib/bmg/operator/constants.rb +7 -0
- data/lib/bmg/operator/extend.rb +7 -0
- data/lib/bmg/operator/group.rb +1 -0
- data/lib/bmg/operator/image.rb +15 -0
- data/lib/bmg/operator/join.rb +1 -0
- data/lib/bmg/operator/matching.rb +1 -0
- data/lib/bmg/operator/not_matching.rb +1 -0
- data/lib/bmg/operator/page.rb +1 -0
- data/lib/bmg/operator/project.rb +3 -2
- data/lib/bmg/operator/rename.rb +12 -5
- data/lib/bmg/operator/restrict.rb +1 -0
- data/lib/bmg/operator/rxmatch.rb +1 -0
- data/lib/bmg/operator/summarize.rb +2 -17
- data/lib/bmg/operator/transform.rb +1 -0
- data/lib/bmg/operator/ungroup.rb +61 -0
- data/lib/bmg/operator/union.rb +1 -0
- data/lib/bmg/operator/unwrap.rb +47 -0
- data/lib/bmg/reader/csv.rb +29 -10
- data/lib/bmg/reader/excel.rb +23 -4
- data/lib/bmg/relation.rb +18 -0
- data/lib/bmg/relation/empty.rb +4 -0
- data/lib/bmg/relation/in_memory.rb +10 -1
- data/lib/bmg/relation/materialized.rb +6 -0
- data/lib/bmg/relation/spied.rb +5 -0
- data/lib/bmg/sequel/relation.rb +5 -0
- data/lib/bmg/sql/relation.rb +2 -2
- data/lib/bmg/summarizer.rb +36 -1
- data/lib/bmg/summarizer/avg.rb +3 -3
- data/lib/bmg/summarizer/by_proc.rb +41 -0
- data/lib/bmg/summarizer/distinct.rb +36 -0
- data/lib/bmg/summarizer/multiple.rb +46 -0
- data/lib/bmg/summarizer/percentile.rb +79 -0
- data/lib/bmg/summarizer/value_by.rb +62 -0
- data/lib/bmg/support/keys.rb +5 -0
- data/lib/bmg/support/tuple_transformer.rb +10 -1
- data/lib/bmg/type.rb +19 -1
- data/lib/bmg/version.rb +1 -1
- data/lib/bmg/writer.rb +16 -0
- data/lib/bmg/writer/csv.rb +2 -12
- data/lib/bmg/writer/xlsx.rb +68 -0
- metadata +24 -2
@@ -0,0 +1,61 @@
|
|
1
|
+
module Bmg
|
2
|
+
module Operator
|
3
|
+
class Ungroup
|
4
|
+
include Operator::Unary
|
5
|
+
|
6
|
+
def initialize(type, operand, attrs)
|
7
|
+
@type = type
|
8
|
+
@operand = operand
|
9
|
+
@attrs = attrs
|
10
|
+
end
|
11
|
+
|
12
|
+
protected
|
13
|
+
|
14
|
+
attr_reader :attrs
|
15
|
+
|
16
|
+
public
|
17
|
+
|
18
|
+
def each(&bl)
|
19
|
+
return to_enum unless block_given?
|
20
|
+
if type.knows_keys? && type.keys.any?{|k| (k & attrs).empty? }
|
21
|
+
operand.each do |tuple|
|
22
|
+
_each(tuple, attrs[0], attrs[1..-1], &bl)
|
23
|
+
end
|
24
|
+
else
|
25
|
+
with_dups = []
|
26
|
+
operand.each do |tuple|
|
27
|
+
_each(tuple, attrs[0], attrs[1..-1]){|t|
|
28
|
+
with_dups << t
|
29
|
+
}
|
30
|
+
end
|
31
|
+
with_dups.uniq.each(&bl)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def _each(tuple, attr, attrs, &bl)
|
36
|
+
rva = tuple[attr] || []
|
37
|
+
rva.each do |rvt|
|
38
|
+
t = tuple.merge(rvt).tap{|t| t.delete(attr) }
|
39
|
+
if attrs.empty?
|
40
|
+
yield(t)
|
41
|
+
else
|
42
|
+
_each(t, attrs[0], attrs[1..-1], &bl)
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def to_ast
|
48
|
+
[ :ungroup, operand.to_ast, attrs ]
|
49
|
+
end
|
50
|
+
|
51
|
+
protected
|
52
|
+
|
53
|
+
protected ### inspect
|
54
|
+
|
55
|
+
def args
|
56
|
+
[ attrs ]
|
57
|
+
end
|
58
|
+
|
59
|
+
end # class Ungroup
|
60
|
+
end # module Operator
|
61
|
+
end # module Bmg
|
data/lib/bmg/operator/union.rb
CHANGED
@@ -0,0 +1,47 @@
|
|
1
|
+
module Bmg
|
2
|
+
module Operator
|
3
|
+
class Unwrap
|
4
|
+
include Operator::Unary
|
5
|
+
|
6
|
+
def initialize(type, operand, attrs)
|
7
|
+
@type = type
|
8
|
+
@operand = operand
|
9
|
+
@attrs = attrs
|
10
|
+
end
|
11
|
+
|
12
|
+
protected
|
13
|
+
|
14
|
+
attr_reader :attrs
|
15
|
+
|
16
|
+
public
|
17
|
+
|
18
|
+
def each(&bl)
|
19
|
+
return to_enum unless block_given?
|
20
|
+
operand.each do |tuple|
|
21
|
+
yield tuple_unwrap(tuple)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def to_ast
|
26
|
+
[ :unwrap, operand.to_ast, attrs ]
|
27
|
+
end
|
28
|
+
|
29
|
+
protected
|
30
|
+
|
31
|
+
def tuple_unwrap(tuple)
|
32
|
+
attrs.inject(tuple.dup){|t,attr|
|
33
|
+
t.merge(tuple[attr]).tap{|t2|
|
34
|
+
t2.delete(attr)
|
35
|
+
}
|
36
|
+
}
|
37
|
+
end
|
38
|
+
|
39
|
+
protected ### inspect
|
40
|
+
|
41
|
+
def args
|
42
|
+
[ attrs ]
|
43
|
+
end
|
44
|
+
|
45
|
+
end # class Unwrap
|
46
|
+
end # module Operator
|
47
|
+
end # module Bmg
|
data/lib/bmg/reader/csv.rb
CHANGED
@@ -5,30 +5,36 @@ module Bmg
|
|
5
5
|
|
6
6
|
DEFAULT_OPTIONS = {
|
7
7
|
:headers => true,
|
8
|
-
:return_headers => false
|
8
|
+
:return_headers => false,
|
9
|
+
:smart => true
|
9
10
|
}
|
10
11
|
|
11
|
-
def initialize(type,
|
12
|
+
def initialize(type, path_or_io, options = {})
|
12
13
|
@type = type
|
13
|
-
@
|
14
|
+
@path_or_io = path_or_io
|
14
15
|
@options = DEFAULT_OPTIONS.merge(options)
|
15
|
-
@options[:
|
16
|
-
|
16
|
+
if @options[:smart] && !@path_or_io.is_a?(IO)
|
17
|
+
@options[:col_sep] ||= infer_col_sep
|
18
|
+
@options[:quote_char] ||= infer_quote_char
|
19
|
+
end
|
17
20
|
end
|
18
21
|
|
19
22
|
def each
|
23
|
+
return to_enum unless block_given?
|
20
24
|
require 'csv'
|
21
|
-
|
22
|
-
|
25
|
+
with_io do |io|
|
26
|
+
::CSV.new(io, **csv_options).each do |row|
|
27
|
+
yield tuple(row)
|
28
|
+
end
|
23
29
|
end
|
24
30
|
end
|
25
31
|
|
26
32
|
def to_ast
|
27
|
-
[ :csv, @
|
33
|
+
[ :csv, @path_or_io, @options ]
|
28
34
|
end
|
29
35
|
|
30
36
|
def to_s
|
31
|
-
"(csv #{
|
37
|
+
"(csv #{@path_or_io})"
|
32
38
|
end
|
33
39
|
alias :inspect :to_s
|
34
40
|
|
@@ -47,7 +53,16 @@ module Bmg
|
|
47
53
|
end
|
48
54
|
|
49
55
|
def text_portion
|
50
|
-
@text_portion ||=
|
56
|
+
@text_portion ||= with_io{|io| io.readlines(10).join("\n") }
|
57
|
+
end
|
58
|
+
|
59
|
+
def with_io(&bl)
|
60
|
+
case @path_or_io
|
61
|
+
when IO, StringIO
|
62
|
+
bl.call(@path_or_io)
|
63
|
+
else
|
64
|
+
File.open(@path_or_io, "r", &bl)
|
65
|
+
end
|
51
66
|
end
|
52
67
|
|
53
68
|
# Finds the best candidate among `candidates` for a separator
|
@@ -61,6 +76,10 @@ module Bmg
|
|
61
76
|
snif.size > 0 ? snif[0][0] : default
|
62
77
|
end
|
63
78
|
|
79
|
+
def csv_options
|
80
|
+
@csv_options ||= @options.dup.tap{|opts| opts.delete(:smart) }
|
81
|
+
end
|
82
|
+
|
64
83
|
end # class Csv
|
65
84
|
end # module Reader
|
66
85
|
end # module Bmg
|
data/lib/bmg/reader/excel.rb
CHANGED
@@ -4,7 +4,8 @@ module Bmg
|
|
4
4
|
include Reader
|
5
5
|
|
6
6
|
DEFAULT_OPTIONS = {
|
7
|
-
skip: 0
|
7
|
+
skip: 0,
|
8
|
+
row_num: true
|
8
9
|
}
|
9
10
|
|
10
11
|
def initialize(type, path, options = {})
|
@@ -14,6 +15,7 @@ module Bmg
|
|
14
15
|
end
|
15
16
|
|
16
17
|
def each
|
18
|
+
return to_enum unless block_given?
|
17
19
|
require 'roo'
|
18
20
|
xlsx = Roo::Spreadsheet.open(@path, @options)
|
19
21
|
headers = nil
|
@@ -23,9 +25,13 @@ module Bmg
|
|
23
25
|
.each_with_index
|
24
26
|
.each do |row, i|
|
25
27
|
if i==0
|
26
|
-
headers = row.map
|
28
|
+
headers = row.map{|c| c.to_s.strip.to_sym }
|
27
29
|
else
|
28
|
-
|
30
|
+
init = init_tuple(i)
|
31
|
+
tuple = (0...headers.size)
|
32
|
+
.each_with_object(init){|i,t|
|
33
|
+
t[headers[i]] = row[i]
|
34
|
+
}
|
29
35
|
yield(tuple)
|
30
36
|
end
|
31
37
|
end
|
@@ -36,10 +42,23 @@ module Bmg
|
|
36
42
|
end
|
37
43
|
|
38
44
|
def to_s
|
39
|
-
"(excel #{path})"
|
45
|
+
"(excel #{@path})"
|
40
46
|
end
|
41
47
|
alias :inspect :to_s
|
42
48
|
|
49
|
+
private
|
50
|
+
|
51
|
+
def init_tuple(i)
|
52
|
+
case as = @options[:row_num]
|
53
|
+
when TrueClass
|
54
|
+
{ :row_num => i }
|
55
|
+
when FalseClass
|
56
|
+
{}
|
57
|
+
when Symbol
|
58
|
+
{ :"#{as}" => i }
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
43
62
|
end # class Excel
|
44
63
|
end # module Reader
|
45
64
|
end # module Bmg
|
data/lib/bmg/relation.rb
CHANGED
@@ -27,6 +27,12 @@ module Bmg
|
|
27
27
|
}
|
28
28
|
end
|
29
29
|
|
30
|
+
def with_type_attrlist
|
31
|
+
return self if type.knows_attrlist?
|
32
|
+
attrs = self.first.keys
|
33
|
+
with_type(type.with_attrlist(attrs))
|
34
|
+
end
|
35
|
+
|
30
36
|
def with_typecheck
|
31
37
|
dup.tap{|r|
|
32
38
|
r.type = r.type.with_typecheck
|
@@ -110,6 +116,18 @@ module Bmg
|
|
110
116
|
end
|
111
117
|
end
|
112
118
|
|
119
|
+
def count
|
120
|
+
if type.knows_keys?
|
121
|
+
project(type.keys.first)._count
|
122
|
+
else
|
123
|
+
self._count
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
def _count
|
128
|
+
to_a.size
|
129
|
+
end
|
130
|
+
|
113
131
|
# Returns a json representation
|
114
132
|
def to_json(*args, &bl)
|
115
133
|
to_a.to_json(*args, &bl)
|
data/lib/bmg/relation/empty.rb
CHANGED
@@ -8,7 +8,6 @@ module Bmg
|
|
8
8
|
@type = type
|
9
9
|
end
|
10
10
|
attr_accessor :type
|
11
|
-
protected :type=
|
12
11
|
attr_reader :operand
|
13
12
|
|
14
13
|
public
|
@@ -17,6 +16,16 @@ module Bmg
|
|
17
16
|
@operand.each(&bl)
|
18
17
|
end
|
19
18
|
|
19
|
+
def _count
|
20
|
+
if operand.respond_to?(:count)
|
21
|
+
operand.count
|
22
|
+
elsif operand.respond_to?(:size)
|
23
|
+
operand.size
|
24
|
+
else
|
25
|
+
super
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
20
29
|
def to_ast
|
21
30
|
[ :in_memory, operand ]
|
22
31
|
end
|
data/lib/bmg/relation/spied.rb
CHANGED
data/lib/bmg/sequel/relation.rb
CHANGED
@@ -9,6 +9,7 @@ module Bmg
|
|
9
9
|
attr_reader :sequel_db
|
10
10
|
|
11
11
|
def each(&bl)
|
12
|
+
return to_enum unless block_given?
|
12
13
|
dataset.each(&bl)
|
13
14
|
end
|
14
15
|
|
@@ -33,6 +34,10 @@ module Bmg
|
|
33
34
|
base_table.update(arg)
|
34
35
|
end
|
35
36
|
|
37
|
+
def _count
|
38
|
+
dataset.count
|
39
|
+
end
|
40
|
+
|
36
41
|
def to_ast
|
37
42
|
[:sequel, dataset.sql]
|
38
43
|
end
|
data/lib/bmg/sql/relation.rb
CHANGED
@@ -133,8 +133,8 @@ module Bmg
|
|
133
133
|
_instance(type, builder, expr)
|
134
134
|
end
|
135
135
|
|
136
|
-
def _summarize(type, by,
|
137
|
-
summarization =
|
136
|
+
def _summarize(type, by, defs)
|
137
|
+
summarization = ::Bmg::Summarizer.summarization(defs)
|
138
138
|
if can_compile_summarization?(summarization)
|
139
139
|
expr = before_use(self.expr)
|
140
140
|
expr = Processor::Summarize.new(by, summarization, builder).call(self.expr)
|
data/lib/bmg/summarizer.rb
CHANGED
@@ -50,6 +50,21 @@ module Bmg
|
|
50
50
|
end
|
51
51
|
end
|
52
52
|
|
53
|
+
# Converts some summarization definitions to a Hash of
|
54
|
+
# summarizers.
|
55
|
+
def self.summarization(defs)
|
56
|
+
Hash[defs.map{|k,v|
|
57
|
+
summarizer = case v
|
58
|
+
when Summarizer then v
|
59
|
+
when Symbol then Summarizer.send(v, k)
|
60
|
+
when Proc then Summarizer.by_proc(&v)
|
61
|
+
else
|
62
|
+
raise ArgumentError, "Unexpected summarizer #{k} => #{v}"
|
63
|
+
end
|
64
|
+
[ k, summarizer ]
|
65
|
+
}]
|
66
|
+
end
|
67
|
+
|
53
68
|
# Returns the default options to use
|
54
69
|
#
|
55
70
|
# @return the default aggregation options
|
@@ -80,7 +95,7 @@ module Bmg
|
|
80
95
|
# @param the current iterated tuple
|
81
96
|
# @return updated memo value
|
82
97
|
def happens(memo, tuple)
|
83
|
-
value =
|
98
|
+
value = extract_value(tuple)
|
84
99
|
_happens(memo, value)
|
85
100
|
end
|
86
101
|
|
@@ -119,6 +134,21 @@ module Bmg
|
|
119
134
|
self.class.name.downcase[/::([a-z]+)$/, 1].to_sym
|
120
135
|
end
|
121
136
|
|
137
|
+
protected
|
138
|
+
|
139
|
+
def extract_value(tuple)
|
140
|
+
value = case @functor
|
141
|
+
when Proc
|
142
|
+
@functor.call(tuple)
|
143
|
+
when NilClass
|
144
|
+
tuple
|
145
|
+
when Symbol
|
146
|
+
tuple[@functor]
|
147
|
+
else
|
148
|
+
tuple[@functor]
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
122
152
|
end # class Summarizer
|
123
153
|
end # module Bmg
|
124
154
|
require_relative 'summarizer/count'
|
@@ -128,5 +158,10 @@ require_relative 'summarizer/max'
|
|
128
158
|
require_relative 'summarizer/avg'
|
129
159
|
require_relative 'summarizer/variance'
|
130
160
|
require_relative 'summarizer/stddev'
|
161
|
+
require_relative 'summarizer/percentile'
|
131
162
|
require_relative 'summarizer/collect'
|
163
|
+
require_relative 'summarizer/distinct'
|
132
164
|
require_relative 'summarizer/concat'
|
165
|
+
require_relative 'summarizer/by_proc'
|
166
|
+
require_relative 'summarizer/multiple'
|
167
|
+
require_relative 'summarizer/value_by'
|