bmg 0.17.7 → 0.18.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +0 -3
- data/README.md +236 -57
- data/lib/bmg.rb +6 -0
- data/lib/bmg/algebra.rb +1 -0
- data/lib/bmg/algebra/shortcuts.rb +14 -0
- data/lib/bmg/operator/allbut.rb +27 -0
- data/lib/bmg/operator/autosummarize.rb +27 -4
- data/lib/bmg/operator/autowrap.rb +27 -0
- data/lib/bmg/operator/constants.rb +7 -0
- data/lib/bmg/operator/extend.rb +7 -0
- data/lib/bmg/operator/group.rb +1 -0
- data/lib/bmg/operator/image.rb +41 -2
- data/lib/bmg/operator/join.rb +1 -0
- data/lib/bmg/operator/matching.rb +1 -0
- data/lib/bmg/operator/not_matching.rb +1 -0
- data/lib/bmg/operator/page.rb +2 -7
- data/lib/bmg/operator/project.rb +3 -2
- data/lib/bmg/operator/rename.rb +7 -0
- data/lib/bmg/operator/restrict.rb +1 -0
- data/lib/bmg/operator/rxmatch.rb +1 -0
- data/lib/bmg/operator/summarize.rb +2 -17
- data/lib/bmg/operator/transform.rb +39 -1
- data/lib/bmg/operator/union.rb +1 -0
- data/lib/bmg/reader.rb +1 -0
- data/lib/bmg/reader/csv.rb +29 -10
- data/lib/bmg/reader/excel.rb +22 -3
- data/lib/bmg/reader/text_file.rb +56 -0
- data/lib/bmg/relation.rb +31 -2
- data/lib/bmg/relation/empty.rb +4 -0
- data/lib/bmg/relation/in_memory.rb +10 -1
- data/lib/bmg/relation/materialized.rb +6 -0
- data/lib/bmg/relation/spied.rb +6 -1
- data/lib/bmg/sequel/relation.rb +5 -0
- data/lib/bmg/sql/relation.rb +2 -3
- data/lib/bmg/summarizer.rb +29 -1
- data/lib/bmg/summarizer/avg.rb +3 -3
- data/lib/bmg/summarizer/by_proc.rb +41 -0
- data/lib/bmg/summarizer/distinct.rb +36 -0
- data/lib/bmg/summarizer/multiple.rb +46 -0
- data/lib/bmg/summarizer/percentile.rb +47 -0
- data/lib/bmg/support.rb +2 -0
- data/lib/bmg/support/ordering.rb +20 -0
- data/lib/bmg/support/output_preferences.rb +44 -0
- data/lib/bmg/support/tuple_algebra.rb +6 -0
- data/lib/bmg/support/tuple_transformer.rb +4 -5
- data/lib/bmg/version.rb +2 -2
- data/lib/bmg/writer.rb +16 -0
- data/lib/bmg/writer/csv.rb +7 -7
- data/lib/bmg/writer/xlsx.rb +68 -0
- data/tasks/test.rake +9 -2
- metadata +37 -15
data/lib/bmg/operator/union.rb
CHANGED
data/lib/bmg/reader.rb
CHANGED
data/lib/bmg/reader/csv.rb
CHANGED
@@ -5,30 +5,36 @@ module Bmg
|
|
5
5
|
|
6
6
|
DEFAULT_OPTIONS = {
|
7
7
|
:headers => true,
|
8
|
-
:return_headers => false
|
8
|
+
:return_headers => false,
|
9
|
+
:smart => true
|
9
10
|
}
|
10
11
|
|
11
|
-
def initialize(type,
|
12
|
+
def initialize(type, path_or_io, options = {})
|
12
13
|
@type = type
|
13
|
-
@
|
14
|
+
@path_or_io = path_or_io
|
14
15
|
@options = DEFAULT_OPTIONS.merge(options)
|
15
|
-
@options[:
|
16
|
-
|
16
|
+
if @options[:smart] && !@path_or_io.is_a?(IO)
|
17
|
+
@options[:col_sep] ||= infer_col_sep
|
18
|
+
@options[:quote_char] ||= infer_quote_char
|
19
|
+
end
|
17
20
|
end
|
18
21
|
|
19
22
|
def each
|
23
|
+
return to_enum unless block_given?
|
20
24
|
require 'csv'
|
21
|
-
|
22
|
-
|
25
|
+
with_io do |io|
|
26
|
+
::CSV.new(io, csv_options).each do |row|
|
27
|
+
yield tuple(row)
|
28
|
+
end
|
23
29
|
end
|
24
30
|
end
|
25
31
|
|
26
32
|
def to_ast
|
27
|
-
[ :csv, @
|
33
|
+
[ :csv, @path_or_io, @options ]
|
28
34
|
end
|
29
35
|
|
30
36
|
def to_s
|
31
|
-
"(csv #{
|
37
|
+
"(csv #{@path_or_io})"
|
32
38
|
end
|
33
39
|
alias :inspect :to_s
|
34
40
|
|
@@ -47,7 +53,16 @@ module Bmg
|
|
47
53
|
end
|
48
54
|
|
49
55
|
def text_portion
|
50
|
-
@text_portion ||=
|
56
|
+
@text_portion ||= with_io{|io| io.readlines(10).join("\n") }
|
57
|
+
end
|
58
|
+
|
59
|
+
def with_io(&bl)
|
60
|
+
case @path_or_io
|
61
|
+
when IO, StringIO
|
62
|
+
bl.call(@path_or_io)
|
63
|
+
else
|
64
|
+
File.open(@path_or_io, "r", &bl)
|
65
|
+
end
|
51
66
|
end
|
52
67
|
|
53
68
|
# Finds the best candidate among `candidates` for a separator
|
@@ -61,6 +76,10 @@ module Bmg
|
|
61
76
|
snif.size > 0 ? snif[0][0] : default
|
62
77
|
end
|
63
78
|
|
79
|
+
def csv_options
|
80
|
+
@csv_options ||= @options.dup.tap{|opts| opts.delete(:smart) }
|
81
|
+
end
|
82
|
+
|
64
83
|
end # class Csv
|
65
84
|
end # module Reader
|
66
85
|
end # module Bmg
|
data/lib/bmg/reader/excel.rb
CHANGED
@@ -4,7 +4,8 @@ module Bmg
|
|
4
4
|
include Reader
|
5
5
|
|
6
6
|
DEFAULT_OPTIONS = {
|
7
|
-
skip: 0
|
7
|
+
skip: 0,
|
8
|
+
row_num: true
|
8
9
|
}
|
9
10
|
|
10
11
|
def initialize(type, path, options = {})
|
@@ -14,6 +15,7 @@ module Bmg
|
|
14
15
|
end
|
15
16
|
|
16
17
|
def each
|
18
|
+
return to_enum unless block_given?
|
17
19
|
require 'roo'
|
18
20
|
xlsx = Roo::Spreadsheet.open(@path, @options)
|
19
21
|
headers = nil
|
@@ -25,7 +27,11 @@ module Bmg
|
|
25
27
|
if i==0
|
26
28
|
headers = row.map(&:to_sym)
|
27
29
|
else
|
28
|
-
|
30
|
+
init = init_tuple(i)
|
31
|
+
tuple = (0...headers.size)
|
32
|
+
.each_with_object(init){|i,t|
|
33
|
+
t[headers[i]] = row[i]
|
34
|
+
}
|
29
35
|
yield(tuple)
|
30
36
|
end
|
31
37
|
end
|
@@ -36,10 +42,23 @@ module Bmg
|
|
36
42
|
end
|
37
43
|
|
38
44
|
def to_s
|
39
|
-
"(excel #{path})"
|
45
|
+
"(excel #{@path})"
|
40
46
|
end
|
41
47
|
alias :inspect :to_s
|
42
48
|
|
49
|
+
private
|
50
|
+
|
51
|
+
def init_tuple(i)
|
52
|
+
case as = @options[:row_num]
|
53
|
+
when TrueClass
|
54
|
+
{ :row_num => i }
|
55
|
+
when FalseClass
|
56
|
+
{}
|
57
|
+
when Symbol
|
58
|
+
{ :"#{as}" => i }
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
43
62
|
end # class Excel
|
44
63
|
end # module Reader
|
45
64
|
end # module Bmg
|
@@ -0,0 +1,56 @@
|
|
1
|
+
module Bmg
|
2
|
+
module Reader
|
3
|
+
class TextFile
|
4
|
+
include Reader
|
5
|
+
|
6
|
+
DEFAULT_OPTIONS = {
|
7
|
+
strip: true,
|
8
|
+
parse: nil
|
9
|
+
}
|
10
|
+
|
11
|
+
def initialize(type, path, options = {})
|
12
|
+
options = { parse: options } if options.is_a?(Regexp)
|
13
|
+
@path = path
|
14
|
+
@options = DEFAULT_OPTIONS.merge(options)
|
15
|
+
@type = infer_type(type)
|
16
|
+
end
|
17
|
+
attr_reader :path, :options
|
18
|
+
|
19
|
+
public # Relation
|
20
|
+
|
21
|
+
def each
|
22
|
+
path.each_line.each_with_index do |text, line|
|
23
|
+
text = text.strip if strip?
|
24
|
+
parsed = parse(text)
|
25
|
+
yield({line: 1+line}.merge(parsed)) if parsed
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
private
|
30
|
+
|
31
|
+
def infer_type(base)
|
32
|
+
return base unless base == Bmg::Type::ANY
|
33
|
+
attr_list = if rx = options[:parse]
|
34
|
+
[:line] + rx.names.map(&:to_sym)
|
35
|
+
else
|
36
|
+
[:line, :text]
|
37
|
+
end
|
38
|
+
base
|
39
|
+
.with_attrlist(attr_list)
|
40
|
+
.with_keys([[:line]])
|
41
|
+
end
|
42
|
+
|
43
|
+
def strip?
|
44
|
+
options[:strip]
|
45
|
+
end
|
46
|
+
|
47
|
+
def parse(text)
|
48
|
+
return { text: text } unless rx = options[:parse]
|
49
|
+
if match = rx.match(text)
|
50
|
+
TupleAlgebra.symbolize_keys(match.named_captures)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
end # class TextFile
|
55
|
+
end # module Reader
|
56
|
+
end # module Bmg
|
data/lib/bmg/relation.rb
CHANGED
@@ -17,6 +17,22 @@ module Bmg
|
|
17
17
|
self
|
18
18
|
end
|
19
19
|
|
20
|
+
def type
|
21
|
+
Bmg::Type::ANY
|
22
|
+
end
|
23
|
+
|
24
|
+
def with_type(type)
|
25
|
+
dup.tap{|r|
|
26
|
+
r.type = type
|
27
|
+
}
|
28
|
+
end
|
29
|
+
|
30
|
+
def with_type_attrlist
|
31
|
+
return self if type.knows_attrlist?
|
32
|
+
attrs = self.first.keys
|
33
|
+
with_type(type.with_attrlist(attrs))
|
34
|
+
end
|
35
|
+
|
20
36
|
def with_typecheck
|
21
37
|
dup.tap{|r|
|
22
38
|
r.type = r.type.with_typecheck
|
@@ -100,6 +116,18 @@ module Bmg
|
|
100
116
|
end
|
101
117
|
end
|
102
118
|
|
119
|
+
def count
|
120
|
+
if type.knows_keys?
|
121
|
+
project(type.keys.first)._count
|
122
|
+
else
|
123
|
+
self._count
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
def _count
|
128
|
+
to_a.size
|
129
|
+
end
|
130
|
+
|
103
131
|
# Returns a json representation
|
104
132
|
def to_json(*args, &bl)
|
105
133
|
to_a.to_json(*args, &bl)
|
@@ -113,9 +141,10 @@ module Bmg
|
|
113
141
|
# When no string_or_io is used, the method uses a string.
|
114
142
|
#
|
115
143
|
# The method always returns the string_or_io.
|
116
|
-
def to_csv(options = {}, string_or_io = nil)
|
144
|
+
def to_csv(options = {}, string_or_io = nil, preferences = nil)
|
117
145
|
options, string_or_io = {}, options unless options.is_a?(Hash)
|
118
|
-
|
146
|
+
string_or_io, preferences = nil, string_or_io if string_or_io.is_a?(Hash)
|
147
|
+
Writer::Csv.new(options, preferences).call(self, string_or_io)
|
119
148
|
end
|
120
149
|
|
121
150
|
# Converts to an sexpr expression.
|
data/lib/bmg/relation/empty.rb
CHANGED
@@ -8,7 +8,6 @@ module Bmg
|
|
8
8
|
@type = type
|
9
9
|
end
|
10
10
|
attr_accessor :type
|
11
|
-
protected :type=
|
12
11
|
attr_reader :operand
|
13
12
|
|
14
13
|
public
|
@@ -17,6 +16,16 @@ module Bmg
|
|
17
16
|
@operand.each(&bl)
|
18
17
|
end
|
19
18
|
|
19
|
+
def _count
|
20
|
+
if operand.respond_to?(:count)
|
21
|
+
operand.count
|
22
|
+
elsif operand.respond_to?(:size)
|
23
|
+
operand.size
|
24
|
+
else
|
25
|
+
super
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
20
29
|
def to_ast
|
21
30
|
[ :in_memory, operand ]
|
22
31
|
end
|
data/lib/bmg/relation/spied.rb
CHANGED
@@ -24,10 +24,15 @@ module Bmg
|
|
24
24
|
protected :type=
|
25
25
|
|
26
26
|
def each(&bl)
|
27
|
-
spy.call(self)
|
27
|
+
spy.call(self) if bl
|
28
28
|
operand.each(&bl)
|
29
29
|
end
|
30
30
|
|
31
|
+
def count
|
32
|
+
spy.call(self) if bl
|
33
|
+
operand.count
|
34
|
+
end
|
35
|
+
|
31
36
|
def to_ast
|
32
37
|
[ :spied, operand.to_ast, spy ]
|
33
38
|
end
|
data/lib/bmg/sequel/relation.rb
CHANGED
@@ -9,6 +9,7 @@ module Bmg
|
|
9
9
|
attr_reader :sequel_db
|
10
10
|
|
11
11
|
def each(&bl)
|
12
|
+
return to_enum unless block_given?
|
12
13
|
dataset.each(&bl)
|
13
14
|
end
|
14
15
|
|
@@ -33,6 +34,10 @@ module Bmg
|
|
33
34
|
base_table.update(arg)
|
34
35
|
end
|
35
36
|
|
37
|
+
def _count
|
38
|
+
dataset.count
|
39
|
+
end
|
40
|
+
|
36
41
|
def to_ast
|
37
42
|
[:sequel, dataset.sql]
|
38
43
|
end
|
data/lib/bmg/sql/relation.rb
CHANGED
@@ -10,7 +10,6 @@ module Bmg
|
|
10
10
|
end
|
11
11
|
|
12
12
|
attr_accessor :type
|
13
|
-
protected :type=
|
14
13
|
|
15
14
|
protected
|
16
15
|
|
@@ -134,8 +133,8 @@ module Bmg
|
|
134
133
|
_instance(type, builder, expr)
|
135
134
|
end
|
136
135
|
|
137
|
-
def _summarize(type, by,
|
138
|
-
summarization =
|
136
|
+
def _summarize(type, by, defs)
|
137
|
+
summarization = ::Bmg::Summarizer.summarization(defs)
|
139
138
|
if can_compile_summarization?(summarization)
|
140
139
|
expr = before_use(self.expr)
|
141
140
|
expr = Processor::Summarize.new(by, summarization, builder).call(self.expr)
|
data/lib/bmg/summarizer.rb
CHANGED
@@ -50,6 +50,21 @@ module Bmg
|
|
50
50
|
end
|
51
51
|
end
|
52
52
|
|
53
|
+
# Converts some summarization definitions to a Hash of
|
54
|
+
# summarizers.
|
55
|
+
def self.summarization(defs)
|
56
|
+
Hash[defs.map{|k,v|
|
57
|
+
summarizer = case v
|
58
|
+
when Summarizer then v
|
59
|
+
when Symbol then Summarizer.send(v, k)
|
60
|
+
when Proc then Summarizer.by_proc(&v)
|
61
|
+
else
|
62
|
+
raise ArgumentError, "Unexpected summarizer #{k} => #{v}"
|
63
|
+
end
|
64
|
+
[ k, summarizer ]
|
65
|
+
}]
|
66
|
+
end
|
67
|
+
|
53
68
|
# Returns the default options to use
|
54
69
|
#
|
55
70
|
# @return the default aggregation options
|
@@ -80,7 +95,16 @@ module Bmg
|
|
80
95
|
# @param the current iterated tuple
|
81
96
|
# @return updated memo value
|
82
97
|
def happens(memo, tuple)
|
83
|
-
value =
|
98
|
+
value = case @functor
|
99
|
+
when Proc
|
100
|
+
@functor.call(tuple)
|
101
|
+
when NilClass
|
102
|
+
tuple
|
103
|
+
when Symbol
|
104
|
+
tuple[@functor]
|
105
|
+
else
|
106
|
+
tuple[@functor]
|
107
|
+
end
|
84
108
|
_happens(memo, value)
|
85
109
|
end
|
86
110
|
|
@@ -128,5 +152,9 @@ require_relative 'summarizer/max'
|
|
128
152
|
require_relative 'summarizer/avg'
|
129
153
|
require_relative 'summarizer/variance'
|
130
154
|
require_relative 'summarizer/stddev'
|
155
|
+
require_relative 'summarizer/percentile'
|
131
156
|
require_relative 'summarizer/collect'
|
157
|
+
require_relative 'summarizer/distinct'
|
132
158
|
require_relative 'summarizer/concat'
|
159
|
+
require_relative 'summarizer/by_proc'
|
160
|
+
require_relative 'summarizer/multiple'
|
data/lib/bmg/summarizer/avg.rb
CHANGED
@@ -16,13 +16,13 @@ module Bmg
|
|
16
16
|
end
|
17
17
|
|
18
18
|
# Collects one more value + the sum of all
|
19
|
-
def _happens(memo, val)
|
19
|
+
def _happens(memo, val)
|
20
20
|
[memo.first + val, memo.last + 1]
|
21
21
|
end
|
22
22
|
|
23
23
|
# Finalizes the computation.
|
24
|
-
def finalize(memo)
|
25
|
-
memo.first / memo.last
|
24
|
+
def finalize(memo)
|
25
|
+
memo.first / memo.last
|
26
26
|
end
|
27
27
|
|
28
28
|
end # class Avg
|
@@ -0,0 +1,41 @@
|
|
1
|
+
module Bmg
|
2
|
+
class Summarizer
|
3
|
+
#
|
4
|
+
# Generic summarizer that takes a Proc àla each_with_object.
|
5
|
+
#
|
6
|
+
# Example:
|
7
|
+
#
|
8
|
+
# # direct ruby usage
|
9
|
+
# Bmg::Summarizer.by_proc{|t,memo| ... }.summarize(...)
|
10
|
+
#
|
11
|
+
class ByProc < Summarizer
|
12
|
+
|
13
|
+
def initialize(least, by_proc)
|
14
|
+
@least = least
|
15
|
+
@by_proc = by_proc
|
16
|
+
end
|
17
|
+
|
18
|
+
# Returns [] as least value.
|
19
|
+
def least()
|
20
|
+
@least
|
21
|
+
end
|
22
|
+
|
23
|
+
# Adds val to the memo array
|
24
|
+
def happens(memo, val)
|
25
|
+
@by_proc.call(val, memo)
|
26
|
+
end
|
27
|
+
|
28
|
+
def finalize(memo)
|
29
|
+
memo
|
30
|
+
end
|
31
|
+
|
32
|
+
end # class ByProc
|
33
|
+
|
34
|
+
# Factors a distinct summarizer
|
35
|
+
def self.by_proc(least = nil, proc = nil, &bl)
|
36
|
+
least, proc = nil, least if least.is_a?(Proc)
|
37
|
+
ByProc.new(least, proc || bl)
|
38
|
+
end
|
39
|
+
|
40
|
+
end # class Summarizer
|
41
|
+
end # module Bmg
|