bmg 0.18.2 → 0.18.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +4 -0
- data/lib/bmg/algebra.rb +18 -0
- data/lib/bmg/algebra/shortcuts.rb +8 -0
- data/lib/bmg/error.rb +3 -0
- data/lib/bmg/operator.rb +2 -0
- data/lib/bmg/operator/allbut.rb +1 -0
- data/lib/bmg/operator/autosummarize.rb +1 -0
- data/lib/bmg/operator/autowrap.rb +1 -0
- data/lib/bmg/operator/constants.rb +1 -0
- data/lib/bmg/operator/extend.rb +1 -0
- data/lib/bmg/operator/group.rb +1 -0
- data/lib/bmg/operator/image.rb +10 -4
- data/lib/bmg/operator/join.rb +1 -0
- data/lib/bmg/operator/matching.rb +1 -0
- data/lib/bmg/operator/not_matching.rb +1 -0
- data/lib/bmg/operator/page.rb +1 -0
- data/lib/bmg/operator/project.rb +1 -0
- data/lib/bmg/operator/rename.rb +6 -5
- data/lib/bmg/operator/restrict.rb +1 -0
- data/lib/bmg/operator/rxmatch.rb +1 -0
- data/lib/bmg/operator/summarize.rb +2 -17
- data/lib/bmg/operator/transform.rb +1 -0
- data/lib/bmg/operator/ungroup.rb +61 -0
- data/lib/bmg/operator/union.rb +1 -0
- data/lib/bmg/operator/unwrap.rb +47 -0
- data/lib/bmg/reader/csv.rb +29 -10
- data/lib/bmg/reader/excel.rb +23 -4
- data/lib/bmg/relation.rb +6 -0
- data/lib/bmg/relation/in_memory.rb +0 -1
- data/lib/bmg/sequel/relation.rb +1 -0
- data/lib/bmg/sequel/translator.rb +9 -2
- data/lib/bmg/sql.rb +4 -1
- data/lib/bmg/sql/processor.rb +1 -0
- data/lib/bmg/sql/processor/transform.rb +105 -0
- data/lib/bmg/sql/relation.rb +20 -6
- data/lib/bmg/summarizer.rb +36 -1
- data/lib/bmg/summarizer/avg.rb +3 -3
- data/lib/bmg/summarizer/by_proc.rb +41 -0
- data/lib/bmg/summarizer/distinct.rb +36 -0
- data/lib/bmg/summarizer/multiple.rb +46 -0
- data/lib/bmg/summarizer/percentile.rb +79 -0
- data/lib/bmg/summarizer/value_by.rb +62 -0
- data/lib/bmg/support/keys.rb +5 -0
- data/lib/bmg/support/tuple_transformer.rb +23 -1
- data/lib/bmg/type.rb +19 -1
- data/lib/bmg/version.rb +1 -1
- data/lib/bmg/writer.rb +16 -0
- data/lib/bmg/writer/csv.rb +2 -12
- data/lib/bmg/writer/xlsx.rb +68 -0
- metadata +25 -2
@@ -0,0 +1,41 @@
|
|
1
|
+
module Bmg
|
2
|
+
class Summarizer
|
3
|
+
#
|
4
|
+
# Generic summarizer that takes a Proc àla each_with_object.
|
5
|
+
#
|
6
|
+
# Example:
|
7
|
+
#
|
8
|
+
# # direct ruby usage
|
9
|
+
# Bmg::Summarizer.by_proc{|t,memo| ... }.summarize(...)
|
10
|
+
#
|
11
|
+
class ByProc < Summarizer
|
12
|
+
|
13
|
+
def initialize(least, by_proc)
|
14
|
+
@least = least
|
15
|
+
@by_proc = by_proc
|
16
|
+
end
|
17
|
+
|
18
|
+
# Returns [] as least value.
|
19
|
+
def least
|
20
|
+
@least
|
21
|
+
end
|
22
|
+
|
23
|
+
# Adds val to the memo array
|
24
|
+
def happens(memo, val)
|
25
|
+
@by_proc.call(val, memo)
|
26
|
+
end
|
27
|
+
|
28
|
+
def finalize(memo)
|
29
|
+
memo
|
30
|
+
end
|
31
|
+
|
32
|
+
end # class ByProc
|
33
|
+
|
34
|
+
# Factors a distinct summarizer
|
35
|
+
def self.by_proc(least = nil, proc = nil, &bl)
|
36
|
+
least, proc = nil, least if least.is_a?(Proc)
|
37
|
+
ByProc.new(least, proc || bl)
|
38
|
+
end
|
39
|
+
|
40
|
+
end # class Summarizer
|
41
|
+
end # module Bmg
|
@@ -0,0 +1,36 @@
|
|
1
|
+
module Bmg
|
2
|
+
class Summarizer
|
3
|
+
#
|
4
|
+
# Collect the distinct values as an array.
|
5
|
+
#
|
6
|
+
# Example:
|
7
|
+
#
|
8
|
+
# # direct ruby usage
|
9
|
+
# Bmg::Summarizer.distinct(:qty).summarize(...)
|
10
|
+
#
|
11
|
+
class Distinct < Summarizer
|
12
|
+
|
13
|
+
# Returns [] as least value.
|
14
|
+
def least()
|
15
|
+
{}
|
16
|
+
end
|
17
|
+
|
18
|
+
# Adds val to the memo array
|
19
|
+
def _happens(memo, val)
|
20
|
+
memo[val] = true
|
21
|
+
memo
|
22
|
+
end
|
23
|
+
|
24
|
+
def finalize(memo)
|
25
|
+
memo.keys
|
26
|
+
end
|
27
|
+
|
28
|
+
end # class Distinct
|
29
|
+
|
30
|
+
# Factors a distinct summarizer
|
31
|
+
def self.distinct(*args, &bl)
|
32
|
+
Distinct.new(*args, &bl)
|
33
|
+
end
|
34
|
+
|
35
|
+
end # class Summarizer
|
36
|
+
end # module Bmg
|
@@ -0,0 +1,46 @@
|
|
1
|
+
module Bmg
|
2
|
+
class Summarizer
|
3
|
+
#
|
4
|
+
# A summarizer that collects multiple summarization as a wrapped
|
5
|
+
# tuple.
|
6
|
+
#
|
7
|
+
# Example:
|
8
|
+
#
|
9
|
+
# # direct ruby usage
|
10
|
+
# Bmg::Summarizer.multiple(x: ..., y: ...).summarize(...)
|
11
|
+
#
|
12
|
+
class Multiple < Summarizer
|
13
|
+
|
14
|
+
def initialize(defs)
|
15
|
+
@summarization = Summarizer.summarization(defs)
|
16
|
+
end
|
17
|
+
|
18
|
+
# Returns [] as least value.
|
19
|
+
def least()
|
20
|
+
@summarization.each_pair.each_with_object({}){|(k,v),memo|
|
21
|
+
memo[k] = v.least
|
22
|
+
}
|
23
|
+
end
|
24
|
+
|
25
|
+
# Adds val to the memo array
|
26
|
+
def happens(memo, val)
|
27
|
+
@summarization.each_pair.each_with_object({}){|(k,v),memo2|
|
28
|
+
memo2[k] = v.happens(memo[k], val)
|
29
|
+
}
|
30
|
+
end
|
31
|
+
|
32
|
+
def finalize(memo)
|
33
|
+
@summarization.each_pair.each_with_object({}){|(k,v),memo2|
|
34
|
+
memo2[k] = v.finalize(memo[k])
|
35
|
+
}
|
36
|
+
end
|
37
|
+
|
38
|
+
end # class Multiple
|
39
|
+
|
40
|
+
# Factors a distinct summarizer
|
41
|
+
def self.multiple(defs)
|
42
|
+
Multiple.new(defs)
|
43
|
+
end
|
44
|
+
|
45
|
+
end # class Summarizer
|
46
|
+
end # module Bmg
|
@@ -0,0 +1,79 @@
|
|
1
|
+
module Bmg
|
2
|
+
class Summarizer
|
3
|
+
#
|
4
|
+
# Percentile summarizer.
|
5
|
+
#
|
6
|
+
# Example:
|
7
|
+
#
|
8
|
+
# # direct ruby usage
|
9
|
+
# Bmg::Summarizer.percentile(:qty, 50).summarize(...)
|
10
|
+
#
|
11
|
+
class Percentile < Summarizer
|
12
|
+
|
13
|
+
DEFAULT_OPTIONS = {
|
14
|
+
:variant => :continuous
|
15
|
+
}
|
16
|
+
|
17
|
+
def initialize(*args, &bl)
|
18
|
+
@nth = args.find{|a| a.is_a?(Integer) } || 50
|
19
|
+
functor = args.find{|a| a.is_a?(Symbol) } || bl
|
20
|
+
options = args.select{|a| a.is_a?(Hash) }.inject(DEFAULT_OPTIONS){|memo,opts|
|
21
|
+
memo.merge(opts)
|
22
|
+
}.dup
|
23
|
+
super(functor, options)
|
24
|
+
end
|
25
|
+
|
26
|
+
# Returns [] as least value.
|
27
|
+
def least()
|
28
|
+
[]
|
29
|
+
end
|
30
|
+
|
31
|
+
# Collects the value
|
32
|
+
def _happens(memo, val)
|
33
|
+
memo << val
|
34
|
+
end
|
35
|
+
|
36
|
+
# Finalizes the computation.
|
37
|
+
def finalize(memo)
|
38
|
+
return nil if memo.empty?
|
39
|
+
index = memo.size.to_f * (@nth.to_f / 100.0)
|
40
|
+
floor, ceil = index.floor, index.ceil
|
41
|
+
ceil +=1 if floor == ceil
|
42
|
+
below = [floor - 1, 0].max
|
43
|
+
above = [[ceil - 1, memo.size - 1].min, 0].max
|
44
|
+
sorted = memo.sort
|
45
|
+
if options[:variant] == :continuous
|
46
|
+
(sorted[above] + sorted[below]) / 2.0
|
47
|
+
else
|
48
|
+
sorted[below]
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
end # class Avg
|
53
|
+
|
54
|
+
def self.percentile(*args, &bl)
|
55
|
+
Percentile.new(*args, &bl)
|
56
|
+
end
|
57
|
+
|
58
|
+
def self.percentile_cont(*args, &bl)
|
59
|
+
Percentile.new(*(args + [{:variant => :continuous}]), &bl)
|
60
|
+
end
|
61
|
+
|
62
|
+
def self.percentile_disc(*args, &bl)
|
63
|
+
Percentile.new(*(args + [{:variant => :discrete}]), &bl)
|
64
|
+
end
|
65
|
+
|
66
|
+
def self.median(*args, &bl)
|
67
|
+
Percentile.new(*(args + [50]), &bl)
|
68
|
+
end
|
69
|
+
|
70
|
+
def self.median_cont(*args, &bl)
|
71
|
+
Percentile.new(*(args + [50, {:variant => :continuous}]), &bl)
|
72
|
+
end
|
73
|
+
|
74
|
+
def self.median_disc(*args, &bl)
|
75
|
+
Percentile.new(*(args + [50, {:variant => :discrete}]), &bl)
|
76
|
+
end
|
77
|
+
|
78
|
+
end # class Summarizer
|
79
|
+
end # module Bmg
|
@@ -0,0 +1,62 @@
|
|
1
|
+
module Bmg
|
2
|
+
class Summarizer
|
3
|
+
#
|
4
|
+
# ValueBy summarizer.
|
5
|
+
#
|
6
|
+
# Example:
|
7
|
+
#
|
8
|
+
# # direct ruby usage
|
9
|
+
# Bmg::Summarizer.value_by(:qty, :by => :serie).summarize(...)
|
10
|
+
#
|
11
|
+
class ValueBy < Summarizer
|
12
|
+
|
13
|
+
DEFAULT_OPTIONS = {
|
14
|
+
:symbolize => false
|
15
|
+
}
|
16
|
+
|
17
|
+
# Returns {} as least value.
|
18
|
+
def least
|
19
|
+
{}
|
20
|
+
end
|
21
|
+
|
22
|
+
# Collects the value
|
23
|
+
def happens(memo, tuple)
|
24
|
+
by = tuple[options[:by]]
|
25
|
+
by = by.to_sym if by && options[:symbolize]
|
26
|
+
misuse!(tuple, memo) if memo.has_key?(by)
|
27
|
+
memo.tap{|m|
|
28
|
+
m[by] = extract_value(tuple)
|
29
|
+
}
|
30
|
+
end
|
31
|
+
|
32
|
+
# Finalizes the computation.
|
33
|
+
def finalize(memo)
|
34
|
+
default_tuple.merge(memo)
|
35
|
+
end
|
36
|
+
|
37
|
+
private
|
38
|
+
|
39
|
+
def default_tuple
|
40
|
+
(options[:series] || []).each_with_object({}){|s,ss|
|
41
|
+
s_def = options[:default]
|
42
|
+
s = s.to_sym if s && options[:symbolize]
|
43
|
+
ss[s] = s_def
|
44
|
+
}
|
45
|
+
end
|
46
|
+
|
47
|
+
def misuse!(tuple, memo)
|
48
|
+
msg = "Summarizer.value_by: summarization key + the serie must form be a candidate key"
|
49
|
+
msg += "\n"
|
50
|
+
msg += " Tuple: #{tuple.inspect}"
|
51
|
+
msg += " Memo: #{memo.inspect}"
|
52
|
+
raise MisuseError, msg
|
53
|
+
end
|
54
|
+
|
55
|
+
end # class ValueBy
|
56
|
+
|
57
|
+
def self.value_by(*args, &bl)
|
58
|
+
ValueBy.new(*args, &bl)
|
59
|
+
end
|
60
|
+
|
61
|
+
end # class Summarizer
|
62
|
+
end # module Bmg
|
data/lib/bmg/support/keys.rb
CHANGED
@@ -32,7 +32,16 @@ module Bmg
|
|
32
32
|
}
|
33
33
|
when Hash
|
34
34
|
with.each_with_object(tuple.dup){|(k,v),dup|
|
35
|
-
|
35
|
+
case k
|
36
|
+
when Symbol
|
37
|
+
dup[k] = transform_attr(dup[k], v)
|
38
|
+
when Class
|
39
|
+
dup.keys.each do |attrname|
|
40
|
+
dup[attrname] = transform_attr(dup[attrname], v) if dup[attrname].is_a?(k)
|
41
|
+
end
|
42
|
+
else
|
43
|
+
raise ArgumentError, "Unexpected transformation `#{with.inspect}`"
|
44
|
+
end
|
36
45
|
}
|
37
46
|
when Array
|
38
47
|
with.inject(tuple){|dup,on|
|
@@ -50,6 +59,19 @@ module Bmg
|
|
50
59
|
when Regexp
|
51
60
|
m = with.match(value.to_s)
|
52
61
|
m.nil? ? m : m.to_s
|
62
|
+
when Class
|
63
|
+
return value if value.nil?
|
64
|
+
if with.respond_to?(:parse)
|
65
|
+
with.parse(value)
|
66
|
+
elsif with == Integer
|
67
|
+
Integer(value)
|
68
|
+
elsif with == Float
|
69
|
+
Float(value)
|
70
|
+
elsif with == String
|
71
|
+
value.to_s
|
72
|
+
else
|
73
|
+
raise ArgumentError, "#{with} should respond to `parse`"
|
74
|
+
end
|
53
75
|
when Proc
|
54
76
|
with.call(value)
|
55
77
|
when Hash
|
data/lib/bmg/type.rb
CHANGED
@@ -82,7 +82,7 @@ module Bmg
|
|
82
82
|
|
83
83
|
def with_keys(keys)
|
84
84
|
dup.tap{|x|
|
85
|
-
x.keys = Keys.new(keys)
|
85
|
+
x.keys = keys ? Keys.new(keys) : nil
|
86
86
|
}
|
87
87
|
end
|
88
88
|
|
@@ -266,6 +266,15 @@ module Bmg
|
|
266
266
|
}
|
267
267
|
end
|
268
268
|
|
269
|
+
def ungroup(attrlist)
|
270
|
+
known_attributes!(attrlist) if typechecked? && knows_attrlist?
|
271
|
+
dup.tap{|x|
|
272
|
+
x.attrlist = nil
|
273
|
+
x.predicate = Predicate.tautology
|
274
|
+
x.keys = nil
|
275
|
+
}
|
276
|
+
end
|
277
|
+
|
269
278
|
def union(other)
|
270
279
|
if typechecked? && knows_attrlist? && other.knows_attrlist?
|
271
280
|
missing = self.attrlist - other.attrlist
|
@@ -280,6 +289,15 @@ module Bmg
|
|
280
289
|
}
|
281
290
|
end
|
282
291
|
|
292
|
+
def unwrap(attrlist)
|
293
|
+
known_attributes!(attrlist) if typechecked? && knows_attrlist?
|
294
|
+
dup.tap{|x|
|
295
|
+
x.attrlist = nil
|
296
|
+
x.predicate = predicate.and_split(attrlist).last
|
297
|
+
x.keys = self._keys.unwrap(self, x, attrlist) if knows_keys?
|
298
|
+
}
|
299
|
+
end
|
300
|
+
|
283
301
|
private
|
284
302
|
|
285
303
|
def known_attributes!(attrs)
|
data/lib/bmg/version.rb
CHANGED
data/lib/bmg/writer.rb
CHANGED
@@ -1 +1,17 @@
|
|
1
|
+
module Bmg
|
2
|
+
module Writer
|
3
|
+
|
4
|
+
protected
|
5
|
+
|
6
|
+
def infer_headers(from)
|
7
|
+
attrlist = if from.is_a?(Type) && from.knows_attrlist?
|
8
|
+
from.to_attrlist
|
9
|
+
elsif from.is_a?(Hash)
|
10
|
+
from.keys
|
11
|
+
end
|
12
|
+
attrlist ? output_preferences.order_attrlist(attrlist) : nil
|
13
|
+
end
|
14
|
+
|
15
|
+
end # module Writer
|
16
|
+
end # module Bmg
|
1
17
|
require_relative 'writer/csv'
|
data/lib/bmg/writer/csv.rb
CHANGED
@@ -19,24 +19,14 @@ module Bmg
|
|
19
19
|
relation.each do |tuple|
|
20
20
|
if csv.nil?
|
21
21
|
headers = infer_headers(tuple) if headers.nil?
|
22
|
-
|
22
|
+
csv_opts = csv_options.merge(headers: headers)
|
23
|
+
csv = CSV.new(string_or_io, **csv_opts)
|
23
24
|
end
|
24
25
|
csv << headers.map{|h| tuple[h] }
|
25
26
|
end
|
26
27
|
to_s ? string_or_io.string : string_or_io
|
27
28
|
end
|
28
29
|
|
29
|
-
private
|
30
|
-
|
31
|
-
def infer_headers(from)
|
32
|
-
attrlist = if from.is_a?(Type) && from.knows_attrlist?
|
33
|
-
from.to_attrlist
|
34
|
-
elsif from.is_a?(Hash)
|
35
|
-
from.keys
|
36
|
-
end
|
37
|
-
attrlist ? output_preferences.order_attrlist(attrlist) : nil
|
38
|
-
end
|
39
|
-
|
40
30
|
end # class Csv
|
41
31
|
end # module Writer
|
42
32
|
end # module Bmg
|
@@ -0,0 +1,68 @@
|
|
1
|
+
module Bmg
|
2
|
+
module Writer
|
3
|
+
class Xlsx
|
4
|
+
include Writer
|
5
|
+
|
6
|
+
DEFAULT_OPTIONS = {
|
7
|
+
}
|
8
|
+
|
9
|
+
def initialize(csv_options, output_preferences = nil)
|
10
|
+
@csv_options = DEFAULT_OPTIONS.merge(csv_options)
|
11
|
+
@output_preferences = OutputPreferences.dress(output_preferences)
|
12
|
+
end
|
13
|
+
attr_reader :csv_options, :output_preferences
|
14
|
+
|
15
|
+
def call(relation, path)
|
16
|
+
require 'write_xlsx'
|
17
|
+
dup._call(relation, path)
|
18
|
+
end
|
19
|
+
|
20
|
+
protected
|
21
|
+
attr_reader :workbook, :worksheet
|
22
|
+
|
23
|
+
def _call(relation, path)
|
24
|
+
@workbook = WriteXLSX.new(path)
|
25
|
+
@worksheet = workbook.add_worksheet
|
26
|
+
|
27
|
+
headers = infer_headers(relation.type)
|
28
|
+
relation.each_with_index do |tuple,i|
|
29
|
+
headers = infer_headers(tuple) if headers.nil?
|
30
|
+
headers.each_with_index do |h,i|
|
31
|
+
worksheet.write_string(0, i, h)
|
32
|
+
end if i == 0
|
33
|
+
headers.each_with_index do |h,j|
|
34
|
+
meth, *args = write_pair(tuple[h])
|
35
|
+
worksheet.send(meth, 1+i, j, *args)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
workbook.close
|
40
|
+
path
|
41
|
+
end
|
42
|
+
|
43
|
+
def write_pair(value)
|
44
|
+
case value
|
45
|
+
when Numeric
|
46
|
+
[:write_number, value]
|
47
|
+
when Date
|
48
|
+
[:write_date_time, value, date_format]
|
49
|
+
else
|
50
|
+
[:write_string, value.to_s]
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def date_format
|
55
|
+
@date_format ||= workbook.add_format(:num_format => 'yyyy-mm-dd')
|
56
|
+
end
|
57
|
+
|
58
|
+
end # class Xlsx
|
59
|
+
end # module Writer
|
60
|
+
module Relation
|
61
|
+
|
62
|
+
def to_xlsx(options = {}, path = nil, preferences = nil)
|
63
|
+
options, path = {}, options unless options.is_a?(Hash)
|
64
|
+
Writer::Xlsx.new(options, preferences).call(self, path)
|
65
|
+
end
|
66
|
+
|
67
|
+
end # module Relation
|
68
|
+
end # module Bmg
|