bmg 0.18.0 → 0.18.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -0
- data/lib/bmg/algebra.rb +1 -0
- data/lib/bmg/algebra/shortcuts.rb +6 -0
- data/lib/bmg/error.rb +3 -0
- data/lib/bmg/operator/allbut.rb +27 -0
- data/lib/bmg/operator/autosummarize.rb +27 -4
- data/lib/bmg/operator/autowrap.rb +27 -0
- data/lib/bmg/operator/constants.rb +7 -0
- data/lib/bmg/operator/extend.rb +7 -0
- data/lib/bmg/operator/group.rb +1 -0
- data/lib/bmg/operator/image.rb +41 -2
- data/lib/bmg/operator/join.rb +1 -0
- data/lib/bmg/operator/matching.rb +1 -0
- data/lib/bmg/operator/not_matching.rb +1 -0
- data/lib/bmg/operator/page.rb +2 -7
- data/lib/bmg/operator/project.rb +3 -2
- data/lib/bmg/operator/rename.rb +12 -5
- data/lib/bmg/operator/restrict.rb +1 -0
- data/lib/bmg/operator/rxmatch.rb +1 -0
- data/lib/bmg/operator/summarize.rb +2 -17
- data/lib/bmg/operator/transform.rb +39 -1
- data/lib/bmg/operator/union.rb +1 -0
- data/lib/bmg/reader/csv.rb +29 -10
- data/lib/bmg/reader/excel.rb +23 -4
- data/lib/bmg/relation.rb +18 -0
- data/lib/bmg/relation/empty.rb +4 -0
- data/lib/bmg/relation/in_memory.rb +10 -1
- data/lib/bmg/relation/materialized.rb +6 -0
- data/lib/bmg/relation/spied.rb +5 -0
- data/lib/bmg/sequel/relation.rb +5 -0
- data/lib/bmg/sql/relation.rb +2 -3
- data/lib/bmg/summarizer.rb +36 -1
- data/lib/bmg/summarizer/avg.rb +3 -3
- data/lib/bmg/summarizer/by_proc.rb +41 -0
- data/lib/bmg/summarizer/distinct.rb +36 -0
- data/lib/bmg/summarizer/multiple.rb +46 -0
- data/lib/bmg/summarizer/percentile.rb +79 -0
- data/lib/bmg/summarizer/value_by.rb +62 -0
- data/lib/bmg/support.rb +1 -0
- data/lib/bmg/support/ordering.rb +20 -0
- data/lib/bmg/support/tuple_transformer.rb +10 -1
- data/lib/bmg/version.rb +1 -1
- data/lib/bmg/writer.rb +16 -0
- data/lib/bmg/writer/csv.rb +2 -12
- data/lib/bmg/writer/xlsx.rb +68 -0
- metadata +23 -2
data/lib/bmg/summarizer/avg.rb
CHANGED
@@ -16,13 +16,13 @@ module Bmg
|
|
16
16
|
end
|
17
17
|
|
18
18
|
# Collects one more value + the sum of all
|
19
|
-
def _happens(memo, val)
|
19
|
+
def _happens(memo, val)
|
20
20
|
[memo.first + val, memo.last + 1]
|
21
21
|
end
|
22
22
|
|
23
23
|
# Finalizes the computation.
|
24
|
-
def finalize(memo)
|
25
|
-
memo.first / memo.last
|
24
|
+
def finalize(memo)
|
25
|
+
memo.first / memo.last
|
26
26
|
end
|
27
27
|
|
28
28
|
end # class Avg
|
@@ -0,0 +1,41 @@
|
|
1
|
+
module Bmg
|
2
|
+
class Summarizer
|
3
|
+
#
|
4
|
+
# Generic summarizer that takes a Proc àla each_with_object.
|
5
|
+
#
|
6
|
+
# Example:
|
7
|
+
#
|
8
|
+
# # direct ruby usage
|
9
|
+
# Bmg::Summarizer.by_proc{|t,memo| ... }.summarize(...)
|
10
|
+
#
|
11
|
+
class ByProc < Summarizer
|
12
|
+
|
13
|
+
def initialize(least, by_proc)
|
14
|
+
@least = least
|
15
|
+
@by_proc = by_proc
|
16
|
+
end
|
17
|
+
|
18
|
+
# Returns [] as least value.
|
19
|
+
def least
|
20
|
+
@least
|
21
|
+
end
|
22
|
+
|
23
|
+
# Adds val to the memo array
|
24
|
+
def happens(memo, val)
|
25
|
+
@by_proc.call(val, memo)
|
26
|
+
end
|
27
|
+
|
28
|
+
def finalize(memo)
|
29
|
+
memo
|
30
|
+
end
|
31
|
+
|
32
|
+
end # class ByProc
|
33
|
+
|
34
|
+
# Factors a distinct summarizer
|
35
|
+
def self.by_proc(least = nil, proc = nil, &bl)
|
36
|
+
least, proc = nil, least if least.is_a?(Proc)
|
37
|
+
ByProc.new(least, proc || bl)
|
38
|
+
end
|
39
|
+
|
40
|
+
end # class Summarizer
|
41
|
+
end # module Bmg
|
@@ -0,0 +1,36 @@
|
|
1
|
+
module Bmg
|
2
|
+
class Summarizer
|
3
|
+
#
|
4
|
+
# Collect the distinct values as an array.
|
5
|
+
#
|
6
|
+
# Example:
|
7
|
+
#
|
8
|
+
# # direct ruby usage
|
9
|
+
# Bmg::Summarizer.distinct(:qty).summarize(...)
|
10
|
+
#
|
11
|
+
class Distinct < Summarizer
|
12
|
+
|
13
|
+
# Returns [] as least value.
|
14
|
+
def least()
|
15
|
+
{}
|
16
|
+
end
|
17
|
+
|
18
|
+
# Adds val to the memo array
|
19
|
+
def _happens(memo, val)
|
20
|
+
memo[val] = true
|
21
|
+
memo
|
22
|
+
end
|
23
|
+
|
24
|
+
def finalize(memo)
|
25
|
+
memo.keys
|
26
|
+
end
|
27
|
+
|
28
|
+
end # class Distinct
|
29
|
+
|
30
|
+
# Factors a distinct summarizer
|
31
|
+
def self.distinct(*args, &bl)
|
32
|
+
Distinct.new(*args, &bl)
|
33
|
+
end
|
34
|
+
|
35
|
+
end # class Summarizer
|
36
|
+
end # module Bmg
|
@@ -0,0 +1,46 @@
|
|
1
|
+
module Bmg
|
2
|
+
class Summarizer
|
3
|
+
#
|
4
|
+
# A summarizer that collects multiple summarization as a wrapped
|
5
|
+
# tuple.
|
6
|
+
#
|
7
|
+
# Example:
|
8
|
+
#
|
9
|
+
# # direct ruby usage
|
10
|
+
# Bmg::Summarizer.multiple(x: ..., y: ...).summarize(...)
|
11
|
+
#
|
12
|
+
class Multiple < Summarizer
|
13
|
+
|
14
|
+
def initialize(defs)
|
15
|
+
@summarization = Summarizer.summarization(defs)
|
16
|
+
end
|
17
|
+
|
18
|
+
# Returns [] as least value.
|
19
|
+
def least()
|
20
|
+
@summarization.each_pair.each_with_object({}){|(k,v),memo|
|
21
|
+
memo[k] = v.least
|
22
|
+
}
|
23
|
+
end
|
24
|
+
|
25
|
+
# Adds val to the memo array
|
26
|
+
def happens(memo, val)
|
27
|
+
@summarization.each_pair.each_with_object({}){|(k,v),memo2|
|
28
|
+
memo2[k] = v.happens(memo[k], val)
|
29
|
+
}
|
30
|
+
end
|
31
|
+
|
32
|
+
def finalize(memo)
|
33
|
+
@summarization.each_pair.each_with_object({}){|(k,v),memo2|
|
34
|
+
memo2[k] = v.finalize(memo[k])
|
35
|
+
}
|
36
|
+
end
|
37
|
+
|
38
|
+
end # class Multiple
|
39
|
+
|
40
|
+
# Factors a distinct summarizer
|
41
|
+
def self.multiple(defs)
|
42
|
+
Multiple.new(defs)
|
43
|
+
end
|
44
|
+
|
45
|
+
end # class Summarizer
|
46
|
+
end # module Bmg
|
@@ -0,0 +1,79 @@
|
|
1
|
+
module Bmg
|
2
|
+
class Summarizer
|
3
|
+
#
|
4
|
+
# Percentile summarizer.
|
5
|
+
#
|
6
|
+
# Example:
|
7
|
+
#
|
8
|
+
# # direct ruby usage
|
9
|
+
# Bmg::Summarizer.percentile(:qty, 50).summarize(...)
|
10
|
+
#
|
11
|
+
class Percentile < Summarizer
|
12
|
+
|
13
|
+
DEFAULT_OPTIONS = {
|
14
|
+
:variant => :continuous
|
15
|
+
}
|
16
|
+
|
17
|
+
def initialize(*args, &bl)
|
18
|
+
@nth = args.find{|a| a.is_a?(Integer) } || 50
|
19
|
+
functor = args.find{|a| a.is_a?(Symbol) } || bl
|
20
|
+
options = args.select{|a| a.is_a?(Hash) }.inject(DEFAULT_OPTIONS){|memo,opts|
|
21
|
+
memo.merge(opts)
|
22
|
+
}.dup
|
23
|
+
super(functor, options)
|
24
|
+
end
|
25
|
+
|
26
|
+
# Returns [] as least value.
|
27
|
+
def least()
|
28
|
+
[]
|
29
|
+
end
|
30
|
+
|
31
|
+
# Collects the value
|
32
|
+
def _happens(memo, val)
|
33
|
+
memo << val
|
34
|
+
end
|
35
|
+
|
36
|
+
# Finalizes the computation.
|
37
|
+
def finalize(memo)
|
38
|
+
return nil if memo.empty?
|
39
|
+
index = memo.size.to_f * (@nth.to_f / 100.0)
|
40
|
+
floor, ceil = index.floor, index.ceil
|
41
|
+
ceil +=1 if floor == ceil
|
42
|
+
below = [floor - 1, 0].max
|
43
|
+
above = [[ceil - 1, memo.size - 1].min, 0].max
|
44
|
+
sorted = memo.sort
|
45
|
+
if options[:variant] == :continuous
|
46
|
+
(sorted[above] + sorted[below]) / 2.0
|
47
|
+
else
|
48
|
+
sorted[below]
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
end # class Avg
|
53
|
+
|
54
|
+
def self.percentile(*args, &bl)
|
55
|
+
Percentile.new(*args, &bl)
|
56
|
+
end
|
57
|
+
|
58
|
+
def self.percentile_cont(*args, &bl)
|
59
|
+
Percentile.new(*(args + [{:variant => :continuous}]), &bl)
|
60
|
+
end
|
61
|
+
|
62
|
+
def self.percentile_disc(*args, &bl)
|
63
|
+
Percentile.new(*(args + [{:variant => :discrete}]), &bl)
|
64
|
+
end
|
65
|
+
|
66
|
+
def self.median(*args, &bl)
|
67
|
+
Percentile.new(*(args + [50]), &bl)
|
68
|
+
end
|
69
|
+
|
70
|
+
def self.median_cont(*args, &bl)
|
71
|
+
Percentile.new(*(args + [50, {:variant => :continuous}]), &bl)
|
72
|
+
end
|
73
|
+
|
74
|
+
def self.median_disc(*args, &bl)
|
75
|
+
Percentile.new(*(args + [50, {:variant => :discrete}]), &bl)
|
76
|
+
end
|
77
|
+
|
78
|
+
end # class Summarizer
|
79
|
+
end # module Bmg
|
@@ -0,0 +1,62 @@
|
|
1
|
+
module Bmg
|
2
|
+
class Summarizer
|
3
|
+
#
|
4
|
+
# ValueBy summarizer.
|
5
|
+
#
|
6
|
+
# Example:
|
7
|
+
#
|
8
|
+
# # direct ruby usage
|
9
|
+
# Bmg::Summarizer.value_by(:qty, :by => :serie).summarize(...)
|
10
|
+
#
|
11
|
+
class ValueBy < Summarizer
|
12
|
+
|
13
|
+
DEFAULT_OPTIONS = {
|
14
|
+
:symbolize => false
|
15
|
+
}
|
16
|
+
|
17
|
+
# Returns {} as least value.
|
18
|
+
def least
|
19
|
+
{}
|
20
|
+
end
|
21
|
+
|
22
|
+
# Collects the value
|
23
|
+
def happens(memo, tuple)
|
24
|
+
by = tuple[options[:by]]
|
25
|
+
by = by.to_sym if by && options[:symbolize]
|
26
|
+
misuse!(tuple, memo) if memo.has_key?(by)
|
27
|
+
memo.tap{|m|
|
28
|
+
m[by] = extract_value(tuple)
|
29
|
+
}
|
30
|
+
end
|
31
|
+
|
32
|
+
# Finalizes the computation.
|
33
|
+
def finalize(memo)
|
34
|
+
default_tuple.merge(memo)
|
35
|
+
end
|
36
|
+
|
37
|
+
private
|
38
|
+
|
39
|
+
def default_tuple
|
40
|
+
(options[:series] || []).each_with_object({}){|s,ss|
|
41
|
+
s_def = options[:default]
|
42
|
+
s_def = s_def.to_sym if s_def && options[:symbolize]
|
43
|
+
ss[s] = s_def
|
44
|
+
}
|
45
|
+
end
|
46
|
+
|
47
|
+
def misuse!(tuple, memo)
|
48
|
+
msg = "Summarizer.value_by: summarization key + the serie must form be a candidate key"
|
49
|
+
msg += "\n"
|
50
|
+
msg += " Tuple: #{tuple.inspect}"
|
51
|
+
msg += " Memo: #{memo.inspect}"
|
52
|
+
raise MisuseError, msg
|
53
|
+
end
|
54
|
+
|
55
|
+
end # class ValueBy
|
56
|
+
|
57
|
+
def self.value_by(*args, &bl)
|
58
|
+
ValueBy.new(*args, &bl)
|
59
|
+
end
|
60
|
+
|
61
|
+
end # class Summarizer
|
62
|
+
end # module Bmg
|
data/lib/bmg/support.rb
CHANGED
@@ -0,0 +1,20 @@
|
|
1
|
+
module Bmg
|
2
|
+
class Ordering
|
3
|
+
|
4
|
+
def initialize(attrs)
|
5
|
+
@attrs = attrs
|
6
|
+
end
|
7
|
+
attr_reader :attrs
|
8
|
+
|
9
|
+
def comparator
|
10
|
+
->(t1, t2) {
|
11
|
+
attrs.each do |(attr,direction)|
|
12
|
+
c = t1[attr] <=> t2[attr]
|
13
|
+
return (direction == :desc ? -c : c) unless c==0
|
14
|
+
end
|
15
|
+
0
|
16
|
+
}
|
17
|
+
end
|
18
|
+
|
19
|
+
end # class Ordering
|
20
|
+
end # module Bmg
|
@@ -32,7 +32,16 @@ module Bmg
|
|
32
32
|
}
|
33
33
|
when Hash
|
34
34
|
with.each_with_object(tuple.dup){|(k,v),dup|
|
35
|
-
|
35
|
+
case k
|
36
|
+
when Symbol
|
37
|
+
dup[k] = transform_attr(dup[k], v)
|
38
|
+
when Class
|
39
|
+
dup.keys.each do |attrname|
|
40
|
+
dup[attrname] = transform_attr(dup[attrname], v) if dup[attrname].is_a?(k)
|
41
|
+
end
|
42
|
+
else
|
43
|
+
raise ArgumentError, "Unexpected transformation `#{with.inspect}`"
|
44
|
+
end
|
36
45
|
}
|
37
46
|
when Array
|
38
47
|
with.inject(tuple){|dup,on|
|
data/lib/bmg/version.rb
CHANGED
data/lib/bmg/writer.rb
CHANGED
@@ -1 +1,17 @@
|
|
1
|
+
module Bmg
|
2
|
+
module Writer
|
3
|
+
|
4
|
+
protected
|
5
|
+
|
6
|
+
def infer_headers(from)
|
7
|
+
attrlist = if from.is_a?(Type) && from.knows_attrlist?
|
8
|
+
from.to_attrlist
|
9
|
+
elsif from.is_a?(Hash)
|
10
|
+
from.keys
|
11
|
+
end
|
12
|
+
attrlist ? output_preferences.order_attrlist(attrlist) : nil
|
13
|
+
end
|
14
|
+
|
15
|
+
end # module Writer
|
16
|
+
end # module Bmg
|
1
17
|
require_relative 'writer/csv'
|
data/lib/bmg/writer/csv.rb
CHANGED
@@ -19,24 +19,14 @@ module Bmg
|
|
19
19
|
relation.each do |tuple|
|
20
20
|
if csv.nil?
|
21
21
|
headers = infer_headers(tuple) if headers.nil?
|
22
|
-
|
22
|
+
csv_opts = csv_options.merge(headers: headers)
|
23
|
+
csv = CSV.new(string_or_io, **csv_opts)
|
23
24
|
end
|
24
25
|
csv << headers.map{|h| tuple[h] }
|
25
26
|
end
|
26
27
|
to_s ? string_or_io.string : string_or_io
|
27
28
|
end
|
28
29
|
|
29
|
-
private
|
30
|
-
|
31
|
-
def infer_headers(from)
|
32
|
-
attrlist = if from.is_a?(Type) && from.knows_attrlist?
|
33
|
-
from.to_attrlist
|
34
|
-
elsif from.is_a?(Hash)
|
35
|
-
from.keys
|
36
|
-
end
|
37
|
-
attrlist ? output_preferences.order_attrlist(attrlist) : nil
|
38
|
-
end
|
39
|
-
|
40
30
|
end # class Csv
|
41
31
|
end # module Writer
|
42
32
|
end # module Bmg
|
@@ -0,0 +1,68 @@
|
|
1
|
+
module Bmg
|
2
|
+
module Writer
|
3
|
+
class Xlsx
|
4
|
+
include Writer
|
5
|
+
|
6
|
+
DEFAULT_OPTIONS = {
|
7
|
+
}
|
8
|
+
|
9
|
+
def initialize(csv_options, output_preferences = nil)
|
10
|
+
@csv_options = DEFAULT_OPTIONS.merge(csv_options)
|
11
|
+
@output_preferences = OutputPreferences.dress(output_preferences)
|
12
|
+
end
|
13
|
+
attr_reader :csv_options, :output_preferences
|
14
|
+
|
15
|
+
def call(relation, path)
|
16
|
+
require 'write_xlsx'
|
17
|
+
dup._call(relation, path)
|
18
|
+
end
|
19
|
+
|
20
|
+
protected
|
21
|
+
attr_reader :workbook, :worksheet
|
22
|
+
|
23
|
+
def _call(relation, path)
|
24
|
+
@workbook = WriteXLSX.new(path)
|
25
|
+
@worksheet = workbook.add_worksheet
|
26
|
+
|
27
|
+
headers = infer_headers(relation.type)
|
28
|
+
relation.each_with_index do |tuple,i|
|
29
|
+
headers = infer_headers(tuple) if headers.nil?
|
30
|
+
headers.each_with_index do |h,i|
|
31
|
+
worksheet.write_string(0, i, h)
|
32
|
+
end if i == 0
|
33
|
+
headers.each_with_index do |h,j|
|
34
|
+
meth, *args = write_pair(tuple[h])
|
35
|
+
worksheet.send(meth, 1+i, j, *args)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
workbook.close
|
40
|
+
path
|
41
|
+
end
|
42
|
+
|
43
|
+
def write_pair(value)
|
44
|
+
case value
|
45
|
+
when Numeric
|
46
|
+
[:write_number, value]
|
47
|
+
when Date
|
48
|
+
[:write_date_time, value, date_format]
|
49
|
+
else
|
50
|
+
[:write_string, value.to_s]
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def date_format
|
55
|
+
@date_format ||= workbook.add_format(:num_format => 'yyyy-mm-dd')
|
56
|
+
end
|
57
|
+
|
58
|
+
end # class Xlsx
|
59
|
+
end # module Writer
|
60
|
+
module Relation
|
61
|
+
|
62
|
+
def to_xlsx(options = {}, path = nil, preferences = nil)
|
63
|
+
options, path = {}, options unless options.is_a?(Hash)
|
64
|
+
Writer::Xlsx.new(options, preferences).call(self, path)
|
65
|
+
end
|
66
|
+
|
67
|
+
end # module Relation
|
68
|
+
end # module Bmg
|