bmg 0.18.0 → 0.18.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +1 -0
- data/lib/bmg/algebra.rb +1 -0
- data/lib/bmg/algebra/shortcuts.rb +6 -0
- data/lib/bmg/error.rb +3 -0
- data/lib/bmg/operator/allbut.rb +27 -0
- data/lib/bmg/operator/autosummarize.rb +27 -4
- data/lib/bmg/operator/autowrap.rb +27 -0
- data/lib/bmg/operator/constants.rb +7 -0
- data/lib/bmg/operator/extend.rb +7 -0
- data/lib/bmg/operator/group.rb +1 -0
- data/lib/bmg/operator/image.rb +41 -2
- data/lib/bmg/operator/join.rb +1 -0
- data/lib/bmg/operator/matching.rb +1 -0
- data/lib/bmg/operator/not_matching.rb +1 -0
- data/lib/bmg/operator/page.rb +2 -7
- data/lib/bmg/operator/project.rb +3 -2
- data/lib/bmg/operator/rename.rb +12 -5
- data/lib/bmg/operator/restrict.rb +1 -0
- data/lib/bmg/operator/rxmatch.rb +1 -0
- data/lib/bmg/operator/summarize.rb +2 -17
- data/lib/bmg/operator/transform.rb +39 -1
- data/lib/bmg/operator/union.rb +1 -0
- data/lib/bmg/reader/csv.rb +29 -10
- data/lib/bmg/reader/excel.rb +23 -4
- data/lib/bmg/relation.rb +18 -0
- data/lib/bmg/relation/empty.rb +4 -0
- data/lib/bmg/relation/in_memory.rb +10 -1
- data/lib/bmg/relation/materialized.rb +6 -0
- data/lib/bmg/relation/spied.rb +5 -0
- data/lib/bmg/sequel/relation.rb +5 -0
- data/lib/bmg/sql/relation.rb +2 -3
- data/lib/bmg/summarizer.rb +36 -1
- data/lib/bmg/summarizer/avg.rb +3 -3
- data/lib/bmg/summarizer/by_proc.rb +41 -0
- data/lib/bmg/summarizer/distinct.rb +36 -0
- data/lib/bmg/summarizer/multiple.rb +46 -0
- data/lib/bmg/summarizer/percentile.rb +79 -0
- data/lib/bmg/summarizer/value_by.rb +62 -0
- data/lib/bmg/support.rb +1 -0
- data/lib/bmg/support/ordering.rb +20 -0
- data/lib/bmg/support/tuple_transformer.rb +10 -1
- data/lib/bmg/version.rb +1 -1
- data/lib/bmg/writer.rb +16 -0
- data/lib/bmg/writer/csv.rb +2 -12
- data/lib/bmg/writer/xlsx.rb +68 -0
- metadata +23 -2
data/lib/bmg/summarizer/avg.rb
CHANGED
@@ -16,13 +16,13 @@ module Bmg
|
|
16
16
|
end
|
17
17
|
|
18
18
|
# Collects one more value + the sum of all
|
19
|
-
def _happens(memo, val)
|
19
|
+
def _happens(memo, val)
|
20
20
|
[memo.first + val, memo.last + 1]
|
21
21
|
end
|
22
22
|
|
23
23
|
# Finalizes the computation.
|
24
|
-
def finalize(memo)
|
25
|
-
memo.first / memo.last
|
24
|
+
def finalize(memo)
|
25
|
+
memo.first / memo.last
|
26
26
|
end
|
27
27
|
|
28
28
|
end # class Avg
|
@@ -0,0 +1,41 @@
|
|
1
|
+
module Bmg
|
2
|
+
class Summarizer
|
3
|
+
#
|
4
|
+
# Generic summarizer that takes a Proc àla each_with_object.
|
5
|
+
#
|
6
|
+
# Example:
|
7
|
+
#
|
8
|
+
# # direct ruby usage
|
9
|
+
# Bmg::Summarizer.by_proc{|t,memo| ... }.summarize(...)
|
10
|
+
#
|
11
|
+
class ByProc < Summarizer
|
12
|
+
|
13
|
+
def initialize(least, by_proc)
|
14
|
+
@least = least
|
15
|
+
@by_proc = by_proc
|
16
|
+
end
|
17
|
+
|
18
|
+
# Returns [] as least value.
|
19
|
+
def least
|
20
|
+
@least
|
21
|
+
end
|
22
|
+
|
23
|
+
# Adds val to the memo array
|
24
|
+
def happens(memo, val)
|
25
|
+
@by_proc.call(val, memo)
|
26
|
+
end
|
27
|
+
|
28
|
+
def finalize(memo)
|
29
|
+
memo
|
30
|
+
end
|
31
|
+
|
32
|
+
end # class ByProc
|
33
|
+
|
34
|
+
# Factors a distinct summarizer
|
35
|
+
def self.by_proc(least = nil, proc = nil, &bl)
|
36
|
+
least, proc = nil, least if least.is_a?(Proc)
|
37
|
+
ByProc.new(least, proc || bl)
|
38
|
+
end
|
39
|
+
|
40
|
+
end # class Summarizer
|
41
|
+
end # module Bmg
|
@@ -0,0 +1,36 @@
|
|
1
|
+
module Bmg
|
2
|
+
class Summarizer
|
3
|
+
#
|
4
|
+
# Collect the distinct values as an array.
|
5
|
+
#
|
6
|
+
# Example:
|
7
|
+
#
|
8
|
+
# # direct ruby usage
|
9
|
+
# Bmg::Summarizer.distinct(:qty).summarize(...)
|
10
|
+
#
|
11
|
+
class Distinct < Summarizer
|
12
|
+
|
13
|
+
# Returns [] as least value.
|
14
|
+
def least()
|
15
|
+
{}
|
16
|
+
end
|
17
|
+
|
18
|
+
# Adds val to the memo array
|
19
|
+
def _happens(memo, val)
|
20
|
+
memo[val] = true
|
21
|
+
memo
|
22
|
+
end
|
23
|
+
|
24
|
+
def finalize(memo)
|
25
|
+
memo.keys
|
26
|
+
end
|
27
|
+
|
28
|
+
end # class Distinct
|
29
|
+
|
30
|
+
# Factors a distinct summarizer
|
31
|
+
def self.distinct(*args, &bl)
|
32
|
+
Distinct.new(*args, &bl)
|
33
|
+
end
|
34
|
+
|
35
|
+
end # class Summarizer
|
36
|
+
end # module Bmg
|
@@ -0,0 +1,46 @@
|
|
1
|
+
module Bmg
|
2
|
+
class Summarizer
|
3
|
+
#
|
4
|
+
# A summarizer that collects multiple summarization as a wrapped
|
5
|
+
# tuple.
|
6
|
+
#
|
7
|
+
# Example:
|
8
|
+
#
|
9
|
+
# # direct ruby usage
|
10
|
+
# Bmg::Summarizer.multiple(x: ..., y: ...).summarize(...)
|
11
|
+
#
|
12
|
+
class Multiple < Summarizer
|
13
|
+
|
14
|
+
def initialize(defs)
|
15
|
+
@summarization = Summarizer.summarization(defs)
|
16
|
+
end
|
17
|
+
|
18
|
+
# Returns [] as least value.
|
19
|
+
def least()
|
20
|
+
@summarization.each_pair.each_with_object({}){|(k,v),memo|
|
21
|
+
memo[k] = v.least
|
22
|
+
}
|
23
|
+
end
|
24
|
+
|
25
|
+
# Adds val to the memo array
|
26
|
+
def happens(memo, val)
|
27
|
+
@summarization.each_pair.each_with_object({}){|(k,v),memo2|
|
28
|
+
memo2[k] = v.happens(memo[k], val)
|
29
|
+
}
|
30
|
+
end
|
31
|
+
|
32
|
+
def finalize(memo)
|
33
|
+
@summarization.each_pair.each_with_object({}){|(k,v),memo2|
|
34
|
+
memo2[k] = v.finalize(memo[k])
|
35
|
+
}
|
36
|
+
end
|
37
|
+
|
38
|
+
end # class Multiple
|
39
|
+
|
40
|
+
# Factors a distinct summarizer
|
41
|
+
def self.multiple(defs)
|
42
|
+
Multiple.new(defs)
|
43
|
+
end
|
44
|
+
|
45
|
+
end # class Summarizer
|
46
|
+
end # module Bmg
|
@@ -0,0 +1,79 @@
|
|
1
|
+
module Bmg
|
2
|
+
class Summarizer
|
3
|
+
#
|
4
|
+
# Percentile summarizer.
|
5
|
+
#
|
6
|
+
# Example:
|
7
|
+
#
|
8
|
+
# # direct ruby usage
|
9
|
+
# Bmg::Summarizer.percentile(:qty, 50).summarize(...)
|
10
|
+
#
|
11
|
+
class Percentile < Summarizer
|
12
|
+
|
13
|
+
DEFAULT_OPTIONS = {
|
14
|
+
:variant => :continuous
|
15
|
+
}
|
16
|
+
|
17
|
+
def initialize(*args, &bl)
|
18
|
+
@nth = args.find{|a| a.is_a?(Integer) } || 50
|
19
|
+
functor = args.find{|a| a.is_a?(Symbol) } || bl
|
20
|
+
options = args.select{|a| a.is_a?(Hash) }.inject(DEFAULT_OPTIONS){|memo,opts|
|
21
|
+
memo.merge(opts)
|
22
|
+
}.dup
|
23
|
+
super(functor, options)
|
24
|
+
end
|
25
|
+
|
26
|
+
# Returns [] as least value.
|
27
|
+
def least()
|
28
|
+
[]
|
29
|
+
end
|
30
|
+
|
31
|
+
# Collects the value
|
32
|
+
def _happens(memo, val)
|
33
|
+
memo << val
|
34
|
+
end
|
35
|
+
|
36
|
+
# Finalizes the computation.
|
37
|
+
def finalize(memo)
|
38
|
+
return nil if memo.empty?
|
39
|
+
index = memo.size.to_f * (@nth.to_f / 100.0)
|
40
|
+
floor, ceil = index.floor, index.ceil
|
41
|
+
ceil +=1 if floor == ceil
|
42
|
+
below = [floor - 1, 0].max
|
43
|
+
above = [[ceil - 1, memo.size - 1].min, 0].max
|
44
|
+
sorted = memo.sort
|
45
|
+
if options[:variant] == :continuous
|
46
|
+
(sorted[above] + sorted[below]) / 2.0
|
47
|
+
else
|
48
|
+
sorted[below]
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
end # class Avg
|
53
|
+
|
54
|
+
def self.percentile(*args, &bl)
|
55
|
+
Percentile.new(*args, &bl)
|
56
|
+
end
|
57
|
+
|
58
|
+
def self.percentile_cont(*args, &bl)
|
59
|
+
Percentile.new(*(args + [{:variant => :continuous}]), &bl)
|
60
|
+
end
|
61
|
+
|
62
|
+
def self.percentile_disc(*args, &bl)
|
63
|
+
Percentile.new(*(args + [{:variant => :discrete}]), &bl)
|
64
|
+
end
|
65
|
+
|
66
|
+
def self.median(*args, &bl)
|
67
|
+
Percentile.new(*(args + [50]), &bl)
|
68
|
+
end
|
69
|
+
|
70
|
+
def self.median_cont(*args, &bl)
|
71
|
+
Percentile.new(*(args + [50, {:variant => :continuous}]), &bl)
|
72
|
+
end
|
73
|
+
|
74
|
+
def self.median_disc(*args, &bl)
|
75
|
+
Percentile.new(*(args + [50, {:variant => :discrete}]), &bl)
|
76
|
+
end
|
77
|
+
|
78
|
+
end # class Summarizer
|
79
|
+
end # module Bmg
|
@@ -0,0 +1,62 @@
|
|
1
|
+
module Bmg
|
2
|
+
class Summarizer
|
3
|
+
#
|
4
|
+
# ValueBy summarizer.
|
5
|
+
#
|
6
|
+
# Example:
|
7
|
+
#
|
8
|
+
# # direct ruby usage
|
9
|
+
# Bmg::Summarizer.value_by(:qty, :by => :serie).summarize(...)
|
10
|
+
#
|
11
|
+
class ValueBy < Summarizer
|
12
|
+
|
13
|
+
DEFAULT_OPTIONS = {
|
14
|
+
:symbolize => false
|
15
|
+
}
|
16
|
+
|
17
|
+
# Returns {} as least value.
|
18
|
+
def least
|
19
|
+
{}
|
20
|
+
end
|
21
|
+
|
22
|
+
# Collects the value
|
23
|
+
def happens(memo, tuple)
|
24
|
+
by = tuple[options[:by]]
|
25
|
+
by = by.to_sym if by && options[:symbolize]
|
26
|
+
misuse!(tuple, memo) if memo.has_key?(by)
|
27
|
+
memo.tap{|m|
|
28
|
+
m[by] = extract_value(tuple)
|
29
|
+
}
|
30
|
+
end
|
31
|
+
|
32
|
+
# Finalizes the computation.
|
33
|
+
def finalize(memo)
|
34
|
+
default_tuple.merge(memo)
|
35
|
+
end
|
36
|
+
|
37
|
+
private
|
38
|
+
|
39
|
+
def default_tuple
|
40
|
+
(options[:series] || []).each_with_object({}){|s,ss|
|
41
|
+
s_def = options[:default]
|
42
|
+
s_def = s_def.to_sym if s_def && options[:symbolize]
|
43
|
+
ss[s] = s_def
|
44
|
+
}
|
45
|
+
end
|
46
|
+
|
47
|
+
def misuse!(tuple, memo)
|
48
|
+
msg = "Summarizer.value_by: summarization key + the serie must form be a candidate key"
|
49
|
+
msg += "\n"
|
50
|
+
msg += " Tuple: #{tuple.inspect}"
|
51
|
+
msg += " Memo: #{memo.inspect}"
|
52
|
+
raise MisuseError, msg
|
53
|
+
end
|
54
|
+
|
55
|
+
end # class ValueBy
|
56
|
+
|
57
|
+
def self.value_by(*args, &bl)
|
58
|
+
ValueBy.new(*args, &bl)
|
59
|
+
end
|
60
|
+
|
61
|
+
end # class Summarizer
|
62
|
+
end # module Bmg
|
data/lib/bmg/support.rb
CHANGED
@@ -0,0 +1,20 @@
|
|
1
|
+
module Bmg
|
2
|
+
class Ordering
|
3
|
+
|
4
|
+
def initialize(attrs)
|
5
|
+
@attrs = attrs
|
6
|
+
end
|
7
|
+
attr_reader :attrs
|
8
|
+
|
9
|
+
def comparator
|
10
|
+
->(t1, t2) {
|
11
|
+
attrs.each do |(attr,direction)|
|
12
|
+
c = t1[attr] <=> t2[attr]
|
13
|
+
return (direction == :desc ? -c : c) unless c==0
|
14
|
+
end
|
15
|
+
0
|
16
|
+
}
|
17
|
+
end
|
18
|
+
|
19
|
+
end # class Ordering
|
20
|
+
end # module Bmg
|
@@ -32,7 +32,16 @@ module Bmg
|
|
32
32
|
}
|
33
33
|
when Hash
|
34
34
|
with.each_with_object(tuple.dup){|(k,v),dup|
|
35
|
-
|
35
|
+
case k
|
36
|
+
when Symbol
|
37
|
+
dup[k] = transform_attr(dup[k], v)
|
38
|
+
when Class
|
39
|
+
dup.keys.each do |attrname|
|
40
|
+
dup[attrname] = transform_attr(dup[attrname], v) if dup[attrname].is_a?(k)
|
41
|
+
end
|
42
|
+
else
|
43
|
+
raise ArgumentError, "Unexpected transformation `#{with.inspect}`"
|
44
|
+
end
|
36
45
|
}
|
37
46
|
when Array
|
38
47
|
with.inject(tuple){|dup,on|
|
data/lib/bmg/version.rb
CHANGED
data/lib/bmg/writer.rb
CHANGED
@@ -1 +1,17 @@
|
|
1
|
+
module Bmg
|
2
|
+
module Writer
|
3
|
+
|
4
|
+
protected
|
5
|
+
|
6
|
+
def infer_headers(from)
|
7
|
+
attrlist = if from.is_a?(Type) && from.knows_attrlist?
|
8
|
+
from.to_attrlist
|
9
|
+
elsif from.is_a?(Hash)
|
10
|
+
from.keys
|
11
|
+
end
|
12
|
+
attrlist ? output_preferences.order_attrlist(attrlist) : nil
|
13
|
+
end
|
14
|
+
|
15
|
+
end # module Writer
|
16
|
+
end # module Bmg
|
1
17
|
require_relative 'writer/csv'
|
data/lib/bmg/writer/csv.rb
CHANGED
@@ -19,24 +19,14 @@ module Bmg
|
|
19
19
|
relation.each do |tuple|
|
20
20
|
if csv.nil?
|
21
21
|
headers = infer_headers(tuple) if headers.nil?
|
22
|
-
|
22
|
+
csv_opts = csv_options.merge(headers: headers)
|
23
|
+
csv = CSV.new(string_or_io, **csv_opts)
|
23
24
|
end
|
24
25
|
csv << headers.map{|h| tuple[h] }
|
25
26
|
end
|
26
27
|
to_s ? string_or_io.string : string_or_io
|
27
28
|
end
|
28
29
|
|
29
|
-
private
|
30
|
-
|
31
|
-
def infer_headers(from)
|
32
|
-
attrlist = if from.is_a?(Type) && from.knows_attrlist?
|
33
|
-
from.to_attrlist
|
34
|
-
elsif from.is_a?(Hash)
|
35
|
-
from.keys
|
36
|
-
end
|
37
|
-
attrlist ? output_preferences.order_attrlist(attrlist) : nil
|
38
|
-
end
|
39
|
-
|
40
30
|
end # class Csv
|
41
31
|
end # module Writer
|
42
32
|
end # module Bmg
|
@@ -0,0 +1,68 @@
|
|
1
|
+
module Bmg
|
2
|
+
module Writer
|
3
|
+
class Xlsx
|
4
|
+
include Writer
|
5
|
+
|
6
|
+
DEFAULT_OPTIONS = {
|
7
|
+
}
|
8
|
+
|
9
|
+
def initialize(csv_options, output_preferences = nil)
|
10
|
+
@csv_options = DEFAULT_OPTIONS.merge(csv_options)
|
11
|
+
@output_preferences = OutputPreferences.dress(output_preferences)
|
12
|
+
end
|
13
|
+
attr_reader :csv_options, :output_preferences
|
14
|
+
|
15
|
+
def call(relation, path)
|
16
|
+
require 'write_xlsx'
|
17
|
+
dup._call(relation, path)
|
18
|
+
end
|
19
|
+
|
20
|
+
protected
|
21
|
+
attr_reader :workbook, :worksheet
|
22
|
+
|
23
|
+
def _call(relation, path)
|
24
|
+
@workbook = WriteXLSX.new(path)
|
25
|
+
@worksheet = workbook.add_worksheet
|
26
|
+
|
27
|
+
headers = infer_headers(relation.type)
|
28
|
+
relation.each_with_index do |tuple,i|
|
29
|
+
headers = infer_headers(tuple) if headers.nil?
|
30
|
+
headers.each_with_index do |h,i|
|
31
|
+
worksheet.write_string(0, i, h)
|
32
|
+
end if i == 0
|
33
|
+
headers.each_with_index do |h,j|
|
34
|
+
meth, *args = write_pair(tuple[h])
|
35
|
+
worksheet.send(meth, 1+i, j, *args)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
workbook.close
|
40
|
+
path
|
41
|
+
end
|
42
|
+
|
43
|
+
def write_pair(value)
|
44
|
+
case value
|
45
|
+
when Numeric
|
46
|
+
[:write_number, value]
|
47
|
+
when Date
|
48
|
+
[:write_date_time, value, date_format]
|
49
|
+
else
|
50
|
+
[:write_string, value.to_s]
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def date_format
|
55
|
+
@date_format ||= workbook.add_format(:num_format => 'yyyy-mm-dd')
|
56
|
+
end
|
57
|
+
|
58
|
+
end # class Xlsx
|
59
|
+
end # module Writer
|
60
|
+
module Relation
|
61
|
+
|
62
|
+
def to_xlsx(options = {}, path = nil, preferences = nil)
|
63
|
+
options, path = {}, options unless options.is_a?(Hash)
|
64
|
+
Writer::Xlsx.new(options, preferences).call(self, path)
|
65
|
+
end
|
66
|
+
|
67
|
+
end # module Relation
|
68
|
+
end # module Bmg
|