bmg 0.18.0 → 0.18.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -0
  3. data/lib/bmg/algebra.rb +1 -0
  4. data/lib/bmg/algebra/shortcuts.rb +6 -0
  5. data/lib/bmg/error.rb +3 -0
  6. data/lib/bmg/operator/allbut.rb +27 -0
  7. data/lib/bmg/operator/autosummarize.rb +27 -4
  8. data/lib/bmg/operator/autowrap.rb +27 -0
  9. data/lib/bmg/operator/constants.rb +7 -0
  10. data/lib/bmg/operator/extend.rb +7 -0
  11. data/lib/bmg/operator/group.rb +1 -0
  12. data/lib/bmg/operator/image.rb +41 -2
  13. data/lib/bmg/operator/join.rb +1 -0
  14. data/lib/bmg/operator/matching.rb +1 -0
  15. data/lib/bmg/operator/not_matching.rb +1 -0
  16. data/lib/bmg/operator/page.rb +2 -7
  17. data/lib/bmg/operator/project.rb +3 -2
  18. data/lib/bmg/operator/rename.rb +12 -5
  19. data/lib/bmg/operator/restrict.rb +1 -0
  20. data/lib/bmg/operator/rxmatch.rb +1 -0
  21. data/lib/bmg/operator/summarize.rb +2 -17
  22. data/lib/bmg/operator/transform.rb +39 -1
  23. data/lib/bmg/operator/union.rb +1 -0
  24. data/lib/bmg/reader/csv.rb +29 -10
  25. data/lib/bmg/reader/excel.rb +23 -4
  26. data/lib/bmg/relation.rb +18 -0
  27. data/lib/bmg/relation/empty.rb +4 -0
  28. data/lib/bmg/relation/in_memory.rb +10 -1
  29. data/lib/bmg/relation/materialized.rb +6 -0
  30. data/lib/bmg/relation/spied.rb +5 -0
  31. data/lib/bmg/sequel/relation.rb +5 -0
  32. data/lib/bmg/sql/relation.rb +2 -3
  33. data/lib/bmg/summarizer.rb +36 -1
  34. data/lib/bmg/summarizer/avg.rb +3 -3
  35. data/lib/bmg/summarizer/by_proc.rb +41 -0
  36. data/lib/bmg/summarizer/distinct.rb +36 -0
  37. data/lib/bmg/summarizer/multiple.rb +46 -0
  38. data/lib/bmg/summarizer/percentile.rb +79 -0
  39. data/lib/bmg/summarizer/value_by.rb +62 -0
  40. data/lib/bmg/support.rb +1 -0
  41. data/lib/bmg/support/ordering.rb +20 -0
  42. data/lib/bmg/support/tuple_transformer.rb +10 -1
  43. data/lib/bmg/version.rb +1 -1
  44. data/lib/bmg/writer.rb +16 -0
  45. data/lib/bmg/writer/csv.rb +2 -12
  46. data/lib/bmg/writer/xlsx.rb +68 -0
  47. metadata +23 -2
@@ -16,13 +16,13 @@ module Bmg
16
16
  end
17
17
 
18
18
  # Collects one more value + the sum of all
19
- def _happens(memo, val)
19
+ def _happens(memo, val)
20
20
  [memo.first + val, memo.last + 1]
21
21
  end
22
22
 
23
23
  # Finalizes the computation.
24
- def finalize(memo)
25
- memo.first / memo.last
24
+ def finalize(memo)
25
+ memo.first / memo.last
26
26
  end
27
27
 
28
28
  end # class Avg
@@ -0,0 +1,41 @@
1
+ module Bmg
2
+ class Summarizer
3
+ #
4
+ # Generic summarizer that takes a Proc àla each_with_object.
5
+ #
6
+ # Example:
7
+ #
8
+ # # direct ruby usage
9
+ # Bmg::Summarizer.by_proc{|t,memo| ... }.summarize(...)
10
+ #
11
+ class ByProc < Summarizer
12
+
13
+ def initialize(least, by_proc)
14
+ @least = least
15
+ @by_proc = by_proc
16
+ end
17
+
18
+ # Returns [] as least value.
19
+ def least
20
+ @least
21
+ end
22
+
23
+ # Adds val to the memo array
24
+ def happens(memo, val)
25
+ @by_proc.call(val, memo)
26
+ end
27
+
28
+ def finalize(memo)
29
+ memo
30
+ end
31
+
32
+ end # class ByProc
33
+
34
+ # Factors a distinct summarizer
35
+ def self.by_proc(least = nil, proc = nil, &bl)
36
+ least, proc = nil, least if least.is_a?(Proc)
37
+ ByProc.new(least, proc || bl)
38
+ end
39
+
40
+ end # class Summarizer
41
+ end # module Bmg
@@ -0,0 +1,36 @@
1
+ module Bmg
2
+ class Summarizer
3
+ #
4
+ # Collect the distinct values as an array.
5
+ #
6
+ # Example:
7
+ #
8
+ # # direct ruby usage
9
+ # Bmg::Summarizer.distinct(:qty).summarize(...)
10
+ #
11
+ class Distinct < Summarizer
12
+
13
+ # Returns [] as least value.
14
+ def least()
15
+ {}
16
+ end
17
+
18
+ # Adds val to the memo array
19
+ def _happens(memo, val)
20
+ memo[val] = true
21
+ memo
22
+ end
23
+
24
+ def finalize(memo)
25
+ memo.keys
26
+ end
27
+
28
+ end # class Distinct
29
+
30
+ # Factors a distinct summarizer
31
+ def self.distinct(*args, &bl)
32
+ Distinct.new(*args, &bl)
33
+ end
34
+
35
+ end # class Summarizer
36
+ end # module Bmg
@@ -0,0 +1,46 @@
1
+ module Bmg
2
+ class Summarizer
3
+ #
4
+ # A summarizer that collects multiple summarization as a wrapped
5
+ # tuple.
6
+ #
7
+ # Example:
8
+ #
9
+ # # direct ruby usage
10
+ # Bmg::Summarizer.multiple(x: ..., y: ...).summarize(...)
11
+ #
12
+ class Multiple < Summarizer
13
+
14
+ def initialize(defs)
15
+ @summarization = Summarizer.summarization(defs)
16
+ end
17
+
18
+ # Returns [] as least value.
19
+ def least()
20
+ @summarization.each_pair.each_with_object({}){|(k,v),memo|
21
+ memo[k] = v.least
22
+ }
23
+ end
24
+
25
+ # Adds val to the memo array
26
+ def happens(memo, val)
27
+ @summarization.each_pair.each_with_object({}){|(k,v),memo2|
28
+ memo2[k] = v.happens(memo[k], val)
29
+ }
30
+ end
31
+
32
+ def finalize(memo)
33
+ @summarization.each_pair.each_with_object({}){|(k,v),memo2|
34
+ memo2[k] = v.finalize(memo[k])
35
+ }
36
+ end
37
+
38
+ end # class Multiple
39
+
40
+ # Factors a distinct summarizer
41
+ def self.multiple(defs)
42
+ Multiple.new(defs)
43
+ end
44
+
45
+ end # class Summarizer
46
+ end # module Bmg
@@ -0,0 +1,79 @@
1
+ module Bmg
2
+ class Summarizer
3
+ #
4
+ # Percentile summarizer.
5
+ #
6
+ # Example:
7
+ #
8
+ # # direct ruby usage
9
+ # Bmg::Summarizer.percentile(:qty, 50).summarize(...)
10
+ #
11
+ class Percentile < Summarizer
12
+
13
+ DEFAULT_OPTIONS = {
14
+ :variant => :continuous
15
+ }
16
+
17
+ def initialize(*args, &bl)
18
+ @nth = args.find{|a| a.is_a?(Integer) } || 50
19
+ functor = args.find{|a| a.is_a?(Symbol) } || bl
20
+ options = args.select{|a| a.is_a?(Hash) }.inject(DEFAULT_OPTIONS){|memo,opts|
21
+ memo.merge(opts)
22
+ }.dup
23
+ super(functor, options)
24
+ end
25
+
26
+ # Returns [] as least value.
27
+ def least()
28
+ []
29
+ end
30
+
31
+ # Collects the value
32
+ def _happens(memo, val)
33
+ memo << val
34
+ end
35
+
36
+ # Finalizes the computation.
37
+ def finalize(memo)
38
+ return nil if memo.empty?
39
+ index = memo.size.to_f * (@nth.to_f / 100.0)
40
+ floor, ceil = index.floor, index.ceil
41
+ ceil +=1 if floor == ceil
42
+ below = [floor - 1, 0].max
43
+ above = [[ceil - 1, memo.size - 1].min, 0].max
44
+ sorted = memo.sort
45
+ if options[:variant] == :continuous
46
+ (sorted[above] + sorted[below]) / 2.0
47
+ else
48
+ sorted[below]
49
+ end
50
+ end
51
+
52
+ end # class Avg
53
+
54
+ def self.percentile(*args, &bl)
55
+ Percentile.new(*args, &bl)
56
+ end
57
+
58
+ def self.percentile_cont(*args, &bl)
59
+ Percentile.new(*(args + [{:variant => :continuous}]), &bl)
60
+ end
61
+
62
+ def self.percentile_disc(*args, &bl)
63
+ Percentile.new(*(args + [{:variant => :discrete}]), &bl)
64
+ end
65
+
66
+ def self.median(*args, &bl)
67
+ Percentile.new(*(args + [50]), &bl)
68
+ end
69
+
70
+ def self.median_cont(*args, &bl)
71
+ Percentile.new(*(args + [50, {:variant => :continuous}]), &bl)
72
+ end
73
+
74
+ def self.median_disc(*args, &bl)
75
+ Percentile.new(*(args + [50, {:variant => :discrete}]), &bl)
76
+ end
77
+
78
+ end # class Summarizer
79
+ end # module Bmg
@@ -0,0 +1,62 @@
1
+ module Bmg
2
+ class Summarizer
3
+ #
4
+ # ValueBy summarizer.
5
+ #
6
+ # Example:
7
+ #
8
+ # # direct ruby usage
9
+ # Bmg::Summarizer.value_by(:qty, :by => :serie).summarize(...)
10
+ #
11
+ class ValueBy < Summarizer
12
+
13
+ DEFAULT_OPTIONS = {
14
+ :symbolize => false
15
+ }
16
+
17
+ # Returns {} as least value.
18
+ def least
19
+ {}
20
+ end
21
+
22
+ # Collects the value
23
+ def happens(memo, tuple)
24
+ by = tuple[options[:by]]
25
+ by = by.to_sym if by && options[:symbolize]
26
+ misuse!(tuple, memo) if memo.has_key?(by)
27
+ memo.tap{|m|
28
+ m[by] = extract_value(tuple)
29
+ }
30
+ end
31
+
32
+ # Finalizes the computation.
33
+ def finalize(memo)
34
+ default_tuple.merge(memo)
35
+ end
36
+
37
+ private
38
+
39
+ def default_tuple
40
+ (options[:series] || []).each_with_object({}){|s,ss|
41
+ s_def = options[:default]
42
+ s_def = s_def.to_sym if s_def && options[:symbolize]
43
+ ss[s] = s_def
44
+ }
45
+ end
46
+
47
+ def misuse!(tuple, memo)
48
+ msg = "Summarizer.value_by: summarization key + the serie must form be a candidate key"
49
+ msg += "\n"
50
+ msg += " Tuple: #{tuple.inspect}"
51
+ msg += " Memo: #{memo.inspect}"
52
+ raise MisuseError, msg
53
+ end
54
+
55
+ end # class ValueBy
56
+
57
+ def self.value_by(*args, &bl)
58
+ ValueBy.new(*args, &bl)
59
+ end
60
+
61
+ end # class Summarizer
62
+ end # module Bmg
data/lib/bmg/support.rb CHANGED
@@ -1,4 +1,5 @@
1
1
  require_relative 'support/tuple_algebra'
2
2
  require_relative 'support/tuple_transformer'
3
3
  require_relative 'support/keys'
4
+ require_relative 'support/ordering'
4
5
  require_relative 'support/output_preferences'
@@ -0,0 +1,20 @@
1
+ module Bmg
2
+ class Ordering
3
+
4
+ def initialize(attrs)
5
+ @attrs = attrs
6
+ end
7
+ attr_reader :attrs
8
+
9
+ def comparator
10
+ ->(t1, t2) {
11
+ attrs.each do |(attr,direction)|
12
+ c = t1[attr] <=> t2[attr]
13
+ return (direction == :desc ? -c : c) unless c==0
14
+ end
15
+ 0
16
+ }
17
+ end
18
+
19
+ end # class Ordering
20
+ end # module Bmg
@@ -32,7 +32,16 @@ module Bmg
32
32
  }
33
33
  when Hash
34
34
  with.each_with_object(tuple.dup){|(k,v),dup|
35
- dup[k] = transform_attr(dup[k], v)
35
+ case k
36
+ when Symbol
37
+ dup[k] = transform_attr(dup[k], v)
38
+ when Class
39
+ dup.keys.each do |attrname|
40
+ dup[attrname] = transform_attr(dup[attrname], v) if dup[attrname].is_a?(k)
41
+ end
42
+ else
43
+ raise ArgumentError, "Unexpected transformation `#{with.inspect}`"
44
+ end
36
45
  }
37
46
  when Array
38
47
  with.inject(tuple){|dup,on|
data/lib/bmg/version.rb CHANGED
@@ -2,7 +2,7 @@ module Bmg
2
2
  module Version
3
3
  MAJOR = 0
4
4
  MINOR = 18
5
- TINY = 0
5
+ TINY = 5
6
6
  end
7
7
  VERSION = "#{Version::MAJOR}.#{Version::MINOR}.#{Version::TINY}"
8
8
  end
data/lib/bmg/writer.rb CHANGED
@@ -1 +1,17 @@
1
+ module Bmg
2
+ module Writer
3
+
4
+ protected
5
+
6
+ def infer_headers(from)
7
+ attrlist = if from.is_a?(Type) && from.knows_attrlist?
8
+ from.to_attrlist
9
+ elsif from.is_a?(Hash)
10
+ from.keys
11
+ end
12
+ attrlist ? output_preferences.order_attrlist(attrlist) : nil
13
+ end
14
+
15
+ end # module Writer
16
+ end # module Bmg
1
17
  require_relative 'writer/csv'
@@ -19,24 +19,14 @@ module Bmg
19
19
  relation.each do |tuple|
20
20
  if csv.nil?
21
21
  headers = infer_headers(tuple) if headers.nil?
22
- csv = CSV.new(string_or_io, csv_options.merge(headers: headers))
22
+ csv_opts = csv_options.merge(headers: headers)
23
+ csv = CSV.new(string_or_io, **csv_opts)
23
24
  end
24
25
  csv << headers.map{|h| tuple[h] }
25
26
  end
26
27
  to_s ? string_or_io.string : string_or_io
27
28
  end
28
29
 
29
- private
30
-
31
- def infer_headers(from)
32
- attrlist = if from.is_a?(Type) && from.knows_attrlist?
33
- from.to_attrlist
34
- elsif from.is_a?(Hash)
35
- from.keys
36
- end
37
- attrlist ? output_preferences.order_attrlist(attrlist) : nil
38
- end
39
-
40
30
  end # class Csv
41
31
  end # module Writer
42
32
  end # module Bmg
@@ -0,0 +1,68 @@
1
+ module Bmg
2
+ module Writer
3
+ class Xlsx
4
+ include Writer
5
+
6
+ DEFAULT_OPTIONS = {
7
+ }
8
+
9
+ def initialize(csv_options, output_preferences = nil)
10
+ @csv_options = DEFAULT_OPTIONS.merge(csv_options)
11
+ @output_preferences = OutputPreferences.dress(output_preferences)
12
+ end
13
+ attr_reader :csv_options, :output_preferences
14
+
15
+ def call(relation, path)
16
+ require 'write_xlsx'
17
+ dup._call(relation, path)
18
+ end
19
+
20
+ protected
21
+ attr_reader :workbook, :worksheet
22
+
23
+ def _call(relation, path)
24
+ @workbook = WriteXLSX.new(path)
25
+ @worksheet = workbook.add_worksheet
26
+
27
+ headers = infer_headers(relation.type)
28
+ relation.each_with_index do |tuple,i|
29
+ headers = infer_headers(tuple) if headers.nil?
30
+ headers.each_with_index do |h,i|
31
+ worksheet.write_string(0, i, h)
32
+ end if i == 0
33
+ headers.each_with_index do |h,j|
34
+ meth, *args = write_pair(tuple[h])
35
+ worksheet.send(meth, 1+i, j, *args)
36
+ end
37
+ end
38
+
39
+ workbook.close
40
+ path
41
+ end
42
+
43
+ def write_pair(value)
44
+ case value
45
+ when Numeric
46
+ [:write_number, value]
47
+ when Date
48
+ [:write_date_time, value, date_format]
49
+ else
50
+ [:write_string, value.to_s]
51
+ end
52
+ end
53
+
54
+ def date_format
55
+ @date_format ||= workbook.add_format(:num_format => 'yyyy-mm-dd')
56
+ end
57
+
58
+ end # class Xlsx
59
+ end # module Writer
60
+ module Relation
61
+
62
+ def to_xlsx(options = {}, path = nil, preferences = nil)
63
+ options, path = {}, options unless options.is_a?(Hash)
64
+ Writer::Xlsx.new(options, preferences).call(self, path)
65
+ end
66
+
67
+ end # module Relation
68
+ end # module Bmg