bmg 0.18.2 → 0.18.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +4 -0
  3. data/lib/bmg/algebra.rb +18 -0
  4. data/lib/bmg/algebra/shortcuts.rb +8 -0
  5. data/lib/bmg/error.rb +3 -0
  6. data/lib/bmg/operator.rb +2 -0
  7. data/lib/bmg/operator/allbut.rb +1 -0
  8. data/lib/bmg/operator/autosummarize.rb +1 -0
  9. data/lib/bmg/operator/autowrap.rb +1 -0
  10. data/lib/bmg/operator/constants.rb +1 -0
  11. data/lib/bmg/operator/extend.rb +1 -0
  12. data/lib/bmg/operator/group.rb +1 -0
  13. data/lib/bmg/operator/image.rb +10 -4
  14. data/lib/bmg/operator/join.rb +1 -0
  15. data/lib/bmg/operator/matching.rb +1 -0
  16. data/lib/bmg/operator/not_matching.rb +1 -0
  17. data/lib/bmg/operator/page.rb +1 -0
  18. data/lib/bmg/operator/project.rb +1 -0
  19. data/lib/bmg/operator/rename.rb +6 -5
  20. data/lib/bmg/operator/restrict.rb +1 -0
  21. data/lib/bmg/operator/rxmatch.rb +1 -0
  22. data/lib/bmg/operator/summarize.rb +2 -17
  23. data/lib/bmg/operator/transform.rb +1 -0
  24. data/lib/bmg/operator/ungroup.rb +61 -0
  25. data/lib/bmg/operator/union.rb +1 -0
  26. data/lib/bmg/operator/unwrap.rb +47 -0
  27. data/lib/bmg/reader/csv.rb +29 -10
  28. data/lib/bmg/reader/excel.rb +23 -4
  29. data/lib/bmg/relation.rb +6 -0
  30. data/lib/bmg/relation/in_memory.rb +0 -1
  31. data/lib/bmg/sequel/relation.rb +1 -0
  32. data/lib/bmg/sequel/translator.rb +9 -2
  33. data/lib/bmg/sql.rb +4 -1
  34. data/lib/bmg/sql/processor.rb +1 -0
  35. data/lib/bmg/sql/processor/transform.rb +105 -0
  36. data/lib/bmg/sql/relation.rb +20 -6
  37. data/lib/bmg/summarizer.rb +36 -1
  38. data/lib/bmg/summarizer/avg.rb +3 -3
  39. data/lib/bmg/summarizer/by_proc.rb +41 -0
  40. data/lib/bmg/summarizer/distinct.rb +36 -0
  41. data/lib/bmg/summarizer/multiple.rb +46 -0
  42. data/lib/bmg/summarizer/percentile.rb +79 -0
  43. data/lib/bmg/summarizer/value_by.rb +62 -0
  44. data/lib/bmg/support/keys.rb +5 -0
  45. data/lib/bmg/support/tuple_transformer.rb +23 -1
  46. data/lib/bmg/type.rb +19 -1
  47. data/lib/bmg/version.rb +1 -1
  48. data/lib/bmg/writer.rb +16 -0
  49. data/lib/bmg/writer/csv.rb +2 -12
  50. data/lib/bmg/writer/xlsx.rb +68 -0
  51. metadata +25 -2
@@ -0,0 +1,41 @@
1
+ module Bmg
2
+ class Summarizer
3
+ #
4
+ # Generic summarizer that takes a Proc àla each_with_object.
5
+ #
6
+ # Example:
7
+ #
8
+ # # direct ruby usage
9
+ # Bmg::Summarizer.by_proc{|t,memo| ... }.summarize(...)
10
+ #
11
+ class ByProc < Summarizer
12
+
13
+ def initialize(least, by_proc)
14
+ @least = least
15
+ @by_proc = by_proc
16
+ end
17
+
18
+ # Returns [] as least value.
19
+ def least
20
+ @least
21
+ end
22
+
23
+ # Adds val to the memo array
24
+ def happens(memo, val)
25
+ @by_proc.call(val, memo)
26
+ end
27
+
28
+ def finalize(memo)
29
+ memo
30
+ end
31
+
32
+ end # class ByProc
33
+
34
+ # Factors a distinct summarizer
35
+ def self.by_proc(least = nil, proc = nil, &bl)
36
+ least, proc = nil, least if least.is_a?(Proc)
37
+ ByProc.new(least, proc || bl)
38
+ end
39
+
40
+ end # class Summarizer
41
+ end # module Bmg
@@ -0,0 +1,36 @@
1
+ module Bmg
2
+ class Summarizer
3
+ #
4
+ # Collect the distinct values as an array.
5
+ #
6
+ # Example:
7
+ #
8
+ # # direct ruby usage
9
+ # Bmg::Summarizer.distinct(:qty).summarize(...)
10
+ #
11
+ class Distinct < Summarizer
12
+
13
+ # Returns [] as least value.
14
+ def least()
15
+ {}
16
+ end
17
+
18
+ # Adds val to the memo array
19
+ def _happens(memo, val)
20
+ memo[val] = true
21
+ memo
22
+ end
23
+
24
+ def finalize(memo)
25
+ memo.keys
26
+ end
27
+
28
+ end # class Distinct
29
+
30
+ # Factors a distinct summarizer
31
+ def self.distinct(*args, &bl)
32
+ Distinct.new(*args, &bl)
33
+ end
34
+
35
+ end # class Summarizer
36
+ end # module Bmg
@@ -0,0 +1,46 @@
1
+ module Bmg
2
+ class Summarizer
3
+ #
4
+ # A summarizer that collects multiple summarization as a wrapped
5
+ # tuple.
6
+ #
7
+ # Example:
8
+ #
9
+ # # direct ruby usage
10
+ # Bmg::Summarizer.multiple(x: ..., y: ...).summarize(...)
11
+ #
12
+ class Multiple < Summarizer
13
+
14
+ def initialize(defs)
15
+ @summarization = Summarizer.summarization(defs)
16
+ end
17
+
18
+ # Returns [] as least value.
19
+ def least()
20
+ @summarization.each_pair.each_with_object({}){|(k,v),memo|
21
+ memo[k] = v.least
22
+ }
23
+ end
24
+
25
+ # Adds val to the memo array
26
+ def happens(memo, val)
27
+ @summarization.each_pair.each_with_object({}){|(k,v),memo2|
28
+ memo2[k] = v.happens(memo[k], val)
29
+ }
30
+ end
31
+
32
+ def finalize(memo)
33
+ @summarization.each_pair.each_with_object({}){|(k,v),memo2|
34
+ memo2[k] = v.finalize(memo[k])
35
+ }
36
+ end
37
+
38
+ end # class Multiple
39
+
40
+ # Factors a distinct summarizer
41
+ def self.multiple(defs)
42
+ Multiple.new(defs)
43
+ end
44
+
45
+ end # class Summarizer
46
+ end # module Bmg
@@ -0,0 +1,79 @@
1
+ module Bmg
2
+ class Summarizer
3
+ #
4
+ # Percentile summarizer.
5
+ #
6
+ # Example:
7
+ #
8
+ # # direct ruby usage
9
+ # Bmg::Summarizer.percentile(:qty, 50).summarize(...)
10
+ #
11
+ class Percentile < Summarizer
12
+
13
+ DEFAULT_OPTIONS = {
14
+ :variant => :continuous
15
+ }
16
+
17
+ def initialize(*args, &bl)
18
+ @nth = args.find{|a| a.is_a?(Integer) } || 50
19
+ functor = args.find{|a| a.is_a?(Symbol) } || bl
20
+ options = args.select{|a| a.is_a?(Hash) }.inject(DEFAULT_OPTIONS){|memo,opts|
21
+ memo.merge(opts)
22
+ }.dup
23
+ super(functor, options)
24
+ end
25
+
26
+ # Returns [] as least value.
27
+ def least()
28
+ []
29
+ end
30
+
31
+ # Collects the value
32
+ def _happens(memo, val)
33
+ memo << val
34
+ end
35
+
36
+ # Finalizes the computation.
37
+ def finalize(memo)
38
+ return nil if memo.empty?
39
+ index = memo.size.to_f * (@nth.to_f / 100.0)
40
+ floor, ceil = index.floor, index.ceil
41
+ ceil +=1 if floor == ceil
42
+ below = [floor - 1, 0].max
43
+ above = [[ceil - 1, memo.size - 1].min, 0].max
44
+ sorted = memo.sort
45
+ if options[:variant] == :continuous
46
+ (sorted[above] + sorted[below]) / 2.0
47
+ else
48
+ sorted[below]
49
+ end
50
+ end
51
+
52
+ end # class Avg
53
+
54
+ def self.percentile(*args, &bl)
55
+ Percentile.new(*args, &bl)
56
+ end
57
+
58
+ def self.percentile_cont(*args, &bl)
59
+ Percentile.new(*(args + [{:variant => :continuous}]), &bl)
60
+ end
61
+
62
+ def self.percentile_disc(*args, &bl)
63
+ Percentile.new(*(args + [{:variant => :discrete}]), &bl)
64
+ end
65
+
66
+ def self.median(*args, &bl)
67
+ Percentile.new(*(args + [50]), &bl)
68
+ end
69
+
70
+ def self.median_cont(*args, &bl)
71
+ Percentile.new(*(args + [50, {:variant => :continuous}]), &bl)
72
+ end
73
+
74
+ def self.median_disc(*args, &bl)
75
+ Percentile.new(*(args + [50, {:variant => :discrete}]), &bl)
76
+ end
77
+
78
+ end # class Summarizer
79
+ end # module Bmg
@@ -0,0 +1,62 @@
1
+ module Bmg
2
+ class Summarizer
3
+ #
4
+ # ValueBy summarizer.
5
+ #
6
+ # Example:
7
+ #
8
+ # # direct ruby usage
9
+ # Bmg::Summarizer.value_by(:qty, :by => :serie).summarize(...)
10
+ #
11
+ class ValueBy < Summarizer
12
+
13
+ DEFAULT_OPTIONS = {
14
+ :symbolize => false
15
+ }
16
+
17
+ # Returns {} as least value.
18
+ def least
19
+ {}
20
+ end
21
+
22
+ # Collects the value
23
+ def happens(memo, tuple)
24
+ by = tuple[options[:by]]
25
+ by = by.to_sym if by && options[:symbolize]
26
+ misuse!(tuple, memo) if memo.has_key?(by)
27
+ memo.tap{|m|
28
+ m[by] = extract_value(tuple)
29
+ }
30
+ end
31
+
32
+ # Finalizes the computation.
33
+ def finalize(memo)
34
+ default_tuple.merge(memo)
35
+ end
36
+
37
+ private
38
+
39
+ def default_tuple
40
+ (options[:series] || []).each_with_object({}){|s,ss|
41
+ s_def = options[:default]
42
+ s = s.to_sym if s && options[:symbolize]
43
+ ss[s] = s_def
44
+ }
45
+ end
46
+
47
+ def misuse!(tuple, memo)
48
+ msg = "Summarizer.value_by: summarization key + the serie must form be a candidate key"
49
+ msg += "\n"
50
+ msg += " Tuple: #{tuple.inspect}"
51
+ msg += " Memo: #{memo.inspect}"
52
+ raise MisuseError, msg
53
+ end
54
+
55
+ end # class ValueBy
56
+
57
+ def self.value_by(*args, &bl)
58
+ ValueBy.new(*args, &bl)
59
+ end
60
+
61
+ end # class Summarizer
62
+ end # module Bmg
@@ -69,6 +69,11 @@ module Bmg
69
69
  Keys.new(shared, false)
70
70
  end
71
71
 
72
+ def unwrap(oldtype, newtype, attrs)
73
+ untouched = @keys.select{|k| (attrs & k).empty? }
74
+ Keys.new(untouched, false)
75
+ end
76
+
72
77
  public ## usuals
73
78
 
74
79
  def to_a
@@ -32,7 +32,16 @@ module Bmg
32
32
  }
33
33
  when Hash
34
34
  with.each_with_object(tuple.dup){|(k,v),dup|
35
- dup[k] = transform_attr(dup[k], v)
35
+ case k
36
+ when Symbol
37
+ dup[k] = transform_attr(dup[k], v)
38
+ when Class
39
+ dup.keys.each do |attrname|
40
+ dup[attrname] = transform_attr(dup[attrname], v) if dup[attrname].is_a?(k)
41
+ end
42
+ else
43
+ raise ArgumentError, "Unexpected transformation `#{with.inspect}`"
44
+ end
36
45
  }
37
46
  when Array
38
47
  with.inject(tuple){|dup,on|
@@ -50,6 +59,19 @@ module Bmg
50
59
  when Regexp
51
60
  m = with.match(value.to_s)
52
61
  m.nil? ? m : m.to_s
62
+ when Class
63
+ return value if value.nil?
64
+ if with.respond_to?(:parse)
65
+ with.parse(value)
66
+ elsif with == Integer
67
+ Integer(value)
68
+ elsif with == Float
69
+ Float(value)
70
+ elsif with == String
71
+ value.to_s
72
+ else
73
+ raise ArgumentError, "#{with} should respond to `parse`"
74
+ end
53
75
  when Proc
54
76
  with.call(value)
55
77
  when Hash
data/lib/bmg/type.rb CHANGED
@@ -82,7 +82,7 @@ module Bmg
82
82
 
83
83
  def with_keys(keys)
84
84
  dup.tap{|x|
85
- x.keys = Keys.new(keys)
85
+ x.keys = keys ? Keys.new(keys) : nil
86
86
  }
87
87
  end
88
88
 
@@ -266,6 +266,15 @@ module Bmg
266
266
  }
267
267
  end
268
268
 
269
+ def ungroup(attrlist)
270
+ known_attributes!(attrlist) if typechecked? && knows_attrlist?
271
+ dup.tap{|x|
272
+ x.attrlist = nil
273
+ x.predicate = Predicate.tautology
274
+ x.keys = nil
275
+ }
276
+ end
277
+
269
278
  def union(other)
270
279
  if typechecked? && knows_attrlist? && other.knows_attrlist?
271
280
  missing = self.attrlist - other.attrlist
@@ -280,6 +289,15 @@ module Bmg
280
289
  }
281
290
  end
282
291
 
292
+ def unwrap(attrlist)
293
+ known_attributes!(attrlist) if typechecked? && knows_attrlist?
294
+ dup.tap{|x|
295
+ x.attrlist = nil
296
+ x.predicate = predicate.and_split(attrlist).last
297
+ x.keys = self._keys.unwrap(self, x, attrlist) if knows_keys?
298
+ }
299
+ end
300
+
283
301
  private
284
302
 
285
303
  def known_attributes!(attrs)
data/lib/bmg/version.rb CHANGED
@@ -2,7 +2,7 @@ module Bmg
2
2
  module Version
3
3
  MAJOR = 0
4
4
  MINOR = 18
5
- TINY = 2
5
+ TINY = 7
6
6
  end
7
7
  VERSION = "#{Version::MAJOR}.#{Version::MINOR}.#{Version::TINY}"
8
8
  end
data/lib/bmg/writer.rb CHANGED
@@ -1 +1,17 @@
1
+ module Bmg
2
+ module Writer
3
+
4
+ protected
5
+
6
+ def infer_headers(from)
7
+ attrlist = if from.is_a?(Type) && from.knows_attrlist?
8
+ from.to_attrlist
9
+ elsif from.is_a?(Hash)
10
+ from.keys
11
+ end
12
+ attrlist ? output_preferences.order_attrlist(attrlist) : nil
13
+ end
14
+
15
+ end # module Writer
16
+ end # module Bmg
1
17
  require_relative 'writer/csv'
@@ -19,24 +19,14 @@ module Bmg
19
19
  relation.each do |tuple|
20
20
  if csv.nil?
21
21
  headers = infer_headers(tuple) if headers.nil?
22
- csv = CSV.new(string_or_io, csv_options.merge(headers: headers))
22
+ csv_opts = csv_options.merge(headers: headers)
23
+ csv = CSV.new(string_or_io, **csv_opts)
23
24
  end
24
25
  csv << headers.map{|h| tuple[h] }
25
26
  end
26
27
  to_s ? string_or_io.string : string_or_io
27
28
  end
28
29
 
29
- private
30
-
31
- def infer_headers(from)
32
- attrlist = if from.is_a?(Type) && from.knows_attrlist?
33
- from.to_attrlist
34
- elsif from.is_a?(Hash)
35
- from.keys
36
- end
37
- attrlist ? output_preferences.order_attrlist(attrlist) : nil
38
- end
39
-
40
30
  end # class Csv
41
31
  end # module Writer
42
32
  end # module Bmg
@@ -0,0 +1,68 @@
1
+ module Bmg
2
+ module Writer
3
+ class Xlsx
4
+ include Writer
5
+
6
+ DEFAULT_OPTIONS = {
7
+ }
8
+
9
+ def initialize(csv_options, output_preferences = nil)
10
+ @csv_options = DEFAULT_OPTIONS.merge(csv_options)
11
+ @output_preferences = OutputPreferences.dress(output_preferences)
12
+ end
13
+ attr_reader :csv_options, :output_preferences
14
+
15
+ def call(relation, path)
16
+ require 'write_xlsx'
17
+ dup._call(relation, path)
18
+ end
19
+
20
+ protected
21
+ attr_reader :workbook, :worksheet
22
+
23
+ def _call(relation, path)
24
+ @workbook = WriteXLSX.new(path)
25
+ @worksheet = workbook.add_worksheet
26
+
27
+ headers = infer_headers(relation.type)
28
+ relation.each_with_index do |tuple,i|
29
+ headers = infer_headers(tuple) if headers.nil?
30
+ headers.each_with_index do |h,i|
31
+ worksheet.write_string(0, i, h)
32
+ end if i == 0
33
+ headers.each_with_index do |h,j|
34
+ meth, *args = write_pair(tuple[h])
35
+ worksheet.send(meth, 1+i, j, *args)
36
+ end
37
+ end
38
+
39
+ workbook.close
40
+ path
41
+ end
42
+
43
+ def write_pair(value)
44
+ case value
45
+ when Numeric
46
+ [:write_number, value]
47
+ when Date
48
+ [:write_date_time, value, date_format]
49
+ else
50
+ [:write_string, value.to_s]
51
+ end
52
+ end
53
+
54
+ def date_format
55
+ @date_format ||= workbook.add_format(:num_format => 'yyyy-mm-dd')
56
+ end
57
+
58
+ end # class Xlsx
59
+ end # module Writer
60
+ module Relation
61
+
62
+ def to_xlsx(options = {}, path = nil, preferences = nil)
63
+ options, path = {}, options unless options.is_a?(Hash)
64
+ Writer::Xlsx.new(options, preferences).call(self, path)
65
+ end
66
+
67
+ end # module Relation
68
+ end # module Bmg