bmg 0.18.2 → 0.18.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (51) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +4 -0
  3. data/lib/bmg/algebra.rb +18 -0
  4. data/lib/bmg/algebra/shortcuts.rb +8 -0
  5. data/lib/bmg/error.rb +3 -0
  6. data/lib/bmg/operator.rb +2 -0
  7. data/lib/bmg/operator/allbut.rb +1 -0
  8. data/lib/bmg/operator/autosummarize.rb +1 -0
  9. data/lib/bmg/operator/autowrap.rb +1 -0
  10. data/lib/bmg/operator/constants.rb +1 -0
  11. data/lib/bmg/operator/extend.rb +1 -0
  12. data/lib/bmg/operator/group.rb +1 -0
  13. data/lib/bmg/operator/image.rb +10 -4
  14. data/lib/bmg/operator/join.rb +1 -0
  15. data/lib/bmg/operator/matching.rb +1 -0
  16. data/lib/bmg/operator/not_matching.rb +1 -0
  17. data/lib/bmg/operator/page.rb +1 -0
  18. data/lib/bmg/operator/project.rb +1 -0
  19. data/lib/bmg/operator/rename.rb +6 -5
  20. data/lib/bmg/operator/restrict.rb +1 -0
  21. data/lib/bmg/operator/rxmatch.rb +1 -0
  22. data/lib/bmg/operator/summarize.rb +2 -17
  23. data/lib/bmg/operator/transform.rb +1 -0
  24. data/lib/bmg/operator/ungroup.rb +61 -0
  25. data/lib/bmg/operator/union.rb +1 -0
  26. data/lib/bmg/operator/unwrap.rb +47 -0
  27. data/lib/bmg/reader/csv.rb +29 -10
  28. data/lib/bmg/reader/excel.rb +23 -4
  29. data/lib/bmg/relation.rb +6 -0
  30. data/lib/bmg/relation/in_memory.rb +0 -1
  31. data/lib/bmg/sequel/relation.rb +1 -0
  32. data/lib/bmg/sequel/translator.rb +9 -2
  33. data/lib/bmg/sql.rb +4 -1
  34. data/lib/bmg/sql/processor.rb +1 -0
  35. data/lib/bmg/sql/processor/transform.rb +105 -0
  36. data/lib/bmg/sql/relation.rb +20 -6
  37. data/lib/bmg/summarizer.rb +36 -1
  38. data/lib/bmg/summarizer/avg.rb +3 -3
  39. data/lib/bmg/summarizer/by_proc.rb +41 -0
  40. data/lib/bmg/summarizer/distinct.rb +36 -0
  41. data/lib/bmg/summarizer/multiple.rb +46 -0
  42. data/lib/bmg/summarizer/percentile.rb +79 -0
  43. data/lib/bmg/summarizer/value_by.rb +62 -0
  44. data/lib/bmg/support/keys.rb +5 -0
  45. data/lib/bmg/support/tuple_transformer.rb +23 -1
  46. data/lib/bmg/type.rb +19 -1
  47. data/lib/bmg/version.rb +1 -1
  48. data/lib/bmg/writer.rb +16 -0
  49. data/lib/bmg/writer/csv.rb +2 -12
  50. data/lib/bmg/writer/xlsx.rb +68 -0
  51. metadata +25 -2
@@ -0,0 +1,41 @@
1
+ module Bmg
2
+ class Summarizer
3
+ #
4
+ # Generic summarizer that takes a Proc àla each_with_object.
5
+ #
6
+ # Example:
7
+ #
8
+ # # direct ruby usage
9
+ # Bmg::Summarizer.by_proc{|t,memo| ... }.summarize(...)
10
+ #
11
+ class ByProc < Summarizer
12
+
13
+ def initialize(least, by_proc)
14
+ @least = least
15
+ @by_proc = by_proc
16
+ end
17
+
18
+ # Returns [] as least value.
19
+ def least
20
+ @least
21
+ end
22
+
23
+ # Adds val to the memo array
24
+ def happens(memo, val)
25
+ @by_proc.call(val, memo)
26
+ end
27
+
28
+ def finalize(memo)
29
+ memo
30
+ end
31
+
32
+ end # class ByProc
33
+
34
+ # Factors a distinct summarizer
35
+ def self.by_proc(least = nil, proc = nil, &bl)
36
+ least, proc = nil, least if least.is_a?(Proc)
37
+ ByProc.new(least, proc || bl)
38
+ end
39
+
40
+ end # class Summarizer
41
+ end # module Bmg
@@ -0,0 +1,36 @@
1
+ module Bmg
2
+ class Summarizer
3
+ #
4
+ # Collect the distinct values as an array.
5
+ #
6
+ # Example:
7
+ #
8
+ # # direct ruby usage
9
+ # Bmg::Summarizer.distinct(:qty).summarize(...)
10
+ #
11
+ class Distinct < Summarizer
12
+
13
+ # Returns [] as least value.
14
+ def least()
15
+ {}
16
+ end
17
+
18
+ # Adds val to the memo array
19
+ def _happens(memo, val)
20
+ memo[val] = true
21
+ memo
22
+ end
23
+
24
+ def finalize(memo)
25
+ memo.keys
26
+ end
27
+
28
+ end # class Distinct
29
+
30
+ # Factors a distinct summarizer
31
+ def self.distinct(*args, &bl)
32
+ Distinct.new(*args, &bl)
33
+ end
34
+
35
+ end # class Summarizer
36
+ end # module Bmg
@@ -0,0 +1,46 @@
1
+ module Bmg
2
+ class Summarizer
3
+ #
4
+ # A summarizer that collects multiple summarization as a wrapped
5
+ # tuple.
6
+ #
7
+ # Example:
8
+ #
9
+ # # direct ruby usage
10
+ # Bmg::Summarizer.multiple(x: ..., y: ...).summarize(...)
11
+ #
12
+ class Multiple < Summarizer
13
+
14
+ def initialize(defs)
15
+ @summarization = Summarizer.summarization(defs)
16
+ end
17
+
18
+ # Returns [] as least value.
19
+ def least()
20
+ @summarization.each_pair.each_with_object({}){|(k,v),memo|
21
+ memo[k] = v.least
22
+ }
23
+ end
24
+
25
+ # Adds val to the memo array
26
+ def happens(memo, val)
27
+ @summarization.each_pair.each_with_object({}){|(k,v),memo2|
28
+ memo2[k] = v.happens(memo[k], val)
29
+ }
30
+ end
31
+
32
+ def finalize(memo)
33
+ @summarization.each_pair.each_with_object({}){|(k,v),memo2|
34
+ memo2[k] = v.finalize(memo[k])
35
+ }
36
+ end
37
+
38
+ end # class Multiple
39
+
40
+ # Factors a distinct summarizer
41
+ def self.multiple(defs)
42
+ Multiple.new(defs)
43
+ end
44
+
45
+ end # class Summarizer
46
+ end # module Bmg
@@ -0,0 +1,79 @@
1
+ module Bmg
2
+ class Summarizer
3
+ #
4
+ # Percentile summarizer.
5
+ #
6
+ # Example:
7
+ #
8
+ # # direct ruby usage
9
+ # Bmg::Summarizer.percentile(:qty, 50).summarize(...)
10
+ #
11
+ class Percentile < Summarizer
12
+
13
+ DEFAULT_OPTIONS = {
14
+ :variant => :continuous
15
+ }
16
+
17
+ def initialize(*args, &bl)
18
+ @nth = args.find{|a| a.is_a?(Integer) } || 50
19
+ functor = args.find{|a| a.is_a?(Symbol) } || bl
20
+ options = args.select{|a| a.is_a?(Hash) }.inject(DEFAULT_OPTIONS){|memo,opts|
21
+ memo.merge(opts)
22
+ }.dup
23
+ super(functor, options)
24
+ end
25
+
26
+ # Returns [] as least value.
27
+ def least()
28
+ []
29
+ end
30
+
31
+ # Collects the value
32
+ def _happens(memo, val)
33
+ memo << val
34
+ end
35
+
36
+ # Finalizes the computation.
37
+ def finalize(memo)
38
+ return nil if memo.empty?
39
+ index = memo.size.to_f * (@nth.to_f / 100.0)
40
+ floor, ceil = index.floor, index.ceil
41
+ ceil +=1 if floor == ceil
42
+ below = [floor - 1, 0].max
43
+ above = [[ceil - 1, memo.size - 1].min, 0].max
44
+ sorted = memo.sort
45
+ if options[:variant] == :continuous
46
+ (sorted[above] + sorted[below]) / 2.0
47
+ else
48
+ sorted[below]
49
+ end
50
+ end
51
+
52
+ end # class Avg
53
+
54
+ def self.percentile(*args, &bl)
55
+ Percentile.new(*args, &bl)
56
+ end
57
+
58
+ def self.percentile_cont(*args, &bl)
59
+ Percentile.new(*(args + [{:variant => :continuous}]), &bl)
60
+ end
61
+
62
+ def self.percentile_disc(*args, &bl)
63
+ Percentile.new(*(args + [{:variant => :discrete}]), &bl)
64
+ end
65
+
66
+ def self.median(*args, &bl)
67
+ Percentile.new(*(args + [50]), &bl)
68
+ end
69
+
70
+ def self.median_cont(*args, &bl)
71
+ Percentile.new(*(args + [50, {:variant => :continuous}]), &bl)
72
+ end
73
+
74
+ def self.median_disc(*args, &bl)
75
+ Percentile.new(*(args + [50, {:variant => :discrete}]), &bl)
76
+ end
77
+
78
+ end # class Summarizer
79
+ end # module Bmg
@@ -0,0 +1,62 @@
1
+ module Bmg
2
+ class Summarizer
3
+ #
4
+ # ValueBy summarizer.
5
+ #
6
+ # Example:
7
+ #
8
+ # # direct ruby usage
9
+ # Bmg::Summarizer.value_by(:qty, :by => :serie).summarize(...)
10
+ #
11
+ class ValueBy < Summarizer
12
+
13
+ DEFAULT_OPTIONS = {
14
+ :symbolize => false
15
+ }
16
+
17
+ # Returns {} as least value.
18
+ def least
19
+ {}
20
+ end
21
+
22
+ # Collects the value
23
+ def happens(memo, tuple)
24
+ by = tuple[options[:by]]
25
+ by = by.to_sym if by && options[:symbolize]
26
+ misuse!(tuple, memo) if memo.has_key?(by)
27
+ memo.tap{|m|
28
+ m[by] = extract_value(tuple)
29
+ }
30
+ end
31
+
32
+ # Finalizes the computation.
33
+ def finalize(memo)
34
+ default_tuple.merge(memo)
35
+ end
36
+
37
+ private
38
+
39
+ def default_tuple
40
+ (options[:series] || []).each_with_object({}){|s,ss|
41
+ s_def = options[:default]
42
+ s = s.to_sym if s && options[:symbolize]
43
+ ss[s] = s_def
44
+ }
45
+ end
46
+
47
+ def misuse!(tuple, memo)
48
+ msg = "Summarizer.value_by: summarization key + the serie must form be a candidate key"
49
+ msg += "\n"
50
+ msg += " Tuple: #{tuple.inspect}"
51
+ msg += " Memo: #{memo.inspect}"
52
+ raise MisuseError, msg
53
+ end
54
+
55
+ end # class ValueBy
56
+
57
+ def self.value_by(*args, &bl)
58
+ ValueBy.new(*args, &bl)
59
+ end
60
+
61
+ end # class Summarizer
62
+ end # module Bmg
@@ -69,6 +69,11 @@ module Bmg
69
69
  Keys.new(shared, false)
70
70
  end
71
71
 
72
+ def unwrap(oldtype, newtype, attrs)
73
+ untouched = @keys.select{|k| (attrs & k).empty? }
74
+ Keys.new(untouched, false)
75
+ end
76
+
72
77
  public ## usuals
73
78
 
74
79
  def to_a
@@ -32,7 +32,16 @@ module Bmg
32
32
  }
33
33
  when Hash
34
34
  with.each_with_object(tuple.dup){|(k,v),dup|
35
- dup[k] = transform_attr(dup[k], v)
35
+ case k
36
+ when Symbol
37
+ dup[k] = transform_attr(dup[k], v)
38
+ when Class
39
+ dup.keys.each do |attrname|
40
+ dup[attrname] = transform_attr(dup[attrname], v) if dup[attrname].is_a?(k)
41
+ end
42
+ else
43
+ raise ArgumentError, "Unexpected transformation `#{with.inspect}`"
44
+ end
36
45
  }
37
46
  when Array
38
47
  with.inject(tuple){|dup,on|
@@ -50,6 +59,19 @@ module Bmg
50
59
  when Regexp
51
60
  m = with.match(value.to_s)
52
61
  m.nil? ? m : m.to_s
62
+ when Class
63
+ return value if value.nil?
64
+ if with.respond_to?(:parse)
65
+ with.parse(value)
66
+ elsif with == Integer
67
+ Integer(value)
68
+ elsif with == Float
69
+ Float(value)
70
+ elsif with == String
71
+ value.to_s
72
+ else
73
+ raise ArgumentError, "#{with} should respond to `parse`"
74
+ end
53
75
  when Proc
54
76
  with.call(value)
55
77
  when Hash
data/lib/bmg/type.rb CHANGED
@@ -82,7 +82,7 @@ module Bmg
82
82
 
83
83
  def with_keys(keys)
84
84
  dup.tap{|x|
85
- x.keys = Keys.new(keys)
85
+ x.keys = keys ? Keys.new(keys) : nil
86
86
  }
87
87
  end
88
88
 
@@ -266,6 +266,15 @@ module Bmg
266
266
  }
267
267
  end
268
268
 
269
+ def ungroup(attrlist)
270
+ known_attributes!(attrlist) if typechecked? && knows_attrlist?
271
+ dup.tap{|x|
272
+ x.attrlist = nil
273
+ x.predicate = Predicate.tautology
274
+ x.keys = nil
275
+ }
276
+ end
277
+
269
278
  def union(other)
270
279
  if typechecked? && knows_attrlist? && other.knows_attrlist?
271
280
  missing = self.attrlist - other.attrlist
@@ -280,6 +289,15 @@ module Bmg
280
289
  }
281
290
  end
282
291
 
292
+ def unwrap(attrlist)
293
+ known_attributes!(attrlist) if typechecked? && knows_attrlist?
294
+ dup.tap{|x|
295
+ x.attrlist = nil
296
+ x.predicate = predicate.and_split(attrlist).last
297
+ x.keys = self._keys.unwrap(self, x, attrlist) if knows_keys?
298
+ }
299
+ end
300
+
283
301
  private
284
302
 
285
303
  def known_attributes!(attrs)
data/lib/bmg/version.rb CHANGED
@@ -2,7 +2,7 @@ module Bmg
2
2
  module Version
3
3
  MAJOR = 0
4
4
  MINOR = 18
5
- TINY = 2
5
+ TINY = 7
6
6
  end
7
7
  VERSION = "#{Version::MAJOR}.#{Version::MINOR}.#{Version::TINY}"
8
8
  end
data/lib/bmg/writer.rb CHANGED
@@ -1 +1,17 @@
1
+ module Bmg
2
+ module Writer
3
+
4
+ protected
5
+
6
+ def infer_headers(from)
7
+ attrlist = if from.is_a?(Type) && from.knows_attrlist?
8
+ from.to_attrlist
9
+ elsif from.is_a?(Hash)
10
+ from.keys
11
+ end
12
+ attrlist ? output_preferences.order_attrlist(attrlist) : nil
13
+ end
14
+
15
+ end # module Writer
16
+ end # module Bmg
1
17
  require_relative 'writer/csv'
@@ -19,24 +19,14 @@ module Bmg
19
19
  relation.each do |tuple|
20
20
  if csv.nil?
21
21
  headers = infer_headers(tuple) if headers.nil?
22
- csv = CSV.new(string_or_io, csv_options.merge(headers: headers))
22
+ csv_opts = csv_options.merge(headers: headers)
23
+ csv = CSV.new(string_or_io, **csv_opts)
23
24
  end
24
25
  csv << headers.map{|h| tuple[h] }
25
26
  end
26
27
  to_s ? string_or_io.string : string_or_io
27
28
  end
28
29
 
29
- private
30
-
31
- def infer_headers(from)
32
- attrlist = if from.is_a?(Type) && from.knows_attrlist?
33
- from.to_attrlist
34
- elsif from.is_a?(Hash)
35
- from.keys
36
- end
37
- attrlist ? output_preferences.order_attrlist(attrlist) : nil
38
- end
39
-
40
30
  end # class Csv
41
31
  end # module Writer
42
32
  end # module Bmg
@@ -0,0 +1,68 @@
1
+ module Bmg
2
+ module Writer
3
+ class Xlsx
4
+ include Writer
5
+
6
+ DEFAULT_OPTIONS = {
7
+ }
8
+
9
+ def initialize(csv_options, output_preferences = nil)
10
+ @csv_options = DEFAULT_OPTIONS.merge(csv_options)
11
+ @output_preferences = OutputPreferences.dress(output_preferences)
12
+ end
13
+ attr_reader :csv_options, :output_preferences
14
+
15
+ def call(relation, path)
16
+ require 'write_xlsx'
17
+ dup._call(relation, path)
18
+ end
19
+
20
+ protected
21
+ attr_reader :workbook, :worksheet
22
+
23
+ def _call(relation, path)
24
+ @workbook = WriteXLSX.new(path)
25
+ @worksheet = workbook.add_worksheet
26
+
27
+ headers = infer_headers(relation.type)
28
+ relation.each_with_index do |tuple,i|
29
+ headers = infer_headers(tuple) if headers.nil?
30
+ headers.each_with_index do |h,i|
31
+ worksheet.write_string(0, i, h)
32
+ end if i == 0
33
+ headers.each_with_index do |h,j|
34
+ meth, *args = write_pair(tuple[h])
35
+ worksheet.send(meth, 1+i, j, *args)
36
+ end
37
+ end
38
+
39
+ workbook.close
40
+ path
41
+ end
42
+
43
+ def write_pair(value)
44
+ case value
45
+ when Numeric
46
+ [:write_number, value]
47
+ when Date
48
+ [:write_date_time, value, date_format]
49
+ else
50
+ [:write_string, value.to_s]
51
+ end
52
+ end
53
+
54
+ def date_format
55
+ @date_format ||= workbook.add_format(:num_format => 'yyyy-mm-dd')
56
+ end
57
+
58
+ end # class Xlsx
59
+ end # module Writer
60
+ module Relation
61
+
62
+ def to_xlsx(options = {}, path = nil, preferences = nil)
63
+ options, path = {}, options unless options.is_a?(Hash)
64
+ Writer::Xlsx.new(options, preferences).call(self, path)
65
+ end
66
+
67
+ end # module Relation
68
+ end # module Bmg