bmg 0.18.2 → 0.18.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +4 -0
  3. data/lib/bmg/algebra.rb +18 -0
  4. data/lib/bmg/algebra/shortcuts.rb +8 -0
  5. data/lib/bmg/error.rb +3 -0
  6. data/lib/bmg/operator.rb +2 -0
  7. data/lib/bmg/operator/allbut.rb +1 -0
  8. data/lib/bmg/operator/autosummarize.rb +1 -0
  9. data/lib/bmg/operator/autowrap.rb +1 -0
  10. data/lib/bmg/operator/constants.rb +1 -0
  11. data/lib/bmg/operator/extend.rb +1 -0
  12. data/lib/bmg/operator/group.rb +1 -0
  13. data/lib/bmg/operator/image.rb +10 -4
  14. data/lib/bmg/operator/join.rb +1 -0
  15. data/lib/bmg/operator/matching.rb +1 -0
  16. data/lib/bmg/operator/not_matching.rb +1 -0
  17. data/lib/bmg/operator/page.rb +1 -0
  18. data/lib/bmg/operator/project.rb +1 -0
  19. data/lib/bmg/operator/rename.rb +6 -5
  20. data/lib/bmg/operator/restrict.rb +1 -0
  21. data/lib/bmg/operator/rxmatch.rb +1 -0
  22. data/lib/bmg/operator/summarize.rb +2 -17
  23. data/lib/bmg/operator/transform.rb +1 -0
  24. data/lib/bmg/operator/ungroup.rb +61 -0
  25. data/lib/bmg/operator/union.rb +1 -0
  26. data/lib/bmg/operator/unwrap.rb +47 -0
  27. data/lib/bmg/reader/csv.rb +29 -10
  28. data/lib/bmg/reader/excel.rb +23 -4
  29. data/lib/bmg/relation.rb +6 -0
  30. data/lib/bmg/relation/in_memory.rb +0 -1
  31. data/lib/bmg/sequel/relation.rb +1 -0
  32. data/lib/bmg/sequel/translator.rb +9 -2
  33. data/lib/bmg/sql.rb +4 -1
  34. data/lib/bmg/sql/processor.rb +1 -0
  35. data/lib/bmg/sql/processor/transform.rb +105 -0
  36. data/lib/bmg/sql/relation.rb +20 -6
  37. data/lib/bmg/summarizer.rb +36 -1
  38. data/lib/bmg/summarizer/avg.rb +3 -3
  39. data/lib/bmg/summarizer/by_proc.rb +41 -0
  40. data/lib/bmg/summarizer/distinct.rb +36 -0
  41. data/lib/bmg/summarizer/multiple.rb +46 -0
  42. data/lib/bmg/summarizer/percentile.rb +79 -0
  43. data/lib/bmg/summarizer/value_by.rb +62 -0
  44. data/lib/bmg/support/keys.rb +5 -0
  45. data/lib/bmg/support/tuple_transformer.rb +23 -1
  46. data/lib/bmg/type.rb +19 -1
  47. data/lib/bmg/version.rb +1 -1
  48. data/lib/bmg/writer.rb +16 -0
  49. data/lib/bmg/writer/csv.rb +2 -12
  50. data/lib/bmg/writer/xlsx.rb +68 -0
  51. metadata +25 -2
@@ -5,30 +5,36 @@ module Bmg
5
5
 
6
6
  DEFAULT_OPTIONS = {
7
7
  :headers => true,
8
- :return_headers => false
8
+ :return_headers => false,
9
+ :smart => true
9
10
  }
10
11
 
11
- def initialize(type, path, options = {})
12
+ def initialize(type, path_or_io, options = {})
12
13
  @type = type
13
- @path = path
14
+ @path_or_io = path_or_io
14
15
  @options = DEFAULT_OPTIONS.merge(options)
15
- @options[:col_sep] ||= infer_col_sep
16
- @options[:quote_char] ||= infer_quote_char
16
+ if @options[:smart] && !@path_or_io.is_a?(IO)
17
+ @options[:col_sep] ||= infer_col_sep
18
+ @options[:quote_char] ||= infer_quote_char
19
+ end
17
20
  end
18
21
 
19
22
  def each
23
+ return to_enum unless block_given?
20
24
  require 'csv'
21
- ::CSV.foreach(@path, @options) do |row|
22
- yield tuple(row)
25
+ with_io do |io|
26
+ ::CSV.new(io, **csv_options).each do |row|
27
+ yield tuple(row)
28
+ end
23
29
  end
24
30
  end
25
31
 
26
32
  def to_ast
27
- [ :csv, @path, @options ]
33
+ [ :csv, @path_or_io, @options ]
28
34
  end
29
35
 
30
36
  def to_s
31
- "(csv #{path})"
37
+ "(csv #{@path_or_io})"
32
38
  end
33
39
  alias :inspect :to_s
34
40
 
@@ -47,7 +53,16 @@ module Bmg
47
53
  end
48
54
 
49
55
  def text_portion
50
- @text_portion ||= File.foreach(@path).first(10).join("\n")
56
+ @text_portion ||= with_io{|io| io.readlines(10).join("\n") }
57
+ end
58
+
59
+ def with_io(&bl)
60
+ case @path_or_io
61
+ when IO, StringIO
62
+ bl.call(@path_or_io)
63
+ else
64
+ File.open(@path_or_io, "r", &bl)
65
+ end
51
66
  end
52
67
 
53
68
  # Finds the best candidate among `candidates` for a separator
@@ -61,6 +76,10 @@ module Bmg
61
76
  snif.size > 0 ? snif[0][0] : default
62
77
  end
63
78
 
79
+ def csv_options
80
+ @csv_options ||= @options.dup.tap{|opts| opts.delete(:smart) }
81
+ end
82
+
64
83
  end # class Csv
65
84
  end # module Reader
66
85
  end # module Bmg
@@ -4,7 +4,8 @@ module Bmg
4
4
  include Reader
5
5
 
6
6
  DEFAULT_OPTIONS = {
7
- skip: 0
7
+ skip: 0,
8
+ row_num: true
8
9
  }
9
10
 
10
11
  def initialize(type, path, options = {})
@@ -14,6 +15,7 @@ module Bmg
14
15
  end
15
16
 
16
17
  def each
18
+ return to_enum unless block_given?
17
19
  require 'roo'
18
20
  xlsx = Roo::Spreadsheet.open(@path, @options)
19
21
  headers = nil
@@ -23,9 +25,13 @@ module Bmg
23
25
  .each_with_index
24
26
  .each do |row, i|
25
27
  if i==0
26
- headers = row.map(&:to_sym)
28
+ headers = row.map{|c| c.to_s.strip.to_sym }
27
29
  else
28
- tuple = (0...headers.size).each_with_object({}){|i,t| t[headers[i]] = row[i] }
30
+ init = init_tuple(i)
31
+ tuple = (0...headers.size)
32
+ .each_with_object(init){|i,t|
33
+ t[headers[i]] = row[i]
34
+ }
29
35
  yield(tuple)
30
36
  end
31
37
  end
@@ -36,10 +42,23 @@ module Bmg
36
42
  end
37
43
 
38
44
  def to_s
39
- "(excel #{path})"
45
+ "(excel #{@path})"
40
46
  end
41
47
  alias :inspect :to_s
42
48
 
49
+ private
50
+
51
+ def init_tuple(i)
52
+ case as = @options[:row_num]
53
+ when TrueClass
54
+ { :row_num => i }
55
+ when FalseClass
56
+ {}
57
+ when Symbol
58
+ { :"#{as}" => i }
59
+ end
60
+ end
61
+
43
62
  end # class Excel
44
63
  end # module Reader
45
64
  end # module Bmg
data/lib/bmg/relation.rb CHANGED
@@ -27,6 +27,12 @@ module Bmg
27
27
  }
28
28
  end
29
29
 
30
+ def with_type_attrlist
31
+ return self if type.knows_attrlist?
32
+ attrs = self.first.keys
33
+ with_type(type.with_attrlist(attrs))
34
+ end
35
+
30
36
  def with_typecheck
31
37
  dup.tap{|r|
32
38
  r.type = r.type.with_typecheck
@@ -8,7 +8,6 @@ module Bmg
8
8
  @type = type
9
9
  end
10
10
  attr_accessor :type
11
- protected :type=
12
11
  attr_reader :operand
13
12
 
14
13
  public
@@ -9,6 +9,7 @@ module Bmg
9
9
  attr_reader :sequel_db
10
10
 
11
11
  def each(&bl)
12
+ return to_enum unless block_given?
12
13
  dataset.each(&bl)
13
14
  end
14
15
 
@@ -78,8 +78,15 @@ module Bmg
78
78
  end
79
79
 
80
80
  def on_func_call(sexpr)
81
- args = sexpr.func_args.map{|fa| apply(fa) }
82
- ::Sequel.function(sexpr.func_name, *args)
81
+ case sexpr.func_name
82
+ when :cast
83
+ to_cast = apply(sexpr.func_args.first)
84
+ type = sexpr.func_args.last.last
85
+ to_cast.cast(type)
86
+ else
87
+ args = sexpr.func_args.map{|fa| apply(fa) }
88
+ ::Sequel.function(sexpr.func_name, *args)
89
+ end
83
90
  end
84
91
 
85
92
  def on_summarizer(sexpr)
data/lib/bmg/sql.rb CHANGED
@@ -2,7 +2,10 @@ require 'sexpr'
2
2
  module Bmg
3
3
 
4
4
  module Sql
5
- end
5
+
6
+ class NotSupportedError < Bmg::Error; end
7
+
8
+ end # module Sql
6
9
 
7
10
  def sql(table, type = Type::ANY)
8
11
  builder = Sql::Builder.new
@@ -85,4 +85,5 @@ require_relative 'processor/semi_join'
85
85
  require_relative 'processor/flatten'
86
86
  require_relative 'processor/requalify'
87
87
  require_relative 'processor/summarize'
88
+ require_relative 'processor/transform'
88
89
  require_relative 'processor/bind'
@@ -0,0 +1,105 @@
1
+ module Bmg
2
+ module Sql
3
+ class Processor
4
+ class Transform < Processor
5
+
6
+ module SplitSupported
7
+ extend(self)
8
+
9
+ def split_supported(tr, &bl)
10
+ case tr
11
+ when Array
12
+ i = tr.find_index{|x| !bl.call(x) } || tr.size
13
+ [tr[0...i], tr[i..-1]].map{|a|
14
+ case a.size
15
+ when 0 then nil
16
+ when 1 then a.first
17
+ else a
18
+ end
19
+ }
20
+ when Hash
21
+ tr.inject([{}, {}]){|(sup,unsup),(k,v)|
22
+ mine, hers = _split_supported(v, &bl)
23
+ [
24
+ sup.merge(k => mine),
25
+ unsup.merge(k => hers)
26
+ ].map(&:compact)
27
+ }.map{|h| h.empty? ? nil : h }
28
+ else
29
+ _split_supported(tr, &bl)
30
+ end
31
+ end
32
+
33
+ def _split_supported(tr, &bl)
34
+ if tr.is_a?(Array)
35
+ split_supported(tr, &bl)
36
+ else
37
+ bl.call(tr) ? [tr, nil] : [nil, tr]
38
+ end
39
+ end
40
+ end # module SplitSupported
41
+
42
+ def initialize(transformation, options, builder)
43
+ raise NotSupportedError unless options.empty?
44
+ super(builder)
45
+ @transformation = transformation
46
+ end
47
+ attr_reader :transformation
48
+
49
+ def self.split_supported(*args, &bl)
50
+ SplitSupported.split_supported(*args, &bl)
51
+ end
52
+
53
+ def on_select_list(sexpr)
54
+ sexpr.each_with_index.map{|child,index|
55
+ index == 0 ? child : apply(child)
56
+ }
57
+ end
58
+
59
+ def on_select_item(sexpr)
60
+ as = sexpr.as_name.to_sym
61
+ case t = transformation_for(as)
62
+ when NilClass
63
+ sexpr
64
+ when Class, Array
65
+ sexpr([:select_item,
66
+ func_call_node(sexpr, Array(t).reverse),
67
+ sexpr[2]
68
+ ])
69
+ else
70
+ raise NotSupportedError
71
+ end
72
+ end
73
+
74
+ private
75
+
76
+ def func_call_node(sexpr, ts)
77
+ _func_call_node(sexpr, ts.first, ts[1..-1])
78
+ end
79
+
80
+ def _func_call_node(sexpr, head, tail)
81
+ inside = if tail.empty?
82
+ sexpr[1]
83
+ else
84
+ _func_call_node(sexpr, tail.first, tail[1..-1])
85
+ end
86
+ [:func_call,
87
+ :cast,
88
+ inside,
89
+ [ :literal, head ] ]
90
+ end
91
+
92
+ def transformation_for(as)
93
+ case t = transformation
94
+ when Class then t
95
+ when Hash then t[as]
96
+ when Array then t
97
+ else
98
+ raise Sql::NotSupportedError, "Unable to use `#{as}` for `transform`"
99
+ end
100
+ end
101
+
102
+ end # class Transform
103
+ end # class Processor
104
+ end # module Sql
105
+ end # module Bmg
@@ -123,27 +123,41 @@ module Bmg
123
123
 
124
124
  def _rename(type, renaming)
125
125
  expr = before_use(self.expr)
126
- expr = Processor::Rename.new(renaming, builder).call(self.expr)
126
+ expr = Processor::Rename.new(renaming, builder).call(expr)
127
127
  _instance(type, builder, expr)
128
128
  end
129
129
 
130
130
  def _restrict(type, predicate)
131
131
  expr = before_use(self.expr)
132
- expr = Processor::Where.new(predicate, builder).call(self.expr)
132
+ expr = Processor::Where.new(predicate, builder).call(expr)
133
133
  _instance(type, builder, expr)
134
134
  end
135
135
 
136
- def _summarize(type, by, summarization)
137
- summarization = Operator::Summarize.compile(summarization)
136
+ def _summarize(type, by, defs)
137
+ summarization = ::Bmg::Summarizer.summarization(defs)
138
138
  if can_compile_summarization?(summarization)
139
139
  expr = before_use(self.expr)
140
- expr = Processor::Summarize.new(by, summarization, builder).call(self.expr)
140
+ expr = Processor::Summarize.new(by, summarization, builder).call(expr)
141
141
  _instance(type, builder, expr)
142
142
  else
143
143
  super
144
144
  end
145
145
  end
146
146
 
147
+ def _transform(type, transformation, options)
148
+ expr = before_use(self.expr)
149
+ sup, unsup = Processor::Transform.split_supported(transformation){|x|
150
+ [String, Integer, Float, Date, DateTime].include?(x)
151
+ }
152
+ return super if sup.nil?
153
+ expr = Processor::Transform.new(sup, options, builder).call(expr)
154
+ result = _instance(type, builder, expr)
155
+ result = result.transform(unsup, options) if unsup
156
+ result
157
+ rescue Sql::NotSupportedError
158
+ super
159
+ end
160
+
147
161
  def can_compile_summarization?(summarization)
148
162
  summarization.values.all?{|s|
149
163
  [:avg, :count, :max, :min, :sum].include?(s.to_summarizer_name)
@@ -153,7 +167,7 @@ module Bmg
153
167
  def _union(type, right, options)
154
168
  if right_expr = extract_compatible_sexpr(right)
155
169
  expr = before_use(self.expr)
156
- expr = Processor::Merge.new(:union, !!options[:all], right_expr, builder).call(self.expr)
170
+ expr = Processor::Merge.new(:union, !!options[:all], right_expr, builder).call(expr)
157
171
  _instance(type, builder, expr)
158
172
  else
159
173
  super
@@ -50,6 +50,21 @@ module Bmg
50
50
  end
51
51
  end
52
52
 
53
+ # Converts some summarization definitions to a Hash of
54
+ # summarizers.
55
+ def self.summarization(defs)
56
+ Hash[defs.map{|k,v|
57
+ summarizer = case v
58
+ when Summarizer then v
59
+ when Symbol then Summarizer.send(v, k)
60
+ when Proc then Summarizer.by_proc(&v)
61
+ else
62
+ raise ArgumentError, "Unexpected summarizer #{k} => #{v}"
63
+ end
64
+ [ k, summarizer ]
65
+ }]
66
+ end
67
+
53
68
  # Returns the default options to use
54
69
  #
55
70
  # @return the default aggregation options
@@ -80,7 +95,7 @@ module Bmg
80
95
  # @param the current iterated tuple
81
96
  # @return updated memo value
82
97
  def happens(memo, tuple)
83
- value = @functor.is_a?(Proc) ? @functor.call(tuple) : tuple[@functor]
98
+ value = extract_value(tuple)
84
99
  _happens(memo, value)
85
100
  end
86
101
 
@@ -119,6 +134,21 @@ module Bmg
119
134
  self.class.name.downcase[/::([a-z]+)$/, 1].to_sym
120
135
  end
121
136
 
137
+ protected
138
+
139
+ def extract_value(tuple)
140
+ value = case @functor
141
+ when Proc
142
+ @functor.call(tuple)
143
+ when NilClass
144
+ tuple
145
+ when Symbol
146
+ tuple[@functor]
147
+ else
148
+ tuple[@functor]
149
+ end
150
+ end
151
+
122
152
  end # class Summarizer
123
153
  end # module Bmg
124
154
  require_relative 'summarizer/count'
@@ -128,5 +158,10 @@ require_relative 'summarizer/max'
128
158
  require_relative 'summarizer/avg'
129
159
  require_relative 'summarizer/variance'
130
160
  require_relative 'summarizer/stddev'
161
+ require_relative 'summarizer/percentile'
131
162
  require_relative 'summarizer/collect'
163
+ require_relative 'summarizer/distinct'
132
164
  require_relative 'summarizer/concat'
165
+ require_relative 'summarizer/by_proc'
166
+ require_relative 'summarizer/multiple'
167
+ require_relative 'summarizer/value_by'
@@ -16,13 +16,13 @@ module Bmg
16
16
  end
17
17
 
18
18
  # Collects one more value + the sum of all
19
- def _happens(memo, val)
19
+ def _happens(memo, val)
20
20
  [memo.first + val, memo.last + 1]
21
21
  end
22
22
 
23
23
  # Finalizes the computation.
24
- def finalize(memo)
25
- memo.first / memo.last
24
+ def finalize(memo)
25
+ memo.first / memo.last
26
26
  end
27
27
 
28
28
  end # class Avg