bmg 0.18.2 → 0.18.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (51) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +4 -0
  3. data/lib/bmg/algebra.rb +18 -0
  4. data/lib/bmg/algebra/shortcuts.rb +8 -0
  5. data/lib/bmg/error.rb +3 -0
  6. data/lib/bmg/operator.rb +2 -0
  7. data/lib/bmg/operator/allbut.rb +1 -0
  8. data/lib/bmg/operator/autosummarize.rb +1 -0
  9. data/lib/bmg/operator/autowrap.rb +1 -0
  10. data/lib/bmg/operator/constants.rb +1 -0
  11. data/lib/bmg/operator/extend.rb +1 -0
  12. data/lib/bmg/operator/group.rb +1 -0
  13. data/lib/bmg/operator/image.rb +10 -4
  14. data/lib/bmg/operator/join.rb +1 -0
  15. data/lib/bmg/operator/matching.rb +1 -0
  16. data/lib/bmg/operator/not_matching.rb +1 -0
  17. data/lib/bmg/operator/page.rb +1 -0
  18. data/lib/bmg/operator/project.rb +1 -0
  19. data/lib/bmg/operator/rename.rb +6 -5
  20. data/lib/bmg/operator/restrict.rb +1 -0
  21. data/lib/bmg/operator/rxmatch.rb +1 -0
  22. data/lib/bmg/operator/summarize.rb +2 -17
  23. data/lib/bmg/operator/transform.rb +1 -0
  24. data/lib/bmg/operator/ungroup.rb +61 -0
  25. data/lib/bmg/operator/union.rb +1 -0
  26. data/lib/bmg/operator/unwrap.rb +47 -0
  27. data/lib/bmg/reader/csv.rb +29 -10
  28. data/lib/bmg/reader/excel.rb +23 -4
  29. data/lib/bmg/relation.rb +6 -0
  30. data/lib/bmg/relation/in_memory.rb +0 -1
  31. data/lib/bmg/sequel/relation.rb +1 -0
  32. data/lib/bmg/sequel/translator.rb +9 -2
  33. data/lib/bmg/sql.rb +4 -1
  34. data/lib/bmg/sql/processor.rb +1 -0
  35. data/lib/bmg/sql/processor/transform.rb +105 -0
  36. data/lib/bmg/sql/relation.rb +20 -6
  37. data/lib/bmg/summarizer.rb +36 -1
  38. data/lib/bmg/summarizer/avg.rb +3 -3
  39. data/lib/bmg/summarizer/by_proc.rb +41 -0
  40. data/lib/bmg/summarizer/distinct.rb +36 -0
  41. data/lib/bmg/summarizer/multiple.rb +46 -0
  42. data/lib/bmg/summarizer/percentile.rb +79 -0
  43. data/lib/bmg/summarizer/value_by.rb +62 -0
  44. data/lib/bmg/support/keys.rb +5 -0
  45. data/lib/bmg/support/tuple_transformer.rb +23 -1
  46. data/lib/bmg/type.rb +19 -1
  47. data/lib/bmg/version.rb +1 -1
  48. data/lib/bmg/writer.rb +16 -0
  49. data/lib/bmg/writer/csv.rb +2 -12
  50. data/lib/bmg/writer/xlsx.rb +68 -0
  51. metadata +25 -2
@@ -5,30 +5,36 @@ module Bmg
5
5
 
6
6
  DEFAULT_OPTIONS = {
7
7
  :headers => true,
8
- :return_headers => false
8
+ :return_headers => false,
9
+ :smart => true
9
10
  }
10
11
 
11
- def initialize(type, path, options = {})
12
+ def initialize(type, path_or_io, options = {})
12
13
  @type = type
13
- @path = path
14
+ @path_or_io = path_or_io
14
15
  @options = DEFAULT_OPTIONS.merge(options)
15
- @options[:col_sep] ||= infer_col_sep
16
- @options[:quote_char] ||= infer_quote_char
16
+ if @options[:smart] && !@path_or_io.is_a?(IO)
17
+ @options[:col_sep] ||= infer_col_sep
18
+ @options[:quote_char] ||= infer_quote_char
19
+ end
17
20
  end
18
21
 
19
22
  def each
23
+ return to_enum unless block_given?
20
24
  require 'csv'
21
- ::CSV.foreach(@path, @options) do |row|
22
- yield tuple(row)
25
+ with_io do |io|
26
+ ::CSV.new(io, **csv_options).each do |row|
27
+ yield tuple(row)
28
+ end
23
29
  end
24
30
  end
25
31
 
26
32
  def to_ast
27
- [ :csv, @path, @options ]
33
+ [ :csv, @path_or_io, @options ]
28
34
  end
29
35
 
30
36
  def to_s
31
- "(csv #{path})"
37
+ "(csv #{@path_or_io})"
32
38
  end
33
39
  alias :inspect :to_s
34
40
 
@@ -47,7 +53,16 @@ module Bmg
47
53
  end
48
54
 
49
55
  def text_portion
50
- @text_portion ||= File.foreach(@path).first(10).join("\n")
56
+ @text_portion ||= with_io{|io| io.readlines(10).join("\n") }
57
+ end
58
+
59
+ def with_io(&bl)
60
+ case @path_or_io
61
+ when IO, StringIO
62
+ bl.call(@path_or_io)
63
+ else
64
+ File.open(@path_or_io, "r", &bl)
65
+ end
51
66
  end
52
67
 
53
68
  # Finds the best candidate among `candidates` for a separator
@@ -61,6 +76,10 @@ module Bmg
61
76
  snif.size > 0 ? snif[0][0] : default
62
77
  end
63
78
 
79
+ def csv_options
80
+ @csv_options ||= @options.dup.tap{|opts| opts.delete(:smart) }
81
+ end
82
+
64
83
  end # class Csv
65
84
  end # module Reader
66
85
  end # module Bmg
@@ -4,7 +4,8 @@ module Bmg
4
4
  include Reader
5
5
 
6
6
  DEFAULT_OPTIONS = {
7
- skip: 0
7
+ skip: 0,
8
+ row_num: true
8
9
  }
9
10
 
10
11
  def initialize(type, path, options = {})
@@ -14,6 +15,7 @@ module Bmg
14
15
  end
15
16
 
16
17
  def each
18
+ return to_enum unless block_given?
17
19
  require 'roo'
18
20
  xlsx = Roo::Spreadsheet.open(@path, @options)
19
21
  headers = nil
@@ -23,9 +25,13 @@ module Bmg
23
25
  .each_with_index
24
26
  .each do |row, i|
25
27
  if i==0
26
- headers = row.map(&:to_sym)
28
+ headers = row.map{|c| c.to_s.strip.to_sym }
27
29
  else
28
- tuple = (0...headers.size).each_with_object({}){|i,t| t[headers[i]] = row[i] }
30
+ init = init_tuple(i)
31
+ tuple = (0...headers.size)
32
+ .each_with_object(init){|i,t|
33
+ t[headers[i]] = row[i]
34
+ }
29
35
  yield(tuple)
30
36
  end
31
37
  end
@@ -36,10 +42,23 @@ module Bmg
36
42
  end
37
43
 
38
44
  def to_s
39
- "(excel #{path})"
45
+ "(excel #{@path})"
40
46
  end
41
47
  alias :inspect :to_s
42
48
 
49
+ private
50
+
51
+ def init_tuple(i)
52
+ case as = @options[:row_num]
53
+ when TrueClass
54
+ { :row_num => i }
55
+ when FalseClass
56
+ {}
57
+ when Symbol
58
+ { :"#{as}" => i }
59
+ end
60
+ end
61
+
43
62
  end # class Excel
44
63
  end # module Reader
45
64
  end # module Bmg
data/lib/bmg/relation.rb CHANGED
@@ -27,6 +27,12 @@ module Bmg
27
27
  }
28
28
  end
29
29
 
30
+ def with_type_attrlist
31
+ return self if type.knows_attrlist?
32
+ attrs = self.first.keys
33
+ with_type(type.with_attrlist(attrs))
34
+ end
35
+
30
36
  def with_typecheck
31
37
  dup.tap{|r|
32
38
  r.type = r.type.with_typecheck
@@ -8,7 +8,6 @@ module Bmg
8
8
  @type = type
9
9
  end
10
10
  attr_accessor :type
11
- protected :type=
12
11
  attr_reader :operand
13
12
 
14
13
  public
@@ -9,6 +9,7 @@ module Bmg
9
9
  attr_reader :sequel_db
10
10
 
11
11
  def each(&bl)
12
+ return to_enum unless block_given?
12
13
  dataset.each(&bl)
13
14
  end
14
15
 
@@ -78,8 +78,15 @@ module Bmg
78
78
  end
79
79
 
80
80
  def on_func_call(sexpr)
81
- args = sexpr.func_args.map{|fa| apply(fa) }
82
- ::Sequel.function(sexpr.func_name, *args)
81
+ case sexpr.func_name
82
+ when :cast
83
+ to_cast = apply(sexpr.func_args.first)
84
+ type = sexpr.func_args.last.last
85
+ to_cast.cast(type)
86
+ else
87
+ args = sexpr.func_args.map{|fa| apply(fa) }
88
+ ::Sequel.function(sexpr.func_name, *args)
89
+ end
83
90
  end
84
91
 
85
92
  def on_summarizer(sexpr)
data/lib/bmg/sql.rb CHANGED
@@ -2,7 +2,10 @@ require 'sexpr'
2
2
  module Bmg
3
3
 
4
4
  module Sql
5
- end
5
+
6
+ class NotSupportedError < Bmg::Error; end
7
+
8
+ end # module Sql
6
9
 
7
10
  def sql(table, type = Type::ANY)
8
11
  builder = Sql::Builder.new
@@ -85,4 +85,5 @@ require_relative 'processor/semi_join'
85
85
  require_relative 'processor/flatten'
86
86
  require_relative 'processor/requalify'
87
87
  require_relative 'processor/summarize'
88
+ require_relative 'processor/transform'
88
89
  require_relative 'processor/bind'
@@ -0,0 +1,105 @@
1
+ module Bmg
2
+ module Sql
3
+ class Processor
4
+ class Transform < Processor
5
+
6
+ module SplitSupported
7
+ extend(self)
8
+
9
+ def split_supported(tr, &bl)
10
+ case tr
11
+ when Array
12
+ i = tr.find_index{|x| !bl.call(x) } || tr.size
13
+ [tr[0...i], tr[i..-1]].map{|a|
14
+ case a.size
15
+ when 0 then nil
16
+ when 1 then a.first
17
+ else a
18
+ end
19
+ }
20
+ when Hash
21
+ tr.inject([{}, {}]){|(sup,unsup),(k,v)|
22
+ mine, hers = _split_supported(v, &bl)
23
+ [
24
+ sup.merge(k => mine),
25
+ unsup.merge(k => hers)
26
+ ].map(&:compact)
27
+ }.map{|h| h.empty? ? nil : h }
28
+ else
29
+ _split_supported(tr, &bl)
30
+ end
31
+ end
32
+
33
+ def _split_supported(tr, &bl)
34
+ if tr.is_a?(Array)
35
+ split_supported(tr, &bl)
36
+ else
37
+ bl.call(tr) ? [tr, nil] : [nil, tr]
38
+ end
39
+ end
40
+ end # module SplitSupported
41
+
42
+ def initialize(transformation, options, builder)
43
+ raise NotSupportedError unless options.empty?
44
+ super(builder)
45
+ @transformation = transformation
46
+ end
47
+ attr_reader :transformation
48
+
49
+ def self.split_supported(*args, &bl)
50
+ SplitSupported.split_supported(*args, &bl)
51
+ end
52
+
53
+ def on_select_list(sexpr)
54
+ sexpr.each_with_index.map{|child,index|
55
+ index == 0 ? child : apply(child)
56
+ }
57
+ end
58
+
59
+ def on_select_item(sexpr)
60
+ as = sexpr.as_name.to_sym
61
+ case t = transformation_for(as)
62
+ when NilClass
63
+ sexpr
64
+ when Class, Array
65
+ sexpr([:select_item,
66
+ func_call_node(sexpr, Array(t).reverse),
67
+ sexpr[2]
68
+ ])
69
+ else
70
+ raise NotSupportedError
71
+ end
72
+ end
73
+
74
+ private
75
+
76
+ def func_call_node(sexpr, ts)
77
+ _func_call_node(sexpr, ts.first, ts[1..-1])
78
+ end
79
+
80
+ def _func_call_node(sexpr, head, tail)
81
+ inside = if tail.empty?
82
+ sexpr[1]
83
+ else
84
+ _func_call_node(sexpr, tail.first, tail[1..-1])
85
+ end
86
+ [:func_call,
87
+ :cast,
88
+ inside,
89
+ [ :literal, head ] ]
90
+ end
91
+
92
+ def transformation_for(as)
93
+ case t = transformation
94
+ when Class then t
95
+ when Hash then t[as]
96
+ when Array then t
97
+ else
98
+ raise Sql::NotSupportedError, "Unable to use `#{as}` for `transform`"
99
+ end
100
+ end
101
+
102
+ end # class Transform
103
+ end # class Processor
104
+ end # module Sql
105
+ end # module Bmg
@@ -123,27 +123,41 @@ module Bmg
123
123
 
124
124
  def _rename(type, renaming)
125
125
  expr = before_use(self.expr)
126
- expr = Processor::Rename.new(renaming, builder).call(self.expr)
126
+ expr = Processor::Rename.new(renaming, builder).call(expr)
127
127
  _instance(type, builder, expr)
128
128
  end
129
129
 
130
130
  def _restrict(type, predicate)
131
131
  expr = before_use(self.expr)
132
- expr = Processor::Where.new(predicate, builder).call(self.expr)
132
+ expr = Processor::Where.new(predicate, builder).call(expr)
133
133
  _instance(type, builder, expr)
134
134
  end
135
135
 
136
- def _summarize(type, by, summarization)
137
- summarization = Operator::Summarize.compile(summarization)
136
+ def _summarize(type, by, defs)
137
+ summarization = ::Bmg::Summarizer.summarization(defs)
138
138
  if can_compile_summarization?(summarization)
139
139
  expr = before_use(self.expr)
140
- expr = Processor::Summarize.new(by, summarization, builder).call(self.expr)
140
+ expr = Processor::Summarize.new(by, summarization, builder).call(expr)
141
141
  _instance(type, builder, expr)
142
142
  else
143
143
  super
144
144
  end
145
145
  end
146
146
 
147
+ def _transform(type, transformation, options)
148
+ expr = before_use(self.expr)
149
+ sup, unsup = Processor::Transform.split_supported(transformation){|x|
150
+ [String, Integer, Float, Date, DateTime].include?(x)
151
+ }
152
+ return super if sup.nil?
153
+ expr = Processor::Transform.new(sup, options, builder).call(expr)
154
+ result = _instance(type, builder, expr)
155
+ result = result.transform(unsup, options) if unsup
156
+ result
157
+ rescue Sql::NotSupportedError
158
+ super
159
+ end
160
+
147
161
  def can_compile_summarization?(summarization)
148
162
  summarization.values.all?{|s|
149
163
  [:avg, :count, :max, :min, :sum].include?(s.to_summarizer_name)
@@ -153,7 +167,7 @@ module Bmg
153
167
  def _union(type, right, options)
154
168
  if right_expr = extract_compatible_sexpr(right)
155
169
  expr = before_use(self.expr)
156
- expr = Processor::Merge.new(:union, !!options[:all], right_expr, builder).call(self.expr)
170
+ expr = Processor::Merge.new(:union, !!options[:all], right_expr, builder).call(expr)
157
171
  _instance(type, builder, expr)
158
172
  else
159
173
  super
@@ -50,6 +50,21 @@ module Bmg
50
50
  end
51
51
  end
52
52
 
53
+ # Converts some summarization definitions to a Hash of
54
+ # summarizers.
55
+ def self.summarization(defs)
56
+ Hash[defs.map{|k,v|
57
+ summarizer = case v
58
+ when Summarizer then v
59
+ when Symbol then Summarizer.send(v, k)
60
+ when Proc then Summarizer.by_proc(&v)
61
+ else
62
+ raise ArgumentError, "Unexpected summarizer #{k} => #{v}"
63
+ end
64
+ [ k, summarizer ]
65
+ }]
66
+ end
67
+
53
68
  # Returns the default options to use
54
69
  #
55
70
  # @return the default aggregation options
@@ -80,7 +95,7 @@ module Bmg
80
95
  # @param the current iterated tuple
81
96
  # @return updated memo value
82
97
  def happens(memo, tuple)
83
- value = @functor.is_a?(Proc) ? @functor.call(tuple) : tuple[@functor]
98
+ value = extract_value(tuple)
84
99
  _happens(memo, value)
85
100
  end
86
101
 
@@ -119,6 +134,21 @@ module Bmg
119
134
  self.class.name.downcase[/::([a-z]+)$/, 1].to_sym
120
135
  end
121
136
 
137
+ protected
138
+
139
+ def extract_value(tuple)
140
+ value = case @functor
141
+ when Proc
142
+ @functor.call(tuple)
143
+ when NilClass
144
+ tuple
145
+ when Symbol
146
+ tuple[@functor]
147
+ else
148
+ tuple[@functor]
149
+ end
150
+ end
151
+
122
152
  end # class Summarizer
123
153
  end # module Bmg
124
154
  require_relative 'summarizer/count'
@@ -128,5 +158,10 @@ require_relative 'summarizer/max'
128
158
  require_relative 'summarizer/avg'
129
159
  require_relative 'summarizer/variance'
130
160
  require_relative 'summarizer/stddev'
161
+ require_relative 'summarizer/percentile'
131
162
  require_relative 'summarizer/collect'
163
+ require_relative 'summarizer/distinct'
132
164
  require_relative 'summarizer/concat'
165
+ require_relative 'summarizer/by_proc'
166
+ require_relative 'summarizer/multiple'
167
+ require_relative 'summarizer/value_by'
@@ -16,13 +16,13 @@ module Bmg
16
16
  end
17
17
 
18
18
  # Collects one more value + the sum of all
19
- def _happens(memo, val)
19
+ def _happens(memo, val)
20
20
  [memo.first + val, memo.last + 1]
21
21
  end
22
22
 
23
23
  # Finalizes the computation.
24
- def finalize(memo)
25
- memo.first / memo.last
24
+ def finalize(memo)
25
+ memo.first / memo.last
26
26
  end
27
27
 
28
28
  end # class Avg