bmg 0.18.0 → 0.18.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -0
  3. data/lib/bmg/algebra.rb +1 -0
  4. data/lib/bmg/algebra/shortcuts.rb +6 -0
  5. data/lib/bmg/error.rb +3 -0
  6. data/lib/bmg/operator/allbut.rb +27 -0
  7. data/lib/bmg/operator/autosummarize.rb +27 -4
  8. data/lib/bmg/operator/autowrap.rb +27 -0
  9. data/lib/bmg/operator/constants.rb +7 -0
  10. data/lib/bmg/operator/extend.rb +7 -0
  11. data/lib/bmg/operator/group.rb +1 -0
  12. data/lib/bmg/operator/image.rb +41 -2
  13. data/lib/bmg/operator/join.rb +1 -0
  14. data/lib/bmg/operator/matching.rb +1 -0
  15. data/lib/bmg/operator/not_matching.rb +1 -0
  16. data/lib/bmg/operator/page.rb +2 -7
  17. data/lib/bmg/operator/project.rb +3 -2
  18. data/lib/bmg/operator/rename.rb +12 -5
  19. data/lib/bmg/operator/restrict.rb +1 -0
  20. data/lib/bmg/operator/rxmatch.rb +1 -0
  21. data/lib/bmg/operator/summarize.rb +2 -17
  22. data/lib/bmg/operator/transform.rb +39 -1
  23. data/lib/bmg/operator/union.rb +1 -0
  24. data/lib/bmg/reader/csv.rb +29 -10
  25. data/lib/bmg/reader/excel.rb +23 -4
  26. data/lib/bmg/relation.rb +18 -0
  27. data/lib/bmg/relation/empty.rb +4 -0
  28. data/lib/bmg/relation/in_memory.rb +10 -1
  29. data/lib/bmg/relation/materialized.rb +6 -0
  30. data/lib/bmg/relation/spied.rb +5 -0
  31. data/lib/bmg/sequel/relation.rb +5 -0
  32. data/lib/bmg/sql/relation.rb +2 -3
  33. data/lib/bmg/summarizer.rb +36 -1
  34. data/lib/bmg/summarizer/avg.rb +3 -3
  35. data/lib/bmg/summarizer/by_proc.rb +41 -0
  36. data/lib/bmg/summarizer/distinct.rb +36 -0
  37. data/lib/bmg/summarizer/multiple.rb +46 -0
  38. data/lib/bmg/summarizer/percentile.rb +79 -0
  39. data/lib/bmg/summarizer/value_by.rb +62 -0
  40. data/lib/bmg/support.rb +1 -0
  41. data/lib/bmg/support/ordering.rb +20 -0
  42. data/lib/bmg/support/tuple_transformer.rb +10 -1
  43. data/lib/bmg/version.rb +1 -1
  44. data/lib/bmg/writer.rb +16 -0
  45. data/lib/bmg/writer/csv.rb +2 -12
  46. data/lib/bmg/writer/xlsx.rb +68 -0
  47. metadata +23 -2
@@ -26,6 +26,7 @@ module Bmg
26
26
  end
27
27
 
28
28
  def each
29
+ return to_enum unless block_given?
29
30
  @operand.each do |tuple|
30
31
  yield(tuple) if @predicate.evaluate(tuple)
31
32
  end
@@ -32,6 +32,7 @@ module Bmg
32
32
  public
33
33
 
34
34
  def each
35
+ return to_enum unless block_given?
35
36
  @operand.each do |tuple|
36
37
  against = attrs.map{|a| tuple[a] }.join(" ")
37
38
  matcher = self.matcher
@@ -13,7 +13,7 @@ module Bmg
13
13
  @type = type
14
14
  @operand = operand
15
15
  @by = by
16
- @summarization = Summarize.compile(summarization)
16
+ @summarization = Summarizer.summarization(summarization)
17
17
  end
18
18
 
19
19
  protected
@@ -23,6 +23,7 @@ module Bmg
23
23
  public
24
24
 
25
25
  def each
26
+ return to_enum unless block_given?
26
27
  # summary key => summarization memo, starting with least
27
28
  result = Hash.new{|h,k|
28
29
  h[k] = Hash[@summarization.map{|k,v|
@@ -56,22 +57,6 @@ module Bmg
56
57
  [ by, summarization ]
57
58
  end
58
59
 
59
- private
60
-
61
- # Compile a summarization hash so that every value is a Summarizer
62
- # instance
63
- def self.compile(summarization)
64
- Hash[summarization.map{|k,v|
65
- summarizer = case v
66
- when Summarizer then v
67
- when Symbol then Summarizer.send(v, k)
68
- else
69
- raise ArgumentError, "Unexpected summarizer #{k} => #{v}"
70
- end
71
- [ k, summarizer ]
72
- }]
73
- end
74
-
75
60
  end # class Summarize
76
61
  end # module Operator
77
62
  end # module Bmg
@@ -23,11 +23,12 @@ module Bmg
23
23
 
24
24
  protected
25
25
 
26
- attr_reader :transformation
26
+ attr_reader :transformation, :options
27
27
 
28
28
  public
29
29
 
30
30
  def each
31
+ return to_enum unless block_given?
31
32
  t = transformer
32
33
  @operand.each do |tuple|
33
34
  yield t.call(tuple)
@@ -40,6 +41,43 @@ module Bmg
40
41
 
41
42
  protected ### optimization
42
43
 
44
+ def _allbut(type, butlist)
45
+ # `allbut` can always be pushed down the tree. unlike
46
+ # `extend` the Proc that might be used cannot use attributes
47
+ # in butlist, so it's safe to strip them away.
48
+ if transformer.knows_attrlist?
49
+ # We just need to clean the transformation
50
+ attrlist = transformer.to_attrlist
51
+ thrown = attrlist & butlist
52
+ t = transformation.dup.reject{|k,v| thrown.include?(k) }
53
+ operand.allbut(butlist).transform(t, options)
54
+ else
55
+ operand.allbut(butlist).transform(transformation, options)
56
+ end
57
+ end
58
+
59
+ def _project(type, attrlist)
60
+ if transformer.knows_attrlist?
61
+ t = transformation.dup.select{|k,v| attrlist.include?(k) }
62
+ operand.project(attrlist).transform(t, options)
63
+ else
64
+ operand.project(attrlist).transform(transformation, options)
65
+ end
66
+ end
67
+
68
+ def _restrict(type, predicate)
69
+ return super unless transformer.knows_attrlist?
70
+ top, bottom = predicate.and_split(transformer.to_attrlist)
71
+ if top == predicate
72
+ super
73
+ else
74
+ operand
75
+ .restrict(bottom)
76
+ .transform(transformation, options)
77
+ .restrict(top)
78
+ end
79
+ end
80
+
43
81
  protected ### inspect
44
82
 
45
83
  def args
@@ -37,6 +37,7 @@ module Bmg
37
37
  end
38
38
 
39
39
  def each(&bl)
40
+ return to_enum unless block_given?
40
41
  if all?
41
42
  operands.each do |op|
42
43
  op.each(&bl)
@@ -5,30 +5,36 @@ module Bmg
5
5
 
6
6
  DEFAULT_OPTIONS = {
7
7
  :headers => true,
8
- :return_headers => false
8
+ :return_headers => false,
9
+ :smart => true
9
10
  }
10
11
 
11
- def initialize(type, path, options = {})
12
+ def initialize(type, path_or_io, options = {})
12
13
  @type = type
13
- @path = path
14
+ @path_or_io = path_or_io
14
15
  @options = DEFAULT_OPTIONS.merge(options)
15
- @options[:col_sep] ||= infer_col_sep
16
- @options[:quote_char] ||= infer_quote_char
16
+ if @options[:smart] && !@path_or_io.is_a?(IO)
17
+ @options[:col_sep] ||= infer_col_sep
18
+ @options[:quote_char] ||= infer_quote_char
19
+ end
17
20
  end
18
21
 
19
22
  def each
23
+ return to_enum unless block_given?
20
24
  require 'csv'
21
- ::CSV.foreach(@path, @options) do |row|
22
- yield tuple(row)
25
+ with_io do |io|
26
+ ::CSV.new(io, **csv_options).each do |row|
27
+ yield tuple(row)
28
+ end
23
29
  end
24
30
  end
25
31
 
26
32
  def to_ast
27
- [ :csv, @path, @options ]
33
+ [ :csv, @path_or_io, @options ]
28
34
  end
29
35
 
30
36
  def to_s
31
- "(csv #{path})"
37
+ "(csv #{@path_or_io})"
32
38
  end
33
39
  alias :inspect :to_s
34
40
 
@@ -47,7 +53,16 @@ module Bmg
47
53
  end
48
54
 
49
55
  def text_portion
50
- @text_portion ||= File.foreach(@path).first(10).join("\n")
56
+ @text_portion ||= with_io{|io| io.readlines(10).join("\n") }
57
+ end
58
+
59
+ def with_io(&bl)
60
+ case @path_or_io
61
+ when IO, StringIO
62
+ bl.call(@path_or_io)
63
+ else
64
+ File.open(@path_or_io, "r", &bl)
65
+ end
51
66
  end
52
67
 
53
68
  # Finds the best candidate among `candidates` for a separator
@@ -61,6 +76,10 @@ module Bmg
61
76
  snif.size > 0 ? snif[0][0] : default
62
77
  end
63
78
 
79
+ def csv_options
80
+ @csv_options ||= @options.dup.tap{|opts| opts.delete(:smart) }
81
+ end
82
+
64
83
  end # class Csv
65
84
  end # module Reader
66
85
  end # module Bmg
@@ -4,7 +4,8 @@ module Bmg
4
4
  include Reader
5
5
 
6
6
  DEFAULT_OPTIONS = {
7
- skip: 0
7
+ skip: 0,
8
+ row_num: true
8
9
  }
9
10
 
10
11
  def initialize(type, path, options = {})
@@ -14,6 +15,7 @@ module Bmg
14
15
  end
15
16
 
16
17
  def each
18
+ return to_enum unless block_given?
17
19
  require 'roo'
18
20
  xlsx = Roo::Spreadsheet.open(@path, @options)
19
21
  headers = nil
@@ -23,9 +25,13 @@ module Bmg
23
25
  .each_with_index
24
26
  .each do |row, i|
25
27
  if i==0
26
- headers = row.map(&:to_sym)
28
+ headers = row.map{|c| c.to_s.strip.to_sym }
27
29
  else
28
- tuple = (0...headers.size).each_with_object({}){|i,t| t[headers[i]] = row[i] }
30
+ init = init_tuple(i)
31
+ tuple = (0...headers.size)
32
+ .each_with_object(init){|i,t|
33
+ t[headers[i]] = row[i]
34
+ }
29
35
  yield(tuple)
30
36
  end
31
37
  end
@@ -36,10 +42,23 @@ module Bmg
36
42
  end
37
43
 
38
44
  def to_s
39
- "(excel #{path})"
45
+ "(excel #{@path})"
40
46
  end
41
47
  alias :inspect :to_s
42
48
 
49
+ private
50
+
51
+ def init_tuple(i)
52
+ case as = @options[:row_num]
53
+ when TrueClass
54
+ { :row_num => i }
55
+ when FalseClass
56
+ {}
57
+ when Symbol
58
+ { :"#{as}" => i }
59
+ end
60
+ end
61
+
43
62
  end # class Excel
44
63
  end # module Reader
45
64
  end # module Bmg
data/lib/bmg/relation.rb CHANGED
@@ -27,6 +27,12 @@ module Bmg
27
27
  }
28
28
  end
29
29
 
30
+ def with_type_attrlist
31
+ return self if type.knows_attrlist?
32
+ attrs = self.first.keys
33
+ with_type(type.with_attrlist(attrs))
34
+ end
35
+
30
36
  def with_typecheck
31
37
  dup.tap{|r|
32
38
  r.type = r.type.with_typecheck
@@ -110,6 +116,18 @@ module Bmg
110
116
  end
111
117
  end
112
118
 
119
+ def count
120
+ if type.knows_keys?
121
+ project(type.keys.first)._count
122
+ else
123
+ self._count
124
+ end
125
+ end
126
+
127
+ def _count
128
+ to_a.size
129
+ end
130
+
113
131
  # Returns a json representation
114
132
  def to_json(*args, &bl)
115
133
  to_a.to_json(*args, &bl)
@@ -19,6 +19,10 @@ module Bmg
19
19
  def each(&bl)
20
20
  end
21
21
 
22
+ def _count
23
+ 0
24
+ end
25
+
22
26
  def to_ast
23
27
  [ :empty ]
24
28
  end
@@ -8,7 +8,6 @@ module Bmg
8
8
  @type = type
9
9
  end
10
10
  attr_accessor :type
11
- protected :type=
12
11
  attr_reader :operand
13
12
 
14
13
  public
@@ -17,6 +16,16 @@ module Bmg
17
16
  @operand.each(&bl)
18
17
  end
19
18
 
19
+ def _count
20
+ if operand.respond_to?(:count)
21
+ operand.count
22
+ elsif operand.respond_to?(:size)
23
+ operand.size
24
+ else
25
+ super
26
+ end
27
+ end
28
+
20
29
  def to_ast
21
30
  [ :in_memory, operand ]
22
31
  end
@@ -16,6 +16,12 @@ module Bmg
16
16
  end
17
17
  protected :type=
18
18
 
19
+ public
20
+
21
+ def _count
22
+ operand._count
23
+ end
24
+
19
25
  public
20
26
 
21
27
  def each(&bl)
@@ -28,6 +28,11 @@ module Bmg
28
28
  operand.each(&bl)
29
29
  end
30
30
 
31
+ def count
32
+ spy.call(self) if bl
33
+ operand.count
34
+ end
35
+
31
36
  def to_ast
32
37
  [ :spied, operand.to_ast, spy ]
33
38
  end
@@ -9,6 +9,7 @@ module Bmg
9
9
  attr_reader :sequel_db
10
10
 
11
11
  def each(&bl)
12
+ return to_enum unless block_given?
12
13
  dataset.each(&bl)
13
14
  end
14
15
 
@@ -33,6 +34,10 @@ module Bmg
33
34
  base_table.update(arg)
34
35
  end
35
36
 
37
+ def _count
38
+ dataset.count
39
+ end
40
+
36
41
  def to_ast
37
42
  [:sequel, dataset.sql]
38
43
  end
@@ -10,7 +10,6 @@ module Bmg
10
10
  end
11
11
 
12
12
  attr_accessor :type
13
- protected :type=
14
13
 
15
14
  protected
16
15
 
@@ -134,8 +133,8 @@ module Bmg
134
133
  _instance(type, builder, expr)
135
134
  end
136
135
 
137
- def _summarize(type, by, summarization)
138
- summarization = Operator::Summarize.compile(summarization)
136
+ def _summarize(type, by, defs)
137
+ summarization = ::Bmg::Summarizer.summarization(defs)
139
138
  if can_compile_summarization?(summarization)
140
139
  expr = before_use(self.expr)
141
140
  expr = Processor::Summarize.new(by, summarization, builder).call(self.expr)
@@ -50,6 +50,21 @@ module Bmg
50
50
  end
51
51
  end
52
52
 
53
+ # Converts some summarization definitions to a Hash of
54
+ # summarizers.
55
+ def self.summarization(defs)
56
+ Hash[defs.map{|k,v|
57
+ summarizer = case v
58
+ when Summarizer then v
59
+ when Symbol then Summarizer.send(v, k)
60
+ when Proc then Summarizer.by_proc(&v)
61
+ else
62
+ raise ArgumentError, "Unexpected summarizer #{k} => #{v}"
63
+ end
64
+ [ k, summarizer ]
65
+ }]
66
+ end
67
+
53
68
  # Returns the default options to use
54
69
  #
55
70
  # @return the default aggregation options
@@ -80,7 +95,7 @@ module Bmg
80
95
  # @param the current iterated tuple
81
96
  # @return updated memo value
82
97
  def happens(memo, tuple)
83
- value = @functor.is_a?(Proc) ? @functor.call(tuple) : tuple[@functor]
98
+ value = extract_value(tuple)
84
99
  _happens(memo, value)
85
100
  end
86
101
 
@@ -119,6 +134,21 @@ module Bmg
119
134
  self.class.name.downcase[/::([a-z]+)$/, 1].to_sym
120
135
  end
121
136
 
137
+ protected
138
+
139
+ def extract_value(tuple)
140
+ value = case @functor
141
+ when Proc
142
+ @functor.call(tuple)
143
+ when NilClass
144
+ tuple
145
+ when Symbol
146
+ tuple[@functor]
147
+ else
148
+ tuple[@functor]
149
+ end
150
+ end
151
+
122
152
  end # class Summarizer
123
153
  end # module Bmg
124
154
  require_relative 'summarizer/count'
@@ -128,5 +158,10 @@ require_relative 'summarizer/max'
128
158
  require_relative 'summarizer/avg'
129
159
  require_relative 'summarizer/variance'
130
160
  require_relative 'summarizer/stddev'
161
+ require_relative 'summarizer/percentile'
131
162
  require_relative 'summarizer/collect'
163
+ require_relative 'summarizer/distinct'
132
164
  require_relative 'summarizer/concat'
165
+ require_relative 'summarizer/by_proc'
166
+ require_relative 'summarizer/multiple'
167
+ require_relative 'summarizer/value_by'