bmg 0.18.1 → 0.18.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +4 -0
  3. data/lib/bmg/algebra.rb +18 -0
  4. data/lib/bmg/algebra/shortcuts.rb +8 -0
  5. data/lib/bmg/error.rb +3 -0
  6. data/lib/bmg/operator.rb +2 -0
  7. data/lib/bmg/operator/allbut.rb +9 -4
  8. data/lib/bmg/operator/autosummarize.rb +7 -0
  9. data/lib/bmg/operator/autowrap.rb +19 -0
  10. data/lib/bmg/operator/constants.rb +7 -0
  11. data/lib/bmg/operator/extend.rb +7 -0
  12. data/lib/bmg/operator/group.rb +1 -0
  13. data/lib/bmg/operator/image.rb +15 -0
  14. data/lib/bmg/operator/join.rb +1 -0
  15. data/lib/bmg/operator/matching.rb +1 -0
  16. data/lib/bmg/operator/not_matching.rb +1 -0
  17. data/lib/bmg/operator/page.rb +1 -0
  18. data/lib/bmg/operator/project.rb +3 -2
  19. data/lib/bmg/operator/rename.rb +12 -5
  20. data/lib/bmg/operator/restrict.rb +1 -0
  21. data/lib/bmg/operator/rxmatch.rb +1 -0
  22. data/lib/bmg/operator/summarize.rb +2 -17
  23. data/lib/bmg/operator/transform.rb +1 -0
  24. data/lib/bmg/operator/ungroup.rb +61 -0
  25. data/lib/bmg/operator/union.rb +1 -0
  26. data/lib/bmg/operator/unwrap.rb +47 -0
  27. data/lib/bmg/reader/csv.rb +29 -10
  28. data/lib/bmg/reader/excel.rb +23 -4
  29. data/lib/bmg/relation.rb +18 -0
  30. data/lib/bmg/relation/empty.rb +4 -0
  31. data/lib/bmg/relation/in_memory.rb +10 -1
  32. data/lib/bmg/relation/materialized.rb +6 -0
  33. data/lib/bmg/relation/spied.rb +5 -0
  34. data/lib/bmg/sequel/relation.rb +5 -0
  35. data/lib/bmg/sql/relation.rb +2 -2
  36. data/lib/bmg/summarizer.rb +36 -1
  37. data/lib/bmg/summarizer/avg.rb +3 -3
  38. data/lib/bmg/summarizer/by_proc.rb +41 -0
  39. data/lib/bmg/summarizer/distinct.rb +36 -0
  40. data/lib/bmg/summarizer/multiple.rb +46 -0
  41. data/lib/bmg/summarizer/percentile.rb +79 -0
  42. data/lib/bmg/summarizer/value_by.rb +62 -0
  43. data/lib/bmg/support/keys.rb +5 -0
  44. data/lib/bmg/support/tuple_transformer.rb +10 -1
  45. data/lib/bmg/type.rb +19 -1
  46. data/lib/bmg/version.rb +1 -1
  47. data/lib/bmg/writer.rb +16 -0
  48. data/lib/bmg/writer/csv.rb +2 -12
  49. data/lib/bmg/writer/xlsx.rb +68 -0
  50. metadata +24 -2
@@ -28,6 +28,7 @@ module Bmg
28
28
  public
29
29
 
30
30
  def each
31
+ return to_enum unless block_given?
31
32
  t = transformer
32
33
  @operand.each do |tuple|
33
34
  yield t.call(tuple)
@@ -0,0 +1,61 @@
1
+ module Bmg
2
+ module Operator
3
+ class Ungroup
4
+ include Operator::Unary
5
+
6
+ def initialize(type, operand, attrs)
7
+ @type = type
8
+ @operand = operand
9
+ @attrs = attrs
10
+ end
11
+
12
+ protected
13
+
14
+ attr_reader :attrs
15
+
16
+ public
17
+
18
+ def each(&bl)
19
+ return to_enum unless block_given?
20
+ if type.knows_keys? && type.keys.any?{|k| (k & attrs).empty? }
21
+ operand.each do |tuple|
22
+ _each(tuple, attrs[0], attrs[1..-1], &bl)
23
+ end
24
+ else
25
+ with_dups = []
26
+ operand.each do |tuple|
27
+ _each(tuple, attrs[0], attrs[1..-1]){|t|
28
+ with_dups << t
29
+ }
30
+ end
31
+ with_dups.uniq.each(&bl)
32
+ end
33
+ end
34
+
35
+ def _each(tuple, attr, attrs, &bl)
36
+ rva = tuple[attr] || []
37
+ rva.each do |rvt|
38
+ t = tuple.merge(rvt).tap{|t| t.delete(attr) }
39
+ if attrs.empty?
40
+ yield(t)
41
+ else
42
+ _each(t, attrs[0], attrs[1..-1], &bl)
43
+ end
44
+ end
45
+ end
46
+
47
+ def to_ast
48
+ [ :ungroup, operand.to_ast, attrs ]
49
+ end
50
+
51
+ protected
52
+
53
+ protected ### inspect
54
+
55
+ def args
56
+ [ attrs ]
57
+ end
58
+
59
+ end # class Ungroup
60
+ end # module Operator
61
+ end # module Bmg
@@ -37,6 +37,7 @@ module Bmg
37
37
  end
38
38
 
39
39
  def each(&bl)
40
+ return to_enum unless block_given?
40
41
  if all?
41
42
  operands.each do |op|
42
43
  op.each(&bl)
@@ -0,0 +1,47 @@
1
+ module Bmg
2
+ module Operator
3
+ class Unwrap
4
+ include Operator::Unary
5
+
6
+ def initialize(type, operand, attrs)
7
+ @type = type
8
+ @operand = operand
9
+ @attrs = attrs
10
+ end
11
+
12
+ protected
13
+
14
+ attr_reader :attrs
15
+
16
+ public
17
+
18
+ def each(&bl)
19
+ return to_enum unless block_given?
20
+ operand.each do |tuple|
21
+ yield tuple_unwrap(tuple)
22
+ end
23
+ end
24
+
25
+ def to_ast
26
+ [ :unwrap, operand.to_ast, attrs ]
27
+ end
28
+
29
+ protected
30
+
31
+ def tuple_unwrap(tuple)
32
+ attrs.inject(tuple.dup){|t,attr|
33
+ t.merge(tuple[attr]).tap{|t2|
34
+ t2.delete(attr)
35
+ }
36
+ }
37
+ end
38
+
39
+ protected ### inspect
40
+
41
+ def args
42
+ [ attrs ]
43
+ end
44
+
45
+ end # class Unwrap
46
+ end # module Operator
47
+ end # module Bmg
@@ -5,30 +5,36 @@ module Bmg
5
5
 
6
6
  DEFAULT_OPTIONS = {
7
7
  :headers => true,
8
- :return_headers => false
8
+ :return_headers => false,
9
+ :smart => true
9
10
  }
10
11
 
11
- def initialize(type, path, options = {})
12
+ def initialize(type, path_or_io, options = {})
12
13
  @type = type
13
- @path = path
14
+ @path_or_io = path_or_io
14
15
  @options = DEFAULT_OPTIONS.merge(options)
15
- @options[:col_sep] ||= infer_col_sep
16
- @options[:quote_char] ||= infer_quote_char
16
+ if @options[:smart] && !@path_or_io.is_a?(IO)
17
+ @options[:col_sep] ||= infer_col_sep
18
+ @options[:quote_char] ||= infer_quote_char
19
+ end
17
20
  end
18
21
 
19
22
  def each
23
+ return to_enum unless block_given?
20
24
  require 'csv'
21
- ::CSV.foreach(@path, @options) do |row|
22
- yield tuple(row)
25
+ with_io do |io|
26
+ ::CSV.new(io, **csv_options).each do |row|
27
+ yield tuple(row)
28
+ end
23
29
  end
24
30
  end
25
31
 
26
32
  def to_ast
27
- [ :csv, @path, @options ]
33
+ [ :csv, @path_or_io, @options ]
28
34
  end
29
35
 
30
36
  def to_s
31
- "(csv #{path})"
37
+ "(csv #{@path_or_io})"
32
38
  end
33
39
  alias :inspect :to_s
34
40
 
@@ -47,7 +53,16 @@ module Bmg
47
53
  end
48
54
 
49
55
  def text_portion
50
- @text_portion ||= File.foreach(@path).first(10).join("\n")
56
+ @text_portion ||= with_io{|io| io.readlines(10).join("\n") }
57
+ end
58
+
59
+ def with_io(&bl)
60
+ case @path_or_io
61
+ when IO, StringIO
62
+ bl.call(@path_or_io)
63
+ else
64
+ File.open(@path_or_io, "r", &bl)
65
+ end
51
66
  end
52
67
 
53
68
  # Finds the best candidate among `candidates` for a separator
@@ -61,6 +76,10 @@ module Bmg
61
76
  snif.size > 0 ? snif[0][0] : default
62
77
  end
63
78
 
79
+ def csv_options
80
+ @csv_options ||= @options.dup.tap{|opts| opts.delete(:smart) }
81
+ end
82
+
64
83
  end # class Csv
65
84
  end # module Reader
66
85
  end # module Bmg
@@ -4,7 +4,8 @@ module Bmg
4
4
  include Reader
5
5
 
6
6
  DEFAULT_OPTIONS = {
7
- skip: 0
7
+ skip: 0,
8
+ row_num: true
8
9
  }
9
10
 
10
11
  def initialize(type, path, options = {})
@@ -14,6 +15,7 @@ module Bmg
14
15
  end
15
16
 
16
17
  def each
18
+ return to_enum unless block_given?
17
19
  require 'roo'
18
20
  xlsx = Roo::Spreadsheet.open(@path, @options)
19
21
  headers = nil
@@ -23,9 +25,13 @@ module Bmg
23
25
  .each_with_index
24
26
  .each do |row, i|
25
27
  if i==0
26
- headers = row.map(&:to_sym)
28
+ headers = row.map{|c| c.to_s.strip.to_sym }
27
29
  else
28
- tuple = (0...headers.size).each_with_object({}){|i,t| t[headers[i]] = row[i] }
30
+ init = init_tuple(i)
31
+ tuple = (0...headers.size)
32
+ .each_with_object(init){|i,t|
33
+ t[headers[i]] = row[i]
34
+ }
29
35
  yield(tuple)
30
36
  end
31
37
  end
@@ -36,10 +42,23 @@ module Bmg
36
42
  end
37
43
 
38
44
  def to_s
39
- "(excel #{path})"
45
+ "(excel #{@path})"
40
46
  end
41
47
  alias :inspect :to_s
42
48
 
49
+ private
50
+
51
+ def init_tuple(i)
52
+ case as = @options[:row_num]
53
+ when TrueClass
54
+ { :row_num => i }
55
+ when FalseClass
56
+ {}
57
+ when Symbol
58
+ { :"#{as}" => i }
59
+ end
60
+ end
61
+
43
62
  end # class Excel
44
63
  end # module Reader
45
64
  end # module Bmg
data/lib/bmg/relation.rb CHANGED
@@ -27,6 +27,12 @@ module Bmg
27
27
  }
28
28
  end
29
29
 
30
+ def with_type_attrlist
31
+ return self if type.knows_attrlist?
32
+ attrs = self.first.keys
33
+ with_type(type.with_attrlist(attrs))
34
+ end
35
+
30
36
  def with_typecheck
31
37
  dup.tap{|r|
32
38
  r.type = r.type.with_typecheck
@@ -110,6 +116,18 @@ module Bmg
110
116
  end
111
117
  end
112
118
 
119
+ def count
120
+ if type.knows_keys?
121
+ project(type.keys.first)._count
122
+ else
123
+ self._count
124
+ end
125
+ end
126
+
127
+ def _count
128
+ to_a.size
129
+ end
130
+
113
131
  # Returns a json representation
114
132
  def to_json(*args, &bl)
115
133
  to_a.to_json(*args, &bl)
@@ -19,6 +19,10 @@ module Bmg
19
19
  def each(&bl)
20
20
  end
21
21
 
22
+ def _count
23
+ 0
24
+ end
25
+
22
26
  def to_ast
23
27
  [ :empty ]
24
28
  end
@@ -8,7 +8,6 @@ module Bmg
8
8
  @type = type
9
9
  end
10
10
  attr_accessor :type
11
- protected :type=
12
11
  attr_reader :operand
13
12
 
14
13
  public
@@ -17,6 +16,16 @@ module Bmg
17
16
  @operand.each(&bl)
18
17
  end
19
18
 
19
+ def _count
20
+ if operand.respond_to?(:count)
21
+ operand.count
22
+ elsif operand.respond_to?(:size)
23
+ operand.size
24
+ else
25
+ super
26
+ end
27
+ end
28
+
20
29
  def to_ast
21
30
  [ :in_memory, operand ]
22
31
  end
@@ -16,6 +16,12 @@ module Bmg
16
16
  end
17
17
  protected :type=
18
18
 
19
+ public
20
+
21
+ def _count
22
+ operand._count
23
+ end
24
+
19
25
  public
20
26
 
21
27
  def each(&bl)
@@ -28,6 +28,11 @@ module Bmg
28
28
  operand.each(&bl)
29
29
  end
30
30
 
31
+ def count
32
+ spy.call(self) if bl
33
+ operand.count
34
+ end
35
+
31
36
  def to_ast
32
37
  [ :spied, operand.to_ast, spy ]
33
38
  end
@@ -9,6 +9,7 @@ module Bmg
9
9
  attr_reader :sequel_db
10
10
 
11
11
  def each(&bl)
12
+ return to_enum unless block_given?
12
13
  dataset.each(&bl)
13
14
  end
14
15
 
@@ -33,6 +34,10 @@ module Bmg
33
34
  base_table.update(arg)
34
35
  end
35
36
 
37
+ def _count
38
+ dataset.count
39
+ end
40
+
36
41
  def to_ast
37
42
  [:sequel, dataset.sql]
38
43
  end
@@ -133,8 +133,8 @@ module Bmg
133
133
  _instance(type, builder, expr)
134
134
  end
135
135
 
136
- def _summarize(type, by, summarization)
137
- summarization = Operator::Summarize.compile(summarization)
136
+ def _summarize(type, by, defs)
137
+ summarization = ::Bmg::Summarizer.summarization(defs)
138
138
  if can_compile_summarization?(summarization)
139
139
  expr = before_use(self.expr)
140
140
  expr = Processor::Summarize.new(by, summarization, builder).call(self.expr)
@@ -50,6 +50,21 @@ module Bmg
50
50
  end
51
51
  end
52
52
 
53
+ # Converts some summarization definitions to a Hash of
54
+ # summarizers.
55
+ def self.summarization(defs)
56
+ Hash[defs.map{|k,v|
57
+ summarizer = case v
58
+ when Summarizer then v
59
+ when Symbol then Summarizer.send(v, k)
60
+ when Proc then Summarizer.by_proc(&v)
61
+ else
62
+ raise ArgumentError, "Unexpected summarizer #{k} => #{v}"
63
+ end
64
+ [ k, summarizer ]
65
+ }]
66
+ end
67
+
53
68
  # Returns the default options to use
54
69
  #
55
70
  # @return the default aggregation options
@@ -80,7 +95,7 @@ module Bmg
80
95
  # @param the current iterated tuple
81
96
  # @return updated memo value
82
97
  def happens(memo, tuple)
83
- value = @functor.is_a?(Proc) ? @functor.call(tuple) : tuple[@functor]
98
+ value = extract_value(tuple)
84
99
  _happens(memo, value)
85
100
  end
86
101
 
@@ -119,6 +134,21 @@ module Bmg
119
134
  self.class.name.downcase[/::([a-z]+)$/, 1].to_sym
120
135
  end
121
136
 
137
+ protected
138
+
139
+ def extract_value(tuple)
140
+ value = case @functor
141
+ when Proc
142
+ @functor.call(tuple)
143
+ when NilClass
144
+ tuple
145
+ when Symbol
146
+ tuple[@functor]
147
+ else
148
+ tuple[@functor]
149
+ end
150
+ end
151
+
122
152
  end # class Summarizer
123
153
  end # module Bmg
124
154
  require_relative 'summarizer/count'
@@ -128,5 +158,10 @@ require_relative 'summarizer/max'
128
158
  require_relative 'summarizer/avg'
129
159
  require_relative 'summarizer/variance'
130
160
  require_relative 'summarizer/stddev'
161
+ require_relative 'summarizer/percentile'
131
162
  require_relative 'summarizer/collect'
163
+ require_relative 'summarizer/distinct'
132
164
  require_relative 'summarizer/concat'
165
+ require_relative 'summarizer/by_proc'
166
+ require_relative 'summarizer/multiple'
167
+ require_relative 'summarizer/value_by'