bmg 0.18.1 → 0.18.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +4 -0
  3. data/lib/bmg/algebra.rb +18 -0
  4. data/lib/bmg/algebra/shortcuts.rb +8 -0
  5. data/lib/bmg/error.rb +3 -0
  6. data/lib/bmg/operator.rb +2 -0
  7. data/lib/bmg/operator/allbut.rb +9 -4
  8. data/lib/bmg/operator/autosummarize.rb +7 -0
  9. data/lib/bmg/operator/autowrap.rb +19 -0
  10. data/lib/bmg/operator/constants.rb +7 -0
  11. data/lib/bmg/operator/extend.rb +7 -0
  12. data/lib/bmg/operator/group.rb +1 -0
  13. data/lib/bmg/operator/image.rb +15 -0
  14. data/lib/bmg/operator/join.rb +1 -0
  15. data/lib/bmg/operator/matching.rb +1 -0
  16. data/lib/bmg/operator/not_matching.rb +1 -0
  17. data/lib/bmg/operator/page.rb +1 -0
  18. data/lib/bmg/operator/project.rb +3 -2
  19. data/lib/bmg/operator/rename.rb +12 -5
  20. data/lib/bmg/operator/restrict.rb +1 -0
  21. data/lib/bmg/operator/rxmatch.rb +1 -0
  22. data/lib/bmg/operator/summarize.rb +2 -17
  23. data/lib/bmg/operator/transform.rb +1 -0
  24. data/lib/bmg/operator/ungroup.rb +61 -0
  25. data/lib/bmg/operator/union.rb +1 -0
  26. data/lib/bmg/operator/unwrap.rb +47 -0
  27. data/lib/bmg/reader/csv.rb +29 -10
  28. data/lib/bmg/reader/excel.rb +23 -4
  29. data/lib/bmg/relation.rb +18 -0
  30. data/lib/bmg/relation/empty.rb +4 -0
  31. data/lib/bmg/relation/in_memory.rb +10 -1
  32. data/lib/bmg/relation/materialized.rb +6 -0
  33. data/lib/bmg/relation/spied.rb +5 -0
  34. data/lib/bmg/sequel/relation.rb +5 -0
  35. data/lib/bmg/sql/relation.rb +2 -2
  36. data/lib/bmg/summarizer.rb +36 -1
  37. data/lib/bmg/summarizer/avg.rb +3 -3
  38. data/lib/bmg/summarizer/by_proc.rb +41 -0
  39. data/lib/bmg/summarizer/distinct.rb +36 -0
  40. data/lib/bmg/summarizer/multiple.rb +46 -0
  41. data/lib/bmg/summarizer/percentile.rb +79 -0
  42. data/lib/bmg/summarizer/value_by.rb +62 -0
  43. data/lib/bmg/support/keys.rb +5 -0
  44. data/lib/bmg/support/tuple_transformer.rb +10 -1
  45. data/lib/bmg/type.rb +19 -1
  46. data/lib/bmg/version.rb +1 -1
  47. data/lib/bmg/writer.rb +16 -0
  48. data/lib/bmg/writer/csv.rb +2 -12
  49. data/lib/bmg/writer/xlsx.rb +68 -0
  50. metadata +24 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1e47d2972990cc85ae0a3b99fa6a79caf97176e46c7d70cdbbbb35ffdb3df21a
4
- data.tar.gz: dd7ddf0c123a9347f517da44cc923ec9664c8a1fd12b9e8f0ed08677be2dcdd4
3
+ metadata.gz: 482e0bb06c06e4afc620696491cf75f788902240c75f9c82d71ce9b0db4deee1
4
+ data.tar.gz: f49609c7cf929149d91bb1c29703fd4e5fb7d861073b38ad916a851214ec20b9
5
5
  SHA512:
6
- metadata.gz: 929f4c67a01d756b484e9ae27e5465173a4a1850686a173a8ab293dbe1e1ba01899c1ab21a8e2f29251378d0354287e379e7573ae551d3aef0583a0690640375
7
- data.tar.gz: 3a92ab6c7708badf0464a94c496000eb55d233fb979bfd7128b71f3229347fa56c38be381815dbbc7eb0f741b58138f89551080e7670b3b044dfbba6571ac892
6
+ metadata.gz: a4068309c373faa50bfc4549648ee26e3220187192a529ee8c6c890424c52b4d05103c2ebf7b137354c1c8a82a756eafbdb5c6b266f489c3c6e0a0b9923a4d15
7
+ data.tar.gz: ba7a0046919e2d796c58cae326f3d4cf7b86f6d577e34f79395c04286c4bf463cb4c349cb2a5ce4611fbd6e310ebf521736fb190bcc1d1c2c42c51ccc175fb2e
data/README.md CHANGED
@@ -234,7 +234,11 @@ t.transform(:to_s) # all-attrs transformation
234
234
  t.transform(&:to_s) # similar, but Proc-driven
235
235
  t.transform(:foo => :upcase, ...) # specific-attrs tranformation
236
236
  t.transform([:to_s, :upcase]) # chain-transformation
237
+ r.ungroup([:a, :b, ...]) # ungroup relation-valued attributes within parent tuple
238
+ r.ungroup(:a) # shortcut over ungroup([:a])
237
239
  r.union(right) # relational union
240
+ r.unwrap([:a, :b, ...]) # merge tuple-valued attributes within parent tuple
241
+ r.unwrap(:a) # shortcut over unwrap([:a])
238
242
  r.where(predicate) # alias for restrict(predicate)
239
243
  ```
240
244
 
data/lib/bmg/algebra.rb CHANGED
@@ -183,6 +183,15 @@ module Bmg
183
183
  end
184
184
  protected :_transform
185
185
 
186
+ def ungroup(attrs)
187
+ _ungroup self.type.ungroup(attrs), attrs
188
+ end
189
+
190
+ def _ungroup(type, attrs)
191
+ Operator::Ungroup.new(type, self, attrs)
192
+ end
193
+ protected :_ungroup
194
+
186
195
  def union(other, options = {})
187
196
  return self if other.is_a?(Relation::Empty)
188
197
  _union self.type.union(other.type), other, options
@@ -193,6 +202,15 @@ module Bmg
193
202
  end
194
203
  protected :_union
195
204
 
205
+ def unwrap(attrs)
206
+ _unwrap self.type.unwrap(attrs), attrs
207
+ end
208
+
209
+ def _unwrap(type, attrs)
210
+ Operator::Unwrap.new(type, self, attrs)
211
+ end
212
+ protected :_unwrap
213
+
196
214
  def spied(spy)
197
215
  return self if spy.nil?
198
216
  Relation::Spied.new(self, spy)
@@ -69,6 +69,14 @@ module Bmg
69
69
  self.not_matching(right.rename(renaming), on.keys)
70
70
  end
71
71
 
72
+ def ungroup(attr)
73
+ super(attr.is_a?(Symbol) ? [attr] : attr)
74
+ end
75
+
76
+ def unwrap(attr)
77
+ super(attr.is_a?(Symbol) ? [attr] : attr)
78
+ end
79
+
72
80
  end # module Shortcuts
73
81
  end # module Algebra
74
82
  end # module Bmg
data/lib/bmg/error.rb CHANGED
@@ -16,4 +16,7 @@ module Bmg
16
16
  # while unknown
17
17
  class UnknownAttributesError < Error; end
18
18
 
19
+ # Raised when an operator is badly used
20
+ class MisuseError < Error; end
21
+
19
22
  end
data/lib/bmg/operator.rb CHANGED
@@ -47,4 +47,6 @@ require_relative 'operator/restrict'
47
47
  require_relative 'operator/rxmatch'
48
48
  require_relative 'operator/summarize'
49
49
  require_relative 'operator/transform'
50
+ require_relative 'operator/ungroup'
50
51
  require_relative 'operator/union'
52
+ require_relative 'operator/unwrap'
@@ -30,6 +30,7 @@ module Bmg
30
30
  public
31
31
 
32
32
  def each
33
+ return to_enum unless block_given?
33
34
  seen = {}
34
35
  @operand.each do |tuple|
35
36
  allbuted = tuple_allbut(tuple)
@@ -74,15 +75,19 @@ module Bmg
74
75
  operand.matching(right, on).allbut(butlist)
75
76
  end
76
77
 
77
- def _restrict(type, predicate)
78
- operand.restrict(predicate).allbut(butlist)
79
- end
80
-
81
78
  def _page(type, ordering, page_index, options)
82
79
  return super unless self.preserving_key?
83
80
  operand.page(ordering, page_index, options).allbut(butlist)
84
81
  end
85
82
 
83
+ def _project(type, attrlist)
84
+ operand.project(attrlist)
85
+ end
86
+
87
+ def _restrict(type, predicate)
88
+ operand.restrict(predicate).allbut(butlist)
89
+ end
90
+
86
91
  protected ### inspect
87
92
 
88
93
  def preserving_key?
@@ -41,6 +41,7 @@ module Bmg
41
41
  end
42
42
 
43
43
  def each(&bl)
44
+ return to_enum unless block_given?
44
45
  h = {}
45
46
  @operand.each do |tuple|
46
47
  key = key(tuple)
@@ -57,6 +58,12 @@ module Bmg
57
58
  [:autosummarize, operand.to_ast, by.dup, sums.dup]
58
59
  end
59
60
 
61
+ public ### for internal reasons
62
+
63
+ def _count
64
+ operand._count
65
+ end
66
+
60
67
  protected
61
68
 
62
69
  def _restrict(type, predicate)
@@ -43,6 +43,7 @@ module Bmg
43
43
  end
44
44
 
45
45
  def each
46
+ return to_enum unless block_given?
46
47
  @operand.each do |tuple|
47
48
  yield autowrap_tuple(tuple)
48
49
  end
@@ -52,6 +53,12 @@ module Bmg
52
53
  [ :autowrap, operand.to_ast, @original_options.dup ]
53
54
  end
54
55
 
56
+ public ### for internal reasons
57
+
58
+ def _count
59
+ operand._count
60
+ end
61
+
55
62
  protected ### optimization
56
63
 
57
64
  def _autowrap(type, opts)
@@ -92,6 +99,8 @@ module Bmg
92
99
  else
93
100
  super
94
101
  end
102
+ rescue UnknownAttributesError
103
+ super
95
104
  end
96
105
 
97
106
  def _page(type, ordering, page_index, opts)
@@ -105,6 +114,16 @@ module Bmg
105
114
  super
106
115
  end
107
116
 
117
+ def _project(type, attrlist)
118
+ if (wrapped_roots! & attrlist).empty?
119
+ operand.project(attrlist).autowrap(options)
120
+ else
121
+ super
122
+ end
123
+ rescue UnknownAttributesError
124
+ super
125
+ end
126
+
108
127
  def _rename(type, renaming)
109
128
  # 1. Can't optimize if renaming applies to a wrapped one
110
129
  return super unless (wrapped_roots! & renaming.keys).empty?
@@ -23,6 +23,7 @@ module Bmg
23
23
  public
24
24
 
25
25
  def each
26
+ return to_enum unless block_given?
26
27
  @operand.each do |tuple|
27
28
  yield extend_it(tuple)
28
29
  end
@@ -54,6 +55,12 @@ module Bmg
54
55
  [ :constants, operand.to_ast, constants.dup ]
55
56
  end
56
57
 
58
+ public ### for internal reasons
59
+
60
+ def _count
61
+ operand._count
62
+ end
63
+
57
64
  protected ### optimization
58
65
 
59
66
  def _page(type, ordering, page_index, options)
@@ -26,6 +26,7 @@ module Bmg
26
26
  public
27
27
 
28
28
  def each
29
+ return to_enum unless block_given?
29
30
  @operand.each do |tuple|
30
31
  yield extend_it(tuple)
31
32
  end
@@ -53,6 +54,12 @@ module Bmg
53
54
  [ :extend, operand.to_ast, extension.dup ]
54
55
  end
55
56
 
57
+ public ### for internal reasons
58
+
59
+ def _count
60
+ operand._count
61
+ end
62
+
56
63
  protected ### optimization
57
64
 
58
65
  def _allbut(type, butlist)
@@ -32,6 +32,7 @@ module Bmg
32
32
  public
33
33
 
34
34
  def each(&bl)
35
+ return to_enum unless block_given?
35
36
  index = Hash.new{|h,k| h[k] = k.merge(as => empty_group) }
36
37
  operand.each do |tuple|
37
38
  key = TupleAlgebra.allbut(tuple, attrs)
@@ -46,6 +46,7 @@ module Bmg
46
46
  public
47
47
 
48
48
  def each(*args, &bl)
49
+ return to_enum unless block_given?
49
50
  (options[:jit_optimized] ? self : jit_optimize)._each(*args, &bl)
50
51
  end
51
52
 
@@ -155,6 +156,12 @@ module Bmg
155
156
  end
156
157
  end
157
158
 
159
+ public ### for internal reasons
160
+
161
+ def _count
162
+ left._count
163
+ end
164
+
158
165
  protected ### optimization
159
166
 
160
167
  def _allbut(type, butlist)
@@ -185,6 +192,14 @@ module Bmg
185
192
  end
186
193
  end
187
194
 
195
+ def _project(type, attrlist)
196
+ if attrlist.include?(as)
197
+ super
198
+ else
199
+ left.project(attrlist)
200
+ end
201
+ end
202
+
188
203
  def _restrict(type, predicate)
189
204
  on_as, rest = predicate.and_split([as])
190
205
  if rest.tautology?
@@ -25,6 +25,7 @@ module Bmg
25
25
  public
26
26
 
27
27
  def each
28
+ return to_enum unless block_given?
28
29
  index = Hash.new
29
30
  right.each_with_object(index) do |t, index|
30
31
  key = tuple_project(t, on)
@@ -23,6 +23,7 @@ module Bmg
23
23
  public
24
24
 
25
25
  def each
26
+ return to_enum unless block_given?
26
27
  index = Hash.new
27
28
  right.each_with_object(index) do |t, index|
28
29
  key = tuple_project(t, on)
@@ -23,6 +23,7 @@ module Bmg
23
23
  public
24
24
 
25
25
  def each
26
+ return to_enum unless block_given?
26
27
  index = Hash.new
27
28
  right.each_with_object(index) do |t, index|
28
29
  key = tuple_project(t, on)
@@ -30,6 +30,7 @@ module Bmg
30
30
  public
31
31
 
32
32
  def each(&bl)
33
+ return to_enum unless block_given?
33
34
  page_size = options[:page_size]
34
35
  @operand.to_a
35
36
  .sort(&comparator)
@@ -29,9 +29,10 @@ module Bmg
29
29
  public
30
30
 
31
31
  def each
32
+ return to_enum unless block_given?
32
33
  seen = {}
33
34
  @operand.each do |tuple|
34
- projected = project(tuple)
35
+ projected = tuple_project(tuple)
35
36
  unless seen.has_key?(projected)
36
37
  yield(projected)
37
38
  seen[projected] = true
@@ -74,7 +75,7 @@ module Bmg
74
75
 
75
76
  private
76
77
 
77
- def project(tuple)
78
+ def tuple_project(tuple)
78
79
  tuple.dup.delete_if{|k,_| !@attrlist.include?(k) }
79
80
  end
80
81
 
@@ -29,16 +29,17 @@ module Bmg
29
29
  public
30
30
 
31
31
  def each
32
+ return to_enum unless block_given?
32
33
  @operand.each do |tuple|
33
- yield rename(tuple, renaming)
34
+ yield rename_tuple(tuple, renaming)
34
35
  end
35
36
  end
36
37
 
37
38
  def insert(arg)
38
39
  case arg
39
- when Hash then operand.insert(rename(arg, reverse_renaming))
40
+ when Hash then operand.insert(rename_tuple(arg, reverse_renaming))
40
41
  when Relation then operand.insert(arg.rename(reverse_renaming))
41
- when Enumerable then operand.insert(arg.map{|t| rename(t, reverse_renaming) })
42
+ when Enumerable then operand.insert(arg.map{|t| rename_tuple(t, reverse_renaming) })
42
43
  else
43
44
  super
44
45
  end
@@ -46,7 +47,7 @@ module Bmg
46
47
 
47
48
  def update(arg)
48
49
  case arg
49
- when Hash then operand.update(rename(arg, reverse_renaming))
50
+ when Hash then operand.update(rename_tuple(arg, reverse_renaming))
50
51
  else
51
52
  super
52
53
  end
@@ -60,6 +61,12 @@ module Bmg
60
61
  [ :rename, operand.to_ast, renaming.dup ]
61
62
  end
62
63
 
64
+ public ### for internal reasons
65
+
66
+ def _count
67
+ operand._count
68
+ end
69
+
63
70
  protected ### optimization
64
71
 
65
72
  def _page(type, ordering, page_index, options)
@@ -82,7 +89,7 @@ module Bmg
82
89
 
83
90
  private
84
91
 
85
- def rename(tuple, renaming)
92
+ def rename_tuple(tuple, renaming)
86
93
  tuple.each_with_object({}){|(k,v),h|
87
94
  h[renaming[k] || k] = v
88
95
  h
@@ -26,6 +26,7 @@ module Bmg
26
26
  end
27
27
 
28
28
  def each
29
+ return to_enum unless block_given?
29
30
  @operand.each do |tuple|
30
31
  yield(tuple) if @predicate.evaluate(tuple)
31
32
  end
@@ -32,6 +32,7 @@ module Bmg
32
32
  public
33
33
 
34
34
  def each
35
+ return to_enum unless block_given?
35
36
  @operand.each do |tuple|
36
37
  against = attrs.map{|a| tuple[a] }.join(" ")
37
38
  matcher = self.matcher
@@ -13,7 +13,7 @@ module Bmg
13
13
  @type = type
14
14
  @operand = operand
15
15
  @by = by
16
- @summarization = Summarize.compile(summarization)
16
+ @summarization = Summarizer.summarization(summarization)
17
17
  end
18
18
 
19
19
  protected
@@ -23,6 +23,7 @@ module Bmg
23
23
  public
24
24
 
25
25
  def each
26
+ return to_enum unless block_given?
26
27
  # summary key => summarization memo, starting with least
27
28
  result = Hash.new{|h,k|
28
29
  h[k] = Hash[@summarization.map{|k,v|
@@ -56,22 +57,6 @@ module Bmg
56
57
  [ by, summarization ]
57
58
  end
58
59
 
59
- private
60
-
61
- # Compile a summarization hash so that every value is a Summarizer
62
- # instance
63
- def self.compile(summarization)
64
- Hash[summarization.map{|k,v|
65
- summarizer = case v
66
- when Summarizer then v
67
- when Symbol then Summarizer.send(v, k)
68
- else
69
- raise ArgumentError, "Unexpected summarizer #{k} => #{v}"
70
- end
71
- [ k, summarizer ]
72
- }]
73
- end
74
-
75
60
  end # class Summarize
76
61
  end # module Operator
77
62
  end # module Bmg