bmg 0.18.0 → 0.18.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -0
  3. data/lib/bmg/algebra.rb +1 -0
  4. data/lib/bmg/algebra/shortcuts.rb +6 -0
  5. data/lib/bmg/error.rb +3 -0
  6. data/lib/bmg/operator/allbut.rb +27 -0
  7. data/lib/bmg/operator/autosummarize.rb +27 -4
  8. data/lib/bmg/operator/autowrap.rb +27 -0
  9. data/lib/bmg/operator/constants.rb +7 -0
  10. data/lib/bmg/operator/extend.rb +7 -0
  11. data/lib/bmg/operator/group.rb +1 -0
  12. data/lib/bmg/operator/image.rb +41 -2
  13. data/lib/bmg/operator/join.rb +1 -0
  14. data/lib/bmg/operator/matching.rb +1 -0
  15. data/lib/bmg/operator/not_matching.rb +1 -0
  16. data/lib/bmg/operator/page.rb +2 -7
  17. data/lib/bmg/operator/project.rb +3 -2
  18. data/lib/bmg/operator/rename.rb +12 -5
  19. data/lib/bmg/operator/restrict.rb +1 -0
  20. data/lib/bmg/operator/rxmatch.rb +1 -0
  21. data/lib/bmg/operator/summarize.rb +2 -17
  22. data/lib/bmg/operator/transform.rb +39 -1
  23. data/lib/bmg/operator/union.rb +1 -0
  24. data/lib/bmg/reader/csv.rb +29 -10
  25. data/lib/bmg/reader/excel.rb +23 -4
  26. data/lib/bmg/relation.rb +18 -0
  27. data/lib/bmg/relation/empty.rb +4 -0
  28. data/lib/bmg/relation/in_memory.rb +10 -1
  29. data/lib/bmg/relation/materialized.rb +6 -0
  30. data/lib/bmg/relation/spied.rb +5 -0
  31. data/lib/bmg/sequel/relation.rb +5 -0
  32. data/lib/bmg/sql/relation.rb +2 -3
  33. data/lib/bmg/summarizer.rb +36 -1
  34. data/lib/bmg/summarizer/avg.rb +3 -3
  35. data/lib/bmg/summarizer/by_proc.rb +41 -0
  36. data/lib/bmg/summarizer/distinct.rb +36 -0
  37. data/lib/bmg/summarizer/multiple.rb +46 -0
  38. data/lib/bmg/summarizer/percentile.rb +79 -0
  39. data/lib/bmg/summarizer/value_by.rb +62 -0
  40. data/lib/bmg/support.rb +1 -0
  41. data/lib/bmg/support/ordering.rb +20 -0
  42. data/lib/bmg/support/tuple_transformer.rb +10 -1
  43. data/lib/bmg/version.rb +1 -1
  44. data/lib/bmg/writer.rb +16 -0
  45. data/lib/bmg/writer/csv.rb +2 -12
  46. data/lib/bmg/writer/xlsx.rb +68 -0
  47. metadata +23 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f1ce59d00b630f644e5716eaff17a83116c1342ea0b6ba7d9174b1f5f4eadd6e
4
- data.tar.gz: 2156d1a8eb88999e749f434a8e94d2b4e70c636fadb10470493f19bb805a19a3
3
+ metadata.gz: 6569e9038c83dda887da734d97ef58f9ff6e94984c78e8eae993561cde3bbfcc
4
+ data.tar.gz: 85d65ca717992a132e94ae17dbdf2cc671a161c495eb5be240c3d4e342e90c3f
5
5
  SHA512:
6
- metadata.gz: 8e3e714d698ff7c47c2f61b4056c73acaa39983306402eeb54d1957b90959af51fcf20a543c5c17242d260fe835b33c19dd960e3cd4920a66d49ea81a22ee4b0
7
- data.tar.gz: 89fe1cc4c3157adf7373fb755e0cf7eb5dd8d93f7061558454d27649b44aca558f936f006e3238dce410ffdfe29821d59feac3b31b5d64241ffc76192bba149f
6
+ metadata.gz: fbeb5215e5942626dd5115d95c091602daa57128014bc0833c38c3460697ef9dbc59b4ccd7566d4d3604e8d7a8c5e7e4812159a5bb89d3dad477e6a7b75545d8
7
+ data.tar.gz: 9e8a9eaaf3699d28890cb15faef5b12360f9bf26c5bb0e9bf42ccedba0b7b5a3c8c2eb627f8b1a4e154c8d992426f47ed82153e9f110634f0ba6aa7ceddd1353
data/README.md CHANGED
@@ -213,6 +213,7 @@ r.extend(x: ->(t){ ... }, ...) # add computed attributes
213
213
  r.exclude(predicate) # shortcut for restrict(!predicate)
214
214
  r.group([:a, :b, ...], :x) # relation-valued attribute from attributes
215
215
  r.image(right, :x, [:a, :b, ...]) # relation-valued attribute from another relation
216
+ r.images({:x => r1, :y => r2}, [:a, ...]) # shortcut over image(r1, :x, ...).image(r2, :y, ...)
216
217
  r.join(right, [:a, :b, ...]) # natural join on a join key
217
218
  r.join(right, :a => :x, :b => :y, ...) # natural join after right reversed renaming
218
219
  r.left_join(right, [:a, :b, ...], {...}) # left join with optional default right tuple
data/lib/bmg/algebra.rb CHANGED
@@ -174,6 +174,7 @@ module Bmg
174
174
 
175
175
  def transform(transformation = nil, options = {}, &proc)
176
176
  transformation, options = proc, (transformation || {}) unless proc.nil?
177
+ return self if transformation.is_a?(Hash) && transformation.empty?
177
178
  _transform(self.type.transform(transformation, options), transformation, options)
178
179
  end
179
180
 
@@ -39,6 +39,12 @@ module Bmg
39
39
  self.image(right.rename(renaming), as, on.keys, options)
40
40
  end
41
41
 
42
+ def images(rights, on = [], options = {})
43
+ rights.each_pair.inject(self){|memo,(as,right)|
44
+ memo.image(right, as, on, options)
45
+ }
46
+ end
47
+
42
48
  def join(right, on = [])
43
49
  return super unless on.is_a?(Hash)
44
50
  renaming = Hash[on.map{|k,v| [v,k] }]
data/lib/bmg/error.rb CHANGED
@@ -16,4 +16,7 @@ module Bmg
16
16
  # while unknown
17
17
  class UnknownAttributesError < Error; end
18
18
 
19
+ # Raised when an operator is badly used
20
+ class MisuseError < Error; end
21
+
19
22
  end
@@ -30,6 +30,7 @@ module Bmg
30
30
  public
31
31
 
32
32
  def each
33
+ return to_enum unless block_given?
33
34
  seen = {}
34
35
  @operand.each do |tuple|
35
36
  allbuted = tuple_allbut(tuple)
@@ -63,12 +64,38 @@ module Bmg
63
64
 
64
65
  protected ### optimization
65
66
 
67
+ def _allbut(type, butlist)
68
+ operand.allbut(self.butlist|butlist)
69
+ end
70
+
71
+ def _matching(type, right, on)
72
+ # Always possible to push the matching, since by construction
73
+ # `on` can only use attributes that have not been trown away,
74
+ # hence they exist on `operand` too.
75
+ operand.matching(right, on).allbut(butlist)
76
+ end
77
+
78
+ def _page(type, ordering, page_index, options)
79
+ return super unless self.preserving_key?
80
+ operand.page(ordering, page_index, options).allbut(butlist)
81
+ end
82
+
83
+ def _project(type, attrlist)
84
+ operand.project(attrlist)
85
+ end
86
+
66
87
  def _restrict(type, predicate)
67
88
  operand.restrict(predicate).allbut(butlist)
68
89
  end
69
90
 
70
91
  protected ### inspect
71
92
 
93
+ def preserving_key?
94
+ operand.type.knows_keys? && operand.type.keys.find{|k|
95
+ (k & butlist).empty?
96
+ }
97
+ end
98
+
72
99
  def args
73
100
  [ butlist ]
74
101
  end
@@ -24,7 +24,24 @@ module Bmg
24
24
 
25
25
  public
26
26
 
27
+ def self.same(*args)
28
+ Same.new(*args)
29
+ end
30
+
31
+ def self.group(*args)
32
+ Group.new(*args)
33
+ end
34
+
35
+ def self.y_by_x(*args)
36
+ YByX.new(*args)
37
+ end
38
+
39
+ def self.ys_by_x(*args)
40
+ YsByX.new(*args)
41
+ end
42
+
27
43
  def each(&bl)
44
+ return to_enum unless block_given?
28
45
  h = {}
29
46
  @operand.each do |tuple|
30
47
  key = key(tuple)
@@ -41,6 +58,12 @@ module Bmg
41
58
  [:autosummarize, operand.to_ast, by.dup, sums.dup]
42
59
  end
43
60
 
61
+ public ### for internal reasons
62
+
63
+ def _count
64
+ operand._count
65
+ end
66
+
44
67
  protected
45
68
 
46
69
  def _restrict(type, predicate)
@@ -175,11 +198,11 @@ module Bmg
175
198
  end
176
199
 
177
200
  def init(v)
178
- [v]
201
+ v.nil? ? [] : [v]
179
202
  end
180
203
 
181
204
  def sum(v1, v2)
182
- v1 << v2
205
+ v2.nil? ? v1 : (v1 << v2)
183
206
  end
184
207
 
185
208
  def term(v)
@@ -211,11 +234,11 @@ module Bmg
211
234
  end
212
235
 
213
236
  def init(v)
214
- [v]
237
+ v.nil? ? [] : [v]
215
238
  end
216
239
 
217
240
  def sum(v1, v2)
218
- v1 << v2
241
+ v2.nil? ? v1 : (v1 << v2)
219
242
  end
220
243
 
221
244
  def term(v)
@@ -43,6 +43,7 @@ module Bmg
43
43
  end
44
44
 
45
45
  def each
46
+ return to_enum unless block_given?
46
47
  @operand.each do |tuple|
47
48
  yield autowrap_tuple(tuple)
48
49
  end
@@ -52,6 +53,12 @@ module Bmg
52
53
  [ :autowrap, operand.to_ast, @original_options.dup ]
53
54
  end
54
55
 
56
+ public ### for internal reasons
57
+
58
+ def _count
59
+ operand._count
60
+ end
61
+
55
62
  protected ### optimization
56
63
 
57
64
  def _autowrap(type, opts)
@@ -86,6 +93,16 @@ module Bmg
86
93
  false
87
94
  end
88
95
 
96
+ def _matching(type, right, on)
97
+ if (wrapped_roots! & on).empty?
98
+ operand.matching(right, on).autowrap(options)
99
+ else
100
+ super
101
+ end
102
+ rescue UnknownAttributesError
103
+ super
104
+ end
105
+
89
106
  def _page(type, ordering, page_index, opts)
90
107
  attrs = ordering.map{|(a,d)| a }
91
108
  if (wrapped_roots! & attrs).empty?
@@ -97,6 +114,16 @@ module Bmg
97
114
  super
98
115
  end
99
116
 
117
+ def _project(type, attrlist)
118
+ if (wrapped_roots! & attrlist).empty?
119
+ operand.project(attrlist).autowrap(options)
120
+ else
121
+ super
122
+ end
123
+ rescue UnknownAttributesError
124
+ super
125
+ end
126
+
100
127
  def _rename(type, renaming)
101
128
  # 1. Can't optimize if renaming applies to a wrapped one
102
129
  return super unless (wrapped_roots! & renaming.keys).empty?
@@ -23,6 +23,7 @@ module Bmg
23
23
  public
24
24
 
25
25
  def each
26
+ return to_enum unless block_given?
26
27
  @operand.each do |tuple|
27
28
  yield extend_it(tuple)
28
29
  end
@@ -54,6 +55,12 @@ module Bmg
54
55
  [ :constants, operand.to_ast, constants.dup ]
55
56
  end
56
57
 
58
+ public ### for internal reasons
59
+
60
+ def _count
61
+ operand._count
62
+ end
63
+
57
64
  protected ### optimization
58
65
 
59
66
  def _page(type, ordering, page_index, options)
@@ -26,6 +26,7 @@ module Bmg
26
26
  public
27
27
 
28
28
  def each
29
+ return to_enum unless block_given?
29
30
  @operand.each do |tuple|
30
31
  yield extend_it(tuple)
31
32
  end
@@ -53,6 +54,12 @@ module Bmg
53
54
  [ :extend, operand.to_ast, extension.dup ]
54
55
  end
55
56
 
57
+ public ### for internal reasons
58
+
59
+ def _count
60
+ operand._count
61
+ end
62
+
56
63
  protected ### optimization
57
64
 
58
65
  def _allbut(type, butlist)
@@ -32,6 +32,7 @@ module Bmg
32
32
  public
33
33
 
34
34
  def each(&bl)
35
+ return to_enum unless block_given?
35
36
  index = Hash.new{|h,k| h[k] = k.merge(as => empty_group) }
36
37
  operand.each do |tuple|
37
38
  key = TupleAlgebra.allbut(tuple, attrs)
@@ -46,6 +46,7 @@ module Bmg
46
46
  public
47
47
 
48
48
  def each(*args, &bl)
49
+ return to_enum unless block_given?
49
50
  (options[:jit_optimized] ? self : jit_optimize)._each(*args, &bl)
50
51
  end
51
52
 
@@ -99,9 +100,10 @@ module Bmg
99
100
  key = tuple_project(t, on)
100
101
  index[key].operand << tuple_image(t, on)
101
102
  end
102
- if options[:array]
103
+ if opt = options[:array]
104
+ sorter = to_sorter(opt)
103
105
  index = index.each_with_object({}) do |(k,v),ix|
104
- ix[k] = v.to_a
106
+ ix[k] = sorter ? v.to_a.sort(&sorter) : v.to_a
105
107
  end
106
108
  end
107
109
  index
@@ -154,8 +156,32 @@ module Bmg
154
156
  end
155
157
  end
156
158
 
159
+ public ### for internal reasons
160
+
161
+ def _count
162
+ left._count
163
+ end
164
+
157
165
  protected ### optimization
158
166
 
167
+ def _allbut(type, butlist)
168
+ if butlist.include?(as)
169
+ left.allbut(butlist - [as])
170
+ elsif (butlist & on).empty?
171
+ left.allbut(butlist).image(right, as, on, options)
172
+ else
173
+ super
174
+ end
175
+ end
176
+
177
+ def _matching(type, m_right, m_on)
178
+ if m_on.include?(as)
179
+ super
180
+ else
181
+ left.matching(m_right, m_on).image(right, as, on, options)
182
+ end
183
+ end
184
+
159
185
  def _page(type, ordering, page_index, opts)
160
186
  if ordering.map{|(k,v)| k}.include?(as)
161
187
  super
@@ -166,6 +192,14 @@ module Bmg
166
192
  end
167
193
  end
168
194
 
195
+ def _project(type, attrlist)
196
+ if attrlist.include?(as)
197
+ super
198
+ else
199
+ left.project(attrlist)
200
+ end
201
+ end
202
+
169
203
  def _restrict(type, predicate)
170
204
  on_as, rest = predicate.and_split([as])
171
205
  if rest.tautology?
@@ -227,6 +261,11 @@ module Bmg
227
261
  Relation::InMemory.new(image_type, Set.new)
228
262
  end
229
263
 
264
+ def to_sorter(opt)
265
+ return nil unless opt.is_a?(Array)
266
+ Ordering.new(opt).comparator
267
+ end
268
+
230
269
  public
231
270
 
232
271
  def to_s
@@ -25,6 +25,7 @@ module Bmg
25
25
  public
26
26
 
27
27
  def each
28
+ return to_enum unless block_given?
28
29
  index = Hash.new
29
30
  right.each_with_object(index) do |t, index|
30
31
  key = tuple_project(t, on)
@@ -23,6 +23,7 @@ module Bmg
23
23
  public
24
24
 
25
25
  def each
26
+ return to_enum unless block_given?
26
27
  index = Hash.new
27
28
  right.each_with_object(index) do |t, index|
28
29
  key = tuple_project(t, on)
@@ -23,6 +23,7 @@ module Bmg
23
23
  public
24
24
 
25
25
  def each
26
+ return to_enum unless block_given?
26
27
  index = Hash.new
27
28
  right.each_with_object(index) do |t, index|
28
29
  key = tuple_project(t, on)
@@ -30,6 +30,7 @@ module Bmg
30
30
  public
31
31
 
32
32
  def each(&bl)
33
+ return to_enum unless block_given?
33
34
  page_size = options[:page_size]
34
35
  @operand.to_a
35
36
  .sort(&comparator)
@@ -45,13 +46,7 @@ module Bmg
45
46
  protected ### inspect
46
47
 
47
48
  def comparator
48
- ->(t1, t2) {
49
- ordering.each do |(attr,direction)|
50
- c = t1[attr] <=> t2[attr]
51
- return (direction == :desc ? -c : c) unless c==0
52
- end
53
- 0
54
- }
49
+ Ordering.new(@ordering).comparator
55
50
  end
56
51
 
57
52
  def args
@@ -29,9 +29,10 @@ module Bmg
29
29
  public
30
30
 
31
31
  def each
32
+ return to_enum unless block_given?
32
33
  seen = {}
33
34
  @operand.each do |tuple|
34
- projected = project(tuple)
35
+ projected = tuple_project(tuple)
35
36
  unless seen.has_key?(projected)
36
37
  yield(projected)
37
38
  seen[projected] = true
@@ -74,7 +75,7 @@ module Bmg
74
75
 
75
76
  private
76
77
 
77
- def project(tuple)
78
+ def tuple_project(tuple)
78
79
  tuple.dup.delete_if{|k,_| !@attrlist.include?(k) }
79
80
  end
80
81
 
@@ -29,16 +29,17 @@ module Bmg
29
29
  public
30
30
 
31
31
  def each
32
+ return to_enum unless block_given?
32
33
  @operand.each do |tuple|
33
- yield rename(tuple, renaming)
34
+ yield rename_tuple(tuple, renaming)
34
35
  end
35
36
  end
36
37
 
37
38
  def insert(arg)
38
39
  case arg
39
- when Hash then operand.insert(rename(arg, reverse_renaming))
40
+ when Hash then operand.insert(rename_tuple(arg, reverse_renaming))
40
41
  when Relation then operand.insert(arg.rename(reverse_renaming))
41
- when Enumerable then operand.insert(arg.map{|t| rename(t, reverse_renaming) })
42
+ when Enumerable then operand.insert(arg.map{|t| rename_tuple(t, reverse_renaming) })
42
43
  else
43
44
  super
44
45
  end
@@ -46,7 +47,7 @@ module Bmg
46
47
 
47
48
  def update(arg)
48
49
  case arg
49
- when Hash then operand.update(rename(arg, reverse_renaming))
50
+ when Hash then operand.update(rename_tuple(arg, reverse_renaming))
50
51
  else
51
52
  super
52
53
  end
@@ -60,6 +61,12 @@ module Bmg
60
61
  [ :rename, operand.to_ast, renaming.dup ]
61
62
  end
62
63
 
64
+ public ### for internal reasons
65
+
66
+ def _count
67
+ operand._count
68
+ end
69
+
63
70
  protected ### optimization
64
71
 
65
72
  def _page(type, ordering, page_index, options)
@@ -82,7 +89,7 @@ module Bmg
82
89
 
83
90
  private
84
91
 
85
- def rename(tuple, renaming)
92
+ def rename_tuple(tuple, renaming)
86
93
  tuple.each_with_object({}){|(k,v),h|
87
94
  h[renaming[k] || k] = v
88
95
  h