bud 0.9.4 → 0.9.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/History.txt +106 -0
- data/README.md +6 -4
- data/Rakefile +91 -0
- data/bin/budlabel +63 -0
- data/bin/budplot +18 -8
- data/bin/budtimelines +2 -2
- data/bin/budvis +7 -1
- data/docs/README.md +8 -17
- data/docs/cheat.md +112 -13
- data/docs/getstarted.md +97 -84
- data/docs/operational.md +3 -3
- data/examples/basics/paths.rb +2 -2
- data/examples/chat/README.md +2 -0
- data/examples/chat/chat.rb +3 -2
- data/examples/chat/chat_protocol.rb +2 -2
- data/examples/chat/chat_server.rb +3 -2
- data/lib/bud.rb +229 -114
- data/lib/bud/aggs.rb +20 -4
- data/lib/bud/bud_meta.rb +83 -73
- data/lib/bud/collections.rb +306 -120
- data/lib/bud/depanalysis.rb +3 -4
- data/lib/bud/executor/README.rescan +2 -1
- data/lib/bud/executor/elements.rb +96 -95
- data/lib/bud/executor/group.rb +35 -32
- data/lib/bud/executor/join.rb +164 -183
- data/lib/bud/graphs.rb +3 -3
- data/lib/bud/labeling/bloomgraph.rb +47 -0
- data/lib/bud/labeling/budplot_style.rb +53 -0
- data/lib/bud/labeling/labeling.rb +288 -0
- data/lib/bud/lattice-core.rb +595 -0
- data/lib/bud/lattice-lib.rb +422 -0
- data/lib/bud/monkeypatch.rb +68 -32
- data/lib/bud/rebl.rb +28 -10
- data/lib/bud/rewrite.rb +361 -152
- data/lib/bud/server.rb +16 -8
- data/lib/bud/source.rb +21 -18
- data/lib/bud/state.rb +93 -4
- data/lib/bud/storage/zookeeper.rb +45 -33
- data/lib/bud/version.rb +3 -0
- data/lib/bud/viz.rb +10 -12
- data/lib/bud/viz_util.rb +8 -3
- metadata +107 -108
data/lib/bud/executor/join.rb
CHANGED
@@ -1,24 +1,24 @@
|
|
1
1
|
require 'bud/executor/elements'
|
2
|
-
require 'set'
|
3
2
|
|
4
|
-
$EMPTY = []
|
5
3
|
module Bud
|
6
4
|
class PushSHJoin < PushStatefulElement
|
7
5
|
attr_reader :all_rels_below, :origpreds, :relnames, :keys, :localpreds
|
8
6
|
|
9
7
|
def initialize(rellist, bud_instance, preds=nil) # :nodoc: all
|
10
8
|
@rels = rellist
|
11
|
-
@relnames = @rels.map{|r| r.
|
9
|
+
@relnames = @rels.map{|r| r.qualified_tabname}
|
12
10
|
@cols = []
|
13
11
|
@bud_instance = bud_instance
|
14
12
|
@origpreds = preds
|
15
|
-
@localpreds =
|
13
|
+
@localpreds = []
|
16
14
|
@selfjoins = []
|
15
|
+
@keys = []
|
16
|
+
@key_attnos = [[], []]
|
17
17
|
@missing_keys = Set.new
|
18
18
|
|
19
19
|
# if any elements on rellist are PushSHJoins, suck up their contents
|
20
20
|
@all_rels_below = []
|
21
|
-
|
21
|
+
@rels.each do |r|
|
22
22
|
if r.class <= PushSHJoin
|
23
23
|
@all_rels_below += r.all_rels_below
|
24
24
|
preds += r.origpreds
|
@@ -26,12 +26,13 @@ module Bud
|
|
26
26
|
@all_rels_below << r
|
27
27
|
end
|
28
28
|
end
|
29
|
+
@left_is_array = @all_rels_below.length > 2
|
29
30
|
|
30
31
|
# check for self-joins: we currently only handle 2 instances of the same
|
31
32
|
# table per rule
|
32
33
|
counts = @all_rels_below.reduce({}) do |memo, r|
|
33
|
-
memo[r.
|
34
|
-
memo[r.
|
34
|
+
memo[r.qualified_tabname] ||= 0
|
35
|
+
memo[r.qualified_tabname] += 1
|
35
36
|
memo
|
36
37
|
end
|
37
38
|
counts.each do |name, cnt|
|
@@ -39,31 +40,12 @@ module Bud
|
|
39
40
|
@selfjoins << name if cnt == 2
|
40
41
|
end
|
41
42
|
|
42
|
-
# derive schema: one column for each table.
|
43
|
-
# duplicated inputs get distinguishing numeral
|
44
|
-
@cols = []
|
45
|
-
index = 0
|
46
|
-
retval = @all_rels_below.reduce({}) do |memo, r|
|
47
|
-
index += 1
|
48
|
-
memo[r.tabname.to_s] ||= 0
|
49
|
-
newstr = r.tabname.to_s + ((memo[r.tabname.to_s] > 0) ? ("_" + memo[r.tabname.to_s].to_s) : "")
|
50
|
-
@cols << newstr.to_sym
|
51
|
-
memo[r.tabname.to_s] += 1
|
52
|
-
memo
|
53
|
-
end
|
54
|
-
|
55
43
|
setup_preds(preds) unless preds.empty?
|
56
44
|
setup_state
|
57
45
|
|
58
46
|
super(@tabname, @bud_instance, nil, @cols)
|
59
47
|
end
|
60
48
|
|
61
|
-
public
|
62
|
-
def copy_on_write
|
63
|
-
@refcount -= 1
|
64
|
-
return Bud::PushSHJoin.new(@all_rels_below, @bud_instance, [])
|
65
|
-
end
|
66
|
-
|
67
49
|
public
|
68
50
|
def state_id # :nodoc: all
|
69
51
|
object_id
|
@@ -77,7 +59,7 @@ module Bud
|
|
77
59
|
private
|
78
60
|
def setup_state
|
79
61
|
sid = state_id
|
80
|
-
@tabname = ("(" + @all_rels_below.map{|r| r.
|
62
|
+
@tabname = ("(" + @all_rels_below.map{|r| r.qualified_tabname}.join('*') +"):"+sid.to_s).to_sym
|
81
63
|
@hash_tables = [{}, {}]
|
82
64
|
end
|
83
65
|
|
@@ -87,25 +69,26 @@ module Bud
|
|
87
69
|
# print "setting up preds for #{@relnames.inspect}(#{self.object_id}): "
|
88
70
|
allpreds = disambiguate_preds(preds)
|
89
71
|
allpreds = canonicalize_localpreds(@rels, allpreds)
|
90
|
-
|
72
|
+
|
73
|
+
# check for refs to collections that aren't being joined
|
91
74
|
unless @rels[0].class <= Bud::PushSHJoin
|
92
|
-
tabnames = @rels.map{ |r| r.tabname }
|
93
75
|
allpreds.each do |p|
|
94
|
-
unless
|
76
|
+
unless @relnames.include? p[0][0]
|
95
77
|
raise Bud::CompileError, "illegal predicate: collection #{p[0][0]} is not being joined"
|
96
78
|
end
|
97
|
-
unless
|
79
|
+
unless @relnames.include? p[1][0]
|
98
80
|
raise Bud::CompileError, "illegal predicate: collection #{p[1][0]} is not being joined"
|
99
81
|
end
|
100
82
|
end
|
101
83
|
end
|
84
|
+
|
102
85
|
@localpreds = allpreds.reject do |p|
|
103
86
|
# reject if it doesn't match the right (leaf node) of the join
|
104
87
|
# or reject if it does match, but it can be evaluated by a lower join
|
105
|
-
# i.e. one that also has this table on the right (
|
106
|
-
p[1][0] != @rels[1].
|
107
|
-
or (p[0][0] != @rels[1].
|
108
|
-
and p[1][0] == @rels[1].
|
88
|
+
# i.e. one that also has this table on the right (leaf node)
|
89
|
+
p[1][0] != @rels[1].qualified_tabname \
|
90
|
+
or (p[0][0] != @rels[1].qualified_tabname \
|
91
|
+
and p[1][0] == @rels[1].qualified_tabname and @selfjoins.include? @rels[1].qualified_tabname)
|
109
92
|
end
|
110
93
|
|
111
94
|
# only allow preds on the same table name if they're on a self-joined table
|
@@ -115,9 +98,9 @@ module Bud
|
|
115
98
|
end
|
116
99
|
end
|
117
100
|
|
118
|
-
@localpreds += allpreds.
|
119
|
-
p
|
120
|
-
end
|
101
|
+
@localpreds += allpreds.select do |p|
|
102
|
+
p[0][0] == p[1][0] and (p[1][0] == @rels[0].qualified_tabname or p[1][0] == @rels[1].qualified_tabname)
|
103
|
+
end
|
121
104
|
otherpreds = allpreds - @localpreds
|
122
105
|
unless otherpreds.empty?
|
123
106
|
unless @rels[0].class <= Bud::PushSHJoin
|
@@ -126,29 +109,27 @@ module Bud
|
|
126
109
|
@rels[0].setup_preds(otherpreds)
|
127
110
|
end
|
128
111
|
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
@keys
|
133
|
-
else
|
134
|
-
@keys = []
|
112
|
+
@localpreds.each do |lp|
|
113
|
+
right_offset = lp[1][1]
|
114
|
+
left_subtuple, left_offset = join_offset(lp[0])
|
115
|
+
@keys << [[left_subtuple, left_offset], [1, right_offset]]
|
135
116
|
end
|
117
|
+
|
118
|
+
# Optimize for a common case. When we're just fetching key values from
|
119
|
+
# an input tuple, lookup the column offsets we need to fetch for each
|
120
|
+
# input. This doesn't apply when we're computing the key for the left
|
121
|
+
# input and @left_is_array is true.
|
122
|
+
@key_attnos = []
|
123
|
+
@key_attnos[0] = @keys.map {|k| k[0][1]}
|
124
|
+
@key_attnos[1] = @keys.map {|k| k[1][1]}
|
136
125
|
end
|
137
126
|
|
138
127
|
public
|
139
128
|
def invalidate_cache
|
140
129
|
@rels.each_with_index do |source_elem, i|
|
141
130
|
if source_elem.rescan
|
142
|
-
puts "#{
|
131
|
+
puts "#{qualified_tabname} rel:#{i}(#{source_elem.qualified_tabname}) invalidated" if $BUD_DEBUG
|
143
132
|
@hash_tables[i] = {}
|
144
|
-
if i == 0
|
145
|
-
# Only if i == 0 because outer joins in Bloom are left outer joins.
|
146
|
-
# If i == 1, missing_keys will be corrected when items are populated
|
147
|
-
# in the rhs fork.
|
148
|
-
# XXX This is not modular. We are doing invalidation work for outer
|
149
|
-
# joins, which is part of a separate module PushSHOuterJoin.
|
150
|
-
@missing_keys.clear
|
151
|
-
end
|
152
133
|
end
|
153
134
|
end
|
154
135
|
end
|
@@ -163,7 +144,7 @@ module Bud
|
|
163
144
|
# referenced in entry.
|
164
145
|
subtuple = 0
|
165
146
|
all_rels_below[0..all_rels_below.length-1].each_with_index do |t,i|
|
166
|
-
if t.
|
147
|
+
if t.qualified_tabname == name
|
167
148
|
subtuple = i
|
168
149
|
break
|
169
150
|
end
|
@@ -181,7 +162,7 @@ module Bud
|
|
181
162
|
elsif k.class <= Array
|
182
163
|
[k,v]
|
183
164
|
elsif k.class <= Symbol
|
184
|
-
if @all_rels_below
|
165
|
+
if @all_rels_below.length == 2
|
185
166
|
[find_attr_match(k, @all_rels_below[0]), find_attr_match(v, @all_rels_below[1])]
|
186
167
|
else
|
187
168
|
[find_attr_match(k), find_attr_match(v)]
|
@@ -204,20 +185,23 @@ module Bud
|
|
204
185
|
dorels = (rel.nil? ? @all_rels_below : [rel])
|
205
186
|
match = nil
|
206
187
|
dorels.each do |r|
|
207
|
-
|
208
|
-
|
209
|
-
|
188
|
+
r_name = r.qualified_tabname
|
189
|
+
tbl = bud_instance.toplevel.tables[r_name]
|
190
|
+
match ||= r if tbl.respond_to?(aname)
|
191
|
+
if tbl.respond_to?(aname) and match != r
|
192
|
+
raise Bud::CompileError, "ambiguous attribute :#{aname} in both #{match.qualified_tabname} and #{r_name}"
|
210
193
|
end
|
211
194
|
end
|
212
195
|
if match.nil?
|
213
|
-
|
196
|
+
rel_names = dorels.map{|t| t.qualified_tabname.to_s}.to_s
|
197
|
+
raise Bud::CompileError, "attribute :#{aname} not found in any of #{rel_names}"
|
214
198
|
end
|
215
|
-
|
199
|
+
match.send(aname)
|
216
200
|
end
|
217
201
|
|
202
|
+
# decompose each pred into a binary pred
|
218
203
|
protected
|
219
204
|
def decomp_preds(*preds) # :nodoc:all
|
220
|
-
# decompose each pred into a binary pred
|
221
205
|
return nil if preds.empty? or preds == [nil]
|
222
206
|
newpreds = []
|
223
207
|
preds.each do |p|
|
@@ -230,40 +214,11 @@ module Bud
|
|
230
214
|
|
231
215
|
protected
|
232
216
|
def canonicalize_localpreds(rel_list, preds) # :nodoc:all
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
end
|
238
|
-
|
239
|
-
private
|
240
|
-
# right is a tuple
|
241
|
-
# left is a tuple or an array (combo) of joined tuples.
|
242
|
-
def test_locals(left, left_is_array, right, *skips)
|
243
|
-
retval = true
|
244
|
-
if (skips and @localpreds.length > skips.length)
|
245
|
-
# check remainder of the predicates
|
246
|
-
@localpreds.each do |pred|
|
247
|
-
# skip skips
|
248
|
-
next if (skips.include? pred)
|
249
|
-
# assumption of left-deep joins here
|
250
|
-
if pred[1][0] != @rels[1].tabname
|
251
|
-
raise Bud::Error, "expected rhs table to be #{@rels[1].tabname}, not #{pred[1][0]}"
|
252
|
-
end
|
253
|
-
rfield = right[pred[1][1]]
|
254
|
-
if left_is_array
|
255
|
-
ix, off = join_offset(pred[0])
|
256
|
-
lfield = left[ix][off]
|
257
|
-
else
|
258
|
-
lfield = left[pred[0][1]]
|
259
|
-
end
|
260
|
-
if lfield != rfield
|
261
|
-
retval = false
|
262
|
-
break
|
263
|
-
end
|
264
|
-
end
|
217
|
+
second_rel = rel_list[1].qualified_tabname
|
218
|
+
preds.map do |p|
|
219
|
+
# reverse if lhs is second_rel *unless* it's a self-join!
|
220
|
+
(p[0][0] == second_rel and p[0][0] != p[1][0]) ? p.reverse : p
|
265
221
|
end
|
266
|
-
return retval
|
267
222
|
end
|
268
223
|
|
269
224
|
undef do_insert
|
@@ -276,30 +231,29 @@ module Bud
|
|
276
231
|
# again if we didn't rescan now.
|
277
232
|
replay_join if @rescan
|
278
233
|
|
279
|
-
|
234
|
+
source_tbl = source.qualified_tabname
|
235
|
+
if @selfjoins.include? source_tbl
|
280
236
|
offsets = []
|
281
|
-
@relnames.each_with_index{|r,i| offsets << i if r ==
|
237
|
+
@relnames.each_with_index{|r,i| offsets << i if r == source_tbl}
|
282
238
|
else
|
283
|
-
offsets = [@relnames.index(
|
284
|
-
end
|
285
|
-
raise Bud::Error, "item #{item.inspect} inserted into join from unknown source #{source.elem_name}" if offsets == $EMPTY
|
286
|
-
offsets.each do |offset|
|
287
|
-
insert_item(item, offset)
|
239
|
+
offsets = [@relnames.index(source_tbl)]
|
288
240
|
end
|
241
|
+
|
242
|
+
offsets.each {|offset| insert_item(item, offset)}
|
289
243
|
end
|
290
244
|
|
291
245
|
protected
|
292
246
|
def insert_item(item, offset)
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
the_key = item[@keys[0][0]][@keys[0][1]]
|
299
|
-
else
|
300
|
-
the_key = item[@keys[offset][1]]
|
247
|
+
# assumes left-deep trees
|
248
|
+
if @left_is_array and offset == 0
|
249
|
+
the_key = @keys.map do |k|
|
250
|
+
left_subtuple, left_offset = k.first
|
251
|
+
item[left_subtuple][left_offset]
|
301
252
|
end
|
253
|
+
else
|
254
|
+
the_key = item.values_at(*@key_attnos[offset])
|
302
255
|
end
|
256
|
+
|
303
257
|
#build
|
304
258
|
# puts "building #{item.inspect} into @source[#{offset}] on key #{the_key.inspect}"
|
305
259
|
if (@hash_tables[offset][the_key] ||= Set.new).add? item
|
@@ -347,29 +301,42 @@ module Bud
|
|
347
301
|
left = m
|
348
302
|
right = item
|
349
303
|
end
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
end
|
304
|
+
|
305
|
+
# FIX: reduce arrays being created
|
306
|
+
result = @left_is_array ? left + [right] : [left, right]
|
307
|
+
push_out(result)
|
355
308
|
end
|
356
309
|
end
|
357
310
|
|
358
311
|
####
|
359
312
|
# and now, the Bloom-facing methods
|
360
313
|
# given a * expression over n collections, form all combinations of items
|
361
|
-
# subject to an array of predicates,
|
362
|
-
# currently supports two options for equijoin predicates:
|
314
|
+
# subject to an array of predicates, +preds+.
|
315
|
+
# currently supports two syntax options for equijoin predicates:
|
363
316
|
# general form: an array of arrays capturing a conjunction of equiv. classes
|
364
317
|
# [[table1.col1, table2.col2, table3.col3], [table1.col2, table2.col3]]
|
365
318
|
# common form: a hash capturing equality of a column on left with one on right.
|
366
319
|
# :col1 => :col2 (same as lefttable.col1 => righttable.col2)
|
367
320
|
public
|
368
321
|
def pairs(*preds, &blk)
|
369
|
-
|
370
|
-
|
371
|
-
|
322
|
+
if @cols.nil?
|
323
|
+
# derive schema if needed: one column for each table. duplicated inputs
|
324
|
+
# get distinguishing numeral.
|
325
|
+
#
|
326
|
+
# XXX: actually, this seems completely bogus. The schema for the output
|
327
|
+
# of the join should depend on the join's *targetlist*.
|
328
|
+
@cols = []
|
329
|
+
retval = @all_rels_below.reduce({}) do |memo, r|
|
330
|
+
r_name = r.qualified_tabname.to_s
|
331
|
+
memo[r_name] ||= 0
|
332
|
+
newstr = r_name + (memo[r_name] > 0 ? "_#{memo[r_name]}" : "")
|
333
|
+
@cols << newstr.to_sym
|
334
|
+
memo[r_name] += 1
|
335
|
+
memo
|
336
|
+
end
|
337
|
+
setup_accessors
|
372
338
|
end
|
339
|
+
|
373
340
|
@origpreds = preds
|
374
341
|
setup_preds(preds) unless preds.empty?
|
375
342
|
# given new preds, the state for the join will be different. set it up again.
|
@@ -383,25 +350,32 @@ module Bud
|
|
383
350
|
# matches in the 2nd, nil-pad it and include it in the output.
|
384
351
|
public
|
385
352
|
def outer(*preds, &blk)
|
353
|
+
if @all_rels_below.length > 2
|
354
|
+
raise Bud::Error, "outer joins cannot be used with more than 2 join relations"
|
355
|
+
end
|
386
356
|
pairs(*preds, &blk)
|
387
357
|
self.extend(Bud::PushSHOuterJoin)
|
388
358
|
end
|
389
359
|
|
390
360
|
public
|
391
|
-
def
|
392
|
-
|
393
|
-
|
361
|
+
def lefts(*preds, &blk)
|
362
|
+
if blk.nil?
|
363
|
+
@cols = @bud_instance.toplevel.tables[@rels[0].qualified_tabname].cols
|
364
|
+
setup_accessors
|
365
|
+
end
|
394
366
|
pairs(*preds) do |x,y|
|
395
|
-
blk.nil? ?
|
367
|
+
blk.nil? ? x : blk.call(x)
|
396
368
|
end
|
397
369
|
end
|
398
370
|
|
399
371
|
public
|
400
|
-
def
|
401
|
-
|
402
|
-
|
372
|
+
def rights(*preds, &blk)
|
373
|
+
if blk.nil?
|
374
|
+
@cols = @bud_instance.toplevel.tables[@rels[1].qualified_tabname].cols
|
375
|
+
setup_accessors
|
376
|
+
end
|
403
377
|
pairs(*preds) do |x,y|
|
404
|
-
blk.nil? ?
|
378
|
+
blk.nil? ? y : blk.call(y)
|
405
379
|
end
|
406
380
|
end
|
407
381
|
|
@@ -432,13 +406,13 @@ module Bud
|
|
432
406
|
public
|
433
407
|
def flatten(*preds, &blk)
|
434
408
|
if blk.nil?
|
435
|
-
@cols = dupfree_schema(@
|
409
|
+
@cols = dupfree_schema(@rels[0].cols + @rels[1].cols)
|
436
410
|
else
|
437
411
|
@cols = []
|
438
412
|
end
|
439
413
|
setup_accessors
|
440
414
|
pairs(*preds) do |x,y|
|
441
|
-
blk.nil? ? x
|
415
|
+
blk.nil? ? x + y : blk.call(x + y)
|
442
416
|
end
|
443
417
|
end
|
444
418
|
|
@@ -468,17 +442,11 @@ module Bud
|
|
468
442
|
end
|
469
443
|
|
470
444
|
module PushSHOuterJoin
|
445
|
+
# XXX: duplicates code from PushSHJoin
|
471
446
|
private
|
472
447
|
def insert_item(item, offset)
|
473
|
-
|
474
|
-
|
475
|
-
else
|
476
|
-
if all_rels_below.length > 2 and offset == 1
|
477
|
-
the_key = item[@keys[1][0]][@keys[1][1]]
|
478
|
-
else
|
479
|
-
the_key = item[@keys[offset][1]]
|
480
|
-
end
|
481
|
-
end
|
448
|
+
the_key = item.values_at(*@key_attnos[offset])
|
449
|
+
|
482
450
|
#build
|
483
451
|
# puts "building #{item.inspect} into @source[#{offset}] on key #{the_key.inspect}"
|
484
452
|
if (@hash_tables[offset][the_key] ||= Set.new).add? item
|
@@ -489,7 +457,8 @@ module Bud
|
|
489
457
|
if the_matches.nil? and offset == 0 # only doing Left Outer Join right now
|
490
458
|
@missing_keys << the_key
|
491
459
|
else
|
492
|
-
|
460
|
+
# no longer missing no matter which side this tuple is
|
461
|
+
@missing_keys.delete(the_key)
|
493
462
|
process_matches(item, the_matches, offset) unless the_matches.nil?
|
494
463
|
end
|
495
464
|
end
|
@@ -508,12 +477,22 @@ module Bud
|
|
508
477
|
|
509
478
|
private
|
510
479
|
def push_missing
|
480
|
+
left_hash = @hash_tables[0]
|
481
|
+
null_tuple = @rels[1].null_tuple
|
511
482
|
@missing_keys.each do |key|
|
512
|
-
|
513
|
-
push_out([t,
|
483
|
+
left_hash[key].each do |t|
|
484
|
+
push_out([t, null_tuple])
|
514
485
|
end
|
515
486
|
end
|
516
487
|
end
|
488
|
+
|
489
|
+
public
|
490
|
+
def invalidate_cache
|
491
|
+
super
|
492
|
+
# Only if need to check left join rel because outer joins in Bloom are
|
493
|
+
# left outer joins.
|
494
|
+
@missing_keys.clear if @rels.first.rescan
|
495
|
+
end
|
517
496
|
end
|
518
497
|
|
519
498
|
|
@@ -527,17 +506,18 @@ module Bud
|
|
527
506
|
# first flush, at which point we are sure to have seen all the t-side tuples
|
528
507
|
# in this tick.
|
529
508
|
class PushNotIn < PushStatefulElement
|
530
|
-
def initialize(rellist, bud_instance, preds
|
509
|
+
def initialize(rellist, bud_instance, preds, &blk) # :nodoc: all
|
531
510
|
@lhs, @rhs = rellist
|
532
511
|
@lhs_keycols = nil
|
533
512
|
@rhs_keycols = nil
|
534
|
-
name_in = "#{@lhs.
|
535
|
-
super(name_in, bud_instance)
|
513
|
+
name_in = "#{@lhs.qualified_tabname}_notin_#{@rhs.qualified_tabname}".to_sym
|
514
|
+
super(name_in, bud_instance, nil, @lhs.schema)
|
536
515
|
setup_preds(preds) unless preds.empty?
|
537
516
|
@rhs_rcvd = false
|
538
517
|
@hash_tables = [{},{}]
|
539
518
|
if @lhs_keycols.nil? and blk.nil?
|
540
|
-
#
|
519
|
+
# Pointwise comparison. Could use zip, but it creates an array for each
|
520
|
+
# field pair.
|
541
521
|
blk = lambda {|lhs, rhs|
|
542
522
|
lhs.to_a == rhs.to_a
|
543
523
|
}
|
@@ -547,7 +527,7 @@ module Bud
|
|
547
527
|
|
548
528
|
def setup_preds(preds)
|
549
529
|
# This is simpler than PushSHJoin's setup_preds, because notin is a binary
|
550
|
-
# operator where both lhs and rhs are collections.
|
530
|
+
# operator where both lhs and rhs are collections. preds is an array of
|
551
531
|
# hash_pairs. For now assume that the attributes are in the same order as
|
552
532
|
# the tables.
|
553
533
|
@lhs_keycols, @rhs_keycols = preds.reduce([[], []]) do |memo, item|
|
@@ -559,21 +539,25 @@ module Bud
|
|
559
539
|
memo
|
560
540
|
end
|
561
541
|
end
|
542
|
+
|
562
543
|
def find_col(colspec, rel)
|
563
|
-
|
544
|
+
case colspec
|
545
|
+
when Symbol
|
546
|
+
unless rel.respond_to? colspec
|
547
|
+
raise Bud::Error, "attribute :#{colspec} not found in #{rel.qualified_tabname}"
|
548
|
+
end
|
564
549
|
col_desc = rel.send(colspec)
|
565
|
-
|
566
|
-
elsif colspec.is_a? Array
|
550
|
+
when Array
|
567
551
|
col_desc = colspec
|
568
552
|
else
|
569
553
|
raise Bud::Error, "symbol or column spec expected. Got #{colspec}"
|
570
554
|
end
|
571
|
-
col_desc[1] # col_desc is of the form [tabname, colnum, colname]
|
555
|
+
col_desc[1] # col_desc is of the form [tabname, colnum, colname, seqno]
|
572
556
|
end
|
573
557
|
|
574
558
|
def get_key(item, offset)
|
575
|
-
keycols = offset == 0 ? @lhs_keycols : @rhs_keycols
|
576
|
-
keycols.nil? ?
|
559
|
+
keycols = (offset == 0 ? @lhs_keycols : @rhs_keycols)
|
560
|
+
keycols.nil? ? [] : item.values_at(*keycols)
|
577
561
|
end
|
578
562
|
|
579
563
|
public
|
@@ -582,11 +566,21 @@ module Bud
|
|
582
566
|
end
|
583
567
|
|
584
568
|
def insert(item, source)
|
585
|
-
|
569
|
+
if source == @lhs && source == @rhs # Self join
|
570
|
+
do_insert(item, 0)
|
571
|
+
do_insert(item, 1)
|
572
|
+
else
|
573
|
+
offset = source == @lhs ? 0 : 1
|
574
|
+
do_insert(item, offset)
|
575
|
+
end
|
576
|
+
end
|
577
|
+
|
578
|
+
def do_insert(item, offset)
|
586
579
|
key = get_key(item, offset)
|
587
580
|
(@hash_tables[offset][key] ||= Set.new).add item
|
588
581
|
if @rhs_rcvd and offset == 0
|
589
|
-
|
582
|
+
rhs_values = @hash_tables[1][key]
|
583
|
+
process_match(item, rhs_values)
|
590
584
|
end
|
591
585
|
end
|
592
586
|
|
@@ -596,19 +590,15 @@ module Bud
|
|
596
590
|
# growing any more, until the next tick.
|
597
591
|
unless @rhs_rcvd
|
598
592
|
@rhs_rcvd = true
|
593
|
+
rhs_hash = @hash_tables[1]
|
599
594
|
@hash_tables[0].each do |key,values|
|
600
|
-
|
595
|
+
rhs_values = rhs_hash[key]
|
596
|
+
values.each {|item| process_match(item, rhs_values)}
|
601
597
|
end
|
602
598
|
end
|
603
599
|
end
|
604
600
|
|
605
|
-
def push_lhs(key, lhs_item)
|
606
|
-
rhs_values = @hash_tables[1][key]
|
607
|
-
process_match(lhs_item, rhs_values)
|
608
|
-
end
|
609
|
-
|
610
601
|
def process_match(lhs_item, rhs_values)
|
611
|
-
exclude = true
|
612
602
|
if rhs_values.nil?
|
613
603
|
# no corresponding rhs. Include in output
|
614
604
|
exclude = false
|
@@ -616,33 +606,24 @@ module Bud
|
|
616
606
|
# for any lhs * rhs pair, if block returns true, do not push lhs. lhs is pushed
|
617
607
|
# only if there is no match (anti-join)
|
618
608
|
exclude = rhs_values.any?{|rhs_item| @blk.call(lhs_item, rhs_item)}
|
609
|
+
else
|
610
|
+
exclude = true
|
619
611
|
end
|
620
|
-
unless exclude
|
621
|
-
push_out(lhs_item)
|
622
|
-
end
|
623
|
-
end
|
624
612
|
|
625
|
-
|
626
|
-
def push_out(item)
|
627
|
-
@outputs.each do |ou|
|
628
|
-
if ou.class <= Bud::PushElement
|
629
|
-
ou.insert(item, self)
|
630
|
-
elsif ou.class <= Bud::BudCollection
|
631
|
-
ou.do_insert(item, ou.new_delta)
|
632
|
-
else
|
633
|
-
raise Bud::Error, "expected either a PushElement or a BudCollection"
|
634
|
-
end
|
635
|
-
end
|
636
|
-
# for all the following, o is a BudCollection
|
637
|
-
@deletes.each{|o| o.pending_delete([item])}
|
638
|
-
@delete_keys.each{|o| o.pending_delete_keys([item])}
|
639
|
-
@pendings.each{|o| o.pending_merge([item])}
|
613
|
+
push_out(lhs_item, false) unless exclude
|
640
614
|
end
|
641
615
|
|
642
616
|
def invalidate_cache
|
643
|
-
|
644
|
-
|
645
|
-
@
|
617
|
+
raise Bud::Error if @rhs_rcvd # sanity check; should already be reset
|
618
|
+
|
619
|
+
if @lhs.rescan
|
620
|
+
puts "#{tabname} rel:#{@lhs.qualified_tabname} invalidated" if $BUD_DEBUG
|
621
|
+
@hash_tables[0] = {}
|
622
|
+
end
|
623
|
+
if @rhs.rescan
|
624
|
+
puts "#{tabname} rel:#{@rhs.qualified_tabname} invalidated" if $BUD_DEBUG
|
625
|
+
@hash_tables[1] = {}
|
626
|
+
end
|
646
627
|
end
|
647
628
|
|
648
629
|
def stratum_end
|