bud 0.9.4 → 0.9.9
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/History.txt +106 -0
- data/README.md +6 -4
- data/Rakefile +91 -0
- data/bin/budlabel +63 -0
- data/bin/budplot +18 -8
- data/bin/budtimelines +2 -2
- data/bin/budvis +7 -1
- data/docs/README.md +8 -17
- data/docs/cheat.md +112 -13
- data/docs/getstarted.md +97 -84
- data/docs/operational.md +3 -3
- data/examples/basics/paths.rb +2 -2
- data/examples/chat/README.md +2 -0
- data/examples/chat/chat.rb +3 -2
- data/examples/chat/chat_protocol.rb +2 -2
- data/examples/chat/chat_server.rb +3 -2
- data/lib/bud.rb +229 -114
- data/lib/bud/aggs.rb +20 -4
- data/lib/bud/bud_meta.rb +83 -73
- data/lib/bud/collections.rb +306 -120
- data/lib/bud/depanalysis.rb +3 -4
- data/lib/bud/executor/README.rescan +2 -1
- data/lib/bud/executor/elements.rb +96 -95
- data/lib/bud/executor/group.rb +35 -32
- data/lib/bud/executor/join.rb +164 -183
- data/lib/bud/graphs.rb +3 -3
- data/lib/bud/labeling/bloomgraph.rb +47 -0
- data/lib/bud/labeling/budplot_style.rb +53 -0
- data/lib/bud/labeling/labeling.rb +288 -0
- data/lib/bud/lattice-core.rb +595 -0
- data/lib/bud/lattice-lib.rb +422 -0
- data/lib/bud/monkeypatch.rb +68 -32
- data/lib/bud/rebl.rb +28 -10
- data/lib/bud/rewrite.rb +361 -152
- data/lib/bud/server.rb +16 -8
- data/lib/bud/source.rb +21 -18
- data/lib/bud/state.rb +93 -4
- data/lib/bud/storage/zookeeper.rb +45 -33
- data/lib/bud/version.rb +3 -0
- data/lib/bud/viz.rb +10 -12
- data/lib/bud/viz_util.rb +8 -3
- metadata +107 -108
data/lib/bud/executor/join.rb
CHANGED
@@ -1,24 +1,24 @@
|
|
1
1
|
require 'bud/executor/elements'
|
2
|
-
require 'set'
|
3
2
|
|
4
|
-
$EMPTY = []
|
5
3
|
module Bud
|
6
4
|
class PushSHJoin < PushStatefulElement
|
7
5
|
attr_reader :all_rels_below, :origpreds, :relnames, :keys, :localpreds
|
8
6
|
|
9
7
|
def initialize(rellist, bud_instance, preds=nil) # :nodoc: all
|
10
8
|
@rels = rellist
|
11
|
-
@relnames = @rels.map{|r| r.
|
9
|
+
@relnames = @rels.map{|r| r.qualified_tabname}
|
12
10
|
@cols = []
|
13
11
|
@bud_instance = bud_instance
|
14
12
|
@origpreds = preds
|
15
|
-
@localpreds =
|
13
|
+
@localpreds = []
|
16
14
|
@selfjoins = []
|
15
|
+
@keys = []
|
16
|
+
@key_attnos = [[], []]
|
17
17
|
@missing_keys = Set.new
|
18
18
|
|
19
19
|
# if any elements on rellist are PushSHJoins, suck up their contents
|
20
20
|
@all_rels_below = []
|
21
|
-
|
21
|
+
@rels.each do |r|
|
22
22
|
if r.class <= PushSHJoin
|
23
23
|
@all_rels_below += r.all_rels_below
|
24
24
|
preds += r.origpreds
|
@@ -26,12 +26,13 @@ module Bud
|
|
26
26
|
@all_rels_below << r
|
27
27
|
end
|
28
28
|
end
|
29
|
+
@left_is_array = @all_rels_below.length > 2
|
29
30
|
|
30
31
|
# check for self-joins: we currently only handle 2 instances of the same
|
31
32
|
# table per rule
|
32
33
|
counts = @all_rels_below.reduce({}) do |memo, r|
|
33
|
-
memo[r.
|
34
|
-
memo[r.
|
34
|
+
memo[r.qualified_tabname] ||= 0
|
35
|
+
memo[r.qualified_tabname] += 1
|
35
36
|
memo
|
36
37
|
end
|
37
38
|
counts.each do |name, cnt|
|
@@ -39,31 +40,12 @@ module Bud
|
|
39
40
|
@selfjoins << name if cnt == 2
|
40
41
|
end
|
41
42
|
|
42
|
-
# derive schema: one column for each table.
|
43
|
-
# duplicated inputs get distinguishing numeral
|
44
|
-
@cols = []
|
45
|
-
index = 0
|
46
|
-
retval = @all_rels_below.reduce({}) do |memo, r|
|
47
|
-
index += 1
|
48
|
-
memo[r.tabname.to_s] ||= 0
|
49
|
-
newstr = r.tabname.to_s + ((memo[r.tabname.to_s] > 0) ? ("_" + memo[r.tabname.to_s].to_s) : "")
|
50
|
-
@cols << newstr.to_sym
|
51
|
-
memo[r.tabname.to_s] += 1
|
52
|
-
memo
|
53
|
-
end
|
54
|
-
|
55
43
|
setup_preds(preds) unless preds.empty?
|
56
44
|
setup_state
|
57
45
|
|
58
46
|
super(@tabname, @bud_instance, nil, @cols)
|
59
47
|
end
|
60
48
|
|
61
|
-
public
|
62
|
-
def copy_on_write
|
63
|
-
@refcount -= 1
|
64
|
-
return Bud::PushSHJoin.new(@all_rels_below, @bud_instance, [])
|
65
|
-
end
|
66
|
-
|
67
49
|
public
|
68
50
|
def state_id # :nodoc: all
|
69
51
|
object_id
|
@@ -77,7 +59,7 @@ module Bud
|
|
77
59
|
private
|
78
60
|
def setup_state
|
79
61
|
sid = state_id
|
80
|
-
@tabname = ("(" + @all_rels_below.map{|r| r.
|
62
|
+
@tabname = ("(" + @all_rels_below.map{|r| r.qualified_tabname}.join('*') +"):"+sid.to_s).to_sym
|
81
63
|
@hash_tables = [{}, {}]
|
82
64
|
end
|
83
65
|
|
@@ -87,25 +69,26 @@ module Bud
|
|
87
69
|
# print "setting up preds for #{@relnames.inspect}(#{self.object_id}): "
|
88
70
|
allpreds = disambiguate_preds(preds)
|
89
71
|
allpreds = canonicalize_localpreds(@rels, allpreds)
|
90
|
-
|
72
|
+
|
73
|
+
# check for refs to collections that aren't being joined
|
91
74
|
unless @rels[0].class <= Bud::PushSHJoin
|
92
|
-
tabnames = @rels.map{ |r| r.tabname }
|
93
75
|
allpreds.each do |p|
|
94
|
-
unless
|
76
|
+
unless @relnames.include? p[0][0]
|
95
77
|
raise Bud::CompileError, "illegal predicate: collection #{p[0][0]} is not being joined"
|
96
78
|
end
|
97
|
-
unless
|
79
|
+
unless @relnames.include? p[1][0]
|
98
80
|
raise Bud::CompileError, "illegal predicate: collection #{p[1][0]} is not being joined"
|
99
81
|
end
|
100
82
|
end
|
101
83
|
end
|
84
|
+
|
102
85
|
@localpreds = allpreds.reject do |p|
|
103
86
|
# reject if it doesn't match the right (leaf node) of the join
|
104
87
|
# or reject if it does match, but it can be evaluated by a lower join
|
105
|
-
# i.e. one that also has this table on the right (
|
106
|
-
p[1][0] != @rels[1].
|
107
|
-
or (p[0][0] != @rels[1].
|
108
|
-
and p[1][0] == @rels[1].
|
88
|
+
# i.e. one that also has this table on the right (leaf node)
|
89
|
+
p[1][0] != @rels[1].qualified_tabname \
|
90
|
+
or (p[0][0] != @rels[1].qualified_tabname \
|
91
|
+
and p[1][0] == @rels[1].qualified_tabname and @selfjoins.include? @rels[1].qualified_tabname)
|
109
92
|
end
|
110
93
|
|
111
94
|
# only allow preds on the same table name if they're on a self-joined table
|
@@ -115,9 +98,9 @@ module Bud
|
|
115
98
|
end
|
116
99
|
end
|
117
100
|
|
118
|
-
@localpreds += allpreds.
|
119
|
-
p
|
120
|
-
end
|
101
|
+
@localpreds += allpreds.select do |p|
|
102
|
+
p[0][0] == p[1][0] and (p[1][0] == @rels[0].qualified_tabname or p[1][0] == @rels[1].qualified_tabname)
|
103
|
+
end
|
121
104
|
otherpreds = allpreds - @localpreds
|
122
105
|
unless otherpreds.empty?
|
123
106
|
unless @rels[0].class <= Bud::PushSHJoin
|
@@ -126,29 +109,27 @@ module Bud
|
|
126
109
|
@rels[0].setup_preds(otherpreds)
|
127
110
|
end
|
128
111
|
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
@keys
|
133
|
-
else
|
134
|
-
@keys = []
|
112
|
+
@localpreds.each do |lp|
|
113
|
+
right_offset = lp[1][1]
|
114
|
+
left_subtuple, left_offset = join_offset(lp[0])
|
115
|
+
@keys << [[left_subtuple, left_offset], [1, right_offset]]
|
135
116
|
end
|
117
|
+
|
118
|
+
# Optimize for a common case. When we're just fetching key values from
|
119
|
+
# an input tuple, lookup the column offsets we need to fetch for each
|
120
|
+
# input. This doesn't apply when we're computing the key for the left
|
121
|
+
# input and @left_is_array is true.
|
122
|
+
@key_attnos = []
|
123
|
+
@key_attnos[0] = @keys.map {|k| k[0][1]}
|
124
|
+
@key_attnos[1] = @keys.map {|k| k[1][1]}
|
136
125
|
end
|
137
126
|
|
138
127
|
public
|
139
128
|
def invalidate_cache
|
140
129
|
@rels.each_with_index do |source_elem, i|
|
141
130
|
if source_elem.rescan
|
142
|
-
puts "#{
|
131
|
+
puts "#{qualified_tabname} rel:#{i}(#{source_elem.qualified_tabname}) invalidated" if $BUD_DEBUG
|
143
132
|
@hash_tables[i] = {}
|
144
|
-
if i == 0
|
145
|
-
# Only if i == 0 because outer joins in Bloom are left outer joins.
|
146
|
-
# If i == 1, missing_keys will be corrected when items are populated
|
147
|
-
# in the rhs fork.
|
148
|
-
# XXX This is not modular. We are doing invalidation work for outer
|
149
|
-
# joins, which is part of a separate module PushSHOuterJoin.
|
150
|
-
@missing_keys.clear
|
151
|
-
end
|
152
133
|
end
|
153
134
|
end
|
154
135
|
end
|
@@ -163,7 +144,7 @@ module Bud
|
|
163
144
|
# referenced in entry.
|
164
145
|
subtuple = 0
|
165
146
|
all_rels_below[0..all_rels_below.length-1].each_with_index do |t,i|
|
166
|
-
if t.
|
147
|
+
if t.qualified_tabname == name
|
167
148
|
subtuple = i
|
168
149
|
break
|
169
150
|
end
|
@@ -181,7 +162,7 @@ module Bud
|
|
181
162
|
elsif k.class <= Array
|
182
163
|
[k,v]
|
183
164
|
elsif k.class <= Symbol
|
184
|
-
if @all_rels_below
|
165
|
+
if @all_rels_below.length == 2
|
185
166
|
[find_attr_match(k, @all_rels_below[0]), find_attr_match(v, @all_rels_below[1])]
|
186
167
|
else
|
187
168
|
[find_attr_match(k), find_attr_match(v)]
|
@@ -204,20 +185,23 @@ module Bud
|
|
204
185
|
dorels = (rel.nil? ? @all_rels_below : [rel])
|
205
186
|
match = nil
|
206
187
|
dorels.each do |r|
|
207
|
-
|
208
|
-
|
209
|
-
|
188
|
+
r_name = r.qualified_tabname
|
189
|
+
tbl = bud_instance.toplevel.tables[r_name]
|
190
|
+
match ||= r if tbl.respond_to?(aname)
|
191
|
+
if tbl.respond_to?(aname) and match != r
|
192
|
+
raise Bud::CompileError, "ambiguous attribute :#{aname} in both #{match.qualified_tabname} and #{r_name}"
|
210
193
|
end
|
211
194
|
end
|
212
195
|
if match.nil?
|
213
|
-
|
196
|
+
rel_names = dorels.map{|t| t.qualified_tabname.to_s}.to_s
|
197
|
+
raise Bud::CompileError, "attribute :#{aname} not found in any of #{rel_names}"
|
214
198
|
end
|
215
|
-
|
199
|
+
match.send(aname)
|
216
200
|
end
|
217
201
|
|
202
|
+
# decompose each pred into a binary pred
|
218
203
|
protected
|
219
204
|
def decomp_preds(*preds) # :nodoc:all
|
220
|
-
# decompose each pred into a binary pred
|
221
205
|
return nil if preds.empty? or preds == [nil]
|
222
206
|
newpreds = []
|
223
207
|
preds.each do |p|
|
@@ -230,40 +214,11 @@ module Bud
|
|
230
214
|
|
231
215
|
protected
|
232
216
|
def canonicalize_localpreds(rel_list, preds) # :nodoc:all
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
end
|
238
|
-
|
239
|
-
private
|
240
|
-
# right is a tuple
|
241
|
-
# left is a tuple or an array (combo) of joined tuples.
|
242
|
-
def test_locals(left, left_is_array, right, *skips)
|
243
|
-
retval = true
|
244
|
-
if (skips and @localpreds.length > skips.length)
|
245
|
-
# check remainder of the predicates
|
246
|
-
@localpreds.each do |pred|
|
247
|
-
# skip skips
|
248
|
-
next if (skips.include? pred)
|
249
|
-
# assumption of left-deep joins here
|
250
|
-
if pred[1][0] != @rels[1].tabname
|
251
|
-
raise Bud::Error, "expected rhs table to be #{@rels[1].tabname}, not #{pred[1][0]}"
|
252
|
-
end
|
253
|
-
rfield = right[pred[1][1]]
|
254
|
-
if left_is_array
|
255
|
-
ix, off = join_offset(pred[0])
|
256
|
-
lfield = left[ix][off]
|
257
|
-
else
|
258
|
-
lfield = left[pred[0][1]]
|
259
|
-
end
|
260
|
-
if lfield != rfield
|
261
|
-
retval = false
|
262
|
-
break
|
263
|
-
end
|
264
|
-
end
|
217
|
+
second_rel = rel_list[1].qualified_tabname
|
218
|
+
preds.map do |p|
|
219
|
+
# reverse if lhs is second_rel *unless* it's a self-join!
|
220
|
+
(p[0][0] == second_rel and p[0][0] != p[1][0]) ? p.reverse : p
|
265
221
|
end
|
266
|
-
return retval
|
267
222
|
end
|
268
223
|
|
269
224
|
undef do_insert
|
@@ -276,30 +231,29 @@ module Bud
|
|
276
231
|
# again if we didn't rescan now.
|
277
232
|
replay_join if @rescan
|
278
233
|
|
279
|
-
|
234
|
+
source_tbl = source.qualified_tabname
|
235
|
+
if @selfjoins.include? source_tbl
|
280
236
|
offsets = []
|
281
|
-
@relnames.each_with_index{|r,i| offsets << i if r ==
|
237
|
+
@relnames.each_with_index{|r,i| offsets << i if r == source_tbl}
|
282
238
|
else
|
283
|
-
offsets = [@relnames.index(
|
284
|
-
end
|
285
|
-
raise Bud::Error, "item #{item.inspect} inserted into join from unknown source #{source.elem_name}" if offsets == $EMPTY
|
286
|
-
offsets.each do |offset|
|
287
|
-
insert_item(item, offset)
|
239
|
+
offsets = [@relnames.index(source_tbl)]
|
288
240
|
end
|
241
|
+
|
242
|
+
offsets.each {|offset| insert_item(item, offset)}
|
289
243
|
end
|
290
244
|
|
291
245
|
protected
|
292
246
|
def insert_item(item, offset)
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
the_key = item[@keys[0][0]][@keys[0][1]]
|
299
|
-
else
|
300
|
-
the_key = item[@keys[offset][1]]
|
247
|
+
# assumes left-deep trees
|
248
|
+
if @left_is_array and offset == 0
|
249
|
+
the_key = @keys.map do |k|
|
250
|
+
left_subtuple, left_offset = k.first
|
251
|
+
item[left_subtuple][left_offset]
|
301
252
|
end
|
253
|
+
else
|
254
|
+
the_key = item.values_at(*@key_attnos[offset])
|
302
255
|
end
|
256
|
+
|
303
257
|
#build
|
304
258
|
# puts "building #{item.inspect} into @source[#{offset}] on key #{the_key.inspect}"
|
305
259
|
if (@hash_tables[offset][the_key] ||= Set.new).add? item
|
@@ -347,29 +301,42 @@ module Bud
|
|
347
301
|
left = m
|
348
302
|
right = item
|
349
303
|
end
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
end
|
304
|
+
|
305
|
+
# FIX: reduce arrays being created
|
306
|
+
result = @left_is_array ? left + [right] : [left, right]
|
307
|
+
push_out(result)
|
355
308
|
end
|
356
309
|
end
|
357
310
|
|
358
311
|
####
|
359
312
|
# and now, the Bloom-facing methods
|
360
313
|
# given a * expression over n collections, form all combinations of items
|
361
|
-
# subject to an array of predicates,
|
362
|
-
# currently supports two options for equijoin predicates:
|
314
|
+
# subject to an array of predicates, +preds+.
|
315
|
+
# currently supports two syntax options for equijoin predicates:
|
363
316
|
# general form: an array of arrays capturing a conjunction of equiv. classes
|
364
317
|
# [[table1.col1, table2.col2, table3.col3], [table1.col2, table2.col3]]
|
365
318
|
# common form: a hash capturing equality of a column on left with one on right.
|
366
319
|
# :col1 => :col2 (same as lefttable.col1 => righttable.col2)
|
367
320
|
public
|
368
321
|
def pairs(*preds, &blk)
|
369
|
-
|
370
|
-
|
371
|
-
|
322
|
+
if @cols.nil?
|
323
|
+
# derive schema if needed: one column for each table. duplicated inputs
|
324
|
+
# get distinguishing numeral.
|
325
|
+
#
|
326
|
+
# XXX: actually, this seems completely bogus. The schema for the output
|
327
|
+
# of the join should depend on the join's *targetlist*.
|
328
|
+
@cols = []
|
329
|
+
retval = @all_rels_below.reduce({}) do |memo, r|
|
330
|
+
r_name = r.qualified_tabname.to_s
|
331
|
+
memo[r_name] ||= 0
|
332
|
+
newstr = r_name + (memo[r_name] > 0 ? "_#{memo[r_name]}" : "")
|
333
|
+
@cols << newstr.to_sym
|
334
|
+
memo[r_name] += 1
|
335
|
+
memo
|
336
|
+
end
|
337
|
+
setup_accessors
|
372
338
|
end
|
339
|
+
|
373
340
|
@origpreds = preds
|
374
341
|
setup_preds(preds) unless preds.empty?
|
375
342
|
# given new preds, the state for the join will be different. set it up again.
|
@@ -383,25 +350,32 @@ module Bud
|
|
383
350
|
# matches in the 2nd, nil-pad it and include it in the output.
|
384
351
|
public
|
385
352
|
def outer(*preds, &blk)
|
353
|
+
if @all_rels_below.length > 2
|
354
|
+
raise Bud::Error, "outer joins cannot be used with more than 2 join relations"
|
355
|
+
end
|
386
356
|
pairs(*preds, &blk)
|
387
357
|
self.extend(Bud::PushSHOuterJoin)
|
388
358
|
end
|
389
359
|
|
390
360
|
public
|
391
|
-
def
|
392
|
-
|
393
|
-
|
361
|
+
def lefts(*preds, &blk)
|
362
|
+
if blk.nil?
|
363
|
+
@cols = @bud_instance.toplevel.tables[@rels[0].qualified_tabname].cols
|
364
|
+
setup_accessors
|
365
|
+
end
|
394
366
|
pairs(*preds) do |x,y|
|
395
|
-
blk.nil? ?
|
367
|
+
blk.nil? ? x : blk.call(x)
|
396
368
|
end
|
397
369
|
end
|
398
370
|
|
399
371
|
public
|
400
|
-
def
|
401
|
-
|
402
|
-
|
372
|
+
def rights(*preds, &blk)
|
373
|
+
if blk.nil?
|
374
|
+
@cols = @bud_instance.toplevel.tables[@rels[1].qualified_tabname].cols
|
375
|
+
setup_accessors
|
376
|
+
end
|
403
377
|
pairs(*preds) do |x,y|
|
404
|
-
blk.nil? ?
|
378
|
+
blk.nil? ? y : blk.call(y)
|
405
379
|
end
|
406
380
|
end
|
407
381
|
|
@@ -432,13 +406,13 @@ module Bud
|
|
432
406
|
public
|
433
407
|
def flatten(*preds, &blk)
|
434
408
|
if blk.nil?
|
435
|
-
@cols = dupfree_schema(@
|
409
|
+
@cols = dupfree_schema(@rels[0].cols + @rels[1].cols)
|
436
410
|
else
|
437
411
|
@cols = []
|
438
412
|
end
|
439
413
|
setup_accessors
|
440
414
|
pairs(*preds) do |x,y|
|
441
|
-
blk.nil? ? x
|
415
|
+
blk.nil? ? x + y : blk.call(x + y)
|
442
416
|
end
|
443
417
|
end
|
444
418
|
|
@@ -468,17 +442,11 @@ module Bud
|
|
468
442
|
end
|
469
443
|
|
470
444
|
module PushSHOuterJoin
|
445
|
+
# XXX: duplicates code from PushSHJoin
|
471
446
|
private
|
472
447
|
def insert_item(item, offset)
|
473
|
-
|
474
|
-
|
475
|
-
else
|
476
|
-
if all_rels_below.length > 2 and offset == 1
|
477
|
-
the_key = item[@keys[1][0]][@keys[1][1]]
|
478
|
-
else
|
479
|
-
the_key = item[@keys[offset][1]]
|
480
|
-
end
|
481
|
-
end
|
448
|
+
the_key = item.values_at(*@key_attnos[offset])
|
449
|
+
|
482
450
|
#build
|
483
451
|
# puts "building #{item.inspect} into @source[#{offset}] on key #{the_key.inspect}"
|
484
452
|
if (@hash_tables[offset][the_key] ||= Set.new).add? item
|
@@ -489,7 +457,8 @@ module Bud
|
|
489
457
|
if the_matches.nil? and offset == 0 # only doing Left Outer Join right now
|
490
458
|
@missing_keys << the_key
|
491
459
|
else
|
492
|
-
|
460
|
+
# no longer missing no matter which side this tuple is
|
461
|
+
@missing_keys.delete(the_key)
|
493
462
|
process_matches(item, the_matches, offset) unless the_matches.nil?
|
494
463
|
end
|
495
464
|
end
|
@@ -508,12 +477,22 @@ module Bud
|
|
508
477
|
|
509
478
|
private
|
510
479
|
def push_missing
|
480
|
+
left_hash = @hash_tables[0]
|
481
|
+
null_tuple = @rels[1].null_tuple
|
511
482
|
@missing_keys.each do |key|
|
512
|
-
|
513
|
-
push_out([t,
|
483
|
+
left_hash[key].each do |t|
|
484
|
+
push_out([t, null_tuple])
|
514
485
|
end
|
515
486
|
end
|
516
487
|
end
|
488
|
+
|
489
|
+
public
|
490
|
+
def invalidate_cache
|
491
|
+
super
|
492
|
+
# Only if need to check left join rel because outer joins in Bloom are
|
493
|
+
# left outer joins.
|
494
|
+
@missing_keys.clear if @rels.first.rescan
|
495
|
+
end
|
517
496
|
end
|
518
497
|
|
519
498
|
|
@@ -527,17 +506,18 @@ module Bud
|
|
527
506
|
# first flush, at which point we are sure to have seen all the t-side tuples
|
528
507
|
# in this tick.
|
529
508
|
class PushNotIn < PushStatefulElement
|
530
|
-
def initialize(rellist, bud_instance, preds
|
509
|
+
def initialize(rellist, bud_instance, preds, &blk) # :nodoc: all
|
531
510
|
@lhs, @rhs = rellist
|
532
511
|
@lhs_keycols = nil
|
533
512
|
@rhs_keycols = nil
|
534
|
-
name_in = "#{@lhs.
|
535
|
-
super(name_in, bud_instance)
|
513
|
+
name_in = "#{@lhs.qualified_tabname}_notin_#{@rhs.qualified_tabname}".to_sym
|
514
|
+
super(name_in, bud_instance, nil, @lhs.schema)
|
536
515
|
setup_preds(preds) unless preds.empty?
|
537
516
|
@rhs_rcvd = false
|
538
517
|
@hash_tables = [{},{}]
|
539
518
|
if @lhs_keycols.nil? and blk.nil?
|
540
|
-
#
|
519
|
+
# Pointwise comparison. Could use zip, but it creates an array for each
|
520
|
+
# field pair.
|
541
521
|
blk = lambda {|lhs, rhs|
|
542
522
|
lhs.to_a == rhs.to_a
|
543
523
|
}
|
@@ -547,7 +527,7 @@ module Bud
|
|
547
527
|
|
548
528
|
def setup_preds(preds)
|
549
529
|
# This is simpler than PushSHJoin's setup_preds, because notin is a binary
|
550
|
-
# operator where both lhs and rhs are collections.
|
530
|
+
# operator where both lhs and rhs are collections. preds is an array of
|
551
531
|
# hash_pairs. For now assume that the attributes are in the same order as
|
552
532
|
# the tables.
|
553
533
|
@lhs_keycols, @rhs_keycols = preds.reduce([[], []]) do |memo, item|
|
@@ -559,21 +539,25 @@ module Bud
|
|
559
539
|
memo
|
560
540
|
end
|
561
541
|
end
|
542
|
+
|
562
543
|
def find_col(colspec, rel)
|
563
|
-
|
544
|
+
case colspec
|
545
|
+
when Symbol
|
546
|
+
unless rel.respond_to? colspec
|
547
|
+
raise Bud::Error, "attribute :#{colspec} not found in #{rel.qualified_tabname}"
|
548
|
+
end
|
564
549
|
col_desc = rel.send(colspec)
|
565
|
-
|
566
|
-
elsif colspec.is_a? Array
|
550
|
+
when Array
|
567
551
|
col_desc = colspec
|
568
552
|
else
|
569
553
|
raise Bud::Error, "symbol or column spec expected. Got #{colspec}"
|
570
554
|
end
|
571
|
-
col_desc[1] # col_desc is of the form [tabname, colnum, colname]
|
555
|
+
col_desc[1] # col_desc is of the form [tabname, colnum, colname, seqno]
|
572
556
|
end
|
573
557
|
|
574
558
|
def get_key(item, offset)
|
575
|
-
keycols = offset == 0 ? @lhs_keycols : @rhs_keycols
|
576
|
-
keycols.nil? ?
|
559
|
+
keycols = (offset == 0 ? @lhs_keycols : @rhs_keycols)
|
560
|
+
keycols.nil? ? [] : item.values_at(*keycols)
|
577
561
|
end
|
578
562
|
|
579
563
|
public
|
@@ -582,11 +566,21 @@ module Bud
|
|
582
566
|
end
|
583
567
|
|
584
568
|
def insert(item, source)
|
585
|
-
|
569
|
+
if source == @lhs && source == @rhs # Self join
|
570
|
+
do_insert(item, 0)
|
571
|
+
do_insert(item, 1)
|
572
|
+
else
|
573
|
+
offset = source == @lhs ? 0 : 1
|
574
|
+
do_insert(item, offset)
|
575
|
+
end
|
576
|
+
end
|
577
|
+
|
578
|
+
def do_insert(item, offset)
|
586
579
|
key = get_key(item, offset)
|
587
580
|
(@hash_tables[offset][key] ||= Set.new).add item
|
588
581
|
if @rhs_rcvd and offset == 0
|
589
|
-
|
582
|
+
rhs_values = @hash_tables[1][key]
|
583
|
+
process_match(item, rhs_values)
|
590
584
|
end
|
591
585
|
end
|
592
586
|
|
@@ -596,19 +590,15 @@ module Bud
|
|
596
590
|
# growing any more, until the next tick.
|
597
591
|
unless @rhs_rcvd
|
598
592
|
@rhs_rcvd = true
|
593
|
+
rhs_hash = @hash_tables[1]
|
599
594
|
@hash_tables[0].each do |key,values|
|
600
|
-
|
595
|
+
rhs_values = rhs_hash[key]
|
596
|
+
values.each {|item| process_match(item, rhs_values)}
|
601
597
|
end
|
602
598
|
end
|
603
599
|
end
|
604
600
|
|
605
|
-
def push_lhs(key, lhs_item)
|
606
|
-
rhs_values = @hash_tables[1][key]
|
607
|
-
process_match(lhs_item, rhs_values)
|
608
|
-
end
|
609
|
-
|
610
601
|
def process_match(lhs_item, rhs_values)
|
611
|
-
exclude = true
|
612
602
|
if rhs_values.nil?
|
613
603
|
# no corresponding rhs. Include in output
|
614
604
|
exclude = false
|
@@ -616,33 +606,24 @@ module Bud
|
|
616
606
|
# for any lhs * rhs pair, if block returns true, do not push lhs. lhs is pushed
|
617
607
|
# only if there is no match (anti-join)
|
618
608
|
exclude = rhs_values.any?{|rhs_item| @blk.call(lhs_item, rhs_item)}
|
609
|
+
else
|
610
|
+
exclude = true
|
619
611
|
end
|
620
|
-
unless exclude
|
621
|
-
push_out(lhs_item)
|
622
|
-
end
|
623
|
-
end
|
624
612
|
|
625
|
-
|
626
|
-
def push_out(item)
|
627
|
-
@outputs.each do |ou|
|
628
|
-
if ou.class <= Bud::PushElement
|
629
|
-
ou.insert(item, self)
|
630
|
-
elsif ou.class <= Bud::BudCollection
|
631
|
-
ou.do_insert(item, ou.new_delta)
|
632
|
-
else
|
633
|
-
raise Bud::Error, "expected either a PushElement or a BudCollection"
|
634
|
-
end
|
635
|
-
end
|
636
|
-
# for all the following, o is a BudCollection
|
637
|
-
@deletes.each{|o| o.pending_delete([item])}
|
638
|
-
@delete_keys.each{|o| o.pending_delete_keys([item])}
|
639
|
-
@pendings.each{|o| o.pending_merge([item])}
|
613
|
+
push_out(lhs_item, false) unless exclude
|
640
614
|
end
|
641
615
|
|
642
616
|
def invalidate_cache
|
643
|
-
|
644
|
-
|
645
|
-
@
|
617
|
+
raise Bud::Error if @rhs_rcvd # sanity check; should already be reset
|
618
|
+
|
619
|
+
if @lhs.rescan
|
620
|
+
puts "#{tabname} rel:#{@lhs.qualified_tabname} invalidated" if $BUD_DEBUG
|
621
|
+
@hash_tables[0] = {}
|
622
|
+
end
|
623
|
+
if @rhs.rescan
|
624
|
+
puts "#{tabname} rel:#{@rhs.qualified_tabname} invalidated" if $BUD_DEBUG
|
625
|
+
@hash_tables[1] = {}
|
626
|
+
end
|
646
627
|
end
|
647
628
|
|
648
629
|
def stratum_end
|