bud 0.0.8 → 0.1.0.pre1
Sign up to get free protection for your applications and to get access to all the features.
- data/README +4 -10
- data/bin/budplot +1 -2
- data/docs/cheat.md +2 -15
- data/examples/basics/paths.rb +7 -7
- data/lib/bud/aggs.rb +15 -19
- data/lib/bud/bud_meta.rb +165 -77
- data/lib/bud/bust/bust.rb +11 -4
- data/lib/bud/collections.rb +643 -280
- data/lib/bud/depanalysis.rb +50 -25
- data/lib/bud/executor/elements.rb +592 -0
- data/lib/bud/executor/group.rb +104 -0
- data/lib/bud/executor/join.rb +638 -0
- data/lib/bud/graphs.rb +12 -11
- data/lib/bud/joins.rb +2 -1
- data/lib/bud/meta_algebra.rb +5 -4
- data/lib/bud/metrics.rb +9 -3
- data/lib/bud/monkeypatch.rb +131 -23
- data/lib/bud/rebl.rb +41 -28
- data/lib/bud/rewrite.rb +112 -440
- data/lib/bud/server.rb +3 -2
- data/lib/bud/source.rb +109 -0
- data/lib/bud/state.rb +16 -9
- data/lib/bud/storage/dbm.rb +62 -16
- data/lib/bud/storage/zookeeper.rb +2 -2
- data/lib/bud/viz.rb +8 -4
- data/lib/bud/viz_util.rb +10 -9
- data/lib/bud.rb +413 -199
- metadata +40 -55
- data/examples/deploy/tokenring-ec2.rb +0 -26
- data/examples/deploy/tokenring-fork.rb +0 -15
- data/examples/deploy/tokenring-thread.rb +0 -15
- data/examples/deploy/tokenring.rb +0 -47
- data/lib/bud/deploy/deployer.rb +0 -67
- data/lib/bud/deploy/ec2deploy.rb +0 -199
- data/lib/bud/deploy/forkdeploy.rb +0 -90
- data/lib/bud/deploy/threaddeploy.rb +0 -38
- data/lib/bud/storage/tokyocabinet.rb +0 -190
- data/lib/bud/stratify.rb +0 -85
@@ -0,0 +1,638 @@
|
|
1
|
+
require 'bud/executor/elements'
|
2
|
+
require 'set'
|
3
|
+
|
4
|
+
$EMPTY = []
|
5
|
+
module Bud
|
6
|
+
class PushSHJoin < PushStatefulElement
|
7
|
+
attr_reader :all_rels_below, :origpreds, :relnames, :keys, :localpreds
|
8
|
+
|
9
|
+
def initialize(rellist, bud_instance, preds=nil) # :nodoc: all
|
10
|
+
@rels = rellist
|
11
|
+
@relnames = @rels.map{|r| r.elem_name}
|
12
|
+
@cols = []
|
13
|
+
@bud_instance = bud_instance
|
14
|
+
@origpreds = preds
|
15
|
+
@localpreds = nil
|
16
|
+
@selfjoins = []
|
17
|
+
@input_bufs=[[],[]]
|
18
|
+
@missing_keys = Set.new
|
19
|
+
the_join = nil
|
20
|
+
|
21
|
+
# if any elements on rellist are PushSHJoins, suck up their contents
|
22
|
+
@all_rels_below = []
|
23
|
+
rellist.each do |r|
|
24
|
+
if r.class <= PushSHJoin
|
25
|
+
@all_rels_below += r.all_rels_below
|
26
|
+
preds += r.origpreds
|
27
|
+
the_join = r
|
28
|
+
else
|
29
|
+
@all_rels_below << r
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
# check for self-joins: we currently only handle 2 instances of the same table per rule
|
34
|
+
counts = @all_rels_below.reduce({}) do |memo, r|
|
35
|
+
memo[r.elem_name] ||= 0
|
36
|
+
memo[r.elem_name] += 1
|
37
|
+
memo
|
38
|
+
end
|
39
|
+
counts.each do |name, cnt|
|
40
|
+
raise Bud::CompileError, "#{cnt} instances of #{name} in rule; only one self-join currently allowed per rule" if cnt > 2
|
41
|
+
@selfjoins << name if cnt == 2
|
42
|
+
end
|
43
|
+
|
44
|
+
# derive schema: one column for each table.
|
45
|
+
# duplicated inputs get distinguishing numeral
|
46
|
+
@cols = []
|
47
|
+
index = 0
|
48
|
+
retval = @all_rels_below.reduce({}) do |memo, r|
|
49
|
+
index += 1
|
50
|
+
memo[r.tabname.to_s] ||= 0
|
51
|
+
newstr = r.tabname.to_s + ((memo[r.tabname.to_s] > 0) ? ("_" + memo[r.tabname.to_s].to_s) : "")
|
52
|
+
@cols << newstr.to_sym
|
53
|
+
memo[r.tabname.to_s] += 1
|
54
|
+
memo
|
55
|
+
end
|
56
|
+
|
57
|
+
setup_preds(preds) unless preds.empty?
|
58
|
+
setup_state
|
59
|
+
|
60
|
+
super(@tabname,@bud_instance,nil,@cols)
|
61
|
+
end
|
62
|
+
|
63
|
+
public
|
64
|
+
def copy_on_write
|
65
|
+
@refcount -= 1
|
66
|
+
return Bud::PushSHJoin.new(@all_rels_below, @bud_instance, [])
|
67
|
+
end
|
68
|
+
|
69
|
+
public
|
70
|
+
def state_id # :nodoc: all
|
71
|
+
object_id
|
72
|
+
# Marshal.dump([@rels.map{|r| r.tabname}, @localpreds]).hash
|
73
|
+
end
|
74
|
+
|
75
|
+
# initialize the state for this join to be carried across iterations within a fixpoint
|
76
|
+
private
|
77
|
+
def setup_state
|
78
|
+
sid = state_id
|
79
|
+
|
80
|
+
@tabname = ("(" + @all_rels_below.map{|r| r.tabname}.join('*') +"):"+sid.to_s).to_sym
|
81
|
+
@hash_tables = [{}, {}]
|
82
|
+
end
|
83
|
+
|
84
|
+
# extract predicates on rellist[1] and recurse to left side with remainder
|
85
|
+
protected
|
86
|
+
def setup_preds(preds) # :nodoc: all
|
87
|
+
# print "setting up preds for #{@relnames.inspect}(#{self.object_id}): "
|
88
|
+
# print "setting up preds for #{@relnames.inspect}(#{self.object_id}): "
|
89
|
+
allpreds = disambiguate_preds(preds)
|
90
|
+
allpreds = canonicalize_localpreds(@rels, allpreds)
|
91
|
+
# check for refs to collections that aren't being joined, Issue 191
|
92
|
+
unless @rels[0].class <= Bud::PushSHJoin
|
93
|
+
tabnames = @rels.map{ |r| r.tabname }
|
94
|
+
allpreds.each do |p|
|
95
|
+
unless tabnames.include? p[0][0]
|
96
|
+
raise Bud::CompileError, "illegal predicate: collection #{p[0][0]} is not being joined"
|
97
|
+
end
|
98
|
+
unless tabnames.include? p[1][0]
|
99
|
+
raise Bud::CompileError, "illegal predicate: collection #{p[1][0]} is not being joined"
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
@localpreds = allpreds.reject do |p|
|
104
|
+
# reject if it doesn't match the right (leaf node) of the join
|
105
|
+
# or reject if it does match, but it can be evaluated by a lower join
|
106
|
+
# i.e. one that also has this table on the right (lead node)
|
107
|
+
p[1][0] != @rels[1].tabname \
|
108
|
+
or (p[0][0] != @rels[1].tabname \
|
109
|
+
and p[1][0] == @rels[1].tabname and @selfjoins.include? @rels[1].tabname)
|
110
|
+
end
|
111
|
+
|
112
|
+
# only allow preds on the same table name if they're on a self-joined table
|
113
|
+
@localpreds.each do |p|
|
114
|
+
if p[0][0] == p[1][0] and not @selfjoins.include? p[0][0]
|
115
|
+
raise Bud::CompileError, "single-table predicate on #{p[0][0]} disallowed in joins"
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
@localpreds += allpreds.map do |p|
|
120
|
+
p if p[0][0] == p[1][0] and (p[1][0] == @rels[0].tabname or p[1][0] == @rels[1].tabname)
|
121
|
+
end.compact
|
122
|
+
otherpreds = allpreds - @localpreds
|
123
|
+
unless otherpreds.empty?
|
124
|
+
unless @rels[0].class <= Bud::PushSHJoin
|
125
|
+
raise Bud::CompileError, "join predicates don't match tables being joined: #{otherpreds.inspect}"
|
126
|
+
end
|
127
|
+
@rels[0].setup_preds(otherpreds)
|
128
|
+
end
|
129
|
+
|
130
|
+
if @localpreds.length > 0
|
131
|
+
@right_offset = @localpreds.first[1][1]
|
132
|
+
@left_subtuple, @left_offset = join_offset(@localpreds.first[0])
|
133
|
+
@keys = [[@left_subtuple, @left_offset], [1, @right_offset]]
|
134
|
+
else
|
135
|
+
@keys = []
|
136
|
+
end
|
137
|
+
# puts "@keys = #{@keys.inspect}"
|
138
|
+
end
|
139
|
+
|
140
|
+
public
|
141
|
+
def invalidate_cache
|
142
|
+
@rels.each_with_index do |source_elem, i|
|
143
|
+
if source_elem.rescan
|
144
|
+
|
145
|
+
puts "#{tabname} rel:#{i}(#{source_elem.tabname}) invalidated" if $BUD_DEBUG
|
146
|
+
@hash_tables[i] = {}
|
147
|
+
if i == 0
|
148
|
+
# XXX This is not modular. We are doing invalidation work for outer joins, which is part of a
|
149
|
+
# separate module PushSHOuterJoin.
|
150
|
+
@missing_keys.clear # Only if i == 0 because outer joins in Bloom are left outer joins
|
151
|
+
# if i == 1, missing_keys will be corrected when items are populated in the rhs fork
|
152
|
+
end
|
153
|
+
end
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
# calculate the position for a field in the result of a join:
|
158
|
+
# the tuple offset ("subtuple") and the attribute position within it
|
159
|
+
# ("offset")
|
160
|
+
def join_offset(entry)
|
161
|
+
name, offset = entry[0], entry[1]
|
162
|
+
|
163
|
+
# determine which subtuple of the collection contains the table
|
164
|
+
# referenced in entry.
|
165
|
+
subtuple = 0
|
166
|
+
all_rels_below[0..all_rels_below.length-1].each_with_index do |t,i|
|
167
|
+
if t.tabname == entry[0]
|
168
|
+
subtuple = i
|
169
|
+
break
|
170
|
+
end
|
171
|
+
end
|
172
|
+
|
173
|
+
return subtuple, offset
|
174
|
+
end
|
175
|
+
|
176
|
+
protected
|
177
|
+
def disambiguate_preds(preds) # :nodoc: all
|
178
|
+
if preds.size == 1 and preds[0].class <= Hash
|
179
|
+
predarray = preds[0].map do |k,v|
|
180
|
+
if k.class != v.class
|
181
|
+
raise Bud::CompileError, "inconsistent attribute ref style #{k.inspect} => #{v.inspect}"
|
182
|
+
elsif k.class <= Array
|
183
|
+
[k,v]
|
184
|
+
elsif k.class <= Symbol
|
185
|
+
if @all_rels_below and @all_rels_below.length == 2
|
186
|
+
[find_attr_match(k, @all_rels_below[0]), find_attr_match(v, @all_rels_below[1])]
|
187
|
+
else
|
188
|
+
[find_attr_match(k), find_attr_match(v)]
|
189
|
+
end
|
190
|
+
else
|
191
|
+
raise Bud::CompileError, "invalid attribute ref in #{k.inspect} => #{v.inspect}"
|
192
|
+
end
|
193
|
+
end
|
194
|
+
return decomp_preds(*predarray)
|
195
|
+
else
|
196
|
+
return decomp_preds(*preds)
|
197
|
+
end
|
198
|
+
end
|
199
|
+
|
200
|
+
# find element in @all_rels_below that contains this +aname+ method
|
201
|
+
# if +rel+ is non-nil, only check that collection.
|
202
|
+
# after found, return the result of invoking +aname+ from chosen collection
|
203
|
+
protected
|
204
|
+
def find_attr_match(aname, rel=nil) # :nodoc: all
|
205
|
+
dorels = (rel.nil? ? @all_rels_below : [rel])
|
206
|
+
match = nil
|
207
|
+
dorels.each do |r|
|
208
|
+
match ||= r if bud_instance.tables[r.elem_name].respond_to?(aname)
|
209
|
+
if bud_instance.tables[r.elem_name].respond_to?(aname) and match != r
|
210
|
+
raise Bud::CompileError, "ambiguous attribute :#{aname} in both #{match.tabname} and #{r.tabname}"
|
211
|
+
end
|
212
|
+
end
|
213
|
+
if match.nil?
|
214
|
+
raise Bud::CompileError, "attribute :#{aname} not found in any of #{dorels.map{|t| t.tabname}.inspect}"
|
215
|
+
end
|
216
|
+
bud_instance.tables[match.elem_name].send(aname)
|
217
|
+
end
|
218
|
+
|
219
|
+
protected
|
220
|
+
def decomp_preds(*preds) # :nodoc:all
|
221
|
+
# decompose each pred into a binary pred
|
222
|
+
return nil if preds.empty? or preds == [nil]
|
223
|
+
newpreds = []
|
224
|
+
preds.each do |p|
|
225
|
+
p.each_with_index do |c, i|
|
226
|
+
newpreds << [p[i], p[i+1]] unless p[i+1].nil?
|
227
|
+
end
|
228
|
+
end
|
229
|
+
newpreds
|
230
|
+
end
|
231
|
+
|
232
|
+
protected
|
233
|
+
def canonicalize_localpreds(rel_list, preds) # :nodoc:all
|
234
|
+
retval = preds.map do |p|
|
235
|
+
# reverse if lhs is rel_list[1], *unless* it's a self-join!
|
236
|
+
(p[0][0] == rel_list[1].tabname and p[0][0] != p[1][0]) ? p.reverse : p
|
237
|
+
end
|
238
|
+
end
|
239
|
+
|
240
|
+
private
|
241
|
+
# right is a tuple
|
242
|
+
# left is a tuple or an array (combo) of joined tuples.
|
243
|
+
def test_locals(left, left_is_array, right, *skips)
|
244
|
+
retval = true
|
245
|
+
if (skips and @localpreds.length > skips.length)
|
246
|
+
# check remainder of the predicates
|
247
|
+
@localpreds.each do |pred|
|
248
|
+
# skip skips
|
249
|
+
next if (skips.include? pred)
|
250
|
+
# assumption of left-deep joins here
|
251
|
+
if pred[1][0] != @rels[1].tabname
|
252
|
+
raise "Expected rhs table to be #{@rels[1].tabname}, not #{pred[1][0]}"
|
253
|
+
end
|
254
|
+
rfield = right[pred[1][1]]
|
255
|
+
if left_is_array
|
256
|
+
ix, off = join_offset(pred[0])
|
257
|
+
lfield = left[ix][off]
|
258
|
+
else
|
259
|
+
lfield = left[pred[0][1]]
|
260
|
+
end
|
261
|
+
if lfield != rfield
|
262
|
+
retval = false
|
263
|
+
break
|
264
|
+
end
|
265
|
+
end
|
266
|
+
end
|
267
|
+
return retval
|
268
|
+
end
|
269
|
+
|
270
|
+
# given a * expression over 2 collections, form all combos of items that
|
271
|
+
# satisfy +preds+, and for any item from the 1st collection that has no
|
272
|
+
# matches in the 2nd, nil-pad it and include it in the output.
|
273
|
+
public
|
274
|
+
def join(elem2, &blk)
|
275
|
+
elem2 = elem2.to_push_elem unless elem2.class <= PushElement
|
276
|
+
# This constructs a left-deep tree!
|
277
|
+
join = Bud::PushSHJoin.new([self,elem2], @bud_instance, [])
|
278
|
+
@bud_instance.push_joins[@bud_instance.this_stratum] << join
|
279
|
+
elem2.wire_to(join)
|
280
|
+
self.wire_to(join)
|
281
|
+
return join
|
282
|
+
end
|
283
|
+
|
284
|
+
undef do_insert
|
285
|
+
|
286
|
+
public
|
287
|
+
def insert(item, source)
|
288
|
+
#puts "JOIN: #{source.tabname} --> #{self.tabname} : #{item}/#{item.class}"
|
289
|
+
if @rescan
|
290
|
+
replay_join
|
291
|
+
@rescan = false
|
292
|
+
end
|
293
|
+
if @selfjoins.include? source.elem_name
|
294
|
+
offsets = []
|
295
|
+
@relnames.each_with_index{|r,i| offsets << i if r == source.elem_name}
|
296
|
+
else
|
297
|
+
offsets = [@relnames.index(source.elem_name)]
|
298
|
+
end
|
299
|
+
raise "item #{item.inspect} inserted into join from unknown source #{source.elem_name}" if offsets == $EMPTY
|
300
|
+
offsets.each do |offset|
|
301
|
+
buf = @input_bufs[offset]
|
302
|
+
buf << item
|
303
|
+
if (buf.length >= ELEMENT_BUFSIZE)
|
304
|
+
flush_buf(buf, offset)
|
305
|
+
end
|
306
|
+
end
|
307
|
+
end
|
308
|
+
|
309
|
+
protected
|
310
|
+
def insert_item(item, offset)
|
311
|
+
if (@keys.nil? or @keys.empty?)
|
312
|
+
the_key = nil
|
313
|
+
else
|
314
|
+
# assumes left-deep trees
|
315
|
+
if all_rels_below.length > 2 and offset == 0
|
316
|
+
the_key = item[@keys[0][0]][@keys[0][1]]
|
317
|
+
else
|
318
|
+
the_key = item[@keys[offset][1]]
|
319
|
+
end
|
320
|
+
end
|
321
|
+
#build
|
322
|
+
# puts "building #{item.inspect} into @source[#{offset}] on key #{the_key.inspect}"
|
323
|
+
if (@hash_tables[offset][the_key] ||= Set.new).add? item
|
324
|
+
@found_delta = true
|
325
|
+
#and probe
|
326
|
+
# puts "probing #{item.inspect} into @source[#{1-offset}] on key #{the_key.inspect}"
|
327
|
+
the_matches = @hash_tables[1-offset][the_key]
|
328
|
+
process_matches(item, the_matches, offset) unless the_matches.nil?
|
329
|
+
end
|
330
|
+
end
|
331
|
+
|
332
|
+
public
|
333
|
+
def rescan_at_tick
|
334
|
+
false
|
335
|
+
end
|
336
|
+
|
337
|
+
public
|
338
|
+
def add_rescan_invalidate(rescan, invalidate)
|
339
|
+
|
340
|
+
if non_temporal_predecessors.any? {|e| rescan.member? e}
|
341
|
+
rescan << self
|
342
|
+
invalidate << self
|
343
|
+
end
|
344
|
+
|
345
|
+
# The distinction between a join node and other stateful elements is that when a join node needs a rescan
|
346
|
+
# it doesn't tell all its sources to rescan. In fact, it doesn't have to pass a rescan request up to a source,
|
347
|
+
# because if a target needs a rescan, the join node has all the state necessary to feed the downstream node. And
|
348
|
+
# if a source node is in rescan, then at run-time only the state associated with that particular source node
|
349
|
+
# @hash_tables[offset] will be cleared, and will get filled up again because that source will rescan anyway.
|
350
|
+
|
351
|
+
invalidate_tables(rescan, invalidate)
|
352
|
+
end
|
353
|
+
|
354
|
+
def replay_join
|
355
|
+
a = @hash_tables[0]
|
356
|
+
b = @hash_tables[1]
|
357
|
+
|
358
|
+
if not(a.empty? or b.empty?)
|
359
|
+
if a.size < b.size
|
360
|
+
a.each_pair do |key, items|
|
361
|
+
the_matches = b[key]
|
362
|
+
unless the_matches.nil?
|
363
|
+
items.each do |item|
|
364
|
+
process_matches(item, the_matches, 1)
|
365
|
+
end
|
366
|
+
end
|
367
|
+
end
|
368
|
+
else
|
369
|
+
b.each_pair do |key, items|
|
370
|
+
the_matches = a[key]
|
371
|
+
unless the_matches.nil?
|
372
|
+
items.each do |item|
|
373
|
+
process_matches(item, the_matches, 0)
|
374
|
+
end
|
375
|
+
end
|
376
|
+
end
|
377
|
+
end
|
378
|
+
end
|
379
|
+
end
|
380
|
+
|
381
|
+
private
|
382
|
+
def process_matches(item, the_matches, offset)
|
383
|
+
the_matches.each do |m|
|
384
|
+
if offset == 0
|
385
|
+
left = item
|
386
|
+
right = m
|
387
|
+
else
|
388
|
+
left = m
|
389
|
+
right = item
|
390
|
+
end
|
391
|
+
left_is_array = all_rels_below.length > 2
|
392
|
+
if @localpreds.nil? or @localpreds.length == 1 or test_locals(left, left_is_array, right, @localpreds.first)
|
393
|
+
result = left_is_array ? left + [right] : [left, right] # FIX: reduce arrays being created.
|
394
|
+
push_out(result)
|
395
|
+
end
|
396
|
+
end
|
397
|
+
end
|
398
|
+
|
399
|
+
def flush_buf(buf, offset)
|
400
|
+
buf.each do |item|
|
401
|
+
insert_item(item, offset)
|
402
|
+
end
|
403
|
+
@input_bufs[offset] = []
|
404
|
+
end
|
405
|
+
|
406
|
+
public
|
407
|
+
def flush
|
408
|
+
@input_bufs.each_with_index do |buf, offset|
|
409
|
+
flush_buf(buf,offset) if buf.length > 0
|
410
|
+
end
|
411
|
+
end
|
412
|
+
|
413
|
+
public
|
414
|
+
def stratum_end
|
415
|
+
flush
|
416
|
+
end
|
417
|
+
|
418
|
+
####
|
419
|
+
# and now, the Bloom-facing methods
|
420
|
+
# given a * expression over n collections, form all combinations of items
|
421
|
+
# subject to an array of predicates, pred
|
422
|
+
# currently supports two options for equijoin predicates:
|
423
|
+
# general form: an array of arrays capturing a conjunction of equiv. classes
|
424
|
+
# [[table1.col1, table2.col2, table3.col3], [table1.col2, table2.col3]]
|
425
|
+
# common form: a hash capturing equality of a column on left with one on right.
|
426
|
+
# :col1 => :col2 (same as lefttable.col1 => righttable.col2)
|
427
|
+
public
|
428
|
+
def pairs(*preds, &blk)
|
429
|
+
## XXX Need to do this for all the join modifiers
|
430
|
+
unless @refcount == 1
|
431
|
+
return self.copy_on_write.pairs(preds, blk)
|
432
|
+
end
|
433
|
+
@origpreds = preds
|
434
|
+
setup_preds(preds) unless preds.empty?
|
435
|
+
# given new preds, the state for the join will be different. set it up again.
|
436
|
+
setup_state if self.class <= Bud::PushSHJoin
|
437
|
+
set_block(&blk) if blk
|
438
|
+
self
|
439
|
+
end
|
440
|
+
|
441
|
+
# given a * expression over 2 collections, form all combos of items that
|
442
|
+
# satisfy +preds+, and for any item from the 1st collection that has no
|
443
|
+
# matches in the 2nd, nil-pad it and include it in the output.
|
444
|
+
public
|
445
|
+
def outer(*preds, &blk)
|
446
|
+
pairs(*preds, &blk)
|
447
|
+
self.extend(Bud::PushSHOuterJoin)
|
448
|
+
end
|
449
|
+
|
450
|
+
|
451
|
+
public
|
452
|
+
def rights(*preds, &blk)
|
453
|
+
@cols = blk.nil? ? @bud_instance.tables[@rels[1].tabname].cols : nil
|
454
|
+
setup_accessors if blk.nil?
|
455
|
+
pairs(*preds) do |x,y|
|
456
|
+
blk.nil? ? y : blk.call(y)
|
457
|
+
end
|
458
|
+
end
|
459
|
+
|
460
|
+
public
|
461
|
+
def lefts(*preds, &blk)
|
462
|
+
@cols = blk.nil? ? @bud_instance.tables[@rels[0].tabname].cols : nil
|
463
|
+
setup_accessors if blk.nil?
|
464
|
+
pairs(*preds) do |x,y|
|
465
|
+
blk.nil? ? x : blk.call(x)
|
466
|
+
end
|
467
|
+
end
|
468
|
+
|
469
|
+
private
|
470
|
+
def dupfree_schema(flat_schema)
|
471
|
+
dupfree_schema = []
|
472
|
+
# while loop here (inefficiently) ensures no collisions
|
473
|
+
while dupfree_schema.empty? or dupfree_schema.uniq.length < dupfree_schema.length
|
474
|
+
dupfree_schema = []
|
475
|
+
flat_schema.reduce({}) do |memo, r|
|
476
|
+
if r.to_s.include?("_") and ((r.to_s.rpartition("_")[2] =~ /^\d+$/) == 0)
|
477
|
+
r = r.to_s.rpartition("_")[0].to_sym
|
478
|
+
end
|
479
|
+
memo[r] ||= 0
|
480
|
+
if memo[r] == 0
|
481
|
+
dupfree_schema << r.to_s.to_sym
|
482
|
+
else
|
483
|
+
dupfree_schema << (r.to_s + "_" + (memo[r]).to_s).to_sym
|
484
|
+
end
|
485
|
+
memo[r] += 1
|
486
|
+
memo
|
487
|
+
end
|
488
|
+
flat_schema = dupfree_schema
|
489
|
+
end
|
490
|
+
return flat_schema
|
491
|
+
end
|
492
|
+
|
493
|
+
public
|
494
|
+
def flatten(*preds, &blk)
|
495
|
+
if blk.nil?
|
496
|
+
@cols = dupfree_schema(@bud_instance.tables[@cols[0]].cols + @bud_instance.tables[@cols[1]].cols)
|
497
|
+
else
|
498
|
+
@cols = []
|
499
|
+
end
|
500
|
+
setup_accessors
|
501
|
+
pairs(*preds) do |x,y|
|
502
|
+
blk.nil? ? x.to_a + y.to_a : blk.call(x.to_a + y.to_a)
|
503
|
+
end
|
504
|
+
end
|
505
|
+
|
506
|
+
private_class_method
|
507
|
+
def self.natural_preds(bud_instance, rels)
|
508
|
+
preds = []
|
509
|
+
rels.each_with_index do |r,i|
|
510
|
+
rels.each_with_index do |s,j|
|
511
|
+
unless i >= j
|
512
|
+
the_matches = r.cols & s.cols
|
513
|
+
the_matches.each do |c|
|
514
|
+
preds << [r.send(c), s.send(c)]
|
515
|
+
end
|
516
|
+
end
|
517
|
+
end
|
518
|
+
end
|
519
|
+
preds.uniq
|
520
|
+
end
|
521
|
+
|
522
|
+
public
|
523
|
+
def matches(&blk)
|
524
|
+
preds = self.class.natural_preds(@bud_instance, @all_rels_below)
|
525
|
+
pairs(*preds, &blk)
|
526
|
+
end
|
527
|
+
|
528
|
+
alias combos pairs
|
529
|
+
end
|
530
|
+
|
531
|
+
module PushSHOuterJoin
|
532
|
+
|
533
|
+
private
|
534
|
+
def insert_item(item, offset)
|
535
|
+
if (@keys.nil? or @keys.empty?)
|
536
|
+
the_key = nil
|
537
|
+
else
|
538
|
+
if all_rels_below.length > 2 and offset == 1
|
539
|
+
the_key = item[@keys[1][0]][@keys[1][1]]
|
540
|
+
else
|
541
|
+
the_key = item[@keys[offset][1]]
|
542
|
+
end
|
543
|
+
end
|
544
|
+
#build
|
545
|
+
# puts "building #{item.inspect} into @source[#{offset}] on key #{the_key.inspect}"
|
546
|
+
if (@hash_tables[offset][the_key] ||= Set.new).add? item
|
547
|
+
@found_delta = true
|
548
|
+
#and probe
|
549
|
+
# puts "probing #{item.inspect} into @source[#{1-offset}] on key #{the_key.inspect}"
|
550
|
+
the_matches = @hash_tables[1-offset][the_key]
|
551
|
+
if the_matches.nil? and offset == 0 # only doing Left Outer Join right now
|
552
|
+
@missing_keys << the_key
|
553
|
+
else
|
554
|
+
@missing_keys.delete(the_key) # no longer missing no matter which side this tuple is
|
555
|
+
process_matches(item, the_matches, offset) unless the_matches.nil?
|
556
|
+
end
|
557
|
+
end
|
558
|
+
end
|
559
|
+
|
560
|
+
public
|
561
|
+
def stratum_end
|
562
|
+
flush
|
563
|
+
push_missing
|
564
|
+
end
|
565
|
+
|
566
|
+
private
|
567
|
+
def push_missing
|
568
|
+
if @missing_keys
|
569
|
+
@missing_keys.each do |key|
|
570
|
+
@hash_tables[0][key].each do |t|
|
571
|
+
push_out([t, []])
|
572
|
+
end
|
573
|
+
end
|
574
|
+
end
|
575
|
+
end
|
576
|
+
end
|
577
|
+
|
578
|
+
class PushNotIn < PushSHJoin
|
579
|
+
def initialize(rellist, bud_instance, preds=nil, &blk) # :nodoc: all
|
580
|
+
if (preds.nil? or preds.empty?)
|
581
|
+
preds = positionwise_preds(bud_instance, rellist)
|
582
|
+
end
|
583
|
+
super(rellist, bud_instance, preds)
|
584
|
+
set_block(&blk)
|
585
|
+
@cols = rellist[0].cols
|
586
|
+
@exclude = Set.new
|
587
|
+
end
|
588
|
+
|
589
|
+
|
590
|
+
def positionwise_preds(bud_instance, rels)
|
591
|
+
# pairwise colnames, for the minimum number of columns from either
|
592
|
+
return [] if rels[0].cols.length != rels[1].cols.length
|
593
|
+
pairs = rels[0].cols.zip(rels[1].cols)
|
594
|
+
# make a hash of each pair, and return an array of hashes as expected by setup_pred
|
595
|
+
[pairs.reduce(Hash.new) {|h, it| h[it[0]]=it[1]; h}]
|
596
|
+
end
|
597
|
+
|
598
|
+
public
|
599
|
+
def rescan_at_tick
|
600
|
+
true
|
601
|
+
end
|
602
|
+
|
603
|
+
def push_out(item) # item is a two element array, a tuple from rels[0] and rels[1]
|
604
|
+
# called from PushSHJoin::process_matches, but we don't push the item downstream until stratum end
|
605
|
+
do_exclude = @blk.nil? ? true : @blk.call(item)
|
606
|
+
#puts "#{item} ===> #{do_exclude}"
|
607
|
+
@exclude << item[0] if do_exclude
|
608
|
+
end
|
609
|
+
|
610
|
+
public
|
611
|
+
def invalidate_cache
|
612
|
+
@exclude.clear
|
613
|
+
end
|
614
|
+
|
615
|
+
def stratum_end
|
616
|
+
flush
|
617
|
+
# Scan through all the cached left rel values, and push out those that are not in exclude
|
618
|
+
@hash_tables[0].each_value do|s| #
|
619
|
+
s.each do |item|
|
620
|
+
next if @exclude.member? item
|
621
|
+
@outputs.each do |ou|
|
622
|
+
if ou.class <= Bud::PushElement
|
623
|
+
ou.insert(item,self)
|
624
|
+
elsif ou.class <= Bud::BudCollection
|
625
|
+
ou.do_insert(item,ou.new_delta)
|
626
|
+
else
|
627
|
+
raise "Expected either a PushElement or a BudCollection"
|
628
|
+
end
|
629
|
+
end
|
630
|
+
# for all the following, o is a BudCollection
|
631
|
+
@deletes.each{|o| o.pending_delete([item])} unless item.nil?
|
632
|
+
@delete_keys.each{|o| o.pending_delete_keys([item])} unless item.nil?
|
633
|
+
@pendings.each{|o| o.pending_merge([item])} unless item.nil?
|
634
|
+
end
|
635
|
+
end
|
636
|
+
end
|
637
|
+
end
|
638
|
+
end
|
data/lib/bud/graphs.rb
CHANGED
@@ -74,8 +74,8 @@ class GraphGen #:nodoc: all
|
|
74
74
|
# bottom if the predicate is not in a NEG/+ cycle. otherwise,
|
75
75
|
# its name is "CYC" + concat(sort(predicate names))
|
76
76
|
depends.each do |d|
|
77
|
-
head = d
|
78
|
-
body = d
|
77
|
+
head = d.lhs
|
78
|
+
body = d.body
|
79
79
|
|
80
80
|
if @builtin_tables.has_key?(head.to_sym) or @builtin_tables.has_key?(body.to_sym)
|
81
81
|
next
|
@@ -83,9 +83,9 @@ class GraphGen #:nodoc: all
|
|
83
83
|
|
84
84
|
head = name_of(head)
|
85
85
|
body = name_of(body)
|
86
|
-
addonce(head, (head != d
|
87
|
-
addonce(body, (body != d
|
88
|
-
addedge(body, head, d
|
86
|
+
addonce(head, (head != d.lhs), true)
|
87
|
+
addonce(body, (body != d.body))
|
88
|
+
addedge(body, head, d.op, d.nm, (head != d.lhs), d.rule_id)
|
89
89
|
end
|
90
90
|
end
|
91
91
|
|
@@ -98,7 +98,6 @@ class GraphGen #:nodoc: all
|
|
98
98
|
when :D, :G
|
99
99
|
"red"
|
100
100
|
else
|
101
|
-
puts "UNKNOWN tag #{paths[0][:val]} class #{paths[0][:val].class}"
|
102
101
|
"black"
|
103
102
|
end
|
104
103
|
end
|
@@ -219,8 +218,8 @@ class GraphGen #:nodoc: all
|
|
219
218
|
end
|
220
219
|
|
221
220
|
unless depanalysis.nil?
|
222
|
-
depanalysis.source.each {|s| addedge("S", s.pred, false, false, false)}
|
223
|
-
depanalysis.sink.each {|s| addedge(s.pred, "T", false, false, false)}
|
221
|
+
depanalysis.source.to_a.each {|s| addedge("S", s.pred, false, false, false)}
|
222
|
+
depanalysis.sink.to_a.each {|s| addedge(s.pred, "T", false, false, false)}
|
224
223
|
|
225
224
|
unless depanalysis.underspecified.empty?
|
226
225
|
addonce("??", false)
|
@@ -229,7 +228,7 @@ class GraphGen #:nodoc: all
|
|
229
228
|
@nodes["??"].penwidth = 2
|
230
229
|
end
|
231
230
|
|
232
|
-
depanalysis.underspecified.each do |u|
|
231
|
+
depanalysis.underspecified.to_a.each do |u|
|
233
232
|
if u.input
|
234
233
|
addedge(u.pred, "??", false, false, false)
|
235
234
|
else
|
@@ -297,10 +296,11 @@ class SpaceTime
|
|
297
296
|
squeues.each do |k, v|
|
298
297
|
v.each_with_index do |item, i|
|
299
298
|
label = "#{k}-#{item}"
|
299
|
+
params = {:label => item.to_s, :width => 0.1, :height => 0.1, :fontsize => 6, :group => k}
|
300
300
|
if @links
|
301
|
-
|
301
|
+
params[:URL] = "DBM_#{k}/tm_#{item}.svg"
|
302
302
|
end
|
303
|
-
snd = @subs[k].add_nodes(label,
|
303
|
+
snd = @subs[k].add_nodes(label, params)
|
304
304
|
unless @head[k].id == snd.id
|
305
305
|
@subs[k].add_edges(@head[k], snd, :weight => 2)
|
306
306
|
@head[k] = snd
|
@@ -322,6 +322,7 @@ class SpaceTime
|
|
322
322
|
def finish(file, fmt=nil)
|
323
323
|
@edges.each_pair do |k, v|
|
324
324
|
lbl = v[3] > 1 ? "#{v[2]}(#{v[3]})" : v[2]
|
325
|
+
lbl ||= ""
|
325
326
|
@g.add_edges(v[0], v[1], :label => lbl, :color => "red", :weight => 1)
|
326
327
|
end
|
327
328
|
if fmt.nil?
|