bud 0.0.8 → 0.1.0.pre1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +4 -10
- data/bin/budplot +1 -2
- data/docs/cheat.md +2 -15
- data/examples/basics/paths.rb +7 -7
- data/lib/bud/aggs.rb +15 -19
- data/lib/bud/bud_meta.rb +165 -77
- data/lib/bud/bust/bust.rb +11 -4
- data/lib/bud/collections.rb +643 -280
- data/lib/bud/depanalysis.rb +50 -25
- data/lib/bud/executor/elements.rb +592 -0
- data/lib/bud/executor/group.rb +104 -0
- data/lib/bud/executor/join.rb +638 -0
- data/lib/bud/graphs.rb +12 -11
- data/lib/bud/joins.rb +2 -1
- data/lib/bud/meta_algebra.rb +5 -4
- data/lib/bud/metrics.rb +9 -3
- data/lib/bud/monkeypatch.rb +131 -23
- data/lib/bud/rebl.rb +41 -28
- data/lib/bud/rewrite.rb +112 -440
- data/lib/bud/server.rb +3 -2
- data/lib/bud/source.rb +109 -0
- data/lib/bud/state.rb +16 -9
- data/lib/bud/storage/dbm.rb +62 -16
- data/lib/bud/storage/zookeeper.rb +2 -2
- data/lib/bud/viz.rb +8 -4
- data/lib/bud/viz_util.rb +10 -9
- data/lib/bud.rb +413 -199
- metadata +40 -55
- data/examples/deploy/tokenring-ec2.rb +0 -26
- data/examples/deploy/tokenring-fork.rb +0 -15
- data/examples/deploy/tokenring-thread.rb +0 -15
- data/examples/deploy/tokenring.rb +0 -47
- data/lib/bud/deploy/deployer.rb +0 -67
- data/lib/bud/deploy/ec2deploy.rb +0 -199
- data/lib/bud/deploy/forkdeploy.rb +0 -90
- data/lib/bud/deploy/threaddeploy.rb +0 -38
- data/lib/bud/storage/tokyocabinet.rb +0 -190
- data/lib/bud/stratify.rb +0 -85
@@ -0,0 +1,638 @@
|
|
1
|
+
require 'bud/executor/elements'
|
2
|
+
require 'set'
|
3
|
+
|
4
|
+
$EMPTY = []
|
5
|
+
module Bud
|
6
|
+
class PushSHJoin < PushStatefulElement
|
7
|
+
attr_reader :all_rels_below, :origpreds, :relnames, :keys, :localpreds
|
8
|
+
|
9
|
+
def initialize(rellist, bud_instance, preds=nil) # :nodoc: all
|
10
|
+
@rels = rellist
|
11
|
+
@relnames = @rels.map{|r| r.elem_name}
|
12
|
+
@cols = []
|
13
|
+
@bud_instance = bud_instance
|
14
|
+
@origpreds = preds
|
15
|
+
@localpreds = nil
|
16
|
+
@selfjoins = []
|
17
|
+
@input_bufs=[[],[]]
|
18
|
+
@missing_keys = Set.new
|
19
|
+
the_join = nil
|
20
|
+
|
21
|
+
# if any elements on rellist are PushSHJoins, suck up their contents
|
22
|
+
@all_rels_below = []
|
23
|
+
rellist.each do |r|
|
24
|
+
if r.class <= PushSHJoin
|
25
|
+
@all_rels_below += r.all_rels_below
|
26
|
+
preds += r.origpreds
|
27
|
+
the_join = r
|
28
|
+
else
|
29
|
+
@all_rels_below << r
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
# check for self-joins: we currently only handle 2 instances of the same table per rule
|
34
|
+
counts = @all_rels_below.reduce({}) do |memo, r|
|
35
|
+
memo[r.elem_name] ||= 0
|
36
|
+
memo[r.elem_name] += 1
|
37
|
+
memo
|
38
|
+
end
|
39
|
+
counts.each do |name, cnt|
|
40
|
+
raise Bud::CompileError, "#{cnt} instances of #{name} in rule; only one self-join currently allowed per rule" if cnt > 2
|
41
|
+
@selfjoins << name if cnt == 2
|
42
|
+
end
|
43
|
+
|
44
|
+
# derive schema: one column for each table.
|
45
|
+
# duplicated inputs get distinguishing numeral
|
46
|
+
@cols = []
|
47
|
+
index = 0
|
48
|
+
retval = @all_rels_below.reduce({}) do |memo, r|
|
49
|
+
index += 1
|
50
|
+
memo[r.tabname.to_s] ||= 0
|
51
|
+
newstr = r.tabname.to_s + ((memo[r.tabname.to_s] > 0) ? ("_" + memo[r.tabname.to_s].to_s) : "")
|
52
|
+
@cols << newstr.to_sym
|
53
|
+
memo[r.tabname.to_s] += 1
|
54
|
+
memo
|
55
|
+
end
|
56
|
+
|
57
|
+
setup_preds(preds) unless preds.empty?
|
58
|
+
setup_state
|
59
|
+
|
60
|
+
super(@tabname,@bud_instance,nil,@cols)
|
61
|
+
end
|
62
|
+
|
63
|
+
public
|
64
|
+
def copy_on_write
|
65
|
+
@refcount -= 1
|
66
|
+
return Bud::PushSHJoin.new(@all_rels_below, @bud_instance, [])
|
67
|
+
end
|
68
|
+
|
69
|
+
public
|
70
|
+
def state_id # :nodoc: all
|
71
|
+
object_id
|
72
|
+
# Marshal.dump([@rels.map{|r| r.tabname}, @localpreds]).hash
|
73
|
+
end
|
74
|
+
|
75
|
+
# initialize the state for this join to be carried across iterations within a fixpoint
|
76
|
+
private
|
77
|
+
def setup_state
|
78
|
+
sid = state_id
|
79
|
+
|
80
|
+
@tabname = ("(" + @all_rels_below.map{|r| r.tabname}.join('*') +"):"+sid.to_s).to_sym
|
81
|
+
@hash_tables = [{}, {}]
|
82
|
+
end
|
83
|
+
|
84
|
+
# extract predicates on rellist[1] and recurse to left side with remainder
|
85
|
+
protected
|
86
|
+
def setup_preds(preds) # :nodoc: all
|
87
|
+
# print "setting up preds for #{@relnames.inspect}(#{self.object_id}): "
|
88
|
+
# print "setting up preds for #{@relnames.inspect}(#{self.object_id}): "
|
89
|
+
allpreds = disambiguate_preds(preds)
|
90
|
+
allpreds = canonicalize_localpreds(@rels, allpreds)
|
91
|
+
# check for refs to collections that aren't being joined, Issue 191
|
92
|
+
unless @rels[0].class <= Bud::PushSHJoin
|
93
|
+
tabnames = @rels.map{ |r| r.tabname }
|
94
|
+
allpreds.each do |p|
|
95
|
+
unless tabnames.include? p[0][0]
|
96
|
+
raise Bud::CompileError, "illegal predicate: collection #{p[0][0]} is not being joined"
|
97
|
+
end
|
98
|
+
unless tabnames.include? p[1][0]
|
99
|
+
raise Bud::CompileError, "illegal predicate: collection #{p[1][0]} is not being joined"
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
@localpreds = allpreds.reject do |p|
|
104
|
+
# reject if it doesn't match the right (leaf node) of the join
|
105
|
+
# or reject if it does match, but it can be evaluated by a lower join
|
106
|
+
# i.e. one that also has this table on the right (lead node)
|
107
|
+
p[1][0] != @rels[1].tabname \
|
108
|
+
or (p[0][0] != @rels[1].tabname \
|
109
|
+
and p[1][0] == @rels[1].tabname and @selfjoins.include? @rels[1].tabname)
|
110
|
+
end
|
111
|
+
|
112
|
+
# only allow preds on the same table name if they're on a self-joined table
|
113
|
+
@localpreds.each do |p|
|
114
|
+
if p[0][0] == p[1][0] and not @selfjoins.include? p[0][0]
|
115
|
+
raise Bud::CompileError, "single-table predicate on #{p[0][0]} disallowed in joins"
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
@localpreds += allpreds.map do |p|
|
120
|
+
p if p[0][0] == p[1][0] and (p[1][0] == @rels[0].tabname or p[1][0] == @rels[1].tabname)
|
121
|
+
end.compact
|
122
|
+
otherpreds = allpreds - @localpreds
|
123
|
+
unless otherpreds.empty?
|
124
|
+
unless @rels[0].class <= Bud::PushSHJoin
|
125
|
+
raise Bud::CompileError, "join predicates don't match tables being joined: #{otherpreds.inspect}"
|
126
|
+
end
|
127
|
+
@rels[0].setup_preds(otherpreds)
|
128
|
+
end
|
129
|
+
|
130
|
+
if @localpreds.length > 0
|
131
|
+
@right_offset = @localpreds.first[1][1]
|
132
|
+
@left_subtuple, @left_offset = join_offset(@localpreds.first[0])
|
133
|
+
@keys = [[@left_subtuple, @left_offset], [1, @right_offset]]
|
134
|
+
else
|
135
|
+
@keys = []
|
136
|
+
end
|
137
|
+
# puts "@keys = #{@keys.inspect}"
|
138
|
+
end
|
139
|
+
|
140
|
+
public
|
141
|
+
def invalidate_cache
|
142
|
+
@rels.each_with_index do |source_elem, i|
|
143
|
+
if source_elem.rescan
|
144
|
+
|
145
|
+
puts "#{tabname} rel:#{i}(#{source_elem.tabname}) invalidated" if $BUD_DEBUG
|
146
|
+
@hash_tables[i] = {}
|
147
|
+
if i == 0
|
148
|
+
# XXX This is not modular. We are doing invalidation work for outer joins, which is part of a
|
149
|
+
# separate module PushSHOuterJoin.
|
150
|
+
@missing_keys.clear # Only if i == 0 because outer joins in Bloom are left outer joins
|
151
|
+
# if i == 1, missing_keys will be corrected when items are populated in the rhs fork
|
152
|
+
end
|
153
|
+
end
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
# calculate the position for a field in the result of a join:
|
158
|
+
# the tuple offset ("subtuple") and the attribute position within it
|
159
|
+
# ("offset")
|
160
|
+
def join_offset(entry)
|
161
|
+
name, offset = entry[0], entry[1]
|
162
|
+
|
163
|
+
# determine which subtuple of the collection contains the table
|
164
|
+
# referenced in entry.
|
165
|
+
subtuple = 0
|
166
|
+
all_rels_below[0..all_rels_below.length-1].each_with_index do |t,i|
|
167
|
+
if t.tabname == entry[0]
|
168
|
+
subtuple = i
|
169
|
+
break
|
170
|
+
end
|
171
|
+
end
|
172
|
+
|
173
|
+
return subtuple, offset
|
174
|
+
end
|
175
|
+
|
176
|
+
protected
|
177
|
+
def disambiguate_preds(preds) # :nodoc: all
|
178
|
+
if preds.size == 1 and preds[0].class <= Hash
|
179
|
+
predarray = preds[0].map do |k,v|
|
180
|
+
if k.class != v.class
|
181
|
+
raise Bud::CompileError, "inconsistent attribute ref style #{k.inspect} => #{v.inspect}"
|
182
|
+
elsif k.class <= Array
|
183
|
+
[k,v]
|
184
|
+
elsif k.class <= Symbol
|
185
|
+
if @all_rels_below and @all_rels_below.length == 2
|
186
|
+
[find_attr_match(k, @all_rels_below[0]), find_attr_match(v, @all_rels_below[1])]
|
187
|
+
else
|
188
|
+
[find_attr_match(k), find_attr_match(v)]
|
189
|
+
end
|
190
|
+
else
|
191
|
+
raise Bud::CompileError, "invalid attribute ref in #{k.inspect} => #{v.inspect}"
|
192
|
+
end
|
193
|
+
end
|
194
|
+
return decomp_preds(*predarray)
|
195
|
+
else
|
196
|
+
return decomp_preds(*preds)
|
197
|
+
end
|
198
|
+
end
|
199
|
+
|
200
|
+
# find element in @all_rels_below that contains this +aname+ method
|
201
|
+
# if +rel+ is non-nil, only check that collection.
|
202
|
+
# after found, return the result of invoking +aname+ from chosen collection
|
203
|
+
protected
|
204
|
+
def find_attr_match(aname, rel=nil) # :nodoc: all
|
205
|
+
dorels = (rel.nil? ? @all_rels_below : [rel])
|
206
|
+
match = nil
|
207
|
+
dorels.each do |r|
|
208
|
+
match ||= r if bud_instance.tables[r.elem_name].respond_to?(aname)
|
209
|
+
if bud_instance.tables[r.elem_name].respond_to?(aname) and match != r
|
210
|
+
raise Bud::CompileError, "ambiguous attribute :#{aname} in both #{match.tabname} and #{r.tabname}"
|
211
|
+
end
|
212
|
+
end
|
213
|
+
if match.nil?
|
214
|
+
raise Bud::CompileError, "attribute :#{aname} not found in any of #{dorels.map{|t| t.tabname}.inspect}"
|
215
|
+
end
|
216
|
+
bud_instance.tables[match.elem_name].send(aname)
|
217
|
+
end
|
218
|
+
|
219
|
+
protected
|
220
|
+
def decomp_preds(*preds) # :nodoc:all
|
221
|
+
# decompose each pred into a binary pred
|
222
|
+
return nil if preds.empty? or preds == [nil]
|
223
|
+
newpreds = []
|
224
|
+
preds.each do |p|
|
225
|
+
p.each_with_index do |c, i|
|
226
|
+
newpreds << [p[i], p[i+1]] unless p[i+1].nil?
|
227
|
+
end
|
228
|
+
end
|
229
|
+
newpreds
|
230
|
+
end
|
231
|
+
|
232
|
+
protected
|
233
|
+
def canonicalize_localpreds(rel_list, preds) # :nodoc:all
|
234
|
+
retval = preds.map do |p|
|
235
|
+
# reverse if lhs is rel_list[1], *unless* it's a self-join!
|
236
|
+
(p[0][0] == rel_list[1].tabname and p[0][0] != p[1][0]) ? p.reverse : p
|
237
|
+
end
|
238
|
+
end
|
239
|
+
|
240
|
+
private
|
241
|
+
# right is a tuple
|
242
|
+
# left is a tuple or an array (combo) of joined tuples.
|
243
|
+
def test_locals(left, left_is_array, right, *skips)
|
244
|
+
retval = true
|
245
|
+
if (skips and @localpreds.length > skips.length)
|
246
|
+
# check remainder of the predicates
|
247
|
+
@localpreds.each do |pred|
|
248
|
+
# skip skips
|
249
|
+
next if (skips.include? pred)
|
250
|
+
# assumption of left-deep joins here
|
251
|
+
if pred[1][0] != @rels[1].tabname
|
252
|
+
raise "Expected rhs table to be #{@rels[1].tabname}, not #{pred[1][0]}"
|
253
|
+
end
|
254
|
+
rfield = right[pred[1][1]]
|
255
|
+
if left_is_array
|
256
|
+
ix, off = join_offset(pred[0])
|
257
|
+
lfield = left[ix][off]
|
258
|
+
else
|
259
|
+
lfield = left[pred[0][1]]
|
260
|
+
end
|
261
|
+
if lfield != rfield
|
262
|
+
retval = false
|
263
|
+
break
|
264
|
+
end
|
265
|
+
end
|
266
|
+
end
|
267
|
+
return retval
|
268
|
+
end
|
269
|
+
|
270
|
+
# given a * expression over 2 collections, form all combos of items that
|
271
|
+
# satisfy +preds+, and for any item from the 1st collection that has no
|
272
|
+
# matches in the 2nd, nil-pad it and include it in the output.
|
273
|
+
public
|
274
|
+
def join(elem2, &blk)
|
275
|
+
elem2 = elem2.to_push_elem unless elem2.class <= PushElement
|
276
|
+
# This constructs a left-deep tree!
|
277
|
+
join = Bud::PushSHJoin.new([self,elem2], @bud_instance, [])
|
278
|
+
@bud_instance.push_joins[@bud_instance.this_stratum] << join
|
279
|
+
elem2.wire_to(join)
|
280
|
+
self.wire_to(join)
|
281
|
+
return join
|
282
|
+
end
|
283
|
+
|
284
|
+
undef do_insert
|
285
|
+
|
286
|
+
public
|
287
|
+
def insert(item, source)
|
288
|
+
#puts "JOIN: #{source.tabname} --> #{self.tabname} : #{item}/#{item.class}"
|
289
|
+
if @rescan
|
290
|
+
replay_join
|
291
|
+
@rescan = false
|
292
|
+
end
|
293
|
+
if @selfjoins.include? source.elem_name
|
294
|
+
offsets = []
|
295
|
+
@relnames.each_with_index{|r,i| offsets << i if r == source.elem_name}
|
296
|
+
else
|
297
|
+
offsets = [@relnames.index(source.elem_name)]
|
298
|
+
end
|
299
|
+
raise "item #{item.inspect} inserted into join from unknown source #{source.elem_name}" if offsets == $EMPTY
|
300
|
+
offsets.each do |offset|
|
301
|
+
buf = @input_bufs[offset]
|
302
|
+
buf << item
|
303
|
+
if (buf.length >= ELEMENT_BUFSIZE)
|
304
|
+
flush_buf(buf, offset)
|
305
|
+
end
|
306
|
+
end
|
307
|
+
end
|
308
|
+
|
309
|
+
protected
|
310
|
+
def insert_item(item, offset)
|
311
|
+
if (@keys.nil? or @keys.empty?)
|
312
|
+
the_key = nil
|
313
|
+
else
|
314
|
+
# assumes left-deep trees
|
315
|
+
if all_rels_below.length > 2 and offset == 0
|
316
|
+
the_key = item[@keys[0][0]][@keys[0][1]]
|
317
|
+
else
|
318
|
+
the_key = item[@keys[offset][1]]
|
319
|
+
end
|
320
|
+
end
|
321
|
+
#build
|
322
|
+
# puts "building #{item.inspect} into @source[#{offset}] on key #{the_key.inspect}"
|
323
|
+
if (@hash_tables[offset][the_key] ||= Set.new).add? item
|
324
|
+
@found_delta = true
|
325
|
+
#and probe
|
326
|
+
# puts "probing #{item.inspect} into @source[#{1-offset}] on key #{the_key.inspect}"
|
327
|
+
the_matches = @hash_tables[1-offset][the_key]
|
328
|
+
process_matches(item, the_matches, offset) unless the_matches.nil?
|
329
|
+
end
|
330
|
+
end
|
331
|
+
|
332
|
+
public
|
333
|
+
def rescan_at_tick
|
334
|
+
false
|
335
|
+
end
|
336
|
+
|
337
|
+
public
|
338
|
+
def add_rescan_invalidate(rescan, invalidate)
|
339
|
+
|
340
|
+
if non_temporal_predecessors.any? {|e| rescan.member? e}
|
341
|
+
rescan << self
|
342
|
+
invalidate << self
|
343
|
+
end
|
344
|
+
|
345
|
+
# The distinction between a join node and other stateful elements is that when a join node needs a rescan
|
346
|
+
# it doesn't tell all its sources to rescan. In fact, it doesn't have to pass a rescan request up to a source,
|
347
|
+
# because if a target needs a rescan, the join node has all the state necessary to feed the downstream node. And
|
348
|
+
# if a source node is in rescan, then at run-time only the state associated with that particular source node
|
349
|
+
# @hash_tables[offset] will be cleared, and will get filled up again because that source will rescan anyway.
|
350
|
+
|
351
|
+
invalidate_tables(rescan, invalidate)
|
352
|
+
end
|
353
|
+
|
354
|
+
def replay_join
|
355
|
+
a = @hash_tables[0]
|
356
|
+
b = @hash_tables[1]
|
357
|
+
|
358
|
+
if not(a.empty? or b.empty?)
|
359
|
+
if a.size < b.size
|
360
|
+
a.each_pair do |key, items|
|
361
|
+
the_matches = b[key]
|
362
|
+
unless the_matches.nil?
|
363
|
+
items.each do |item|
|
364
|
+
process_matches(item, the_matches, 1)
|
365
|
+
end
|
366
|
+
end
|
367
|
+
end
|
368
|
+
else
|
369
|
+
b.each_pair do |key, items|
|
370
|
+
the_matches = a[key]
|
371
|
+
unless the_matches.nil?
|
372
|
+
items.each do |item|
|
373
|
+
process_matches(item, the_matches, 0)
|
374
|
+
end
|
375
|
+
end
|
376
|
+
end
|
377
|
+
end
|
378
|
+
end
|
379
|
+
end
|
380
|
+
|
381
|
+
private
|
382
|
+
def process_matches(item, the_matches, offset)
|
383
|
+
the_matches.each do |m|
|
384
|
+
if offset == 0
|
385
|
+
left = item
|
386
|
+
right = m
|
387
|
+
else
|
388
|
+
left = m
|
389
|
+
right = item
|
390
|
+
end
|
391
|
+
left_is_array = all_rels_below.length > 2
|
392
|
+
if @localpreds.nil? or @localpreds.length == 1 or test_locals(left, left_is_array, right, @localpreds.first)
|
393
|
+
result = left_is_array ? left + [right] : [left, right] # FIX: reduce arrays being created.
|
394
|
+
push_out(result)
|
395
|
+
end
|
396
|
+
end
|
397
|
+
end
|
398
|
+
|
399
|
+
def flush_buf(buf, offset)
|
400
|
+
buf.each do |item|
|
401
|
+
insert_item(item, offset)
|
402
|
+
end
|
403
|
+
@input_bufs[offset] = []
|
404
|
+
end
|
405
|
+
|
406
|
+
public
|
407
|
+
def flush
|
408
|
+
@input_bufs.each_with_index do |buf, offset|
|
409
|
+
flush_buf(buf,offset) if buf.length > 0
|
410
|
+
end
|
411
|
+
end
|
412
|
+
|
413
|
+
public
|
414
|
+
def stratum_end
|
415
|
+
flush
|
416
|
+
end
|
417
|
+
|
418
|
+
####
|
419
|
+
# and now, the Bloom-facing methods
|
420
|
+
# given a * expression over n collections, form all combinations of items
|
421
|
+
# subject to an array of predicates, pred
|
422
|
+
# currently supports two options for equijoin predicates:
|
423
|
+
# general form: an array of arrays capturing a conjunction of equiv. classes
|
424
|
+
# [[table1.col1, table2.col2, table3.col3], [table1.col2, table2.col3]]
|
425
|
+
# common form: a hash capturing equality of a column on left with one on right.
|
426
|
+
# :col1 => :col2 (same as lefttable.col1 => righttable.col2)
|
427
|
+
public
|
428
|
+
def pairs(*preds, &blk)
|
429
|
+
## XXX Need to do this for all the join modifiers
|
430
|
+
unless @refcount == 1
|
431
|
+
return self.copy_on_write.pairs(preds, blk)
|
432
|
+
end
|
433
|
+
@origpreds = preds
|
434
|
+
setup_preds(preds) unless preds.empty?
|
435
|
+
# given new preds, the state for the join will be different. set it up again.
|
436
|
+
setup_state if self.class <= Bud::PushSHJoin
|
437
|
+
set_block(&blk) if blk
|
438
|
+
self
|
439
|
+
end
|
440
|
+
|
441
|
+
# given a * expression over 2 collections, form all combos of items that
|
442
|
+
# satisfy +preds+, and for any item from the 1st collection that has no
|
443
|
+
# matches in the 2nd, nil-pad it and include it in the output.
|
444
|
+
public
|
445
|
+
def outer(*preds, &blk)
|
446
|
+
pairs(*preds, &blk)
|
447
|
+
self.extend(Bud::PushSHOuterJoin)
|
448
|
+
end
|
449
|
+
|
450
|
+
|
451
|
+
public
|
452
|
+
def rights(*preds, &blk)
|
453
|
+
@cols = blk.nil? ? @bud_instance.tables[@rels[1].tabname].cols : nil
|
454
|
+
setup_accessors if blk.nil?
|
455
|
+
pairs(*preds) do |x,y|
|
456
|
+
blk.nil? ? y : blk.call(y)
|
457
|
+
end
|
458
|
+
end
|
459
|
+
|
460
|
+
public
|
461
|
+
def lefts(*preds, &blk)
|
462
|
+
@cols = blk.nil? ? @bud_instance.tables[@rels[0].tabname].cols : nil
|
463
|
+
setup_accessors if blk.nil?
|
464
|
+
pairs(*preds) do |x,y|
|
465
|
+
blk.nil? ? x : blk.call(x)
|
466
|
+
end
|
467
|
+
end
|
468
|
+
|
469
|
+
private
|
470
|
+
def dupfree_schema(flat_schema)
|
471
|
+
dupfree_schema = []
|
472
|
+
# while loop here (inefficiently) ensures no collisions
|
473
|
+
while dupfree_schema.empty? or dupfree_schema.uniq.length < dupfree_schema.length
|
474
|
+
dupfree_schema = []
|
475
|
+
flat_schema.reduce({}) do |memo, r|
|
476
|
+
if r.to_s.include?("_") and ((r.to_s.rpartition("_")[2] =~ /^\d+$/) == 0)
|
477
|
+
r = r.to_s.rpartition("_")[0].to_sym
|
478
|
+
end
|
479
|
+
memo[r] ||= 0
|
480
|
+
if memo[r] == 0
|
481
|
+
dupfree_schema << r.to_s.to_sym
|
482
|
+
else
|
483
|
+
dupfree_schema << (r.to_s + "_" + (memo[r]).to_s).to_sym
|
484
|
+
end
|
485
|
+
memo[r] += 1
|
486
|
+
memo
|
487
|
+
end
|
488
|
+
flat_schema = dupfree_schema
|
489
|
+
end
|
490
|
+
return flat_schema
|
491
|
+
end
|
492
|
+
|
493
|
+
public
|
494
|
+
def flatten(*preds, &blk)
|
495
|
+
if blk.nil?
|
496
|
+
@cols = dupfree_schema(@bud_instance.tables[@cols[0]].cols + @bud_instance.tables[@cols[1]].cols)
|
497
|
+
else
|
498
|
+
@cols = []
|
499
|
+
end
|
500
|
+
setup_accessors
|
501
|
+
pairs(*preds) do |x,y|
|
502
|
+
blk.nil? ? x.to_a + y.to_a : blk.call(x.to_a + y.to_a)
|
503
|
+
end
|
504
|
+
end
|
505
|
+
|
506
|
+
private_class_method
|
507
|
+
def self.natural_preds(bud_instance, rels)
|
508
|
+
preds = []
|
509
|
+
rels.each_with_index do |r,i|
|
510
|
+
rels.each_with_index do |s,j|
|
511
|
+
unless i >= j
|
512
|
+
the_matches = r.cols & s.cols
|
513
|
+
the_matches.each do |c|
|
514
|
+
preds << [r.send(c), s.send(c)]
|
515
|
+
end
|
516
|
+
end
|
517
|
+
end
|
518
|
+
end
|
519
|
+
preds.uniq
|
520
|
+
end
|
521
|
+
|
522
|
+
public
|
523
|
+
def matches(&blk)
|
524
|
+
preds = self.class.natural_preds(@bud_instance, @all_rels_below)
|
525
|
+
pairs(*preds, &blk)
|
526
|
+
end
|
527
|
+
|
528
|
+
alias combos pairs
|
529
|
+
end
|
530
|
+
|
531
|
+
module PushSHOuterJoin
|
532
|
+
|
533
|
+
private
|
534
|
+
def insert_item(item, offset)
|
535
|
+
if (@keys.nil? or @keys.empty?)
|
536
|
+
the_key = nil
|
537
|
+
else
|
538
|
+
if all_rels_below.length > 2 and offset == 1
|
539
|
+
the_key = item[@keys[1][0]][@keys[1][1]]
|
540
|
+
else
|
541
|
+
the_key = item[@keys[offset][1]]
|
542
|
+
end
|
543
|
+
end
|
544
|
+
#build
|
545
|
+
# puts "building #{item.inspect} into @source[#{offset}] on key #{the_key.inspect}"
|
546
|
+
if (@hash_tables[offset][the_key] ||= Set.new).add? item
|
547
|
+
@found_delta = true
|
548
|
+
#and probe
|
549
|
+
# puts "probing #{item.inspect} into @source[#{1-offset}] on key #{the_key.inspect}"
|
550
|
+
the_matches = @hash_tables[1-offset][the_key]
|
551
|
+
if the_matches.nil? and offset == 0 # only doing Left Outer Join right now
|
552
|
+
@missing_keys << the_key
|
553
|
+
else
|
554
|
+
@missing_keys.delete(the_key) # no longer missing no matter which side this tuple is
|
555
|
+
process_matches(item, the_matches, offset) unless the_matches.nil?
|
556
|
+
end
|
557
|
+
end
|
558
|
+
end
|
559
|
+
|
560
|
+
public
|
561
|
+
def stratum_end
|
562
|
+
flush
|
563
|
+
push_missing
|
564
|
+
end
|
565
|
+
|
566
|
+
private
|
567
|
+
def push_missing
|
568
|
+
if @missing_keys
|
569
|
+
@missing_keys.each do |key|
|
570
|
+
@hash_tables[0][key].each do |t|
|
571
|
+
push_out([t, []])
|
572
|
+
end
|
573
|
+
end
|
574
|
+
end
|
575
|
+
end
|
576
|
+
end
|
577
|
+
|
578
|
+
class PushNotIn < PushSHJoin
|
579
|
+
def initialize(rellist, bud_instance, preds=nil, &blk) # :nodoc: all
|
580
|
+
if (preds.nil? or preds.empty?)
|
581
|
+
preds = positionwise_preds(bud_instance, rellist)
|
582
|
+
end
|
583
|
+
super(rellist, bud_instance, preds)
|
584
|
+
set_block(&blk)
|
585
|
+
@cols = rellist[0].cols
|
586
|
+
@exclude = Set.new
|
587
|
+
end
|
588
|
+
|
589
|
+
|
590
|
+
def positionwise_preds(bud_instance, rels)
|
591
|
+
# pairwise colnames, for the minimum number of columns from either
|
592
|
+
return [] if rels[0].cols.length != rels[1].cols.length
|
593
|
+
pairs = rels[0].cols.zip(rels[1].cols)
|
594
|
+
# make a hash of each pair, and return an array of hashes as expected by setup_pred
|
595
|
+
[pairs.reduce(Hash.new) {|h, it| h[it[0]]=it[1]; h}]
|
596
|
+
end
|
597
|
+
|
598
|
+
public
|
599
|
+
def rescan_at_tick
|
600
|
+
true
|
601
|
+
end
|
602
|
+
|
603
|
+
def push_out(item) # item is a two element array, a tuple from rels[0] and rels[1]
|
604
|
+
# called from PushSHJoin::process_matches, but we don't push the item downstream until stratum end
|
605
|
+
do_exclude = @blk.nil? ? true : @blk.call(item)
|
606
|
+
#puts "#{item} ===> #{do_exclude}"
|
607
|
+
@exclude << item[0] if do_exclude
|
608
|
+
end
|
609
|
+
|
610
|
+
public
|
611
|
+
def invalidate_cache
|
612
|
+
@exclude.clear
|
613
|
+
end
|
614
|
+
|
615
|
+
def stratum_end
|
616
|
+
flush
|
617
|
+
# Scan through all the cached left rel values, and push out those that are not in exclude
|
618
|
+
@hash_tables[0].each_value do|s| #
|
619
|
+
s.each do |item|
|
620
|
+
next if @exclude.member? item
|
621
|
+
@outputs.each do |ou|
|
622
|
+
if ou.class <= Bud::PushElement
|
623
|
+
ou.insert(item,self)
|
624
|
+
elsif ou.class <= Bud::BudCollection
|
625
|
+
ou.do_insert(item,ou.new_delta)
|
626
|
+
else
|
627
|
+
raise "Expected either a PushElement or a BudCollection"
|
628
|
+
end
|
629
|
+
end
|
630
|
+
# for all the following, o is a BudCollection
|
631
|
+
@deletes.each{|o| o.pending_delete([item])} unless item.nil?
|
632
|
+
@delete_keys.each{|o| o.pending_delete_keys([item])} unless item.nil?
|
633
|
+
@pendings.each{|o| o.pending_merge([item])} unless item.nil?
|
634
|
+
end
|
635
|
+
end
|
636
|
+
end
|
637
|
+
end
|
638
|
+
end
|
data/lib/bud/graphs.rb
CHANGED
@@ -74,8 +74,8 @@ class GraphGen #:nodoc: all
|
|
74
74
|
# bottom if the predicate is not in a NEG/+ cycle. otherwise,
|
75
75
|
# its name is "CYC" + concat(sort(predicate names))
|
76
76
|
depends.each do |d|
|
77
|
-
head = d
|
78
|
-
body = d
|
77
|
+
head = d.lhs
|
78
|
+
body = d.body
|
79
79
|
|
80
80
|
if @builtin_tables.has_key?(head.to_sym) or @builtin_tables.has_key?(body.to_sym)
|
81
81
|
next
|
@@ -83,9 +83,9 @@ class GraphGen #:nodoc: all
|
|
83
83
|
|
84
84
|
head = name_of(head)
|
85
85
|
body = name_of(body)
|
86
|
-
addonce(head, (head != d
|
87
|
-
addonce(body, (body != d
|
88
|
-
addedge(body, head, d
|
86
|
+
addonce(head, (head != d.lhs), true)
|
87
|
+
addonce(body, (body != d.body))
|
88
|
+
addedge(body, head, d.op, d.nm, (head != d.lhs), d.rule_id)
|
89
89
|
end
|
90
90
|
end
|
91
91
|
|
@@ -98,7 +98,6 @@ class GraphGen #:nodoc: all
|
|
98
98
|
when :D, :G
|
99
99
|
"red"
|
100
100
|
else
|
101
|
-
puts "UNKNOWN tag #{paths[0][:val]} class #{paths[0][:val].class}"
|
102
101
|
"black"
|
103
102
|
end
|
104
103
|
end
|
@@ -219,8 +218,8 @@ class GraphGen #:nodoc: all
|
|
219
218
|
end
|
220
219
|
|
221
220
|
unless depanalysis.nil?
|
222
|
-
depanalysis.source.each {|s| addedge("S", s.pred, false, false, false)}
|
223
|
-
depanalysis.sink.each {|s| addedge(s.pred, "T", false, false, false)}
|
221
|
+
depanalysis.source.to_a.each {|s| addedge("S", s.pred, false, false, false)}
|
222
|
+
depanalysis.sink.to_a.each {|s| addedge(s.pred, "T", false, false, false)}
|
224
223
|
|
225
224
|
unless depanalysis.underspecified.empty?
|
226
225
|
addonce("??", false)
|
@@ -229,7 +228,7 @@ class GraphGen #:nodoc: all
|
|
229
228
|
@nodes["??"].penwidth = 2
|
230
229
|
end
|
231
230
|
|
232
|
-
depanalysis.underspecified.each do |u|
|
231
|
+
depanalysis.underspecified.to_a.each do |u|
|
233
232
|
if u.input
|
234
233
|
addedge(u.pred, "??", false, false, false)
|
235
234
|
else
|
@@ -297,10 +296,11 @@ class SpaceTime
|
|
297
296
|
squeues.each do |k, v|
|
298
297
|
v.each_with_index do |item, i|
|
299
298
|
label = "#{k}-#{item}"
|
299
|
+
params = {:label => item.to_s, :width => 0.1, :height => 0.1, :fontsize => 6, :group => k}
|
300
300
|
if @links
|
301
|
-
|
301
|
+
params[:URL] = "DBM_#{k}/tm_#{item}.svg"
|
302
302
|
end
|
303
|
-
snd = @subs[k].add_nodes(label,
|
303
|
+
snd = @subs[k].add_nodes(label, params)
|
304
304
|
unless @head[k].id == snd.id
|
305
305
|
@subs[k].add_edges(@head[k], snd, :weight => 2)
|
306
306
|
@head[k] = snd
|
@@ -322,6 +322,7 @@ class SpaceTime
|
|
322
322
|
def finish(file, fmt=nil)
|
323
323
|
@edges.each_pair do |k, v|
|
324
324
|
lbl = v[3] > 1 ? "#{v[2]}(#{v[3]})" : v[2]
|
325
|
+
lbl ||= ""
|
325
326
|
@g.add_edges(v[0], v[1], :label => lbl, :color => "red", :weight => 1)
|
326
327
|
end
|
327
328
|
if fmt.nil?
|