bud 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +9 -0
- data/README +30 -0
- data/bin/budplot +134 -0
- data/bin/budvis +201 -0
- data/bin/rebl +4 -0
- data/docs/README.md +13 -0
- data/docs/bfs.md +379 -0
- data/docs/bfs.raw +251 -0
- data/docs/bfs_arch.png +0 -0
- data/docs/bloom-loop.png +0 -0
- data/docs/bust.md +83 -0
- data/docs/cheat.md +291 -0
- data/docs/deploy.md +96 -0
- data/docs/diffs +181 -0
- data/docs/getstarted.md +296 -0
- data/docs/intro.md +36 -0
- data/docs/modules.md +112 -0
- data/docs/operational.md +96 -0
- data/docs/rebl.md +99 -0
- data/docs/ruby_hooks.md +19 -0
- data/docs/visualizations.md +75 -0
- data/examples/README +1 -0
- data/examples/basics/hello.rb +12 -0
- data/examples/basics/out +1103 -0
- data/examples/basics/out.new +856 -0
- data/examples/basics/paths.rb +51 -0
- data/examples/bust/README.md +9 -0
- data/examples/bust/bustclient-example.rb +23 -0
- data/examples/bust/bustinspector.html +135 -0
- data/examples/bust/bustserver-example.rb +18 -0
- data/examples/chat/README.md +9 -0
- data/examples/chat/chat.rb +45 -0
- data/examples/chat/chat_protocol.rb +8 -0
- data/examples/chat/chat_server.rb +29 -0
- data/examples/deploy/tokenring-ec2.rb +26 -0
- data/examples/deploy/tokenring-local.rb +17 -0
- data/examples/deploy/tokenring.rb +39 -0
- data/lib/bud/aggs.rb +126 -0
- data/lib/bud/bud_meta.rb +185 -0
- data/lib/bud/bust/bust.rb +126 -0
- data/lib/bud/bust/client/idempotence.rb +10 -0
- data/lib/bud/bust/client/restclient.rb +49 -0
- data/lib/bud/collections.rb +937 -0
- data/lib/bud/depanalysis.rb +44 -0
- data/lib/bud/deploy/countatomicdelivery.rb +50 -0
- data/lib/bud/deploy/deployer.rb +67 -0
- data/lib/bud/deploy/ec2deploy.rb +200 -0
- data/lib/bud/deploy/localdeploy.rb +41 -0
- data/lib/bud/errors.rb +15 -0
- data/lib/bud/graphs.rb +405 -0
- data/lib/bud/joins.rb +300 -0
- data/lib/bud/rebl.rb +314 -0
- data/lib/bud/rewrite.rb +523 -0
- data/lib/bud/rtrace.rb +27 -0
- data/lib/bud/server.rb +43 -0
- data/lib/bud/state.rb +108 -0
- data/lib/bud/storage/tokyocabinet.rb +170 -0
- data/lib/bud/storage/zookeeper.rb +178 -0
- data/lib/bud/stratify.rb +83 -0
- data/lib/bud/viz.rb +65 -0
- data/lib/bud.rb +797 -0
- metadata +330 -0
data/lib/bud/graphs.rb
ADDED
@@ -0,0 +1,405 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
require 'rubygems'
|
3
|
+
require 'graphviz'
|
4
|
+
|
5
|
+
class GraphGen #:nodoc: all
|
6
|
+
|
7
|
+
def initialize(mapping, tableinfo, cycle, name, budtime, vizlevel, pics_dir, collapse=false, depanalysis=nil, cardinalities={})
|
8
|
+
#@graph = GraphViz.new(:G, :type => :digraph, :label => "", :ratio => 0.85 )
|
9
|
+
@graph = GraphViz.new(:G, :type => :digraph, :label => "")
|
10
|
+
@graph.node[:fontname] = "Times-Roman"
|
11
|
+
@graph.node[:fontsize] = 18
|
12
|
+
@graph.edge[:fontname] = "Times-Roman"
|
13
|
+
@graph.edge[:fontsize] = 18
|
14
|
+
@tiers = []
|
15
|
+
@cards = cardinalities
|
16
|
+
@name = name
|
17
|
+
@collapse = collapse
|
18
|
+
@depanalysis = depanalysis
|
19
|
+
@budtime = budtime
|
20
|
+
@vizlevel = vizlevel
|
21
|
+
@pics_dir = pics_dir
|
22
|
+
#@internals = {'count' => 1, 'localtick' => 1, 'stdio' => 1, 't_rules' => 1, 't_depends' => 1, 't_depends_tc' => 1, 't_provides' => 1, 't_cycle' => 1}
|
23
|
+
@internals = {'count' => 1, 'localtick' => 1, 'stdio' => 1} #, 't_rules' => 1, 't_depends' => 1, 't_depends_tc' => 1, 't_provides' => 1, 't_cycle' => 1}
|
24
|
+
|
25
|
+
# map: table -> stratum
|
26
|
+
@t2s = {}
|
27
|
+
mapping.each do |m|
|
28
|
+
@t2s[m[0]] = m[1].to_i
|
29
|
+
end
|
30
|
+
|
31
|
+
# map: table -> type
|
32
|
+
@tabinf = {}
|
33
|
+
tableinfo.each do |ti|
|
34
|
+
@tabinf[ti[0].to_s] = ti[1]
|
35
|
+
end
|
36
|
+
|
37
|
+
@redcycle = {}
|
38
|
+
cycle.each do |c|
|
39
|
+
puts "CYCLE: #{c.inspect}"
|
40
|
+
if c[2] and c[3]
|
41
|
+
if !@redcycle[c[0]]
|
42
|
+
@redcycle[c[0]] = []
|
43
|
+
end
|
44
|
+
@redcycle[c[0]] << c[1]
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
@nodes = {}
|
49
|
+
@edges = {}
|
50
|
+
@labels = {}
|
51
|
+
end
|
52
|
+
|
53
|
+
def safe_t2s(tab)
|
54
|
+
if @t2s[tab]
|
55
|
+
@t2s[tab]
|
56
|
+
else
|
57
|
+
words = tab.split(",")
|
58
|
+
maxs = 0
|
59
|
+
words.each do |w|
|
60
|
+
if @t2s[w] and @t2s[w] > maxs
|
61
|
+
maxs = @t2s[w]
|
62
|
+
end
|
63
|
+
end
|
64
|
+
return maxs
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def name_bag(predicate, bag)
|
69
|
+
if bag[predicate]
|
70
|
+
return bag
|
71
|
+
else
|
72
|
+
bag[predicate] = true
|
73
|
+
res = bag
|
74
|
+
if @redcycle[predicate].nil?
|
75
|
+
return res
|
76
|
+
end
|
77
|
+
@redcycle[predicate].each do |rp|
|
78
|
+
res = name_bag(rp, res)
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
return res
|
83
|
+
end
|
84
|
+
|
85
|
+
def name_of(predicate)
|
86
|
+
# consider doing this in bud
|
87
|
+
# PAA
|
88
|
+
if @redcycle[predicate] and @collapse
|
89
|
+
puts "collapse #{predicate}, redcycle #{@redcycle[predicate].inspect}"
|
90
|
+
via = @redcycle[predicate]
|
91
|
+
bag = name_bag(predicate, {})
|
92
|
+
#str = bag.key_cols.sort.join(", ")
|
93
|
+
str = bag.keys.sort.join(", ")
|
94
|
+
return str
|
95
|
+
else
|
96
|
+
return predicate
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
def process(depends)
|
101
|
+
|
102
|
+
# collapsing NEG/+ cycles.
|
103
|
+
# we want to create a function from any predicate to (cycle_name or bottom)
|
104
|
+
# bottom if the predicate is not in a NEG/+ cycle. otherwise,
|
105
|
+
# its name is "CYC" + concat(sort(predicate names))
|
106
|
+
|
107
|
+
depends.each do |d|
|
108
|
+
#puts "DEP: #{d.inspect}"
|
109
|
+
head = d[1]
|
110
|
+
body = d[3]
|
111
|
+
|
112
|
+
# hack attack
|
113
|
+
if @internals[head] or @internals[body]
|
114
|
+
next
|
115
|
+
end
|
116
|
+
|
117
|
+
head = name_of(head)
|
118
|
+
body = name_of(body)
|
119
|
+
addonce(head, (head != d[1]))
|
120
|
+
addonce(body, (body != d[3]))
|
121
|
+
addedge(body, head, d[2], d[4], (head != d[1]), d[0])
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
def addonce(node, negcluster)
|
126
|
+
#puts "ADD NODE #{node}"
|
127
|
+
if !@nodes[node]
|
128
|
+
@nodes[node] = @graph.add_node(node)
|
129
|
+
if @cards and @cards[node]
|
130
|
+
@nodes[node].label = node +"\n (#{@cards[node].to_s})"
|
131
|
+
puts "IMAGE IS #{@cards[node]}"
|
132
|
+
#@nodes[node].image = @cards[node]
|
133
|
+
end
|
134
|
+
|
135
|
+
if @vizlevel >= 3
|
136
|
+
@nodes[node].URL = "javascript:openWin(\"#{node}\", #{@budtime})"
|
137
|
+
else
|
138
|
+
@nodes[node].URL = "#{node}.html"
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
if negcluster
|
143
|
+
# cleaning
|
144
|
+
res = node
|
145
|
+
node.split(", ").each_with_index do |p, i|
|
146
|
+
if i == 0
|
147
|
+
res = p
|
148
|
+
elsif i % 4 == 0
|
149
|
+
res = res + ",\n" + p
|
150
|
+
else
|
151
|
+
res = res + ", " + p
|
152
|
+
end
|
153
|
+
end
|
154
|
+
#@nodes[node].label = "<b>" + res + "</b>"
|
155
|
+
@nodes[node].label = res
|
156
|
+
@nodes[node].color = "red"
|
157
|
+
@nodes[node].shape = "octagon"
|
158
|
+
@nodes[node].penwidth = 3
|
159
|
+
@nodes[node].URL = "#{File.basename(@name)}_expanded.svg"
|
160
|
+
elsif @tabinf[node] and (@tabinf[node] == "Bud::BudTable")
|
161
|
+
@nodes[node].shape = "rect"
|
162
|
+
end
|
163
|
+
end
|
164
|
+
|
165
|
+
def addedge(body, head, op, nm, negcluster, rule_id=nil)
|
166
|
+
return if body.nil? or head.nil?
|
167
|
+
body = body.to_s
|
168
|
+
head = head.to_s
|
169
|
+
return if negcluster and body == head
|
170
|
+
|
171
|
+
ekey = body + head
|
172
|
+
if !@edges[ekey]
|
173
|
+
@edges[ekey] = @graph.add_edge(@nodes[body], @nodes[head], :penwidth => 5)
|
174
|
+
@edges[ekey].arrowsize = 2
|
175
|
+
|
176
|
+
@edges[ekey].URL = "#{rule_id}.html" unless rule_id.nil?
|
177
|
+
if head =~ /_msg\z/
|
178
|
+
@edges[ekey].minlen = 2
|
179
|
+
else
|
180
|
+
@edges[ekey].minlen = 1.5
|
181
|
+
end
|
182
|
+
@labels[ekey] = {}
|
183
|
+
|
184
|
+
end
|
185
|
+
|
186
|
+
#@edges[ekey].minlen = 5 if negcluster and body == head
|
187
|
+
|
188
|
+
if op == '<+'
|
189
|
+
@labels[ekey][' +/-'] = true
|
190
|
+
elsif op == "<~"
|
191
|
+
@edges[ekey].style = 'dashed'
|
192
|
+
elsif op == "<-"
|
193
|
+
#@labels[ekey] = @labels[ekey] + 'NEG(del)'
|
194
|
+
@labels[ekey][' +/-'] = true
|
195
|
+
@edges[ekey].arrowhead = 'veeodot'
|
196
|
+
end
|
197
|
+
if nm and head != "T"
|
198
|
+
# hm, nonmono
|
199
|
+
@edges[ekey].arrowhead = 'veeodot'
|
200
|
+
end
|
201
|
+
end
|
202
|
+
|
203
|
+
def finish
|
204
|
+
@labels.each_key do |k|
|
205
|
+
#@edges[k].label = @labels[k].key_cols.join(" ")
|
206
|
+
@edges[k].label = @labels[k].keys.join(" ")
|
207
|
+
end
|
208
|
+
|
209
|
+
addonce("S", false)
|
210
|
+
addonce("T", false)
|
211
|
+
|
212
|
+
@nodes["T"].URL = "javascript:advanceTo(#{@budtime+1})"
|
213
|
+
@nodes["S"].URL = "javascript:advanceTo(#{@budtime-1})"
|
214
|
+
|
215
|
+
@nodes["S"].color = "blue"
|
216
|
+
@nodes["T"].color = "blue"
|
217
|
+
@nodes["S"].shape = "diamond"
|
218
|
+
@nodes["T"].shape = "diamond"
|
219
|
+
|
220
|
+
@nodes["S"].penwidth = 3
|
221
|
+
@nodes["T"].penwidth = 3
|
222
|
+
|
223
|
+
@tabinf.each_pair do |k, v|
|
224
|
+
|
225
|
+
unless @nodes[name_of(k.to_s)] or k.to_s =~ /_tbl/ or @internals[k.to_s] or (k.to_s =~ /^t_/ and @budtime != 0)
|
226
|
+
addonce(k.to_s, false)
|
227
|
+
end
|
228
|
+
if v == "Bud::BudPeriodic"
|
229
|
+
puts "adding edge S -> #{@nodes[k.to_s]}"
|
230
|
+
addedge("S", k.to_s, false, false, false)
|
231
|
+
end
|
232
|
+
end
|
233
|
+
|
234
|
+
unless @depanalysis.nil?
|
235
|
+
@depanalysis.source.each {|s| addedge("S", s.pred, false, false, false) }
|
236
|
+
@depanalysis.sink.each {|s| addedge(s.pred, "T", false, false, false) }
|
237
|
+
|
238
|
+
unless @depanalysis.underspecified.empty?
|
239
|
+
addonce("??", false)
|
240
|
+
@nodes["??"].color = "red"
|
241
|
+
@nodes["??"].shape = "diamond"
|
242
|
+
@nodes["??"].penwidth = 2
|
243
|
+
end
|
244
|
+
|
245
|
+
@depanalysis.underspecified.each do |u|
|
246
|
+
if u.input
|
247
|
+
addedge(u.pred, "??", false, false, false)
|
248
|
+
else
|
249
|
+
addedge("??", u.pred, false, false, false)
|
250
|
+
end
|
251
|
+
end
|
252
|
+
end
|
253
|
+
|
254
|
+
suffix = @collapse ? "collapsed" : "expanded"
|
255
|
+
fn = "#{@name}_#{suffix}.svg"
|
256
|
+
puts "fn is #{fn}"
|
257
|
+
staging = "#{fn}_staging"
|
258
|
+
@graph.output(:svg => staging)
|
259
|
+
@graph.output(:dot => "#{fn}.dot")
|
260
|
+
@graph.output(:png => "#{fn}.png")
|
261
|
+
fin = File.open(staging, "r")
|
262
|
+
fout = File.open(fn, "w")
|
263
|
+
while line = fin.gets
|
264
|
+
fout.puts line.gsub("<title>G</title>", svg_javascript())
|
265
|
+
end
|
266
|
+
fin.close
|
267
|
+
fout.close
|
268
|
+
File.delete(staging)
|
269
|
+
end
|
270
|
+
|
271
|
+
def output_base
|
272
|
+
if @vizlevel >= 3
|
273
|
+
@pics_dir
|
274
|
+
else
|
275
|
+
"bud_doc"
|
276
|
+
end
|
277
|
+
end
|
278
|
+
|
279
|
+
def dump(shredded_rules)
|
280
|
+
return if shredded_rules.nil?
|
281
|
+
|
282
|
+
fout = File.new("#{output_base}/style.css", "w")
|
283
|
+
fout.puts css
|
284
|
+
fout.close
|
285
|
+
|
286
|
+
code = {}
|
287
|
+
rules = {}
|
288
|
+
convertor = Syntax::Convertors::HTML.for_syntax "ruby"
|
289
|
+
shredded_rules.each do |s|
|
290
|
+
#fout = File.new("#{output_base}/#{s[0]}.html", "w+")
|
291
|
+
fout = File.new("#{output_base}/#{s[0]}.html", "w+")
|
292
|
+
fout.puts header
|
293
|
+
fout.puts "<h1>Rule #{s[0]}</h1><br>"
|
294
|
+
|
295
|
+
c = convertor.convert(s[3])
|
296
|
+
c.sub!(/^<pre>/, "<pre class=\"code\" style='font-size:20px'>\n")
|
297
|
+
fout.puts c
|
298
|
+
rules[s[0]] = [s[1], s[3]]
|
299
|
+
fout.close
|
300
|
+
end
|
301
|
+
|
302
|
+
rules.each_pair do |k, v|
|
303
|
+
if !code[v[0]]
|
304
|
+
code[v[0]] = ""
|
305
|
+
end
|
306
|
+
#code[v[0]] = "<br># RULE #{k}<br> " + code[v[0]] + "<br>" + v[1]
|
307
|
+
code[v[0]] = "\n# RULE #{k}\n " + code[v[0]] + "\n" + v[1]
|
308
|
+
end
|
309
|
+
@nodes.each_pair do |k, v|
|
310
|
+
fout = File.new("#{output_base}/#{k}.html", "w+")
|
311
|
+
fout.puts header
|
312
|
+
k.split(", ").each do |i|
|
313
|
+
unless code[i].nil?
|
314
|
+
c = convertor.convert(code[i])
|
315
|
+
c.sub!(/^<pre>/, "<pre class=\"code\">\n")
|
316
|
+
fout.puts c
|
317
|
+
end
|
318
|
+
end
|
319
|
+
fout.puts("</body></html>")
|
320
|
+
fout.close
|
321
|
+
end
|
322
|
+
end
|
323
|
+
|
324
|
+
|
325
|
+
def header
|
326
|
+
return "<html><meta content='text/html; charset=UTF-8' http-equiv='Content-Type'/>\n<head><link rel=\"stylesheet\" type=\"text/css\" href=\"style.css\" /></head><body>"
|
327
|
+
end
|
328
|
+
|
329
|
+
def css
|
330
|
+
return "pre.code {
|
331
|
+
padding: 1ex 1ex 1ex 1ex;
|
332
|
+
border: 4px groove #CC0000;
|
333
|
+
overflow-x: auto;
|
334
|
+
}
|
335
|
+
|
336
|
+
pre.code span.attribute { color: #009900; }
|
337
|
+
pre.code span.char { color: #F00; }
|
338
|
+
pre.code span.class { color: #A020F0; font-weight: bold; }
|
339
|
+
pre.code span.comment { color: #0000FF; }
|
340
|
+
pre.code span.constant { color: #008B8B; }
|
341
|
+
pre.code span.escape { color: #6A5ACD; }
|
342
|
+
pre.code span.expr { color: #2222CC; }
|
343
|
+
pre.code span.global { color: #11AA44; }
|
344
|
+
pre.code span.ident { color: #000000; }
|
345
|
+
pre.code span.keyword { color: #A52A2A; font-weight: bold; }
|
346
|
+
pre.code span.method { color: #008B8B; }
|
347
|
+
pre.code span.module { color: #A020F0; font-weight: bold; }
|
348
|
+
pre.code span.number { color: #DD00DD; }
|
349
|
+
pre.code span.punct { color: #6A5ACD; }
|
350
|
+
pre.code span.regex { color: #DD00DD; }
|
351
|
+
pre.code span.string { color: #DD00DD; }
|
352
|
+
pre.code span.symbol { color: #008B8B; }
|
353
|
+
"
|
354
|
+
end
|
355
|
+
|
356
|
+
|
357
|
+
end
|
358
|
+
|
359
|
+
def svg_javascript
|
360
|
+
return "
|
361
|
+
<script type='text/javascript'>
|
362
|
+
<![CDATA[
|
363
|
+
|
364
|
+
var windows = new Array()
|
365
|
+
var info = new Array()
|
366
|
+
|
367
|
+
function openWin(target, time) {
|
368
|
+
win = window.open(target + \"_\" + time + \".html\", target, \"location=no,width=400,height=180,left=0,status=no\");
|
369
|
+
// hm, an associative array, how strange.
|
370
|
+
info[target] = 1
|
371
|
+
}
|
372
|
+
|
373
|
+
function advanceTo(time) {
|
374
|
+
arr = gup(\"wins\").split(\",\");
|
375
|
+
for (i=0; i < arr.length; i++) {
|
376
|
+
if (arr[i] != \"\") {
|
377
|
+
openWin(arr[i], time);
|
378
|
+
}
|
379
|
+
}
|
380
|
+
str = '';
|
381
|
+
// getting 'key_cols'
|
382
|
+
for (var i in info) {
|
383
|
+
str = str + ',' + i;
|
384
|
+
}
|
385
|
+
self.window.location.href = 'tm_' + time + '_expanded.svg?wins=' + str;
|
386
|
+
}
|
387
|
+
|
388
|
+
// off the netz
|
389
|
+
function gup( name )
|
390
|
+
{
|
391
|
+
name = name.replace(/[\[]/,\"\\\[\").replace(/[\]]/,\"\\\]\");
|
392
|
+
var regexS = \"[\\?&]\"+name+\"=([^&#]*)\";
|
393
|
+
var regex = new RegExp( regexS );
|
394
|
+
var results = regex.exec( window.location.href );
|
395
|
+
if( results == null )
|
396
|
+
return \"\";
|
397
|
+
else
|
398
|
+
return results[1];
|
399
|
+
}
|
400
|
+
|
401
|
+
]]>
|
402
|
+
</script>
|
403
|
+
"
|
404
|
+
end
|
405
|
+
|
data/lib/bud/joins.rb
ADDED
@@ -0,0 +1,300 @@
|
|
1
|
+
module Bud
|
2
|
+
class BudJoin < BudCollection
|
3
|
+
attr_accessor :rels, :origrels, :origpreds # :nodoc: all
|
4
|
+
attr_reader :hash_tables # :nodoc: all
|
5
|
+
|
6
|
+
def initialize(rellist, bud_instance, preds=nil) # :nodoc: all
|
7
|
+
@schema = []
|
8
|
+
otherpreds = nil
|
9
|
+
@origpreds = preds
|
10
|
+
@bud_instance = bud_instance
|
11
|
+
@localpreds = nil
|
12
|
+
|
13
|
+
# if any elements on rellist are BudJoins, suck up their contents
|
14
|
+
tmprels = []
|
15
|
+
rellist.each do |r|
|
16
|
+
if r.class <= BudJoin
|
17
|
+
tmprels += r.origrels
|
18
|
+
preds += r.origpreds
|
19
|
+
else
|
20
|
+
tmprels << r
|
21
|
+
end
|
22
|
+
end
|
23
|
+
rellist = tmprels
|
24
|
+
@origrels = rellist
|
25
|
+
|
26
|
+
# recurse to form a tree of binary BudJoins
|
27
|
+
@rels = [rellist[0]]
|
28
|
+
@rels << (rellist.length == 2 ? rellist[1] : BudJoin.new(rellist[1..rellist.length-1], @bud_instance, nil))
|
29
|
+
# derive schema: one column for each table.
|
30
|
+
# duplicated inputs get distinguishing numeral
|
31
|
+
@schema = []
|
32
|
+
index = 0
|
33
|
+
retval = rellist.reduce({}) do |memo, r|
|
34
|
+
index += 1
|
35
|
+
memo[r.tabname.to_s] ||= 0
|
36
|
+
newstr = r.tabname.to_s + ((memo[r.tabname.to_s] > 0) ? ("_" + memo[r.tabname.to_s].to_s) : "")
|
37
|
+
@schema << newstr.to_sym
|
38
|
+
memo[r.tabname.to_s] += 1
|
39
|
+
memo
|
40
|
+
end
|
41
|
+
|
42
|
+
setup_preds(preds) unless preds.nil? or preds.empty?
|
43
|
+
setup_state
|
44
|
+
end
|
45
|
+
|
46
|
+
public
|
47
|
+
def state_id # :nodoc: all
|
48
|
+
Marshal.dump([@rels.map{|r| r.tabname}, @localpreds]).hash
|
49
|
+
end
|
50
|
+
|
51
|
+
# initialize the state for this join to be carried across iterations within a fixpoint
|
52
|
+
private
|
53
|
+
def setup_state
|
54
|
+
sid = state_id
|
55
|
+
@tabname = ("temp_join"+state_id.to_s).to_sym
|
56
|
+
@bud_instance.joinstate[sid] ||= [{:storage => {}, :delta => {}}, {:storage => {}, :delta => {}}]
|
57
|
+
@hash_tables = @bud_instance.joinstate[sid]
|
58
|
+
end
|
59
|
+
|
60
|
+
private_class_method
|
61
|
+
def self.natural_preds(bud_instance, rels)
|
62
|
+
preds = []
|
63
|
+
rels.each do |r|
|
64
|
+
rels.each do |s|
|
65
|
+
matches = r.schema & s.schema
|
66
|
+
matches.each do |c|
|
67
|
+
preds << [bud_instance.send(r.tabname).send(c), bud_instance.send(s.tabname).send(c)] unless r.tabname.to_s >= s.tabname.to_s
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
preds.uniq
|
72
|
+
end
|
73
|
+
|
74
|
+
# flatten joined items into arrays, with attribute accessors inherited
|
75
|
+
# from the input collections, disambiguated via suffix indexes as needed.
|
76
|
+
# similar to <tt>SELECT * FROM ... WHERE...</tt> block in SQL.
|
77
|
+
public
|
78
|
+
def flatten(*preds)
|
79
|
+
setup_preds(preds) unless preds.nil? or preds.size == 0
|
80
|
+
flat_schema = @rels.map{|r| r.schema}.flatten(1)
|
81
|
+
dupfree_schema = []
|
82
|
+
# while loop here (inefficiently) ensures no collisions
|
83
|
+
while dupfree_schema == [] or dupfree_schema.uniq.length < dupfree_schema.length
|
84
|
+
dupfree_schema = []
|
85
|
+
flat_schema.reduce({}) do |memo, r|
|
86
|
+
if r.to_s.include?("_") and ((r.to_s.rpartition("_")[2] =~ /^\d+$/) == 0)
|
87
|
+
r = r.to_s.rpartition("_")[0].to_sym
|
88
|
+
end
|
89
|
+
memo[r] ||= 0
|
90
|
+
if memo[r] == 0
|
91
|
+
dupfree_schema << r.to_s.to_sym
|
92
|
+
else
|
93
|
+
dupfree_schema << (r.to_s + "_" + (memo[r]).to_s).to_sym
|
94
|
+
end
|
95
|
+
memo[r] += 1
|
96
|
+
memo
|
97
|
+
end
|
98
|
+
flat_schema = dupfree_schema
|
99
|
+
end
|
100
|
+
retval = BudScratch.new('temp_flatten', bud_instance, dupfree_schema)
|
101
|
+
retval.uniquify_tabname
|
102
|
+
retval.merge(self.map{|r,s| r + s}, retval.storage)
|
103
|
+
end
|
104
|
+
|
105
|
+
undef do_insert
|
106
|
+
|
107
|
+
public
|
108
|
+
# map each (nested) item in the collection into a string, suitable for placement in stdio
|
109
|
+
def inspected
|
110
|
+
raise BudError, "join left unconverted to binary" if @rels.length > 2
|
111
|
+
self.map{|r1, r2| ["\[ #{r1.inspect} #{r2.inspect} \]"]}
|
112
|
+
end
|
113
|
+
|
114
|
+
public
|
115
|
+
def pro(&blk) # :nodoc: all
|
116
|
+
pairs(&blk)
|
117
|
+
end
|
118
|
+
|
119
|
+
public
|
120
|
+
def each(mode=:both, &block) # :nodoc: all
|
121
|
+
mode = :storage if @bud_instance.stratum_first_iter
|
122
|
+
if mode == :storage
|
123
|
+
methods = [:storage]
|
124
|
+
else
|
125
|
+
methods = [:delta, :storage]
|
126
|
+
end
|
127
|
+
|
128
|
+
methods.each do |left_rel|
|
129
|
+
methods.each do |right_rel|
|
130
|
+
next if (mode == :delta and left_rel == :storage and right_rel == :storage)
|
131
|
+
if @localpreds.nil? or @localpreds.empty?
|
132
|
+
nestloop_join(left_rel, right_rel, &block)
|
133
|
+
else
|
134
|
+
hash_join(left_rel, right_rel, &block)
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
138
|
+
tick_hash_deltas
|
139
|
+
end
|
140
|
+
|
141
|
+
public
|
142
|
+
def each_from_sym(buf_syms, &block) # :nodoc: all
|
143
|
+
buf_syms.each do |s|
|
144
|
+
each(s, &block)
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
private
|
149
|
+
# r is a tuple
|
150
|
+
# s is an array (combo) of joined tuples
|
151
|
+
def test_locals(r, s, *skips)
|
152
|
+
retval = true
|
153
|
+
if (@localpreds and skips and @localpreds.length > skips.length)
|
154
|
+
# check remainder of the predicates
|
155
|
+
@localpreds.each do |pred|
|
156
|
+
next if skips.include? pred
|
157
|
+
r_offset, s_index, s_offset = join_offsets(pred)
|
158
|
+
if r[r_offset] != s[s_index][s_offset]
|
159
|
+
retval = false
|
160
|
+
break
|
161
|
+
end
|
162
|
+
end
|
163
|
+
end
|
164
|
+
return retval
|
165
|
+
end
|
166
|
+
|
167
|
+
private
|
168
|
+
def nestloop_join(left_rel, right_rel, &block)
|
169
|
+
@rels[0].each_from_sym([left_rel]) do |r|
|
170
|
+
@rels[1].each_from_sym([right_rel]) do |s|
|
171
|
+
s = [s] if origrels.length == 2
|
172
|
+
yield([r] + s) if test_locals(r, s)
|
173
|
+
end
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
private
|
178
|
+
# calculate the attribute position for the left table in the join ("left_offset")
|
179
|
+
# the right table may itself be a nested tuple from a join, so calculate
|
180
|
+
# the tuple offset ("right_subtuple") and the attribute position within it
|
181
|
+
# ("right_offset")
|
182
|
+
def join_offsets(pred)
|
183
|
+
right_entry = pred[1]
|
184
|
+
right_name, right_offset = right_entry[0], right_entry[1]
|
185
|
+
left_entry = pred[0]
|
186
|
+
left_name, left_offset = left_entry[0], left_entry[1]
|
187
|
+
|
188
|
+
# determine which subtuple of right collection contains the table
|
189
|
+
# referenced in RHS of pred. note that right collection doesn't contain the
|
190
|
+
# first entry in rels, which is the left collection
|
191
|
+
right_subtuple = 0
|
192
|
+
origrels[1..origrels.length].each_with_index do |t,i|
|
193
|
+
if t.tabname == pred[1][0]
|
194
|
+
right_subtuple = i
|
195
|
+
break
|
196
|
+
end
|
197
|
+
end
|
198
|
+
|
199
|
+
return left_offset, right_subtuple, right_offset
|
200
|
+
end
|
201
|
+
|
202
|
+
def tick_hash_deltas
|
203
|
+
# for hash_join, move old delta hashtables into storage hashtables
|
204
|
+
return if @hash_tables.nil?
|
205
|
+
(0..1).each do |i|
|
206
|
+
@hash_tables[i][:storage].merge!(@hash_tables[i][:delta]) do |k,l,r|
|
207
|
+
l+r
|
208
|
+
end
|
209
|
+
@hash_tables[i][:delta] = {}
|
210
|
+
end
|
211
|
+
end
|
212
|
+
|
213
|
+
# semi-naive symmetric hash join on first predicate
|
214
|
+
private
|
215
|
+
def hash_join(left_sym, right_sym, &block)
|
216
|
+
left_offset, right_subtuple, right_offset = join_offsets(@localpreds.first)
|
217
|
+
|
218
|
+
syms = [left_sym, right_sym]
|
219
|
+
|
220
|
+
syms.each_with_index do |probe_sym, probe_ix|
|
221
|
+
other_ix = 1 - probe_ix # bit-flip
|
222
|
+
other_sym = syms[other_ix]
|
223
|
+
probe_offset = (probe_ix == 0) ? left_offset : right_offset
|
224
|
+
|
225
|
+
# in a delta/storage join we do traditional one-sided hash join
|
226
|
+
# so don't probe from the storage side.
|
227
|
+
# the other side should have been built already!
|
228
|
+
if probe_sym == :storage and probe_sym != other_sym
|
229
|
+
next
|
230
|
+
end
|
231
|
+
|
232
|
+
# ready to do the symmetric hash join
|
233
|
+
rels[probe_ix].each_from_sym([probe_sym]) do |r|
|
234
|
+
r = [r] unless probe_ix == 1 and origrels.length > 2
|
235
|
+
attrval = (probe_ix == 0) ? r[0][left_offset] : r[right_subtuple][right_offset]
|
236
|
+
|
237
|
+
# insert into the prober's hashtable only if symmetric ...
|
238
|
+
if probe_sym == other_sym
|
239
|
+
@hash_tables[probe_ix][probe_sym][attrval] ||= []
|
240
|
+
@hash_tables[probe_ix][probe_sym][attrval] << r
|
241
|
+
end
|
242
|
+
|
243
|
+
# ...and probe the other hashtable
|
244
|
+
next if @hash_tables[other_ix][other_sym][attrval].nil?
|
245
|
+
@hash_tables[other_ix][other_sym][attrval].each do |s_tup|
|
246
|
+
if probe_ix == 0
|
247
|
+
left = r; right = s_tup
|
248
|
+
else
|
249
|
+
left = s_tup; right = r
|
250
|
+
end
|
251
|
+
retval = left + right
|
252
|
+
yield retval if test_locals(left[0], right, @localpreds.first)
|
253
|
+
end
|
254
|
+
end
|
255
|
+
end
|
256
|
+
end
|
257
|
+
end
|
258
|
+
|
259
|
+
class BudLeftJoin < BudJoin # :nodoc: all
|
260
|
+
def initialize(rellist, bud_instance, preds=nil)
|
261
|
+
raise(BudError, "Left Join only defined for two relations") unless rellist.length == 2
|
262
|
+
super(rellist, bud_instance, preds)
|
263
|
+
@origpreds = preds
|
264
|
+
preds.each do |k,v|
|
265
|
+
if k.class <= Array
|
266
|
+
raise Bud::CompileError, "in leftjoin, attribute refs must have style :col1 => :col2"
|
267
|
+
end
|
268
|
+
end
|
269
|
+
end
|
270
|
+
|
271
|
+
public
|
272
|
+
def each(&block) # :nodoc:all
|
273
|
+
super(&block)
|
274
|
+
# previous line finds all the matches.
|
275
|
+
# now its time to ``preserve'' the outer tuples with no matches.
|
276
|
+
# this is totally inefficient: we should fold the identification of non-matches
|
277
|
+
# into the join algorithms. Another day.
|
278
|
+
# our trick: for each tuple of the outer, generate a singleton relation
|
279
|
+
# and join with inner. If result is empty, preserve tuple.
|
280
|
+
@rels[0].each do |r|
|
281
|
+
t = @origrels[0].clone_empty
|
282
|
+
# need to uniquify the tablename here to avoid sharing join state with original
|
283
|
+
t.uniquify_tabname
|
284
|
+
t << r
|
285
|
+
j = BudJoin.new([t, @origrels[1]], @bud_instance, @origpreds)
|
286
|
+
|
287
|
+
# the following is "next if j.any?" on storage tuples *only*
|
288
|
+
any = false
|
289
|
+
j.each(:storage) do |j|
|
290
|
+
any = true
|
291
|
+
break
|
292
|
+
end
|
293
|
+
next if any
|
294
|
+
|
295
|
+
nulltup = @origrels[1].null_tuple
|
296
|
+
yield [r, nulltup]
|
297
|
+
end
|
298
|
+
end
|
299
|
+
end
|
300
|
+
end
|