bud 0.0.4 → 0.0.5
Sign up to get free protection for your applications and to get access to all the features.
- data/README +2 -3
- data/bin/budvis +0 -66
- data/docs/README.md +27 -15
- data/docs/bust.md +1 -1
- data/docs/cheat.md +79 -30
- data/docs/operational.md +8 -4
- data/examples/basics/paths.rb +5 -3
- data/lib/bud/aggs.rb +1 -1
- data/lib/bud/bud_meta.rb +11 -2
- data/lib/bud/bust/bust.rb +1 -1
- data/lib/bud/collections.rb +78 -20
- data/lib/bud/deploy/threaddeploy.rb +1 -1
- data/lib/bud/errors.rb +3 -0
- data/lib/bud/graphs.rb +25 -26
- data/lib/bud/joins.rb +78 -33
- data/lib/bud/metrics.rb +43 -0
- data/lib/bud/monkeypatch.rb +1 -1
- data/lib/bud/rebl.rb +20 -13
- data/lib/bud/rewrite.rb +217 -39
- data/lib/bud/server.rb +16 -13
- data/lib/bud/state.rb +39 -25
- data/lib/bud/storage/dbm.rb +6 -1
- data/lib/bud/storage/tokyocabinet.rb +6 -0
- data/lib/bud/storage/zookeeper.rb +6 -6
- data/lib/bud/viz.rb +5 -1
- data/lib/bud/viz_util.rb +70 -0
- data/lib/bud.rb +227 -99
- metadata +33 -24
- data/docs/c.html +0 -251
- data/examples/deploy/deploy_ip_port +0 -1
- data/examples/deploy/keys.rb +0 -5
- data/lib/bud.rb.orig +0 -806
data/README
CHANGED
@@ -41,7 +41,6 @@ To run the unit tests:
|
|
41
41
|
|
42
42
|
The bud gem has a handful of mandatory dependencies. It also has two optional
|
43
43
|
dependencies: if you wish to use Bud collections backed by Zookeeper or Tokyo
|
44
|
-
Cabinet
|
45
|
-
|
46
|
-
installing the "tokyocabinet" gem, the Tokyo Cabinet libraries should be
|
44
|
+
Cabinet, the "zookeeper" and/or "tokyocabinet" gems must be installed. Note that
|
45
|
+
before installing the "tokyocabinet" gem, the Tokyo Cabinet libraries should be
|
47
46
|
installed first.
|
data/bin/budvis
CHANGED
@@ -9,72 +9,6 @@ include VizUtil
|
|
9
9
|
|
10
10
|
BUD_DBM_DIR = "#{ARGV[0]}/bud_"
|
11
11
|
|
12
|
-
class VizHelper
|
13
|
-
include Bud
|
14
|
-
include TraceCardinality
|
15
|
-
|
16
|
-
def initialize(tabinf, cycle, depends, rules, dir)
|
17
|
-
@t_tabinf = tabinf
|
18
|
-
@t_cycle = cycle
|
19
|
-
@t_depends = depends
|
20
|
-
@t_rules = rules
|
21
|
-
@dir = dir
|
22
|
-
super()
|
23
|
-
end
|
24
|
-
|
25
|
-
def summarize(dir, schema)
|
26
|
-
table_io = {}
|
27
|
-
cardinalities.sort{|a, b| a[0] <=> b[0]}.each do |card|
|
28
|
-
table_io["#{card.table}_#{card.bud_time}"] = start_table(dir, card.table, card.bud_time, schema[card.table])
|
29
|
-
end
|
30
|
-
|
31
|
-
full_info.each do |info|
|
32
|
-
write_table_content(table_io["#{info.table}_#{info.bud_time}"], info.row)
|
33
|
-
end
|
34
|
-
|
35
|
-
table_io.each_value do |tab|
|
36
|
-
end_table(tab)
|
37
|
-
end
|
38
|
-
|
39
|
-
# fix: nested loops
|
40
|
-
times.sort.each do |time|
|
41
|
-
card_info = {}
|
42
|
-
cardinalities.each do |card|
|
43
|
-
if card.bud_time == time.bud_time
|
44
|
-
card_info[card.table] = card.cnt
|
45
|
-
end
|
46
|
-
end
|
47
|
-
|
48
|
-
d = "#{@dir}/tm_#{time.bud_time}"
|
49
|
-
write_graphs(@t_tabinf, @t_cycle, @t_depends, @t_rules, d, @dir, nil, false, nil, time.bud_time, card_info)
|
50
|
-
end
|
51
|
-
end
|
52
|
-
|
53
|
-
def start_table(dir, tab, time, schema)
|
54
|
-
str = "#{dir}/#{tab}_#{time}.html"
|
55
|
-
fout = File.new(str, "w")
|
56
|
-
|
57
|
-
fout.puts "<html><title>#{tab} @ #{time}</title>"
|
58
|
-
fout.puts "<table border=1>"
|
59
|
-
fout.puts "<tr>" + schema.map{|s| "<th> #{s} </th>"}.join(" ") + "<tr>" unless schema.nil?
|
60
|
-
fout.close
|
61
|
-
return str
|
62
|
-
end
|
63
|
-
|
64
|
-
def end_table(stream)
|
65
|
-
fp = File.open(stream, "a")
|
66
|
-
fp.puts "</table>"
|
67
|
-
fp.close
|
68
|
-
end
|
69
|
-
|
70
|
-
def write_table_content(fn, row)
|
71
|
-
stream = File.open(fn, "a")
|
72
|
-
stream.puts "<tr>"
|
73
|
-
stream.puts row.map{|c| "<td>#{c.to_s}</td>"}.join(" ")
|
74
|
-
stream.puts "</tr>"
|
75
|
-
stream.close
|
76
|
-
end
|
77
|
-
end
|
78
12
|
|
79
13
|
def usage
|
80
14
|
puts "Usage:"
|
data/docs/README.md
CHANGED
@@ -1,26 +1,38 @@
|
|
1
|
-
|
1
|
+
Bud: Bloom under development
|
2
|
+
============================
|
3
|
+
|
2
4
|
Welcome to the documentation for *Bud*, a prototype of Bloom under development.
|
3
5
|
|
4
|
-
The documents here are organized to be read in any order, but you might like to
|
6
|
+
The documents here are organized to be read in any order, but you might like to
|
7
|
+
try the following:
|
5
8
|
|
6
|
-
*
|
7
|
-
*
|
9
|
+
* [intro.md][intro]: A brief introduction to Bud and Bloom.
|
10
|
+
* [getstarted.md][getstarted]: A quickstart to teach you basic Bloom
|
8
11
|
concepts, the use of `rebl` interactive terminal, and the embedding of Bloom
|
9
12
|
code in Ruby via the `Bud` module.
|
10
|
-
*
|
13
|
+
* [operational.md][operational]: An operational view of Bloom, to provide
|
11
14
|
a more detailed model of how Bloom code is evaluated by Bud.
|
12
|
-
*
|
13
|
-
*
|
14
|
-
*
|
15
|
+
* [cheat.md][cheat]: A concise "cheat sheet" to remind you about Bloom syntax.
|
16
|
+
* [modules.md][modules]: An overview of Bloom's modularity features.
|
17
|
+
* [ruby\_hooks.md][ruby_hooks]: Bud module methods that allow you to
|
15
18
|
interact with the Bud evaluator from other Ruby threads.
|
16
|
-
*
|
19
|
+
* [visualizations.md][visualizations]: Overview of the `budvis` and
|
17
20
|
`budplot` tools for visualizing Bloom program analyses.
|
18
|
-
*
|
21
|
+
* [bfs.md][bfs]: A walkthrough of the Bloom distributed filesystem.
|
22
|
+
|
23
|
+
[intro]: /bloom-lang/bud/blob/master/docs/intro.md
|
24
|
+
[getstarted]: /bloom-lang/bud/blob/master/docs/getstarted.md
|
25
|
+
[operational]: /bloom-lang/bud/blob/master/docs/operational.md
|
26
|
+
[cheat]: /bloom-lang/bud/blob/master/docs/cheat.md
|
27
|
+
[modules]: /bloom-lang/bud/blob/master/docs/modules.md
|
28
|
+
[ruby_hooks]: /bloom-lang/bud/blob/master/docs/ruby_hooks.md
|
29
|
+
[visualizations]: /bloom-lang/bud/blob/master/docs/visualizations.md
|
30
|
+
[bfs]: /bloom-lang/bud/blob/master/docs/bfs.md
|
19
31
|
|
20
|
-
In addition, the
|
21
|
-
|
22
|
-
|
32
|
+
In addition, the [bud-sandbox](http://github.com/bloom-lang/bud-sandbox) GitHub
|
33
|
+
repository contains lots of useful libraries and example programs built using
|
34
|
+
Bloom.
|
23
35
|
|
24
36
|
Finally, the Bud gem ships with RubyDoc on the language constructs and runtime
|
25
|
-
hooks provided by the Bud module.
|
26
|
-
line and open [http://0.0.0.0:8808/](http://0.0.0.0:8808/)
|
37
|
+
hooks provided by the Bud module. To see rdoc, run `gem server` from a command
|
38
|
+
line and open [http://0.0.0.0:8808/](http://0.0.0.0:8808/)
|
data/docs/bust.md
CHANGED
@@ -66,7 +66,7 @@ and the include line:
|
|
66
66
|
|
67
67
|
include RestClient
|
68
68
|
|
69
|
-
To make requests, insert into the rest_req interface, whose
|
69
|
+
To make requests, insert into the rest_req interface, whose definition is reproduced below:
|
70
70
|
|
71
71
|
interface input, :rest_req, [:rid, :verb, :form, :url, :params]
|
72
72
|
|
data/docs/cheat.md
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
## General Bloom Syntax Rules ##
|
4
4
|
Bloom programs are unordered sets of statements.<br>
|
5
|
-
Statements are delimited by semicolons (;) or newlines
|
5
|
+
Statements are delimited by semicolons (;) or newlines.<br>
|
6
6
|
As in Ruby, backslash is used to escape a newline.<br>
|
7
7
|
|
8
8
|
## Simple embedding of Bud in a Ruby Class ##
|
@@ -25,7 +25,7 @@ A `state` block contains Bud collection definitions. A Bud collection is a *set*
|
|
25
25
|
of *facts*; each fact is an array of Ruby values. Note that collections do not
|
26
26
|
contain duplicates (inserting a duplicate fact into a collection is ignored).
|
27
27
|
|
28
|
-
Like a table in a relational
|
28
|
+
Like a table in a relational database, a subset of the columns in a collection
|
29
29
|
makeup the collection's _key_. Attempting to insert two facts into a collection
|
30
30
|
that agree on the key columns (but are not duplicates) results in a runtime
|
31
31
|
exception.
|
@@ -90,35 +90,51 @@ Statements with stdio on lhs must use async merge (`<~`).<br>
|
|
90
90
|
Using `stdio` on the lhs of an async merge results in writing to the `IO` object specified by the `:stdout` Bud option (`$stdout` by default).<br>
|
91
91
|
To use `stdio` on rhs, instantiate Bud with `:stdin` option set to an `IO` object (e.g., `$stdin`).<br>
|
92
92
|
|
93
|
-
|
94
|
-
|
95
|
-
|
93
|
+
Statements with stdio on lhs must use async merge (`<~`).<br>
|
94
|
+
Using `stdio` on the lhs of an async merge results in writing to the `IO` object specified by the `:stdout` Bud option (`$stdout` by default).<br>
|
95
|
+
To use `stdio` on rhs, instantiate Bud with `:stdin` option set to an `IO` object (e.g., `$stdin`).<br>
|
96
96
|
|
97
|
-
|
98
|
-
|
97
|
+
### signals ###
|
98
|
+
Built-in read-only scratch collection for receiving OS signals.<br>
|
99
|
+
System-provided attributes: `[:key] => []`
|
99
100
|
|
100
|
-
|
101
|
-
|
102
|
-
Default attributes: `[:key] => [:val]`
|
101
|
+
Currently catches only SIGINT ("INT") and SIGTERM ("TERM"). If Bud option `:signal_handling=>:bloom` is set, the signal is trapped and Bloom rules
|
102
|
+
are responsible to deal with the content of `signals`.
|
103
103
|
|
104
|
-
|
105
|
-
|
104
|
+
### halt ###
|
105
|
+
Built-in scratch collection to be used on the lhs of a rule; permanently halts the Bud instance upon first insertion.
|
106
106
|
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
107
|
+
If the item `[:kill]` is inserted, the Bud OS process (including all Bud instances) is also halted.
|
108
|
+
|
109
|
+
### sync ###
|
110
|
+
Persistent collection mapped to an external storage engine, with synchronous write-flushing each timestep. Supported storage engines: `:dbm` and `:tokyo`.<br>
|
111
|
+
Default attributes: `[:key] => [:val]`.
|
112
|
+
|
113
|
+
sync :s1, :dbm
|
114
|
+
sync :s2, :tokyo, [:k1, :k2] => [:v1, :v2]
|
115
|
+
|
116
|
+
Further info: [DBM](http://en.wikipedia.org/wiki/Dbm), [Tokyo Cabinet](http://fallabs.com/tokyocabinet/).
|
117
|
+
|
118
|
+
### store ###
|
119
|
+
Persistent collection mapped to an external storage engine, with asynchronous write-flushing. Supported storage engines: `:zookeeper`.<br>
|
120
|
+
Default attributes: `[:key] => [:val]`.
|
121
|
+
|
122
|
+
Statements with a store on lhs must use async merge (`<~`).<br>
|
123
|
+
|
124
|
+
Zookeeper is a special case: it does not take attributes as it trailing arguments. Instead it requires a `:path` and can optionally take an `:addr` specification (default: `addr => 'localhost:2181'`).
|
111
125
|
|
112
|
-
|
113
|
-
|
126
|
+
store :s3, :zookeeper, :path=>"/foo/bar", :addr => 'localhost:2181'
|
127
|
+
|
128
|
+
Further info: [Apache Zookeeper](http://hadoop.apache.org/zookeeper/).
|
114
129
|
|
115
130
|
|
116
131
|
## Bloom Statements ##
|
117
|
-
|
132
|
+
### Statement Syntax ###
|
133
|
+
*lhs bloom_op rhs*
|
118
134
|
|
119
|
-
Left-hand-side (lhs) is a named `BudCollection` object.<br>
|
120
|
-
Right-hand-side (rhs) is a Ruby expression producing a `BudCollection` or `Array` of `Arrays`.<br>
|
121
|
-
|
135
|
+
Left-hand-side (*lhs*) is a named `BudCollection` object.<br>
|
136
|
+
Right-hand-side (*rhs*) is a Ruby expression producing a `BudCollection` or `Array` of `Arrays`.<br>
|
137
|
+
The operator (*bloom_op*) is one of the 5 operators listed below.
|
122
138
|
|
123
139
|
### Bloom Operators ###
|
124
140
|
merges:
|
@@ -131,13 +147,24 @@ delete:
|
|
131
147
|
|
132
148
|
* `left <- right` (*deferred*)
|
133
149
|
|
150
|
+
update/upsert:
|
151
|
+
|
152
|
+
* `left <+- right` (*deferred*)<br>
|
153
|
+
deferred insert of items on rhs and deferred deletion of items with matching
|
154
|
+
keys on lhs.
|
155
|
+
|
156
|
+
That is, for each fact produced by the rhs, the upsert operator removes any
|
157
|
+
existing tuples that match on the lhs collection's key columns before inserting
|
158
|
+
the corresponding rhs fact. Note that both the removal and insertion operators
|
159
|
+
happen atomically in the next timestep.
|
160
|
+
|
134
161
|
### Collection Methods ###
|
135
162
|
Standard Ruby methods used on a BudCollection `bc`:
|
136
163
|
|
137
164
|
implicit map:
|
138
165
|
|
139
166
|
t1 <= bc {|t| [t.col1 + 4, t.col2.chomp]} # formatting/projection
|
140
|
-
t2 <= bc {|t| t if t.col
|
167
|
+
t2 <= bc {|t| t if t.col == 5} # selection
|
141
168
|
|
142
169
|
`flat_map`:
|
143
170
|
|
@@ -183,15 +210,27 @@ implicit map:
|
|
183
210
|
stdio <~ requests do |r|
|
184
211
|
[r.inspect] if msgs.exists?{|m| r.ident == m.ident}
|
185
212
|
end
|
213
|
+
|
214
|
+
`bc.notin(bc2, `*optional hash pairs*`)` *optional ruby block*:<br>
|
215
|
+
Output each item of `bc` such that (a) it has no match in `bc2` on the hash-pairs attributes, or (b) there is no matching item in `bc2` that leads to a non-nil return value from the block.
|
216
|
+
Hash pairs can be fully qualified (`bc.attr1 => bc2.attr2`)
|
217
|
+
or shorthand (`:attr1 => :attr2`).
|
218
|
+
|
219
|
+
# output items from foo if (a) there is no matching key in bar, or
|
220
|
+
# (b) all matching keys in bar have a smaller value
|
221
|
+
stdio <~ foo.notin(bar, :key=>:key) {|f, b| true if f.val <= b.val}
|
186
222
|
|
223
|
+
|
187
224
|
## SQL-style grouping/aggregation (and then some) ##
|
188
225
|
|
189
|
-
* `bc.group([:col1, :col2], min(:col3))`. *akin to min(col3) GROUP BY
|
226
|
+
* `bc.group([:col1, :col2], min(:col3))`. *akin to min(col3) GROUP BY col1,col2*
|
190
227
|
* exemplary aggs: `min`, `max`, `choose`
|
191
228
|
* summary aggs: `sum`, `avg`, `count`
|
192
229
|
* structural aggs: `accum`
|
193
|
-
* `bc.argmax([:
|
194
|
-
* `bc.argmin([:
|
230
|
+
* `bc.argmax([:attr1], :attr2)` *returns the bc items per attr1 that have highest attr2*
|
231
|
+
* `bc.argmin([:attr1], :attr2)`
|
232
|
+
* `bc.argagg(:exemplary_agg_name, [:attr1], :attr2))`. *generalizes argmin/max: returns the bc items per attr1 that are chosen by the exemplary
|
233
|
+
aggregate named*
|
195
234
|
|
196
235
|
### Built-in Aggregates: ###
|
197
236
|
|
@@ -244,14 +283,14 @@ Like `pairs`, but implicitly includes a block that projects down to the left ite
|
|
244
283
|
Like `pairs`, but implicitly includes a block that projects down to the right item in each pair.
|
245
284
|
|
246
285
|
`flatten`:<br>
|
247
|
-
`flatten` is a bit like SQL's `SELECT *`: it produces a collection of concatenated objects, with a schema that is the concatenation of the schemas in tablelist (with duplicate names disambiguated.
|
286
|
+
`flatten` is a bit like SQL's `SELECT *`: it produces a collection of concatenated objects, with a schema that is the concatenation of the schemas in tablelist (with duplicate names disambiguated). Useful for chaining to operators that expect input collections with schemas, e.g., `group`:
|
248
287
|
|
249
288
|
out <= (r * s).matches.flatten.group([:a], max(:b))
|
250
289
|
|
251
290
|
`outer(`*hash pairs*`)`:<br>
|
252
|
-
Left Outer Join. Like `pairs`, but
|
291
|
+
Left Outer Join. Like `pairs`, but items in the first collection will be produced nil-padded if they have no match in the second collection.
|
253
292
|
|
254
|
-
## Temp Collections ##
|
293
|
+
## Temp Collections and With Blocks ##
|
255
294
|
`temp`<br>
|
256
295
|
Temp collections are scratches defined within a `bloom` block:
|
257
296
|
|
@@ -261,10 +300,20 @@ The schema of a temp collection in inherited from the rhs; if the rhs has no
|
|
261
300
|
schema, a simple one is manufactured to suit the data found in the rhs at
|
262
301
|
runtime: `[c0, c1, ...]`.
|
263
302
|
|
303
|
+
`with`<br>
|
304
|
+
With statements define a temp collection that can be referenced only within the scope of the associated block. They are useful when you "fork" in a dataflow into two lhs destinations:
|
305
|
+
|
306
|
+
with :biggies <= request {|r| r if r.quantity > 100}, begin
|
307
|
+
to_process <= (biggies * known_good).lefts(:key=>:key)
|
308
|
+
denied <= (biggies * known_good).nopairs(:key=>key)
|
309
|
+
end
|
310
|
+
|
311
|
+
The advantage of using `with` over `temp` is modularity: all the rules referencing `biggies` have to be bundled together, making it easier to see that the contents of `request` with quantity > 100 are handled properly.
|
312
|
+
|
264
313
|
## Bud Modules ##
|
265
314
|
A Bud module combines state (collections) and logic (Bloom rules). Using modules allows your program to be decomposed into a collection of smaller units.
|
266
315
|
|
267
|
-
|
316
|
+
Defining a Bud module is identical to defining a Ruby module, except that the module can use the `bloom`, `bootstrap`, and `state` blocks described above.
|
268
317
|
|
269
318
|
There are two ways to use a module *B* in another Bloom module *A*:
|
270
319
|
|
data/docs/operational.md
CHANGED
@@ -19,13 +19,13 @@ Each iteration of this loop is a *timestep* for that node; each timestep is asso
|
|
19
19
|
A Bloom timestep has 3 main phases (from left to right):
|
20
20
|
|
21
21
|
1. *setup*: All scratch collections are set to empty. Network messages and periodic timer events are received from the runtime and placed into their designated `channel` and `periodic` scratches, respectively, to be read in the rhs of statements. Note that a batch of multiple messages/events may be received at once.
|
22
|
-
2. *logic*: All Bloom statements for the program are evaluated. In programs with recursion through instantaneous merges (`<=`), the statements are repeatedly evaluated until a *fixpoint* is reached: i.e
|
23
|
-
3. *transition*: Items derived on the lhs of deferred operators (`<+`,
|
22
|
+
2. *logic*: All Bloom statements for the program are evaluated. In programs with recursion through instantaneous merges (`<=`), the statements are repeatedly evaluated until a *fixpoint* is reached: i.e., no new lhs items are derived from any rhs.
|
23
|
+
3. *transition*: Items derived on the lhs of deferred operators (`<+`, `<-`, `<+-`) are placed into/deleted from their corresponding collections, and items derived on the lhs of asynchronous merge (`<~`) are handed off to external code (i.e., the local operating system) for processing.
|
24
24
|
|
25
25
|
It is important to understand how the Bloom collection operators fit into these timesteps:
|
26
26
|
|
27
27
|
* *Instantaneous* merge (`<=`) occurs within the fixpoint of phase 2.
|
28
|
-
* *Deferred* operations include merge (`<+`) and delete (`<-`), and are handled in phase 3. Their effects become visible atomically to Bloom statements in phase 2 of the next timestep.
|
28
|
+
* *Deferred* operations include merge (`<+`), update (`<+-`), and delete (`<-`), and are handled in phase 3. Their effects become visible atomically to Bloom statements in phase 2 of the next timestep.
|
29
29
|
* *Asynchronous* merge (`<~`) is initiated during phase 3, so it cannot affect the current timestep. When multiple items are on the rhs of an async merge, they may "appear" independently spread across multiple different future local timesteps.
|
30
30
|
|
31
31
|
|
@@ -43,7 +43,11 @@ State "update" is achieved in Bloom via a pair of deferred statements, one posit
|
|
43
43
|
buffer <+ [[1, "newval"]]
|
44
44
|
buffer <- buffer {|b| b if b.key == 1}
|
45
45
|
|
46
|
-
This atomically replaces the entry for key 1 with the value "newval" at the start of the next timestep.
|
46
|
+
This atomically replaces the entry for key 1 with the value "newval" at the start of the next timestep. As syntax sugar for this common pattern, the deferred update operator can be used:
|
47
|
+
|
48
|
+
buffer <+- [[1, "newval"]]
|
49
|
+
|
50
|
+
This update statement removes (from the following timestep) any fact in `buffer` with the key `1`, and inserts (in the following timestep) a fact with the value `[1, "newval"]`. Note that "key" here refers to the key column(s) of the lhs relation: this example assumes `buffer` has a single key column.
|
47
51
|
|
48
52
|
Any reasoning about atomicity in Bloom programs is built on this simple foundation. It's really all you need. In the bud-sandbox we show how to build more powerful atomicity constructs using it, including things like enforcing [ordering of items across timesteps](https://github.com/bloom-lang/bud-sandbox/tree/master/ordering), and protocols for [agreeing on ordering of distributed updates](https://github.com/bloom-lang/bud-sandbox/tree/master/paxos) across all nodes.
|
49
53
|
|
data/examples/basics/paths.rb
CHANGED
@@ -18,9 +18,11 @@ class ShortestPaths
|
|
18
18
|
# base case: every link is a path
|
19
19
|
path <= link {|e| [e.from, e.to, e.to, e.cost]}
|
20
20
|
|
21
|
-
# inductive case: make path of length n+1 by connecting a link to a path of
|
22
|
-
|
23
|
-
path <=
|
21
|
+
# inductive case: make path of length n+1 by connecting a link to a path of
|
22
|
+
# length n
|
23
|
+
path <= (link*path).pairs(:to => :from) do |l,p|
|
24
|
+
[l.from, p.to, p.from, l.cost+p.cost]
|
25
|
+
end
|
24
26
|
end
|
25
27
|
|
26
28
|
# find the shortest path between each connected pair of nodes
|
data/lib/bud/aggs.rb
CHANGED
@@ -120,7 +120,7 @@ module Bud
|
|
120
120
|
# exemplary aggregate method to be used in Bud::BudCollection.group.
|
121
121
|
# randomly chooses among x entries being aggregated.
|
122
122
|
def choose_rand(x=nil)
|
123
|
-
[ChooseRand.new]
|
123
|
+
[ChooseRand.new, x]
|
124
124
|
end
|
125
125
|
|
126
126
|
class Sum < Agg #:nodoc: all
|
data/lib/bud/bud_meta.rb
CHANGED
@@ -36,12 +36,19 @@ class BudMeta #:nodoc: all
|
|
36
36
|
@depanalysis = DepAnalysis.new
|
37
37
|
@bud_instance.t_depends_tc.each {|d| @depanalysis.depends_tc << d}
|
38
38
|
@bud_instance.t_provides.each {|p| @depanalysis.providing << p}
|
39
|
-
3.times { @depanalysis.
|
39
|
+
3.times { @depanalysis.tick_internal }
|
40
40
|
|
41
41
|
@depanalysis.underspecified.each do |u|
|
42
42
|
puts "Warning: underspecified dataflow: #{u.inspect}"
|
43
43
|
@bud_instance.t_underspecified << u
|
44
44
|
end
|
45
|
+
@depanalysis.source.each do |s|
|
46
|
+
@bud_instance.sources[s.first] = true
|
47
|
+
end
|
48
|
+
@depanalysis.sink.each do |s|
|
49
|
+
@bud_instance.sinks[s.first] = true
|
50
|
+
end
|
51
|
+
|
45
52
|
dump_rewrite(rewritten_strata) if @bud_instance.options[:dump_rewrite]
|
46
53
|
|
47
54
|
return rewritten_strata, no_attr_rewrite_strata
|
@@ -128,6 +135,8 @@ class BudMeta #:nodoc: all
|
|
128
135
|
next
|
129
136
|
end
|
130
137
|
|
138
|
+
next if i == 1 and n.sexp_type == :nil # a block got rewritten to an empty block
|
139
|
+
|
131
140
|
# Check for a common case
|
132
141
|
if n.sexp_type == :lasgn
|
133
142
|
return [n, "Illegal operator: '='"]
|
@@ -170,7 +179,7 @@ class BudMeta #:nodoc: all
|
|
170
179
|
def stratify
|
171
180
|
strat = Stratification.new
|
172
181
|
@bud_instance.t_depends.each {|d| strat.depends << d}
|
173
|
-
strat.
|
182
|
+
strat.tick_internal
|
174
183
|
|
175
184
|
# Copy computed data back into Bud runtime
|
176
185
|
strat.stratum.each {|s| @bud_instance.t_stratum << s}
|
data/lib/bud/bust/bust.rb
CHANGED
@@ -84,7 +84,7 @@ module Bust
|
|
84
84
|
tuple_to_insert[index] = v[0]
|
85
85
|
end
|
86
86
|
# actually insert the puppy
|
87
|
-
@bud.async_do { (eval "@bud." + table_name)
|
87
|
+
@bud.async_do { (eval "@bud." + table_name) <+ [tuple_to_insert] }
|
88
88
|
@session.print success
|
89
89
|
end
|
90
90
|
rescue Exception
|
data/lib/bud/collections.rb
CHANGED
@@ -135,7 +135,7 @@ module Bud
|
|
135
135
|
# project the collection to its key attributes
|
136
136
|
public
|
137
137
|
def keys
|
138
|
-
self.map{|t|
|
138
|
+
self.map{|t| @key_colnums.map {|i| t[i]}}
|
139
139
|
end
|
140
140
|
|
141
141
|
# project the collection to its non-key attributes
|
@@ -173,10 +173,23 @@ module Bud
|
|
173
173
|
each_from([@storage, @delta], &block)
|
174
174
|
end
|
175
175
|
|
176
|
+
public
|
177
|
+
def tick_metrics
|
178
|
+
strat_num = bud_instance.this_stratum
|
179
|
+
rule_num = bud_instance.this_rule
|
180
|
+
addr = nil
|
181
|
+
addr = bud_instance.ip_port unless bud_instance.port.nil?
|
182
|
+
rule_txt = nil
|
183
|
+
bud_instance.metrics[:collections] ||= {}
|
184
|
+
bud_instance.metrics[:collections][{:addr=>addr, :tabname=>tabname, :strat_num=>strat_num, :rule_num=>rule_num}] ||= 0
|
185
|
+
bud_instance.metrics[:collections][{:addr=>addr, :tabname=>tabname, :strat_num=>strat_num, :rule_num=>rule_num}] += 1
|
186
|
+
end
|
187
|
+
|
176
188
|
private
|
177
189
|
def each_from(bufs, &block) # :nodoc: all
|
178
190
|
bufs.each do |b|
|
179
191
|
b.each_value do |v|
|
192
|
+
tick_metrics if bud_instance and bud_instance.options[:metrics]
|
180
193
|
yield v
|
181
194
|
end
|
182
195
|
end
|
@@ -237,7 +250,7 @@ module Bud
|
|
237
250
|
def include?(item)
|
238
251
|
return true if key_cols.nil? or (key_cols.empty? and length > 0)
|
239
252
|
return false if item.nil? or item.empty?
|
240
|
-
key =
|
253
|
+
key = @key_colnums.map{|i| item[i]}
|
241
254
|
return (item == self[key])
|
242
255
|
end
|
243
256
|
|
@@ -255,7 +268,7 @@ module Bud
|
|
255
268
|
|
256
269
|
private
|
257
270
|
def raise_pk_error(new_guy, old)
|
258
|
-
keycols =
|
271
|
+
keycols = @key_colnums.map{|i| old[i]}
|
259
272
|
raise KeyConstraintError, "Key conflict inserting #{new_guy.inspect} into \"#{tabname}\": existing tuple #{old.inspect}, key_cols = #{keycols.inspect}"
|
260
273
|
end
|
261
274
|
|
@@ -399,7 +412,22 @@ module Bud
|
|
399
412
|
superator "<+" do |o|
|
400
413
|
pending_merge o
|
401
414
|
end
|
402
|
-
|
415
|
+
|
416
|
+
public
|
417
|
+
superator "<+-" do |o|
|
418
|
+
self <+ o
|
419
|
+
self <- o.map do |t|
|
420
|
+
unless t.nil?
|
421
|
+
self[@key_colnums.map{|k| t[k]}]
|
422
|
+
end
|
423
|
+
end
|
424
|
+
end
|
425
|
+
|
426
|
+
public
|
427
|
+
superator "<-+" do |o|
|
428
|
+
self <+- o
|
429
|
+
end
|
430
|
+
|
403
431
|
# Called at the end of each timestep: prepare the collection for the next
|
404
432
|
# timestep.
|
405
433
|
public
|
@@ -440,8 +468,8 @@ module Bud
|
|
440
468
|
|
441
469
|
|
442
470
|
# a generalization of argmin/argmax to arbitrary exemplary aggregates.
|
443
|
-
# for each distinct value
|
444
|
-
# that
|
471
|
+
# for each distinct value of the grouping key columns, return the items in that group
|
472
|
+
# that have the value of the exemplary aggregate +aggname+
|
445
473
|
public
|
446
474
|
def argagg(aggname, gbkey_cols, collection)
|
447
475
|
agg = bud_instance.send(aggname, nil)[0]
|
@@ -501,15 +529,17 @@ module Bud
|
|
501
529
|
end
|
502
530
|
end
|
503
531
|
|
504
|
-
# for each distinct value
|
505
|
-
# that
|
532
|
+
# for each distinct value of the grouping key columns, return the items in
|
533
|
+
# that group that have the minimum value of the attribute +col+. Note that
|
534
|
+
# multiple tuples might be returned.
|
506
535
|
public
|
507
536
|
def argmin(gbkey_cols, col)
|
508
537
|
argagg(:min, gbkey_cols, col)
|
509
538
|
end
|
510
539
|
|
511
|
-
# for each distinct value
|
512
|
-
# that has the maximum value of the attribute +col
|
540
|
+
# for each distinct value of the grouping key columns, return the item in
|
541
|
+
# that group that has the maximum value of the attribute +col+. Note that
|
542
|
+
# multiple tuples might be returned.
|
513
543
|
public
|
514
544
|
def argmax(gbkey_cols, col)
|
515
545
|
argagg(:max, gbkey_cols, col)
|
@@ -536,6 +566,14 @@ module Bud
|
|
536
566
|
def *(collection)
|
537
567
|
join([self, collection])
|
538
568
|
end
|
569
|
+
|
570
|
+
# AntiJoin
|
571
|
+
public
|
572
|
+
def notin(coll,*preds, &blk)
|
573
|
+
@origpreds = preds
|
574
|
+
@schema = schema
|
575
|
+
return BudJoin.new([self,coll], @bud_instance).anti(*preds,&blk)
|
576
|
+
end
|
539
577
|
|
540
578
|
# SQL-style grouping. first argument is an array of attributes to group by.
|
541
579
|
# Followed by a variable-length list of aggregates over attributes (e.g. +min(:x)+)
|
@@ -626,13 +664,27 @@ module Bud
|
|
626
664
|
@is_loopback = loopback
|
627
665
|
@locspec_idx = nil
|
628
666
|
|
667
|
+
# We're going to mutate the caller's given_schema (to remove the location
|
668
|
+
# specifier), so make a deep copy first. We also save a ref to the
|
669
|
+
# unmodified given_schema.
|
670
|
+
@raw_schema = given_schema
|
671
|
+
given_schema = Marshal.load(Marshal.dump(given_schema))
|
672
|
+
|
629
673
|
unless @is_loopback
|
630
674
|
the_schema, the_key_cols = parse_schema(given_schema)
|
675
|
+
spec_count = the_schema.count {|s| s.to_s.start_with? "@"}
|
676
|
+
if spec_count == 0
|
677
|
+
raise BudError, "Missing location specifier for channel '#{name}'"
|
678
|
+
end
|
679
|
+
if spec_count > 1
|
680
|
+
raise BudError, "Multiple location specifiers for channel '#{name}'"
|
681
|
+
end
|
682
|
+
|
631
683
|
the_val_cols = the_schema - the_key_cols
|
632
684
|
@locspec_idx = remove_at_sign!(the_key_cols)
|
633
|
-
@locspec_idx = remove_at_sign!(the_schema) if @locspec_idx.nil?
|
634
685
|
if @locspec_idx.nil?
|
635
|
-
|
686
|
+
val_idx = remove_at_sign!(the_val_cols)
|
687
|
+
@locspec_idx = val_idx + the_key_cols.length
|
636
688
|
end
|
637
689
|
|
638
690
|
# We mutate the hash key above, so we need to recreate the hash
|
@@ -647,7 +699,7 @@ module Bud
|
|
647
699
|
|
648
700
|
private
|
649
701
|
def remove_at_sign!(cols)
|
650
|
-
i = cols.find_index {|c| c.to_s
|
702
|
+
i = cols.find_index {|c| c.to_s.start_with? "@"}
|
651
703
|
unless i.nil?
|
652
704
|
cols[i] = cols[i].to_s.delete('@').to_sym
|
653
705
|
end
|
@@ -667,7 +719,7 @@ module Bud
|
|
667
719
|
|
668
720
|
public
|
669
721
|
def clone_empty
|
670
|
-
self.class.new(tabname, bud_instance, @
|
722
|
+
self.class.new(tabname, bud_instance, @raw_schema, @is_loopback)
|
671
723
|
end
|
672
724
|
|
673
725
|
public
|
@@ -757,7 +809,7 @@ module Bud
|
|
757
809
|
socket.send_datagram([tabname, tup].to_msgpack, ip, port)
|
758
810
|
end
|
759
811
|
end
|
760
|
-
rescue
|
812
|
+
rescue Exception
|
761
813
|
puts "terminal reader thread failed: #{$!}"
|
762
814
|
print $!.backtrace.join("\n")
|
763
815
|
exit
|
@@ -778,7 +830,7 @@ module Bud
|
|
778
830
|
public
|
779
831
|
def tick #:nodoc: all
|
780
832
|
@storage = {}
|
781
|
-
raise BudError unless @pending.empty?
|
833
|
+
raise BudError, "orphaned pending tuples in terminal" unless @pending.empty?
|
782
834
|
end
|
783
835
|
|
784
836
|
undef merge
|
@@ -796,6 +848,7 @@ module Bud
|
|
796
848
|
def get_out_io
|
797
849
|
rv = @bud_instance.options[:stdout]
|
798
850
|
rv ||= $stdout
|
851
|
+
raise BudError, "attempting to write to terminal #{tabname} that was already closed" if rv.closed?
|
799
852
|
rv
|
800
853
|
end
|
801
854
|
end
|
@@ -816,10 +869,6 @@ module Bud
|
|
816
869
|
superator "<+" do |o|
|
817
870
|
raise BudError, "Illegal use of <+ with periodic '#{tabname}' on left"
|
818
871
|
end
|
819
|
-
|
820
|
-
def add_periodic_tuple(id)
|
821
|
-
pending_merge([[id, Time.now]])
|
822
|
-
end
|
823
872
|
end
|
824
873
|
|
825
874
|
class BudTable < BudCollection # :nodoc: all
|
@@ -890,6 +939,7 @@ module Bud
|
|
890
939
|
while (l = @fd.gets)
|
891
940
|
t = tuple_accessors([@linenum, l.strip])
|
892
941
|
@linenum += 1
|
942
|
+
tick_metrics if bud_instance.options[:metrics]
|
893
943
|
yield t
|
894
944
|
end
|
895
945
|
end
|
@@ -909,4 +959,12 @@ module Enumerable
|
|
909
959
|
scr.merge(self, scr.storage)
|
910
960
|
scr
|
911
961
|
end
|
962
|
+
|
963
|
+
public
|
964
|
+
# We rewrite "map" calls in Bloom blocks to invoke the "pro" method
|
965
|
+
# instead. This is fine when applied to a BudCollection; when applied to a
|
966
|
+
# normal Enumerable, just treat pro as an alias for map.
|
967
|
+
def pro(&blk)
|
968
|
+
map(&blk)
|
969
|
+
end
|
912
970
|
end
|