bud 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +9 -0
- data/README +30 -0
- data/bin/budplot +134 -0
- data/bin/budvis +201 -0
- data/bin/rebl +4 -0
- data/docs/README.md +13 -0
- data/docs/bfs.md +379 -0
- data/docs/bfs.raw +251 -0
- data/docs/bfs_arch.png +0 -0
- data/docs/bloom-loop.png +0 -0
- data/docs/bust.md +83 -0
- data/docs/cheat.md +291 -0
- data/docs/deploy.md +96 -0
- data/docs/diffs +181 -0
- data/docs/getstarted.md +296 -0
- data/docs/intro.md +36 -0
- data/docs/modules.md +112 -0
- data/docs/operational.md +96 -0
- data/docs/rebl.md +99 -0
- data/docs/ruby_hooks.md +19 -0
- data/docs/visualizations.md +75 -0
- data/examples/README +1 -0
- data/examples/basics/hello.rb +12 -0
- data/examples/basics/out +1103 -0
- data/examples/basics/out.new +856 -0
- data/examples/basics/paths.rb +51 -0
- data/examples/bust/README.md +9 -0
- data/examples/bust/bustclient-example.rb +23 -0
- data/examples/bust/bustinspector.html +135 -0
- data/examples/bust/bustserver-example.rb +18 -0
- data/examples/chat/README.md +9 -0
- data/examples/chat/chat.rb +45 -0
- data/examples/chat/chat_protocol.rb +8 -0
- data/examples/chat/chat_server.rb +29 -0
- data/examples/deploy/tokenring-ec2.rb +26 -0
- data/examples/deploy/tokenring-local.rb +17 -0
- data/examples/deploy/tokenring.rb +39 -0
- data/lib/bud/aggs.rb +126 -0
- data/lib/bud/bud_meta.rb +185 -0
- data/lib/bud/bust/bust.rb +126 -0
- data/lib/bud/bust/client/idempotence.rb +10 -0
- data/lib/bud/bust/client/restclient.rb +49 -0
- data/lib/bud/collections.rb +937 -0
- data/lib/bud/depanalysis.rb +44 -0
- data/lib/bud/deploy/countatomicdelivery.rb +50 -0
- data/lib/bud/deploy/deployer.rb +67 -0
- data/lib/bud/deploy/ec2deploy.rb +200 -0
- data/lib/bud/deploy/localdeploy.rb +41 -0
- data/lib/bud/errors.rb +15 -0
- data/lib/bud/graphs.rb +405 -0
- data/lib/bud/joins.rb +300 -0
- data/lib/bud/rebl.rb +314 -0
- data/lib/bud/rewrite.rb +523 -0
- data/lib/bud/rtrace.rb +27 -0
- data/lib/bud/server.rb +43 -0
- data/lib/bud/state.rb +108 -0
- data/lib/bud/storage/tokyocabinet.rb +170 -0
- data/lib/bud/storage/zookeeper.rb +178 -0
- data/lib/bud/stratify.rb +83 -0
- data/lib/bud/viz.rb +65 -0
- data/lib/bud.rb +797 -0
- metadata +330 -0
@@ -0,0 +1,937 @@
|
|
1
|
+
require 'msgpack'
|
2
|
+
|
3
|
+
module Bud
|
4
|
+
########
|
5
|
+
#--
|
6
|
+
# the collection types
|
7
|
+
# each collection is partitioned into 4:
|
8
|
+
# - pending holds tuples deferred til the next tick
|
9
|
+
# - storage holds the "normal" tuples
|
10
|
+
# - delta holds the delta for rhs's of rules during semi-naive
|
11
|
+
# - new_delta will hold the lhs tuples currently being produced during s-n
|
12
|
+
#++
|
13
|
+
|
14
|
+
class BudCollection
|
15
|
+
include Enumerable
|
16
|
+
|
17
|
+
attr_accessor :bud_instance, :locspec_idx # :nodoc: all
|
18
|
+
attr_reader :schema, :tabname # :nodoc: all
|
19
|
+
attr_reader :storage, :delta, :new_delta # :nodoc: all
|
20
|
+
|
21
|
+
def initialize(name, bud_instance, given_schema=nil, defer_schema=false) # :nodoc: all
|
22
|
+
@tabname = name
|
23
|
+
@bud_instance = bud_instance
|
24
|
+
init_schema(given_schema) unless given_schema.nil? and defer_schema
|
25
|
+
init_buffers
|
26
|
+
end
|
27
|
+
|
28
|
+
private
|
29
|
+
def init_buffers
|
30
|
+
@sealed = false
|
31
|
+
init_storage
|
32
|
+
init_pending
|
33
|
+
init_deltas
|
34
|
+
end
|
35
|
+
|
36
|
+
private
|
37
|
+
def init_schema(given_schema)
|
38
|
+
given_schema ||= {[:key]=>[:val]}
|
39
|
+
@given_schema = given_schema
|
40
|
+
@schema, @key_cols = parse_schema(given_schema)
|
41
|
+
@key_colnums = key_cols.map {|k| schema.index(k)}
|
42
|
+
setup_accessors
|
43
|
+
end
|
44
|
+
|
45
|
+
# The user-specified schema might come in two forms: a hash of Array =>
|
46
|
+
# Array (key_cols => remaining columns), or simply an Array of columns (if no
|
47
|
+
# key_cols were specified). Return a pair: [list of columns in entire tuple,
|
48
|
+
# list of key columns]
|
49
|
+
private
|
50
|
+
def parse_schema(given_schema)
|
51
|
+
if given_schema.respond_to? :keys
|
52
|
+
raise BudError, "invalid schema for #{tabname}" if given_schema.length != 1
|
53
|
+
key_cols = given_schema.keys.first
|
54
|
+
val_cols = given_schema.values.first
|
55
|
+
else
|
56
|
+
key_cols = given_schema
|
57
|
+
val_cols = []
|
58
|
+
end
|
59
|
+
|
60
|
+
schema = key_cols + val_cols
|
61
|
+
schema.each do |s|
|
62
|
+
if s.class != Symbol
|
63
|
+
raise BudError, "Invalid schema element \"#{s}\", type \"#{s.class}\""
|
64
|
+
end
|
65
|
+
end
|
66
|
+
if schema.uniq.length < schema.length
|
67
|
+
raise BudError, "schema for #{tabname} contains duplicate names"
|
68
|
+
end
|
69
|
+
|
70
|
+
return [schema, key_cols]
|
71
|
+
end
|
72
|
+
|
73
|
+
public
|
74
|
+
def clone_empty #:nodoc: all
|
75
|
+
self.class.new(tabname, bud_instance, @given_schema)
|
76
|
+
end
|
77
|
+
|
78
|
+
# subset of the schema (i.e. an array of attribute names) that forms the key
|
79
|
+
public
|
80
|
+
def key_cols
|
81
|
+
@key_cols
|
82
|
+
end
|
83
|
+
|
84
|
+
# subset of the schema (i.e. an array of attribute names) that is not in the key
|
85
|
+
public
|
86
|
+
def val_cols # :nodoc: all
|
87
|
+
schema - key_cols
|
88
|
+
end
|
89
|
+
|
90
|
+
# define methods to turn 'table.col' into a [table,col] pair
|
91
|
+
# e.g. to support something like
|
92
|
+
# j = join link, path, {link.to => path.from}
|
93
|
+
private
|
94
|
+
def setup_accessors
|
95
|
+
s = @schema
|
96
|
+
s.each do |colname|
|
97
|
+
reserved = eval "defined?(#{colname})"
|
98
|
+
unless (reserved.nil? or
|
99
|
+
(reserved == "method" and method(colname).arity == -1 and (eval(colname))[0] == self.tabname))
|
100
|
+
raise BudError, "symbol :#{colname} reserved, cannot be used as column name for #{tabname}"
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
# set up schema accessors, which are class methods
|
105
|
+
m = Module.new do
|
106
|
+
s.each_with_index do |c, i|
|
107
|
+
define_method c do
|
108
|
+
[@tabname, i, c]
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
self.extend m
|
113
|
+
|
114
|
+
# now set up a Module for tuple accessors, which are instance methods
|
115
|
+
@tupaccess = Module.new do
|
116
|
+
s.each_with_index do |colname, offset|
|
117
|
+
define_method colname do
|
118
|
+
self[offset]
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
# define methods to access tuple attributes by column name
|
125
|
+
private
|
126
|
+
def tuple_accessors(tup)
|
127
|
+
tup.extend @tupaccess
|
128
|
+
end
|
129
|
+
|
130
|
+
# generate a tuple with the schema of this collection and nil values in each attribute
|
131
|
+
public
|
132
|
+
def null_tuple
|
133
|
+
tuple_accessors(Array.new(@schema.length))
|
134
|
+
end
|
135
|
+
|
136
|
+
# project the collection to its key attributes
|
137
|
+
public
|
138
|
+
def keys
|
139
|
+
self.map{|t| (0..self.key_cols.length-1).map{|i| t[i]}}
|
140
|
+
end
|
141
|
+
|
142
|
+
# project the collection to its non-key attributes
|
143
|
+
public
|
144
|
+
def values
|
145
|
+
self.map{|t| (self.key_cols.length..self.schema.length-1).map{|i| t[i]}}
|
146
|
+
end
|
147
|
+
|
148
|
+
# map each item in the collection into a string, suitable for placement in stdio
|
149
|
+
public
|
150
|
+
def inspected
|
151
|
+
self.map{|t| [t.inspect]}
|
152
|
+
end
|
153
|
+
|
154
|
+
# akin to map, but modified for efficiency in Bloom statements
|
155
|
+
public
|
156
|
+
def pro(&blk)
|
157
|
+
if @bud_instance.stratum_first_iter
|
158
|
+
return map(&blk)
|
159
|
+
else
|
160
|
+
if @delta.empty?
|
161
|
+
return []
|
162
|
+
else
|
163
|
+
retval = []
|
164
|
+
each_from([@delta]) do |t|
|
165
|
+
newitem = blk.call(t)
|
166
|
+
retval << newitem unless newitem.nil?
|
167
|
+
end
|
168
|
+
return retval
|
169
|
+
end
|
170
|
+
end
|
171
|
+
end
|
172
|
+
|
173
|
+
# By default, all tuples in any rhs are in storage or delta. Tuples in
|
174
|
+
# new_delta will get transitioned to delta in the next iteration of the
|
175
|
+
# evaluator (but within the current time tick).
|
176
|
+
public
|
177
|
+
def each(&block) # :nodoc: all
|
178
|
+
each_from([@storage, @delta], &block)
|
179
|
+
end
|
180
|
+
|
181
|
+
private
|
182
|
+
def each_from(bufs, &block) # :nodoc: all
|
183
|
+
bufs.each do |b|
|
184
|
+
b.each_value do |v|
|
185
|
+
yield v
|
186
|
+
end
|
187
|
+
end
|
188
|
+
end
|
189
|
+
|
190
|
+
public
|
191
|
+
def each_from_sym(buf_syms, &block) # :nodoc: all
|
192
|
+
bufs = buf_syms.map do |s|
|
193
|
+
case s
|
194
|
+
when :storage then @storage
|
195
|
+
when :delta then @delta
|
196
|
+
when :new_delta then @new_delta
|
197
|
+
else raise BudError, "bad symbol passed into each_from_sym"
|
198
|
+
end
|
199
|
+
end
|
200
|
+
each_from(bufs, &block)
|
201
|
+
end
|
202
|
+
|
203
|
+
private
|
204
|
+
def init_storage
|
205
|
+
@storage = {}
|
206
|
+
end
|
207
|
+
|
208
|
+
private
|
209
|
+
def init_pending
|
210
|
+
@pending = {}
|
211
|
+
end
|
212
|
+
|
213
|
+
private
|
214
|
+
def init_deltas
|
215
|
+
@delta = {}
|
216
|
+
@new_delta = {}
|
217
|
+
end
|
218
|
+
|
219
|
+
public
|
220
|
+
def close # :nodoc: all
|
221
|
+
end
|
222
|
+
|
223
|
+
# checks for key +k+ in the key columns
|
224
|
+
public
|
225
|
+
def has_key?(k)
|
226
|
+
return false if k.nil? or k.empty? or self[k].nil?
|
227
|
+
return true
|
228
|
+
end
|
229
|
+
|
230
|
+
# return item with key +k+
|
231
|
+
public
|
232
|
+
def [](k)
|
233
|
+
# assumes that key is in storage or delta, but not both
|
234
|
+
# is this enforced in do_insert?
|
235
|
+
return @storage[k].nil? ? @delta[k] : @storage[k]
|
236
|
+
end
|
237
|
+
|
238
|
+
# checks for +item+ in the collection
|
239
|
+
public
|
240
|
+
def include?(item)
|
241
|
+
return true if key_cols.nil? or (key_cols.empty? and length > 0)
|
242
|
+
return false if item.nil? or item.empty?
|
243
|
+
key = key_cols.map{|k| item[schema.index(k)]}
|
244
|
+
return (item == self[key])
|
245
|
+
end
|
246
|
+
|
247
|
+
# checks for an item for which +block+ produces a match
|
248
|
+
public
|
249
|
+
def exists?(&block)
|
250
|
+
if length == 0
|
251
|
+
return false
|
252
|
+
elsif not block_given?
|
253
|
+
return true
|
254
|
+
else
|
255
|
+
return ((detect{|t| yield t}).nil?) ? false : true
|
256
|
+
end
|
257
|
+
end
|
258
|
+
|
259
|
+
private
|
260
|
+
def raise_pk_error(new, old)
|
261
|
+
keycols = key_cols.map{|k| old[schema.index(k)]}
|
262
|
+
raise KeyConstraintError, "Key conflict inserting #{old.inspect} into \"#{tabname}\": existing tuple #{new.inspect}, key_cols = #{keycols.inspect}"
|
263
|
+
end
|
264
|
+
|
265
|
+
private
|
266
|
+
def prep_tuple(o)
|
267
|
+
unless o.respond_to?(:length) and o.respond_to?(:[])
|
268
|
+
raise BudTypeError, "non-indexable type inserted into BudCollection #{self.tabname}: #{o.inspect}"
|
269
|
+
end
|
270
|
+
|
271
|
+
if o.length < schema.length then
|
272
|
+
# if this tuple has too few fields, pad with nil's
|
273
|
+
old = o.clone
|
274
|
+
(o.length..schema.length-1).each{|i| o << nil}
|
275
|
+
# puts "in #{@tabname}, converted #{old.inspect} to #{o.inspect}"
|
276
|
+
elsif o.length > schema.length then
|
277
|
+
# if this tuple has more fields than usual, bundle up the
|
278
|
+
# extras into an array
|
279
|
+
o = (0..(schema.length - 1)).map{|c| o[c]} << (schema.length..(o.length - 1)).map{|c| o[c]}
|
280
|
+
end
|
281
|
+
return o
|
282
|
+
end
|
283
|
+
|
284
|
+
private
|
285
|
+
def do_insert(o, store)
|
286
|
+
return if o.nil? # silently ignore nils resulting from map predicates failing
|
287
|
+
o = prep_tuple(o)
|
288
|
+
keycols = @key_colnums.map{|i| o[i]}
|
289
|
+
|
290
|
+
old = store[keycols]
|
291
|
+
if old.nil?
|
292
|
+
store[keycols] = tuple_accessors(o)
|
293
|
+
else
|
294
|
+
raise_pk_error(o, old) unless old == o
|
295
|
+
end
|
296
|
+
end
|
297
|
+
|
298
|
+
public
|
299
|
+
def insert(o) # :nodoc: all
|
300
|
+
# puts "insert: #{o.inspect} into #{tabname}"
|
301
|
+
do_insert(o, @delta)
|
302
|
+
end
|
303
|
+
|
304
|
+
# instantaneously place an individual item from rhs into collection on lhs
|
305
|
+
def <<(item)
|
306
|
+
insert(item)
|
307
|
+
end
|
308
|
+
|
309
|
+
private
|
310
|
+
def check_enumerable(o)
|
311
|
+
unless (o.nil? or o.class < Enumerable) and o.respond_to? 'each'
|
312
|
+
raise BudTypeError, "Attempt to merge non-enumerable type into BudCollection"
|
313
|
+
end
|
314
|
+
end
|
315
|
+
|
316
|
+
# Assign self a schema, by hook or by crook. If +o+ is schemaless *and*
|
317
|
+
# empty, will leave @schema as is.
|
318
|
+
private
|
319
|
+
def establish_schema(o)
|
320
|
+
# use o's schema if available
|
321
|
+
deduce_schema(o) if @schema.nil?
|
322
|
+
# else use arity of first tuple of o
|
323
|
+
fit_schema(o.first.size) if @schema.nil? and not o.first.nil?
|
324
|
+
return @schema
|
325
|
+
end
|
326
|
+
|
327
|
+
# Copy over the schema from +o+ if available
|
328
|
+
private
|
329
|
+
def deduce_schema(o)
|
330
|
+
if @schema.nil? and o.class <= Bud::BudCollection and not o.schema.nil?
|
331
|
+
# must have been initialized with defer_schema==true. take schema from rhs
|
332
|
+
init_schema(o.schema)
|
333
|
+
end
|
334
|
+
# returns old state of @schema (nil) if nothing available
|
335
|
+
return @schema
|
336
|
+
end
|
337
|
+
|
338
|
+
# manufacture schema of the form [:c0, :c1, ...] with width = +arity+
|
339
|
+
private
|
340
|
+
def fit_schema(arity)
|
341
|
+
# rhs is schemaless. create schema from first tuple merged
|
342
|
+
init_schema((0..arity-1).map{|indx| ("c"+indx.to_s).to_sym})
|
343
|
+
return @schema
|
344
|
+
end
|
345
|
+
|
346
|
+
public
|
347
|
+
def merge(o, buf=@new_delta) # :nodoc: all
|
348
|
+
check_enumerable(o)
|
349
|
+
establish_schema(o) if @schema.nil?
|
350
|
+
|
351
|
+
delta = o.map do |i|
|
352
|
+
next if i.nil? or i == []
|
353
|
+
i = prep_tuple(i)
|
354
|
+
key_vals = @key_colnums.map{|k| i[k]}
|
355
|
+
if (old = self[key_vals])
|
356
|
+
raise_pk_error(i, old) if old != i
|
357
|
+
elsif (oldnew = self.new_delta[key_vals])
|
358
|
+
raise_pk_error(i, oldnew) if oldnew != i
|
359
|
+
else
|
360
|
+
buf[key_vals] = tuple_accessors(i)
|
361
|
+
end
|
362
|
+
end
|
363
|
+
return self
|
364
|
+
end
|
365
|
+
|
366
|
+
public
|
367
|
+
# instantaneously merge items from collection +o+ into +buf+
|
368
|
+
def <=(collection)
|
369
|
+
merge(collection)
|
370
|
+
end
|
371
|
+
|
372
|
+
# buffer items to be merged atomically at end of this timestep
|
373
|
+
public
|
374
|
+
def pending_merge(o) # :nodoc: all
|
375
|
+
check_enumerable(o)
|
376
|
+
deduce_schema(o)
|
377
|
+
|
378
|
+
o.each {|i| do_insert(i, @pending)}
|
379
|
+
return self
|
380
|
+
end
|
381
|
+
|
382
|
+
public
|
383
|
+
superator "<+" do |o|
|
384
|
+
pending_merge o
|
385
|
+
end
|
386
|
+
|
387
|
+
# Called at the end of each timestep: prepare the collection for the next
|
388
|
+
# timestep.
|
389
|
+
public
|
390
|
+
def tick # :nodoc: all
|
391
|
+
@storage = @pending
|
392
|
+
@pending = {}
|
393
|
+
raise BudError, "orphaned tuples in @delta for #{@tabname}" unless @delta.empty?
|
394
|
+
raise BudError, "orphaned tuples in @new_delta for #{@tabname}" unless @new_delta.empty?
|
395
|
+
end
|
396
|
+
|
397
|
+
# move deltas to storage, and new_deltas to deltas.
|
398
|
+
public
|
399
|
+
def tick_deltas # :nodoc: all
|
400
|
+
# assertion: intersect(@storage, @delta) == nil
|
401
|
+
@storage.merge!(@delta)
|
402
|
+
@delta = @new_delta
|
403
|
+
@new_delta = {}
|
404
|
+
end
|
405
|
+
|
406
|
+
private
|
407
|
+
def method_missing(sym, *args, &block)
|
408
|
+
@storage.send sym, *args, &block
|
409
|
+
end
|
410
|
+
|
411
|
+
######## aggs
|
412
|
+
|
413
|
+
private
|
414
|
+
# we only do grouping during first iteration of stratum. group and argagg should
|
415
|
+
# never deal with deltas. This assumes that stratification is done right, and it will
|
416
|
+
# be sensitive to bugs in the stratification!
|
417
|
+
def agg_in
|
418
|
+
if not respond_to?(:bud_instance) or bud_instance.nil? or bud_instance.stratum_first_iter
|
419
|
+
return self
|
420
|
+
else
|
421
|
+
return []
|
422
|
+
end
|
423
|
+
end
|
424
|
+
|
425
|
+
|
426
|
+
# a generalization of argmin/argmax to arbitrary exemplary aggregates.
|
427
|
+
# for each distinct value in the grouping key columns, return the item in that group
|
428
|
+
# that has the value of the exemplary aggregate +aggname+
|
429
|
+
public
|
430
|
+
def argagg(aggname, gbkey_cols, collection)
|
431
|
+
agg = bud_instance.send(aggname, nil)[0]
|
432
|
+
raise BudError, "#{aggname} not declared exemplary" unless agg.class <= Bud::ArgExemplary
|
433
|
+
keynames = gbkey_cols.map do |k|
|
434
|
+
if k.class == Symbol
|
435
|
+
k.to_s
|
436
|
+
else
|
437
|
+
k[2]
|
438
|
+
end
|
439
|
+
end
|
440
|
+
if collection.class == Symbol
|
441
|
+
colnum = self.send(collection.to_s)[1]
|
442
|
+
else
|
443
|
+
colnum = collection[1]
|
444
|
+
end
|
445
|
+
tups = agg_in.inject({}) do |memo,p|
|
446
|
+
pkey_cols = keynames.map{|n| p.send(n.to_sym)}
|
447
|
+
if memo[pkey_cols].nil?
|
448
|
+
memo[pkey_cols] = {:agg=>agg.send(:init, p[colnum]), :tups => [p]}
|
449
|
+
else
|
450
|
+
newval = agg.send(:trans, memo[pkey_cols][:agg], p[colnum])
|
451
|
+
if memo[pkey_cols][:agg] == newval
|
452
|
+
if agg.send(:tie, memo[pkey_cols][:agg], p[colnum])
|
453
|
+
memo[pkey_cols][:tups] << p
|
454
|
+
end
|
455
|
+
else
|
456
|
+
memo[pkey_cols] = {:agg=>newval, :tups=>[p]}
|
457
|
+
end
|
458
|
+
end
|
459
|
+
memo
|
460
|
+
end
|
461
|
+
|
462
|
+
finals = []
|
463
|
+
outs = tups.each_value do |t|
|
464
|
+
ties = t[:tups].map do |tie|
|
465
|
+
finals << tie
|
466
|
+
end
|
467
|
+
end
|
468
|
+
|
469
|
+
# merge directly into retval.storage, so that the temp tuples get picked up
|
470
|
+
# by the lhs of the rule
|
471
|
+
retval = BudScratch.new('argagg_temp', bud_instance, @given_schema)
|
472
|
+
retval.uniquify_tabname
|
473
|
+
retval.merge(finals, retval.storage)
|
474
|
+
end
|
475
|
+
|
476
|
+
# for each distinct value in the grouping key columns, return the item in that group
|
477
|
+
# that has the minimum value of the attribute +col+
|
478
|
+
public
|
479
|
+
def argmin(gbkey_cols, col)
|
480
|
+
argagg(:min, gbkey_cols, col)
|
481
|
+
end
|
482
|
+
|
483
|
+
# for each distinct value in the grouping key columns, return the item in that group
|
484
|
+
# that has the maximum value of the attribute +col+
|
485
|
+
public
|
486
|
+
def argmax(gbkey_cols, col)
|
487
|
+
argagg(:max, gbkey_cols, col)
|
488
|
+
end
|
489
|
+
|
490
|
+
# form a collection containing all pairs of items in +self+ and items in
|
491
|
+
# +collection+
|
492
|
+
public
|
493
|
+
def *(collection)
|
494
|
+
bud_instance.join([self, collection])
|
495
|
+
end
|
496
|
+
|
497
|
+
# SQL-style grouping. first argument is an array of attributes to group by.
|
498
|
+
# Followed by a variable-length list of aggregates over attributes (e.g. +min(:x)+)
|
499
|
+
# Attributes can be referenced as symbols, or as +collection_name.attribute_name+
|
500
|
+
public
|
501
|
+
def group(key_cols, *aggpairs)
|
502
|
+
key_cols = [] if key_cols.nil?
|
503
|
+
keynames = key_cols.map do |k|
|
504
|
+
if k.class == Symbol
|
505
|
+
k
|
506
|
+
elsif k[2] and k[2].class == Symbol
|
507
|
+
k[2]
|
508
|
+
else
|
509
|
+
raise Bud::CompileError, "Invalid grouping key"
|
510
|
+
end
|
511
|
+
end
|
512
|
+
aggcolsdups = aggpairs.map{|ap| ap[0].class.name.split("::").last}
|
513
|
+
aggcols = []
|
514
|
+
aggcolsdups.each_with_index do |n, i|
|
515
|
+
aggcols << "#{n.downcase}_#{i}".to_sym
|
516
|
+
end
|
517
|
+
tups = agg_in.inject({}) do |memo, p|
|
518
|
+
pkey_cols = keynames.map{|n| p.send(n)}
|
519
|
+
memo[pkey_cols] = [] if memo[pkey_cols].nil?
|
520
|
+
aggpairs.each_with_index do |ap, i|
|
521
|
+
agg = ap[0]
|
522
|
+
if ap[1].class == Symbol
|
523
|
+
colnum = ap[1].nil? ? nil : self.send(ap[1].to_s)[1]
|
524
|
+
else
|
525
|
+
colnum = ap[1].nil? ? nil : ap[1][1]
|
526
|
+
end
|
527
|
+
colval = colnum.nil? ? nil : p[colnum]
|
528
|
+
if memo[pkey_cols][i].nil?
|
529
|
+
memo[pkey_cols][i] = agg.send(:init, colval)
|
530
|
+
else
|
531
|
+
memo[pkey_cols][i] = agg.send(:trans, memo[pkey_cols][i], colval)
|
532
|
+
end
|
533
|
+
end
|
534
|
+
memo
|
535
|
+
end
|
536
|
+
|
537
|
+
result = tups.inject([]) do |memo, t|
|
538
|
+
finals = []
|
539
|
+
aggpairs.each_with_index do |ap, i|
|
540
|
+
finals << ap[0].send(:final, t[1][i])
|
541
|
+
end
|
542
|
+
memo << t[0] + finals
|
543
|
+
end
|
544
|
+
if block_given?
|
545
|
+
result.map{|r| yield r}
|
546
|
+
else
|
547
|
+
# merge directly into retval.storage, so that the temp tuples get picked up
|
548
|
+
# by the lhs of the rule
|
549
|
+
if aggcols.empty?
|
550
|
+
schema = keynames
|
551
|
+
else
|
552
|
+
schema = { keynames => aggcols }
|
553
|
+
end
|
554
|
+
retval = BudScratch.new('temp_group', bud_instance, schema)
|
555
|
+
retval.uniquify_tabname
|
556
|
+
retval.merge(result, retval.storage)
|
557
|
+
end
|
558
|
+
end
|
559
|
+
|
560
|
+
alias reduce inject
|
561
|
+
|
562
|
+
# methods that work on nested collections (resulting from joins)
|
563
|
+
|
564
|
+
|
565
|
+
# given a * expression over n collections, form all combinations of items
|
566
|
+
# subject to an array of predicates, pred
|
567
|
+
# currently supports two options for equijoin predicates:
|
568
|
+
# general form: an array of arrays capturing a conjunction of equiv. classes
|
569
|
+
# [[table1.col1, table2.col2, table3.col3], [table1.col2, table2.col3]]
|
570
|
+
# common form: a hash capturing equality of a column on left with one on right.
|
571
|
+
# :col1 => :col2 (same as lefttable.col1 => righttable.col2)
|
572
|
+
public
|
573
|
+
def pairs(*preds, &blk)
|
574
|
+
setup_preds(preds) unless (preds.nil? or preds.empty?)
|
575
|
+
# given new preds, the state for the join will be different. set it up again.
|
576
|
+
setup_state if self.class <= Bud::BudJoin
|
577
|
+
blk.nil? ? self : map(&blk)
|
578
|
+
end
|
579
|
+
|
580
|
+
alias combos pairs
|
581
|
+
|
582
|
+
# the natural join: given a * expression over 2 collections, form all
|
583
|
+
# combinations of items that have the same values in matching fiels
|
584
|
+
public
|
585
|
+
def matches(&blk)
|
586
|
+
preds = BudJoin::natural_preds(@bud_instance, @rels)
|
587
|
+
pairs(*preds, &blk)
|
588
|
+
end
|
589
|
+
|
590
|
+
# given a * expression over 2 collections, form all
|
591
|
+
# combinations of items that satisfy the predicates +preds+,
|
592
|
+
# and project only onto the attributes of the first collection
|
593
|
+
public
|
594
|
+
def lefts(*preds)
|
595
|
+
@localpreds = disambiguate_preds(preds)
|
596
|
+
map{ |l,r| l }
|
597
|
+
end
|
598
|
+
|
599
|
+
# given a * expression over 2 collections, form all
|
600
|
+
# combinations of items that satisfy the predicates +preds+,
|
601
|
+
# and project only onto the attributes of the second item
|
602
|
+
public
|
603
|
+
def rights(*preds)
|
604
|
+
@localpreds = disambiguate_preds(preds)
|
605
|
+
map{ |l,r| r }
|
606
|
+
end
|
607
|
+
|
608
|
+
# extract predicates on rellist[0] and recurse to right side with remainder
|
609
|
+
protected
|
610
|
+
def setup_preds(preds) # :nodoc: all
|
611
|
+
allpreds = disambiguate_preds(preds)
|
612
|
+
allpreds = canonicalize_localpreds(@rels, allpreds)
|
613
|
+
@localpreds = allpreds.reject { |p| p[0][0] != @rels[0].tabname }
|
614
|
+
otherpreds = allpreds - @localpreds
|
615
|
+
otherpreds = nil if otherpreds.empty?
|
616
|
+
unless otherpreds.nil?
|
617
|
+
unless @rels[1].class <= Bud::BudJoin
|
618
|
+
raise BudError, "join predicates don't match tables being joined: #{otherpreds.inspect}"
|
619
|
+
end
|
620
|
+
@rels[1].setup_preds(otherpreds)
|
621
|
+
end
|
622
|
+
end
|
623
|
+
|
624
|
+
protected
|
625
|
+
def disambiguate_preds(preds) # :nodoc: all
|
626
|
+
if preds.size == 1 and preds[0].class <= Hash
|
627
|
+
predarray = preds[0].map do |k,v|
|
628
|
+
if k.class != v.class
|
629
|
+
raise Bud::CompileError, "inconsistent attribute ref style #{k.inspect} => #{v.inspect}"
|
630
|
+
elsif k.class <= Array
|
631
|
+
[k,v]
|
632
|
+
elsif k.class <= Symbol
|
633
|
+
if @origrels and @origrels.length == 2
|
634
|
+
[find_attr_match(k,@origrels[0]), find_attr_match(v,@origrels[1])]
|
635
|
+
else
|
636
|
+
[find_attr_match(k), find_attr_match(v)]
|
637
|
+
end
|
638
|
+
else
|
639
|
+
raise Bud::CompileError, "invalid attribute ref in #{k.inspect} => #{v.inspect}"
|
640
|
+
end
|
641
|
+
end
|
642
|
+
return decomp_preds(*predarray)
|
643
|
+
else
|
644
|
+
return decomp_preds(*preds)
|
645
|
+
end
|
646
|
+
end
|
647
|
+
|
648
|
+
# find element in @origrels that contains this aname method
|
649
|
+
# if 2nd arg is non-nil, only check that collection.
|
650
|
+
# after found, return the result of invoking aname from chosen collection
|
651
|
+
protected
|
652
|
+
def find_attr_match(aname, rel=nil) # :nodoc: all
|
653
|
+
dorels = (rel.nil? ? @origrels : [rel])
|
654
|
+
match = nil
|
655
|
+
dorels.each do |r|
|
656
|
+
match ||= r if r.respond_to?(aname)
|
657
|
+
if r.respond_to?(aname) and match != r
|
658
|
+
raise Bud::CompileError, "ambiguous attribute :#{aname} in both #{match.tabname} and #{r.tabname}"
|
659
|
+
end
|
660
|
+
end
|
661
|
+
if match.nil?
|
662
|
+
raise Bud::CompileError, "attribute :#{aname} not found in any of #{dorels.map{|t| t.tabname}.inspect}"
|
663
|
+
end
|
664
|
+
match.send(aname)
|
665
|
+
end
|
666
|
+
|
667
|
+
protected
|
668
|
+
def decomp_preds(*preds) # :nodoc:all
|
669
|
+
# decompose each pred into a binary pred
|
670
|
+
return nil if preds.nil? or preds.empty? or preds == [nil]
|
671
|
+
newpreds = []
|
672
|
+
preds.each do |p|
|
673
|
+
p.each_with_index do |c, i|
|
674
|
+
newpreds << [p[i], p[i+1]] unless p[i+1].nil?
|
675
|
+
end
|
676
|
+
end
|
677
|
+
newpreds
|
678
|
+
end
|
679
|
+
|
680
|
+
protected
|
681
|
+
def canonicalize_localpreds(rel_list, preds) # :nodoc:all
|
682
|
+
return if preds.nil?
|
683
|
+
retval = preds.map do |p|
|
684
|
+
p[1][0] == rel_list[0].tabname ? p.reverse : p
|
685
|
+
end
|
686
|
+
end
|
687
|
+
|
688
|
+
public
|
689
|
+
def uniquify_tabname # :nodoc: all
|
690
|
+
# just append current number of microseconds
|
691
|
+
@tabname = (@tabname.to_s + Time.new.tv_usec.to_s).to_sym
|
692
|
+
end
|
693
|
+
end
|
694
|
+
|
695
|
+
class BudScratch < BudCollection # :nodoc: all
|
696
|
+
end
|
697
|
+
|
698
|
+
class BudTemp < BudCollection # :nodoc: all
|
699
|
+
end
|
700
|
+
|
701
|
+
class BudChannel < BudCollection
|
702
|
+
attr_reader :locspec_idx # :nodoc: all
|
703
|
+
|
704
|
+
def initialize(name, bud_instance, given_schema=nil) # :nodoc: all
|
705
|
+
given_schema ||= [:@address, :val]
|
706
|
+
the_schema, the_key_cols = parse_schema(given_schema)
|
707
|
+
the_val_cols = the_schema - the_key_cols
|
708
|
+
@locspec_idx = remove_at_sign!(the_key_cols)
|
709
|
+
@locspec_idx = remove_at_sign!(the_schema) if @locspec_idx.nil?
|
710
|
+
# If @locspec_idx is still nil, this is a loopback channel
|
711
|
+
|
712
|
+
# We mutate the hash key above, so we need to recreate the hash
|
713
|
+
# XXX: ugh, hacky
|
714
|
+
if given_schema.respond_to? :keys
|
715
|
+
given_schema = {the_key_cols => the_val_cols}
|
716
|
+
end
|
717
|
+
|
718
|
+
super(name, bud_instance, given_schema)
|
719
|
+
end
|
720
|
+
|
721
|
+
private
|
722
|
+
def remove_at_sign!(cols)
|
723
|
+
i = cols.find_index {|c| c.to_s[0].chr == '@'}
|
724
|
+
unless i.nil?
|
725
|
+
cols[i] = cols[i].to_s.delete('@').to_sym
|
726
|
+
end
|
727
|
+
return i
|
728
|
+
end
|
729
|
+
|
730
|
+
private
|
731
|
+
def split_locspec(l)
|
732
|
+
lsplit = l.split(':')
|
733
|
+
lsplit[1] = lsplit[1].to_i
|
734
|
+
return lsplit
|
735
|
+
end
|
736
|
+
|
737
|
+
public
|
738
|
+
def clone_empty
|
739
|
+
retval = super
|
740
|
+
retval.locspec_idx = @locspec_idx
|
741
|
+
retval
|
742
|
+
end
|
743
|
+
|
744
|
+
public
|
745
|
+
def tick # :nodoc: all
|
746
|
+
@sealed = false
|
747
|
+
@storage = {}
|
748
|
+
# Note that we do not clear @pending here: if the user inserted into the
|
749
|
+
# channel manually (e.g., via <~ from inside a sync_do block), we send the
|
750
|
+
# message at the end of the current tick.
|
751
|
+
end
|
752
|
+
|
753
|
+
public
|
754
|
+
def flush # :nodoc: all
|
755
|
+
ip = @bud_instance.ip
|
756
|
+
port = @bud_instance.port
|
757
|
+
each_from([@pending]) do |t|
|
758
|
+
if @locspec_idx.nil?
|
759
|
+
the_locspec = [ip, port]
|
760
|
+
else
|
761
|
+
the_locspec = split_locspec(t[@locspec_idx])
|
762
|
+
raise BudError, "'#{t[@locspec_idx]}', channel '#{@tabname}'" if the_locspec[0].nil? or the_locspec[1].nil? or the_locspec[0] == '' or the_locspec[1] == ''
|
763
|
+
end
|
764
|
+
@bud_instance.dsock.send_datagram([@tabname, t].to_msgpack, the_locspec[0], the_locspec[1])
|
765
|
+
end
|
766
|
+
@pending.clear
|
767
|
+
end
|
768
|
+
|
769
|
+
public
|
770
|
+
# project to the non-address fields
|
771
|
+
def payloads
|
772
|
+
if schema.size > 2
|
773
|
+
# bundle up each tuple's non-locspec fields into an array
|
774
|
+
retval = case @locspec_idx
|
775
|
+
when 0 then self.pro{|t| t[1..(t.size-1)]}
|
776
|
+
when (schema.size - 1) then self.pro{|t| t[0..(t.size-2)]}
|
777
|
+
else self.pro{|t| t[0..(@locspec_idx-1)] + t[@locspec_idx+1..(t.size-1)]}
|
778
|
+
end
|
779
|
+
else
|
780
|
+
# just return each tuple's non-locspec field value
|
781
|
+
retval = self.pro{|t| t[(@locspec_idx == 0) ? 1 : 0]}
|
782
|
+
end
|
783
|
+
return retval
|
784
|
+
end
|
785
|
+
|
786
|
+
superator "<~" do |o|
|
787
|
+
pending_merge o
|
788
|
+
end
|
789
|
+
|
790
|
+
superator "<+" do |o|
|
791
|
+
raise BudError, "Illegal use of <+ with channel '#{@tabname}' on left"
|
792
|
+
end
|
793
|
+
|
794
|
+
undef merge
|
795
|
+
|
796
|
+
def <=(o)
|
797
|
+
raise BudError, "Illegal use of <= with channel '#{@tabname}' on left"
|
798
|
+
end
|
799
|
+
end
|
800
|
+
|
801
|
+
class BudTerminal < BudCollection # :nodoc: all
|
802
|
+
def initialize(name, given_schema, bud_instance, prompt=false) # :nodoc: all
|
803
|
+
super(name, bud_instance, given_schema)
|
804
|
+
@prompt = prompt
|
805
|
+
end
|
806
|
+
|
807
|
+
public
|
808
|
+
def start_stdin_reader # :nodoc: all
|
809
|
+
# XXX: Ugly hack. Rather than sending terminal data to EM via UDP,
|
810
|
+
# we should add the terminal file descriptor to the EM event loop.
|
811
|
+
@reader = Thread.new do
|
812
|
+
begin
|
813
|
+
while true
|
814
|
+
$stdout.print("#{tabname} > ") if @prompt
|
815
|
+
s = $stdin.gets
|
816
|
+
break if s.nil? # Hit EOF
|
817
|
+
s = s.chomp if s
|
818
|
+
tup = [s]
|
819
|
+
|
820
|
+
ip = @bud_instance.ip
|
821
|
+
port = @bud_instance.port
|
822
|
+
EventMachine::schedule do
|
823
|
+
socket = EventMachine::open_datagram_socket("127.0.0.1", 0)
|
824
|
+
socket.send_datagram([tabname, tup].to_msgpack, ip, port)
|
825
|
+
end
|
826
|
+
end
|
827
|
+
rescue
|
828
|
+
puts "terminal reader thread failed: #{$!}"
|
829
|
+
print $!.backtrace.join("\n")
|
830
|
+
exit
|
831
|
+
end
|
832
|
+
end
|
833
|
+
end
|
834
|
+
|
835
|
+
public
|
836
|
+
def flush #:nodoc: all
|
837
|
+
@pending.each do |p|
|
838
|
+
$stdout.puts p[0]
|
839
|
+
$stdout.flush
|
840
|
+
end
|
841
|
+
@pending = {}
|
842
|
+
end
|
843
|
+
|
844
|
+
public
|
845
|
+
def tick #:nodoc: all
|
846
|
+
@storage = {}
|
847
|
+
raise BudError unless @pending.empty?
|
848
|
+
end
|
849
|
+
|
850
|
+
undef merge
|
851
|
+
|
852
|
+
public
|
853
|
+
def <=(o) #:nodoc: all
|
854
|
+
raise BudError, "Illegal use of <= with terminal '#{@tabname}' on left"
|
855
|
+
end
|
856
|
+
|
857
|
+
superator "<~" do |o|
|
858
|
+
pending_merge(o)
|
859
|
+
end
|
860
|
+
end
|
861
|
+
|
862
|
+
class BudPeriodic < BudCollection # :nodoc: all
|
863
|
+
end
|
864
|
+
|
865
|
+
class BudTable < BudCollection # :nodoc: all
|
866
|
+
def initialize(name, bud_instance, given_schema) # :nodoc: all
|
867
|
+
super(name, bud_instance, given_schema)
|
868
|
+
@to_delete = []
|
869
|
+
end
|
870
|
+
|
871
|
+
public
|
872
|
+
def tick #:nodoc: all
|
873
|
+
@to_delete.each do |tuple|
|
874
|
+
keycols = @key_colnums.map{|k| tuple[k]}
|
875
|
+
if @storage[keycols] == tuple
|
876
|
+
@storage.delete keycols
|
877
|
+
end
|
878
|
+
end
|
879
|
+
@storage.merge! @pending
|
880
|
+
@to_delete = []
|
881
|
+
@pending = {}
|
882
|
+
end
|
883
|
+
|
884
|
+
superator "<-" do |o|
|
885
|
+
o.each do |tuple|
|
886
|
+
next if tuple.nil?
|
887
|
+
tuple = prep_tuple(tuple)
|
888
|
+
@to_delete << tuple
|
889
|
+
end
|
890
|
+
end
|
891
|
+
end
|
892
|
+
|
893
|
+
class BudReadOnly < BudScratch # :nodoc: all
|
894
|
+
superator "<+" do |o|
|
895
|
+
raise CompileError, "Illegal use of <+ with read-only collection '#{@tabname}' on left"
|
896
|
+
end
|
897
|
+
public
|
898
|
+
def merge(o) #:nodoc: all
|
899
|
+
raise CompileError, "Illegal use of <= with read-only collection '#{@tabname}' on left"
|
900
|
+
end
|
901
|
+
end
|
902
|
+
|
903
|
+
class BudFileReader < BudReadOnly # :nodoc: all
|
904
|
+
def initialize(name, filename, delimiter, bud_instance) # :nodoc: all
|
905
|
+
super(name, bud_instance, {[:lineno] => [:text]})
|
906
|
+
@filename = filename
|
907
|
+
@storage = {}
|
908
|
+
# NEEDS A TRY/RESCUE BLOCK
|
909
|
+
@fd = File.open(@filename, "r")
|
910
|
+
@linenum = 0
|
911
|
+
end
|
912
|
+
|
913
|
+
public
|
914
|
+
def each(&block) # :nodoc: all
|
915
|
+
while (l = @fd.gets)
|
916
|
+
t = tuple_accessors([@linenum, l.strip])
|
917
|
+
@linenum += 1
|
918
|
+
yield t
|
919
|
+
end
|
920
|
+
end
|
921
|
+
end
|
922
|
+
end
|
923
|
+
|
924
|
+
module Enumerable
|
925
|
+
public
|
926
|
+
# monkeypatch to Enumerable to rename collections and their schemas
|
927
|
+
def rename(new_tabname, new_schema=nil)
|
928
|
+
budi = (respond_to?(:bud_instance)) ? bud_instance : nil
|
929
|
+
if new_schema.nil? and respond_to?(:schema)
|
930
|
+
new_schema = schema
|
931
|
+
end
|
932
|
+
scr = Bud::BudScratch.new(new_tabname.to_s, budi, new_schema)
|
933
|
+
scr.uniquify_tabname
|
934
|
+
scr.merge(self, scr.storage)
|
935
|
+
scr
|
936
|
+
end
|
937
|
+
end
|