bud 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +9 -0
- data/README +30 -0
- data/bin/budplot +134 -0
- data/bin/budvis +201 -0
- data/bin/rebl +4 -0
- data/docs/README.md +13 -0
- data/docs/bfs.md +379 -0
- data/docs/bfs.raw +251 -0
- data/docs/bfs_arch.png +0 -0
- data/docs/bloom-loop.png +0 -0
- data/docs/bust.md +83 -0
- data/docs/cheat.md +291 -0
- data/docs/deploy.md +96 -0
- data/docs/diffs +181 -0
- data/docs/getstarted.md +296 -0
- data/docs/intro.md +36 -0
- data/docs/modules.md +112 -0
- data/docs/operational.md +96 -0
- data/docs/rebl.md +99 -0
- data/docs/ruby_hooks.md +19 -0
- data/docs/visualizations.md +75 -0
- data/examples/README +1 -0
- data/examples/basics/hello.rb +12 -0
- data/examples/basics/out +1103 -0
- data/examples/basics/out.new +856 -0
- data/examples/basics/paths.rb +51 -0
- data/examples/bust/README.md +9 -0
- data/examples/bust/bustclient-example.rb +23 -0
- data/examples/bust/bustinspector.html +135 -0
- data/examples/bust/bustserver-example.rb +18 -0
- data/examples/chat/README.md +9 -0
- data/examples/chat/chat.rb +45 -0
- data/examples/chat/chat_protocol.rb +8 -0
- data/examples/chat/chat_server.rb +29 -0
- data/examples/deploy/tokenring-ec2.rb +26 -0
- data/examples/deploy/tokenring-local.rb +17 -0
- data/examples/deploy/tokenring.rb +39 -0
- data/lib/bud/aggs.rb +126 -0
- data/lib/bud/bud_meta.rb +185 -0
- data/lib/bud/bust/bust.rb +126 -0
- data/lib/bud/bust/client/idempotence.rb +10 -0
- data/lib/bud/bust/client/restclient.rb +49 -0
- data/lib/bud/collections.rb +937 -0
- data/lib/bud/depanalysis.rb +44 -0
- data/lib/bud/deploy/countatomicdelivery.rb +50 -0
- data/lib/bud/deploy/deployer.rb +67 -0
- data/lib/bud/deploy/ec2deploy.rb +200 -0
- data/lib/bud/deploy/localdeploy.rb +41 -0
- data/lib/bud/errors.rb +15 -0
- data/lib/bud/graphs.rb +405 -0
- data/lib/bud/joins.rb +300 -0
- data/lib/bud/rebl.rb +314 -0
- data/lib/bud/rewrite.rb +523 -0
- data/lib/bud/rtrace.rb +27 -0
- data/lib/bud/server.rb +43 -0
- data/lib/bud/state.rb +108 -0
- data/lib/bud/storage/tokyocabinet.rb +170 -0
- data/lib/bud/storage/zookeeper.rb +178 -0
- data/lib/bud/stratify.rb +83 -0
- data/lib/bud/viz.rb +65 -0
- data/lib/bud.rb +797 -0
- metadata +330 -0
data/lib/bud.rb
ADDED
@@ -0,0 +1,797 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'eventmachine'
|
3
|
+
require 'msgpack'
|
4
|
+
require 'socket'
|
5
|
+
require 'superators'
|
6
|
+
require 'thread'
|
7
|
+
|
8
|
+
require 'bud/aggs'
|
9
|
+
require 'bud/bud_meta'
|
10
|
+
require 'bud/collections'
|
11
|
+
require 'bud/errors'
|
12
|
+
require 'bud/joins'
|
13
|
+
require 'bud/rtrace'
|
14
|
+
require 'bud/server'
|
15
|
+
require 'bud/state'
|
16
|
+
require 'bud/storage/tokyocabinet'
|
17
|
+
require 'bud/storage/zookeeper'
|
18
|
+
require 'bud/viz'
|
19
|
+
|
20
|
+
# We monkeypatch Module to add support for Bloom state and code declarations.
|
21
|
+
class Module
|
22
|
+
|
23
|
+
# import another module and assign to a qualifier symbol: <tt>import MyModule => :m</tt>
|
24
|
+
def import(spec)
|
25
|
+
raise Bud::CompileError unless (spec.class <= Hash and spec.length == 1)
|
26
|
+
mod, local_name = spec.first
|
27
|
+
raise Bud::CompileError unless (mod.class <= Module and local_name.class <= Symbol)
|
28
|
+
|
29
|
+
# To correctly expand qualified references to an imported module, we keep a
|
30
|
+
# table with the local bind names of all the modules imported by this
|
31
|
+
# module. To handle nested references (a.b.c.d etc.), the import table for
|
32
|
+
# module X points to X's own nested import table.
|
33
|
+
@bud_import_tbl ||= {}
|
34
|
+
child_tbl = mod.bud_import_table
|
35
|
+
raise Bud::CompileError if @bud_import_tbl.has_key? local_name
|
36
|
+
@bud_import_tbl[local_name] = child_tbl.clone # XXX: clone needed?
|
37
|
+
|
38
|
+
rewritten_mod_name = ModuleRewriter.do_import(self, mod, local_name)
|
39
|
+
self.module_eval "include #{rewritten_mod_name}"
|
40
|
+
end
|
41
|
+
|
42
|
+
# the block of Bloom collection declarations. one per module.
|
43
|
+
def state(&block)
|
44
|
+
meth_name = Module.make_state_meth_name(self)
|
45
|
+
define_method(meth_name, &block)
|
46
|
+
end
|
47
|
+
|
48
|
+
# a ruby block to be run before timestep 1. one per module.
|
49
|
+
def bootstrap(&block)
|
50
|
+
meth_name = "__bootstrap__#{Module.get_class_name(self)}".to_sym
|
51
|
+
define_method(meth_name, &block)
|
52
|
+
end
|
53
|
+
|
54
|
+
# bloom statements to be registered with Bud runtime. optional +block_name+
|
55
|
+
# allows for multiple bloom blocks per module, and overriding
|
56
|
+
def bloom(block_name=nil, &block)
|
57
|
+
# If no block name was specified, generate a unique name
|
58
|
+
if block_name.nil?
|
59
|
+
@block_id ||= 0
|
60
|
+
block_name = "#{Module.get_class_name(self)}__#{@block_id.to_s}"
|
61
|
+
@block_id += 1
|
62
|
+
else
|
63
|
+
unless block_name.class <= Symbol
|
64
|
+
raise Bud::CompileError, "Bloom block names must be a symbol: #{block_name}"
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
# Note that we don't encode the module name ("self") into the name of the
|
69
|
+
# method. This allows named blocks to be overridden (via inheritance or
|
70
|
+
# mixin) in the same way as normal Ruby methods.
|
71
|
+
meth_name = "__bloom__#{block_name}"
|
72
|
+
|
73
|
+
# Don't allow duplicate named bloom blocks to be defined within a single
|
74
|
+
# module; this indicates a likely programmer error.
|
75
|
+
if instance_methods(false).include? meth_name
|
76
|
+
raise Bud::CompileError, "Duplicate named bloom block: '#{block_name}' in #{self}"
|
77
|
+
end
|
78
|
+
define_method(meth_name.to_sym, &block)
|
79
|
+
end
|
80
|
+
|
81
|
+
def bud_import_table() #:nodoc: all
|
82
|
+
@bud_import_tbl ||= {}
|
83
|
+
@bud_import_tbl
|
84
|
+
end
|
85
|
+
|
86
|
+
private
|
87
|
+
# Return a string with a version of the class name appropriate for embedding
|
88
|
+
# into a method name. Annoyingly, if you define class X nested inside
|
89
|
+
# class/module Y, X's class name is the string "Y::X". We don't want to define
|
90
|
+
# method names with semicolons in them, so just return "X" instead.
|
91
|
+
def self.get_class_name(klass)
|
92
|
+
klass.name.split("::").last
|
93
|
+
end
|
94
|
+
|
95
|
+
# State method blocks are named using an auto-incrementing counter. This is to
|
96
|
+
# ensure that we can rediscover the possible dependencies between these blocks
|
97
|
+
# after module import (see Bud#call_state_methods).
|
98
|
+
def self.make_state_meth_name(klass)
|
99
|
+
@state_meth_id ||= 0
|
100
|
+
r = "__state#{@state_meth_id}__#{Module.get_class_name(klass)}".to_sym
|
101
|
+
@state_meth_id += 1
|
102
|
+
return r
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
# The root Bud module. To cause an instance of Bud to begin executing, there are
|
107
|
+
# three main options:
|
108
|
+
#
|
109
|
+
# 1. Synchronously. To do this, instantiate your program and then call tick()
|
110
|
+
# one or more times; each call evaluates a single Bud timestep. Note that in
|
111
|
+
# this mode, network communication (channels) and timers cannot be used. This
|
112
|
+
# is mostly intended for "one-shot" programs that compute a single result and
|
113
|
+
# then terminate.
|
114
|
+
# 2. In a separate thread in the foreground. To do this, instantiate your
|
115
|
+
# program and then call run_fg(). The Bud interpreter will then run, handling
|
116
|
+
# network events and evaluating new timesteps as appropriate. The run_fg()
|
117
|
+
# method will not return unless an error occurs.
|
118
|
+
# 3. In a separate thread in the background. To do this, instantiate your
|
119
|
+
# program and then call run_bg(). The Bud interpreter will run
|
120
|
+
# asynchronously. To interact with Bud (e.g., insert additional data or
|
121
|
+
# inspect the state of a Bud collection), use the sync_do and async_do
|
122
|
+
# methods. To shutdown the Bud interpreter, use stop_bg().
|
123
|
+
#
|
124
|
+
# Most programs should use method #3.
|
125
|
+
#
|
126
|
+
# :main: Bud
|
127
|
+
module Bud
|
128
|
+
attr_reader :strata, :budtime, :inbound, :options, :meta_parser, :viz, :rtracer
|
129
|
+
attr_reader :dsock
|
130
|
+
attr_reader :tables, :ip, :port
|
131
|
+
attr_reader :stratum_first_iter
|
132
|
+
attr_accessor :lazy # This can be changed on-the-fly by REBL
|
133
|
+
|
134
|
+
# options to the bud runtime are passed in a hash, with the following keys
|
135
|
+
# * network configuration
|
136
|
+
# * <tt>:ip</tt> IP address string for this instance
|
137
|
+
# * <tt>:port</tt> port number for this instance
|
138
|
+
# * <tt>:ext_ip</tt> IP address at which external nodes can contact this instance
|
139
|
+
# * <tt>:ext_port</tt> port number to go with :ext_ip
|
140
|
+
# * <tt>:bust_port</tt> port number for the restful http messages
|
141
|
+
# * operating system interaction
|
142
|
+
# * <tt>:read_stdin</tt> if true, captures stdin via the stdio collection
|
143
|
+
# * <tt>:no_signal_handlers</tt> if true, runtime ignores SIGINT and SIGTERM
|
144
|
+
# * tracing and output
|
145
|
+
# * <tt>:quiet</tt> if true, suppress certain messages
|
146
|
+
# * <tt>:trace</tt> if true, generate budvis outputs
|
147
|
+
# * <tt>:rtrace</tt> if true, generate budplot outputs
|
148
|
+
# * <tt>:dump_rewrite</tt> if true, dump results of internal rewriting of Bloom code to a file
|
149
|
+
# * controlling execution
|
150
|
+
# * <tt>:lazy</tt> if true, prevents runtime from ticking except on external calls to +tick+
|
151
|
+
# * <tt>:tag</tt> a name for this instance, suitable for display during tracing and visualization
|
152
|
+
# * storage configuration
|
153
|
+
# * <tt>:tc_dir</tt> filesystem directory to hold TokyoCabinet data stores
|
154
|
+
# * <tt>:tc_truncate</tt> if true, TokyoCabinet collections are opened with OTRUNC
|
155
|
+
def initialize(options={})
|
156
|
+
@tables = {}
|
157
|
+
@table_meta = []
|
158
|
+
@rewritten_strata = []
|
159
|
+
@channels = {}
|
160
|
+
@tc_tables = {}
|
161
|
+
@zk_tables = {}
|
162
|
+
@callbacks = {}
|
163
|
+
@callback_id = 0
|
164
|
+
@timers = []
|
165
|
+
@budtime = 0
|
166
|
+
@inbound = []
|
167
|
+
@done_bootstrap = false
|
168
|
+
@em_stopped = Queue.new
|
169
|
+
@joinstate = {} # joins are stateful, their state needs to be kept inside the Bud instance
|
170
|
+
|
171
|
+
# Setup options (named arguments), along with default values
|
172
|
+
@options = options
|
173
|
+
@lazy = @options[:lazy] ||= false
|
174
|
+
@options[:ip] ||= "localhost"
|
175
|
+
@ip = @options[:ip]
|
176
|
+
@options[:port] ||= 0
|
177
|
+
@options[:port] = @options[:port].to_i
|
178
|
+
# NB: If using an ephemeral port (specified by port = 0), the actual port
|
179
|
+
# number won't be known until we start EM
|
180
|
+
|
181
|
+
relatives = self.class.modules + [self.class]
|
182
|
+
relatives.each do |r|
|
183
|
+
Bud.rewrite_local_methods(r)
|
184
|
+
end
|
185
|
+
|
186
|
+
@declarations = ModuleRewriter.get_rule_defs(self.class)
|
187
|
+
|
188
|
+
init_state
|
189
|
+
|
190
|
+
# NB: Somewhat hacky. Dependency analysis and stratification are implemented
|
191
|
+
# by Bud programs, so in order for those programs to parse, we need the
|
192
|
+
# "Bud" class to have been defined first.
|
193
|
+
require 'bud/depanalysis'
|
194
|
+
require 'bud/stratify'
|
195
|
+
|
196
|
+
@viz = VizOnline.new(self) if @options[:trace]
|
197
|
+
@rtracer = RTrace.new(self) if @options[:rtrace]
|
198
|
+
|
199
|
+
# Get dependency info and determine stratification order.
|
200
|
+
unless self.class <= Stratification or self.class <= DepAnalysis
|
201
|
+
do_rewrite
|
202
|
+
end
|
203
|
+
|
204
|
+
# Load the rules as a closure. Each element of @strata is an array of
|
205
|
+
# lambdas, one for each rewritten rule in that strata. Note that legacy Bud
|
206
|
+
# code (with user-specified stratification) assumes that @strata is a simple
|
207
|
+
# array, so we need to convert it before loading the rewritten strata.
|
208
|
+
@strata = []
|
209
|
+
@rule_src = []
|
210
|
+
declaration
|
211
|
+
@strata.each_with_index do |s,i|
|
212
|
+
raise BudError if s.class <= Array
|
213
|
+
@strata[i] = [s]
|
214
|
+
# Don't try to record source text for old-style rule blocks
|
215
|
+
@rule_src[i] = [""]
|
216
|
+
end
|
217
|
+
|
218
|
+
@rewritten_strata.each_with_index do |src_ary,i|
|
219
|
+
@strata[i] ||= []
|
220
|
+
@rule_src[i] ||= []
|
221
|
+
src_ary.each do |src|
|
222
|
+
@strata[i] << eval("lambda { #{src} }")
|
223
|
+
@rule_src[i] << src
|
224
|
+
end
|
225
|
+
end
|
226
|
+
end
|
227
|
+
|
228
|
+
private
|
229
|
+
|
230
|
+
# Rewrite methods defined in the given klass to expand module references and
|
231
|
+
# temp collections. Imported modules are rewritten during the import process;
|
232
|
+
# we rewrite the main Bud class and any included modules here. Note that we
|
233
|
+
# only rewrite each distinct Class once.
|
234
|
+
def self.rewrite_local_methods(klass)
|
235
|
+
@done_rewrite ||= {}
|
236
|
+
return if @done_rewrite.has_key? klass.name
|
237
|
+
|
238
|
+
u = Unifier.new
|
239
|
+
ref_expander = NestedRefRewriter.new(klass.bud_import_table)
|
240
|
+
tmp_expander = TempExpander.new
|
241
|
+
r2r = Ruby2Ruby.new
|
242
|
+
|
243
|
+
klass.instance_methods(false).each do |m|
|
244
|
+
ast = ParseTree.translate(klass, m)
|
245
|
+
ast = u.process(ast)
|
246
|
+
ast = ref_expander.process(ast)
|
247
|
+
ast = tmp_expander.process(ast)
|
248
|
+
|
249
|
+
if (ref_expander.did_work or tmp_expander.did_work)
|
250
|
+
new_source = r2r.process(ast)
|
251
|
+
klass.module_eval new_source # Replace previous method def
|
252
|
+
end
|
253
|
+
|
254
|
+
ref_expander.did_work = false
|
255
|
+
tmp_expander.did_work = false
|
256
|
+
end
|
257
|
+
|
258
|
+
# If we found any temp statements in the klass's rule blocks, add a state
|
259
|
+
# block with declarations for the corresponding temp collections.
|
260
|
+
s = tmp_expander.get_state_meth(klass)
|
261
|
+
if s
|
262
|
+
state_src = r2r.process(s)
|
263
|
+
klass.module_eval(state_src)
|
264
|
+
end
|
265
|
+
|
266
|
+
# Always rewrite anonymous classes
|
267
|
+
@done_rewrite[klass.name] = true unless klass.name == ""
|
268
|
+
end
|
269
|
+
|
270
|
+
# Invoke all the user-defined state blocks and initialize builtin state.
|
271
|
+
def init_state
|
272
|
+
builtin_state
|
273
|
+
call_state_methods
|
274
|
+
end
|
275
|
+
|
276
|
+
# If module Y is a parent module of X, X's state block might reference state
|
277
|
+
# defined in Y. Hence, we want to invoke Y's state block first. However, when
|
278
|
+
# "import" and "include" are combined, we can't use the inheritance hierarchy
|
279
|
+
# to do this. When a module Z is imported, the import process inlines all the
|
280
|
+
# modules Z includes into a single module. Hence, we can no longer rely on the
|
281
|
+
# inheritance hierarchy to respect dependencies between modules. To fix this,
|
282
|
+
# we add an increasing ID to each state block's method name (assigned
|
283
|
+
# according to the order in which the state blocks are defined); we then sort
|
284
|
+
# by this order before invoking the state blocks.
|
285
|
+
def call_state_methods
|
286
|
+
meth_map = {} # map from ID => [Method]
|
287
|
+
self.class.instance_methods.each do |m|
|
288
|
+
next unless m =~ /^__state(\d+)__/
|
289
|
+
id = Regexp.last_match.captures.first.to_i
|
290
|
+
meth_map[id] ||= []
|
291
|
+
meth_map[id] << self.method(m)
|
292
|
+
end
|
293
|
+
|
294
|
+
meth_map.keys.sort.each do |i|
|
295
|
+
meth_map[i].each {|m| m.call}
|
296
|
+
end
|
297
|
+
end
|
298
|
+
|
299
|
+
# Evaluate all bootstrap blocks
|
300
|
+
def do_bootstrap
|
301
|
+
self.class.ancestors.reverse.each do |anc|
|
302
|
+
anc.instance_methods(false).each do |m|
|
303
|
+
if /^__bootstrap__/.match m
|
304
|
+
self.method(m.to_sym).call
|
305
|
+
end
|
306
|
+
end
|
307
|
+
end
|
308
|
+
bootstrap
|
309
|
+
|
310
|
+
@done_bootstrap = true
|
311
|
+
end
|
312
|
+
|
313
|
+
def do_rewrite
|
314
|
+
@meta_parser = BudMeta.new(self, @declarations)
|
315
|
+
@rewritten_strata = @meta_parser.meta_rewrite
|
316
|
+
end
|
317
|
+
|
318
|
+
public
|
319
|
+
|
320
|
+
########### give empty defaults for these
|
321
|
+
def declaration # :nodoc: all
|
322
|
+
end
|
323
|
+
def bootstrap # :nodoc: all
|
324
|
+
end
|
325
|
+
|
326
|
+
########### metaprogramming support for ruby and for rule rewriting
|
327
|
+
# helper to define instance methods
|
328
|
+
def singleton_class # :nodoc: all
|
329
|
+
class << self; self; end
|
330
|
+
end
|
331
|
+
|
332
|
+
######## methods for controlling execution
|
333
|
+
|
334
|
+
# Run Bud in the background (in a different thread). This means that the Bud
|
335
|
+
# interpreter will run asynchronously from the caller, so care must be used
|
336
|
+
# when interacting with it. For example, it is not safe to directly examine
|
337
|
+
# Bud collections from the caller's thread (see async_do and sync_do).
|
338
|
+
#
|
339
|
+
# This instance of Bud will continue to execute until stop_bg is called.
|
340
|
+
def run_bg
|
341
|
+
start_reactor
|
342
|
+
# Wait for Bud to start up before returning
|
343
|
+
schedule_and_wait do
|
344
|
+
start_bud
|
345
|
+
end
|
346
|
+
end
|
347
|
+
|
348
|
+
# Run Bud in the "foreground" -- the caller's thread will be used to run the
|
349
|
+
# Bud interpreter. This means this method won't return unless an error
|
350
|
+
# occurs. It is often more useful to run Bud asynchronously -- see run_bg.
|
351
|
+
#
|
352
|
+
# Note that run_fg cannot be invoked if run_bg has already been called in the
|
353
|
+
# same Ruby process.
|
354
|
+
#
|
355
|
+
# Execution proceeds in time ticks, a la Dedalus.
|
356
|
+
# * Within each tick there may be multiple strata.
|
357
|
+
# * Within each stratum we do multiple semi-naive iterations.
|
358
|
+
def run_fg
|
359
|
+
raise BudError if EventMachine::reactor_running?
|
360
|
+
|
361
|
+
EventMachine::run {
|
362
|
+
start_bud
|
363
|
+
}
|
364
|
+
end
|
365
|
+
|
366
|
+
# Shutdown a Bud instance that is running asynchronously. This method blocks
|
367
|
+
# until Bud has been shutdown. If +stop_em+ is true, the EventMachine event
|
368
|
+
# loop is also shutdown; this will interfere with the execution of any other
|
369
|
+
# Bud instances in the same process (as well as anything else that happens to
|
370
|
+
# use EventMachine).
|
371
|
+
def stop_bg(stop_em=false)
|
372
|
+
if stop_em
|
373
|
+
schedule_shutdown(true)
|
374
|
+
# Wait until EM has completely shutdown before we return.
|
375
|
+
@em_stopped.pop
|
376
|
+
else
|
377
|
+
schedule_and_wait do
|
378
|
+
do_shutdown(false)
|
379
|
+
end
|
380
|
+
end
|
381
|
+
end
|
382
|
+
|
383
|
+
# Given a block, evaluate that block inside the background Ruby thread at some
|
384
|
+
# time in the future. Because the block is evaluate inside the background Ruby
|
385
|
+
# thread, the block can safely examine Bud state. Naturally, this method can
|
386
|
+
# only be used when Bud is running in the background. Note that calling
|
387
|
+
# sync_do blocks the caller until the block has been evaluated; for a
|
388
|
+
# non-blocking version, see async_do.
|
389
|
+
#
|
390
|
+
# Note that the block is invoked after one Bud timestep has ended but before
|
391
|
+
# the next timestep begins. Hence, synchronous accumulation (<=) into a Bud
|
392
|
+
# scratch collection in a callback is typically not a useful thing to do: when
|
393
|
+
# the next tick begins, the content of any scratch collections will be
|
394
|
+
# emptied, which includes anything inserted by a sync_do block using <=. To
|
395
|
+
# avoid this behavior, insert into scratches using <+.
|
396
|
+
def sync_do
|
397
|
+
schedule_and_wait do
|
398
|
+
yield if block_given?
|
399
|
+
# Do another tick, in case the user-supplied block inserted any data
|
400
|
+
tick
|
401
|
+
end
|
402
|
+
end
|
403
|
+
|
404
|
+
# Like sync_do, but does not block the caller's thread: the given callback
|
405
|
+
# will be invoked at some future time. Note that calls to async_do respect
|
406
|
+
# FIFO order.
|
407
|
+
def async_do
|
408
|
+
EventMachine::schedule do
|
409
|
+
yield if block_given?
|
410
|
+
# Do another tick, in case the user-supplied block inserted any data
|
411
|
+
tick
|
412
|
+
end
|
413
|
+
end
|
414
|
+
|
415
|
+
# Shutdown any persistent tables used by the current Bud instance. If you are
|
416
|
+
# running Bud via tick() and using `tctable` collections, you should call this
|
417
|
+
# after you're finished using Bud. Programs that use Bud via run_fg() or
|
418
|
+
# run_bg() don't need to call this manually.
|
419
|
+
def close_tables
|
420
|
+
@tables.each_value do |t|
|
421
|
+
t.close
|
422
|
+
end
|
423
|
+
end
|
424
|
+
|
425
|
+
# Register a new callback. Given the name of a Bud collection, this method
|
426
|
+
# arranges for the given block to be invoked at the end of any tick in which
|
427
|
+
# any tuples have been inserted into the specified collection. The code block
|
428
|
+
# is passed the collection as an argument; this provides a convenient way to
|
429
|
+
# examine the tuples inserted during that fixpoint. (Note that because the Bud
|
430
|
+
# runtime is blocked while the callback is invoked, it can also examine any
|
431
|
+
# other Bud state freely.)
|
432
|
+
#
|
433
|
+
# Note that registering callbacks on persistent collections (e.g., tables and
|
434
|
+
# tctables) is probably not a wise thing to do: as long as any tuples are
|
435
|
+
# stored in the collection, the callback will be invoked at the end of every
|
436
|
+
# tick.
|
437
|
+
def register_callback(tbl_name, &block)
|
438
|
+
# We allow callbacks to be added before or after EM has been started. To
|
439
|
+
# simplify matters, we start EM if it hasn't been started yet.
|
440
|
+
start_reactor
|
441
|
+
cb_id = nil
|
442
|
+
schedule_and_wait do
|
443
|
+
unless @tables.has_key? tbl_name
|
444
|
+
raise Bud::BudError, "No such table: #{tbl_name}"
|
445
|
+
end
|
446
|
+
|
447
|
+
raise Bud::BudError if @callbacks.has_key? @callback_id
|
448
|
+
@callbacks[@callback_id] = [tbl_name, block]
|
449
|
+
cb_id = @callback_id
|
450
|
+
@callback_id += 1
|
451
|
+
end
|
452
|
+
return cb_id
|
453
|
+
end
|
454
|
+
|
455
|
+
# Unregister the callback that has the given ID.
|
456
|
+
def unregister_callback(id)
|
457
|
+
schedule_and_wait do
|
458
|
+
raise Bud::BudError unless @callbacks.has_key? id
|
459
|
+
@callbacks.delete(id)
|
460
|
+
end
|
461
|
+
end
|
462
|
+
|
463
|
+
# sync_callback supports synchronous interaction with Bud modules. The caller
|
464
|
+
# supplies the name of an input collection, a set of tuples to insert, and an
|
465
|
+
# output collection on which to 'listen.' The call blocks until tuples are
|
466
|
+
# inserted into the output collection: these are returned to the caller.
|
467
|
+
def sync_callback(in_tbl, tupleset, out_tbl)
|
468
|
+
q = Queue.new
|
469
|
+
cb = register_callback(out_tbl) do |c|
|
470
|
+
q.push c.to_a
|
471
|
+
end
|
472
|
+
unless in_tbl.nil?
|
473
|
+
sync_do {
|
474
|
+
t = @tables[in_tbl]
|
475
|
+
if t.class <= Bud::BudChannel or t.class <= Bud::BudZkTable
|
476
|
+
t <~ tupleset
|
477
|
+
else
|
478
|
+
t <+ tupleset
|
479
|
+
end
|
480
|
+
}
|
481
|
+
end
|
482
|
+
result = q.pop
|
483
|
+
unregister_callback(cb)
|
484
|
+
return result
|
485
|
+
end
|
486
|
+
|
487
|
+
# a common special case for sync_callback: block on a delta to a table.
|
488
|
+
def delta(out_tbl)
|
489
|
+
sync_callback(nil, nil, out_tbl)
|
490
|
+
end
|
491
|
+
|
492
|
+
private
|
493
|
+
|
494
|
+
def invoke_callbacks
|
495
|
+
@callbacks.each_value do |cb|
|
496
|
+
tbl_name, block = cb
|
497
|
+
tbl = @tables[tbl_name]
|
498
|
+
unless tbl.empty?
|
499
|
+
block.call(tbl)
|
500
|
+
end
|
501
|
+
end
|
502
|
+
end
|
503
|
+
|
504
|
+
def start_reactor
|
505
|
+
return if EventMachine::reactor_running?
|
506
|
+
|
507
|
+
EventMachine::error_handler do |e|
|
508
|
+
puts "Unexpected Bud error: #{e.inspect}"
|
509
|
+
puts e.backtrace.join("\n")
|
510
|
+
raise e
|
511
|
+
end
|
512
|
+
|
513
|
+
# Block until EM has successfully started up.
|
514
|
+
q = Queue.new
|
515
|
+
# This thread helps us avoid race conditions on the start and stop of
|
516
|
+
# EventMachine's event loop.
|
517
|
+
Thread.new do
|
518
|
+
EventMachine.run do
|
519
|
+
q << true
|
520
|
+
end
|
521
|
+
# Executed only after EventMachine::stop_event_loop is done
|
522
|
+
@em_stopped << true
|
523
|
+
end
|
524
|
+
# Block waiting for EM's event loop to start up.
|
525
|
+
q.pop
|
526
|
+
end
|
527
|
+
|
528
|
+
# Schedule a block to be evaluated by EventMachine in the future, and
|
529
|
+
# block until this has happened.
|
530
|
+
def schedule_and_wait
|
531
|
+
# Try to defend against error situations in which EM has stopped, but we've
|
532
|
+
# been called nonetheless. This is racy, but better than nothing.
|
533
|
+
raise BudError, "EM not running" unless EventMachine::reactor_running?
|
534
|
+
|
535
|
+
q = Queue.new
|
536
|
+
EventMachine::schedule do
|
537
|
+
ret = false
|
538
|
+
begin
|
539
|
+
yield
|
540
|
+
rescue Exception
|
541
|
+
ret = $!
|
542
|
+
end
|
543
|
+
q.push(ret)
|
544
|
+
end
|
545
|
+
|
546
|
+
resp = q.pop
|
547
|
+
raise resp if resp
|
548
|
+
end
|
549
|
+
|
550
|
+
def do_shutdown(stop_em=false)
|
551
|
+
@timers.each do |t|
|
552
|
+
t.cancel
|
553
|
+
end
|
554
|
+
close_tables
|
555
|
+
@dsock.close_connection
|
556
|
+
# Note that this affects anyone else in the same process who happens to be
|
557
|
+
# using EventMachine! This is also a non-blocking call; to block until EM
|
558
|
+
# has completely shutdown, we use the @em_stopped queue.
|
559
|
+
EventMachine::stop_event_loop if stop_em
|
560
|
+
end
|
561
|
+
|
562
|
+
# Schedule a "graceful" shutdown for a future EM tick. If EM is not currently
|
563
|
+
# running, shutdown immediately.
|
564
|
+
def schedule_shutdown(stop_em=false)
|
565
|
+
if EventMachine::reactor_running?
|
566
|
+
EventMachine::schedule do
|
567
|
+
do_shutdown(stop_em)
|
568
|
+
end
|
569
|
+
else
|
570
|
+
do_shutdown(stop_em)
|
571
|
+
end
|
572
|
+
end
|
573
|
+
|
574
|
+
def start_bud
|
575
|
+
raise BudError unless EventMachine::reactor_thread?
|
576
|
+
|
577
|
+
# If we get SIGINT or SIGTERM, shutdown gracefully
|
578
|
+
unless @options[:no_signal_handlers]
|
579
|
+
Signal.trap("INT") do
|
580
|
+
schedule_shutdown(true)
|
581
|
+
end
|
582
|
+
Signal.trap("TRAP") do
|
583
|
+
schedule_shutdown(true)
|
584
|
+
end
|
585
|
+
end
|
586
|
+
|
587
|
+
do_start_server
|
588
|
+
|
589
|
+
# Initialize periodics
|
590
|
+
@periodics.each do |p|
|
591
|
+
@timers << set_periodic_timer(p.pername, p.ident, p.period)
|
592
|
+
end
|
593
|
+
|
594
|
+
# Arrange for Bud to read from stdin if enabled. Note that we can't do this
|
595
|
+
# earlier because we need to wait for EventMachine startup.
|
596
|
+
@stdio.start_stdin_reader if @options[:read_stdin]
|
597
|
+
@zk_tables.each_value {|t| t.start_watchers}
|
598
|
+
|
599
|
+
# Compute a fixpoint; this will also invoke any bootstrap blocks.
|
600
|
+
tick unless @lazy
|
601
|
+
|
602
|
+
@rtracer.sleep if options[:rtrace]
|
603
|
+
end
|
604
|
+
|
605
|
+
def do_start_server
|
606
|
+
@dsock = EventMachine::open_datagram_socket(@ip, @options[:port],
|
607
|
+
BudServer, self)
|
608
|
+
@port = Socket.unpack_sockaddr_in(@dsock.get_sockname)[0]
|
609
|
+
end
|
610
|
+
|
611
|
+
public
|
612
|
+
|
613
|
+
# Returns the ip and port of the Bud instance. In addition to the local IP
|
614
|
+
# and port, the user may define an external IP and/or port. the external
|
615
|
+
# version of each is returned if available. If not, the local version is
|
616
|
+
# returned. There are use cases for mixing and matching local and external.
|
617
|
+
# local_ip:external_port would be if you have local port forwarding, and
|
618
|
+
# external_ip:local_port would be if you're in a DMZ, for example
|
619
|
+
def ip_port
|
620
|
+
raise BudError, "ip_port called before port defined" if @port.nil? and @options[:port] == 0 and not @options[:ext_port]
|
621
|
+
|
622
|
+
ip = options[:ext_ip] ? "#{@options[:ext_ip]}" : "#{@ip}"
|
623
|
+
port = options[:ext_port] ? "#{@options[:ext_port]}" :
|
624
|
+
(@port.nil? ? "#{@options[:port]}" : "#{@port}")
|
625
|
+
ip + ":" + port
|
626
|
+
end
|
627
|
+
|
628
|
+
# Returns the internal IP and port. See ip_port
|
629
|
+
def int_ip_port
|
630
|
+
raise BudError, "ip_port called before port defined" if @port.nil? and @options[:port] == 0
|
631
|
+
@port.nil? ? "#{@ip}:#{@options[:port]}" : "#{@ip}:#{@port}"
|
632
|
+
end
|
633
|
+
|
634
|
+
# manually trigger one timestep of Bloom execution.
|
635
|
+
def tick
|
636
|
+
@tables.each_value do |t|
|
637
|
+
t.tick
|
638
|
+
end
|
639
|
+
|
640
|
+
@joinstate = {}
|
641
|
+
|
642
|
+
do_bootstrap unless @done_bootstrap
|
643
|
+
receive_inbound
|
644
|
+
|
645
|
+
@strata.each_with_index { |s,i| stratum_fixpoint(s, i) }
|
646
|
+
@viz.do_cards if @options[:trace]
|
647
|
+
do_flush
|
648
|
+
invoke_callbacks
|
649
|
+
@budtime += 1
|
650
|
+
end
|
651
|
+
|
652
|
+
private
|
653
|
+
|
654
|
+
# Builtin BUD state (predefined collections). We could define this using the
|
655
|
+
# standard "state" syntax, but we want to ensure that builtin state is
|
656
|
+
# initialized before user-defined state.
|
657
|
+
def builtin_state
|
658
|
+
channel :localtick, [:col1]
|
659
|
+
@stdio = terminal :stdio
|
660
|
+
@periodics = table :periodics_tbl, [:pername] => [:ident, :period]
|
661
|
+
|
662
|
+
# for BUD reflection
|
663
|
+
table :t_rules, [:rule_id] => [:lhs, :op, :src]
|
664
|
+
table :t_depends, [:rule_id, :lhs, :op, :body] => [:nm]
|
665
|
+
table :t_depends_tc, [:head, :body, :via, :neg, :temporal]
|
666
|
+
table :t_provides, [:interface] => [:input]
|
667
|
+
table :t_underspecified, t_provides.schema
|
668
|
+
table :t_stratum, [:predicate] => [:stratum]
|
669
|
+
table :t_cycle, [:predicate, :via, :neg, :temporal]
|
670
|
+
end
|
671
|
+
|
672
|
+
# Handle any inbound tuples off the wire and then clear. Received messages are
|
673
|
+
# placed directly into the storage of the appropriate local channel.
|
674
|
+
def receive_inbound
|
675
|
+
@inbound.each do |msg|
|
676
|
+
# puts "dequeueing tuple #{msg[1].inspect} into #{msg[0]} @ #{ip_port}"
|
677
|
+
tables[msg[0].to_sym] << msg[1]
|
678
|
+
end
|
679
|
+
@inbound = []
|
680
|
+
end
|
681
|
+
|
682
|
+
# "Flush" any tuples that need to be flushed. This does two things:
|
683
|
+
# 1. Emit outgoing tuples in channels and ZK tables.
|
684
|
+
# 2. Commit to disk any changes made to on-disk tables.
|
685
|
+
def do_flush
|
686
|
+
@channels.each { |c| @tables[c[0]].flush }
|
687
|
+
@zk_tables.each_value { |t| t.flush }
|
688
|
+
@tc_tables.each_value { |t| t.flush }
|
689
|
+
end
|
690
|
+
|
691
|
+
def stratum_fixpoint(strat, strat_num)
|
692
|
+
# This routine uses semi-naive evaluation to compute
|
693
|
+
# a fixpoint of the rules in strat.
|
694
|
+
#
|
695
|
+
# As described in lib/collections.rb, each collection has three
|
696
|
+
# sub-collections of note here:
|
697
|
+
# @storage: the "main" storage of tuples
|
698
|
+
# @delta: tuples that should be used to drive derivation of new facts
|
699
|
+
# @new_delta: a place to store newly-derived facts
|
700
|
+
#
|
701
|
+
# The first time through this loop we mark @stratum_first_iter=true,
|
702
|
+
# while tells the Join::each code to join up all its @storage subcollections
|
703
|
+
# to start. In subsequent iterations the join code uses some table's @delta
|
704
|
+
# to ensure that only new tuples are derived.
|
705
|
+
#
|
706
|
+
# Note that calling "each" on a non-Join collection will iterate through both
|
707
|
+
# storage and delta.
|
708
|
+
#
|
709
|
+
# At the end of each iteration of this loop we transition:
|
710
|
+
# - @delta tuples are merged into @storage
|
711
|
+
# - @new_delta tuples are moved into @delta
|
712
|
+
# - @new_delta is set to empty
|
713
|
+
#
|
714
|
+
# XXX as a performance optimization, it would be nice to bypass the delta
|
715
|
+
# tables for any preds that don't participate in a rhs Join -- in that
|
716
|
+
# case there's pointless extra tuple movement letting tuples "graduate"
|
717
|
+
# through @new_delta and @delta.
|
718
|
+
|
719
|
+
# In semi-naive, the first iteration should join up tables
|
720
|
+
# on their storage fields; subsequent iterations do the
|
721
|
+
# delta-joins only. The stratum_first_iter field here distinguishes
|
722
|
+
# these cases.
|
723
|
+
@stratum_first_iter = true
|
724
|
+
begin
|
725
|
+
strat.each_with_index do |r,i|
|
726
|
+
begin
|
727
|
+
r.call
|
728
|
+
rescue Exception => e
|
729
|
+
# Don't report source text for certain rules (old-style rule blocks)
|
730
|
+
rule_src = @rule_src[strat_num][i]
|
731
|
+
src_msg = ""
|
732
|
+
unless rule_src == ""
|
733
|
+
src_msg = "\nRule: #{rule_src}"
|
734
|
+
end
|
735
|
+
|
736
|
+
new_e = e
|
737
|
+
unless new_e.class <= BudError
|
738
|
+
new_e = BudError
|
739
|
+
end
|
740
|
+
raise new_e, "Exception during Bud evaluation.\nException: #{e.inspect}.#{src_msg}"
|
741
|
+
end
|
742
|
+
end
|
743
|
+
@stratum_first_iter = false
|
744
|
+
# XXX this next line is inefficient.
|
745
|
+
# we could call tick_deltas only on predicates in this stratum.
|
746
|
+
# but it's not easy right now (??) to pull out tables in a given stratum
|
747
|
+
@tables.each{|name,coll| coll.tick_deltas}
|
748
|
+
end while not @tables.all?{|name,coll| coll.new_delta.empty? and coll.delta.empty?}
|
749
|
+
end
|
750
|
+
|
751
|
+
####### Joins
|
752
|
+
def wrap_map(j, &blk)
|
753
|
+
if blk.nil?
|
754
|
+
return j
|
755
|
+
else
|
756
|
+
return j.map(&blk)
|
757
|
+
end
|
758
|
+
end
|
759
|
+
|
760
|
+
public
|
761
|
+
def joinstate # :nodoc: all
|
762
|
+
@joinstate
|
763
|
+
end
|
764
|
+
|
765
|
+
public
|
766
|
+
def join(collections, *preds, &blk) # :nodoc: all
|
767
|
+
# since joins are stateful, we want to allocate them once and store in this Bud instance
|
768
|
+
# we ID them on their tablenames, preds, and block
|
769
|
+
return wrap_map(BudJoin.new(collections, self, preds), &blk)
|
770
|
+
end
|
771
|
+
|
772
|
+
def natjoin(collections, &blk) # :nodoc: all
|
773
|
+
# for all pairs of relations, add predicates on matching column names
|
774
|
+
preds = BudJoin::natural_preds(self, collections)
|
775
|
+
join(collections, *preds, &blk)
|
776
|
+
end
|
777
|
+
|
778
|
+
# left-outer-join syntax to be used in rhs of Bloom statements.
|
779
|
+
# first argument an array of 2 collections, second argument an array of predicates (as in Bud::BudCollection.pairs)
|
780
|
+
def leftjoin(collections, *preds, &blk)
|
781
|
+
return wrap_map(BudLeftJoin.new(collections, self, preds), &blk)
|
782
|
+
end
|
783
|
+
|
784
|
+
private
|
785
|
+
|
786
|
+
######## ids and timers
|
787
|
+
def gen_id
|
788
|
+
Time.new.to_i.to_s << rand.to_s
|
789
|
+
end
|
790
|
+
|
791
|
+
def set_periodic_timer(name, id, period)
|
792
|
+
EventMachine::PeriodicTimer.new(period) do
|
793
|
+
@tables[name] <+ [[id, Time.new.to_s]]
|
794
|
+
tick
|
795
|
+
end
|
796
|
+
end
|
797
|
+
end
|