bud 0.0.3 → 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- data/README +33 -16
- data/bin/budplot +42 -65
- data/bin/budtimelines +235 -0
- data/bin/budvis +24 -122
- data/bin/rebl +1 -0
- data/docs/README.md +21 -10
- data/docs/bfs.md +4 -6
- data/docs/c.html +251 -0
- data/docs/cheat.md +45 -30
- data/docs/deploy.md +26 -26
- data/docs/getstarted.md +6 -4
- data/docs/visualizations.md +43 -31
- data/examples/chat/chat.rb +4 -9
- data/examples/chat/chat_server.rb +1 -8
- data/examples/deploy/deploy_ip_port +1 -0
- data/examples/deploy/keys.rb +5 -0
- data/examples/deploy/tokenring-ec2.rb +9 -9
- data/examples/deploy/{tokenring-local.rb → tokenring-fork.rb} +3 -5
- data/examples/deploy/tokenring-thread.rb +15 -0
- data/examples/deploy/tokenring.rb +25 -17
- data/lib/bud/aggs.rb +87 -25
- data/lib/bud/bud_meta.rb +48 -31
- data/lib/bud/bust/bust.rb +16 -15
- data/lib/bud/collections.rb +207 -232
- data/lib/bud/depanalysis.rb +1 -0
- data/lib/bud/deploy/countatomicdelivery.rb +8 -20
- data/lib/bud/deploy/deployer.rb +16 -16
- data/lib/bud/deploy/ec2deploy.rb +34 -35
- data/lib/bud/deploy/forkdeploy.rb +90 -0
- data/lib/bud/deploy/threaddeploy.rb +38 -0
- data/lib/bud/graphs.rb +103 -199
- data/lib/bud/joins.rb +190 -41
- data/lib/bud/monkeypatch.rb +84 -0
- data/lib/bud/rebl.rb +8 -1
- data/lib/bud/rewrite.rb +152 -49
- data/lib/bud/server.rb +1 -0
- data/lib/bud/state.rb +24 -10
- data/lib/bud/storage/dbm.rb +170 -0
- data/lib/bud/storage/tokyocabinet.rb +5 -1
- data/lib/bud/stratify.rb +6 -7
- data/lib/bud/viz.rb +31 -17
- data/lib/bud/viz_util.rb +204 -0
- data/lib/bud.rb +271 -244
- data/lib/bud.rb.orig +806 -0
- metadata +43 -22
- data/docs/bfs.raw +0 -251
- data/docs/diffs +0 -181
- data/examples/basics/out +0 -1103
- data/examples/basics/out.new +0 -856
- data/lib/bud/deploy/localdeploy.rb +0 -53
data/lib/bud.rb.orig
ADDED
@@ -0,0 +1,806 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'eventmachine'
|
3
|
+
require 'msgpack'
|
4
|
+
require 'socket'
|
5
|
+
require 'superators'
|
6
|
+
require 'thread'
|
7
|
+
|
8
|
+
require 'bud/monkeypatch'
|
9
|
+
|
10
|
+
require 'bud/aggs'
|
11
|
+
require 'bud/bud_meta'
|
12
|
+
require 'bud/collections'
|
13
|
+
require 'bud/depanalysis'
|
14
|
+
require 'bud/deploy/forkdeploy'
|
15
|
+
require 'bud/deploy/threaddeploy'
|
16
|
+
require 'bud/errors'
|
17
|
+
require 'bud/joins'
|
18
|
+
require 'bud/rtrace'
|
19
|
+
require 'bud/server'
|
20
|
+
require 'bud/state'
|
21
|
+
require 'bud/storage/dbm'
|
22
|
+
require 'bud/storage/tokyocabinet'
|
23
|
+
require 'bud/storage/zookeeper'
|
24
|
+
require 'bud/stratify'
|
25
|
+
require 'bud/viz'
|
26
|
+
|
27
|
+
ILLEGAL_INSTANCE_ID = -1
|
28
|
+
SIGNAL_CHECK_PERIOD = 0.2
|
29
|
+
|
30
|
+
$signal_lock = Mutex.new
|
31
|
+
$got_shutdown_signal = false
|
32
|
+
$signal_handler_setup = false
|
33
|
+
$instance_id = 0
|
34
|
+
$bud_instances = {} # Map from instance id => Bud instance
|
35
|
+
|
36
|
+
# The root Bud module. To cause an instance of Bud to begin executing, there are
|
37
|
+
# three main options:
|
38
|
+
#
|
39
|
+
# 1. Synchronously. To do this, instantiate your program and then call tick()
|
40
|
+
# one or more times; each call evaluates a single Bud timestep. Note that in
|
41
|
+
# this mode, network communication (channels) and timers cannot be used. This
|
42
|
+
# is mostly intended for "one-shot" programs that compute a single result and
|
43
|
+
# then terminate.
|
44
|
+
# 2. In a separate thread in the foreground. To do this, instantiate your
|
45
|
+
# program and then call run_fg(). The Bud interpreter will then run, handling
|
46
|
+
# network events and evaluating new timesteps as appropriate. The run_fg()
|
47
|
+
# method will not return unless an error occurs.
|
48
|
+
# 3. In a separate thread in the background. To do this, instantiate your
|
49
|
+
# program and then call run_bg(). The Bud interpreter will run
|
50
|
+
# asynchronously. To interact with Bud (e.g., insert additional data or
|
51
|
+
# inspect the state of a Bud collection), use the sync_do and async_do
|
52
|
+
# methods. To shutdown the Bud interpreter, use stop_bg().
|
53
|
+
#
|
54
|
+
# Most programs should use method #3.
|
55
|
+
#
|
56
|
+
# :main: Bud
|
57
|
+
module Bud
|
58
|
+
attr_reader :strata, :budtime, :inbound, :options, :meta_parser, :viz, :rtracer
|
59
|
+
attr_reader :dsock
|
60
|
+
attr_reader :tables, :ip, :port
|
61
|
+
attr_reader :stratum_first_iter, :joinstate
|
62
|
+
attr_accessor :lazy # This can be changed on-the-fly by REBL
|
63
|
+
attr_accessor :stratum_collection_map
|
64
|
+
|
65
|
+
# options to the Bud runtime are passed in a hash, with the following keys
|
66
|
+
# * network configuration
|
67
|
+
# * <tt>:ip</tt> IP address string for this instance
|
68
|
+
# * <tt>:port</tt> port number for this instance
|
69
|
+
# * <tt>:ext_ip</tt> IP address at which external nodes can contact this instance
|
70
|
+
# * <tt>:ext_port</tt> port number to go with <tt>:ext_ip</tt>
|
71
|
+
# * <tt>:bust_port</tt> port number for the restful HTTP messages
|
72
|
+
# * operating system interaction
|
73
|
+
# * <tt>:stdin</tt> if non-nil, reading from the +stdio+ collection results in reading from this +IO+ handle
|
74
|
+
# * <tt>:stdout</tt> writing to the +stdio+ collection results in writing to this +IO+ handle; defaults to <tt>$stdout</tt>
|
75
|
+
# * <tt>:no_signal_handlers</tt> if true, runtime ignores +SIGINT+ and +SIGTERM+
|
76
|
+
# * tracing and output
|
77
|
+
# * <tt>:quiet</tt> if true, suppress certain messages
|
78
|
+
# * <tt>:trace</tt> if true, generate +budvis+ outputs
|
79
|
+
# * <tt>:rtrace</tt> if true, generate +budplot+ outputs
|
80
|
+
# * <tt>:dump_rewrite</tt> if true, dump results of internal rewriting of Bloom code to a file
|
81
|
+
# * controlling execution
|
82
|
+
# * <tt>:lazy</tt> if true, prevents runtime from ticking except on external calls to +tick+
|
83
|
+
# * <tt>:tag</tt> a name for this instance, suitable for display during tracing and visualization
|
84
|
+
# * storage configuration
|
85
|
+
# * <tt>:tc_dir</tt> filesystem directory to hold TokyoCabinet data stores
|
86
|
+
# * <tt>:tc_truncate</tt> if true, TokyoCabinet collections are opened with +OTRUNC+
|
87
|
+
# * deployment
|
88
|
+
# * <tt>:deploy</tt> enable deployment
|
89
|
+
# * <tt>:deploy_child_opts</tt> option hash to pass to deployed instances
|
90
|
+
def initialize(options={})
|
91
|
+
@tables = {}
|
92
|
+
@table_meta = []
|
93
|
+
@rewritten_strata = []
|
94
|
+
@channels = {}
|
95
|
+
@tc_tables = {}
|
96
|
+
@dbm_tables = {}
|
97
|
+
@zk_tables = {}
|
98
|
+
@callbacks = {}
|
99
|
+
@callback_id = 0
|
100
|
+
@shutdown_callbacks = []
|
101
|
+
@post_shutdown_callbacks = []
|
102
|
+
@timers = []
|
103
|
+
@budtime = 0
|
104
|
+
@inbound = []
|
105
|
+
@done_bootstrap = false
|
106
|
+
@joinstate = {} # joins are stateful, their state needs to be kept inside the Bud instance
|
107
|
+
@instance_id = ILLEGAL_INSTANCE_ID # Assigned when we start running
|
108
|
+
|
109
|
+
# Setup options (named arguments), along with default values
|
110
|
+
@options = options.clone
|
111
|
+
@lazy = @options[:lazy] ||= false
|
112
|
+
@options[:ip] ||= "127.0.0.1"
|
113
|
+
@ip = @options[:ip]
|
114
|
+
@options[:port] ||= 0
|
115
|
+
@options[:port] = @options[:port].to_i
|
116
|
+
# NB: If using an ephemeral port (specified by port = 0), the actual port
|
117
|
+
# number won't be known until we start EM
|
118
|
+
|
119
|
+
relatives = self.class.modules + [self.class]
|
120
|
+
relatives.each do |r|
|
121
|
+
Bud.rewrite_local_methods(r)
|
122
|
+
end
|
123
|
+
|
124
|
+
@declarations = ModuleRewriter.get_rule_defs(self.class)
|
125
|
+
|
126
|
+
init_state
|
127
|
+
|
128
|
+
@viz = VizOnline.new(self) if @options[:trace]
|
129
|
+
@rtracer = RTrace.new(self) if @options[:rtrace]
|
130
|
+
|
131
|
+
# Get dependency info and determine stratification order.
|
132
|
+
unless self.class <= Stratification or self.class <= DepAnalysis
|
133
|
+
do_rewrite
|
134
|
+
end
|
135
|
+
|
136
|
+
# Load the rules as a closure. Each element of @strata is an array of
|
137
|
+
# lambdas, one for each rewritten rule in that strata. Note that legacy Bud
|
138
|
+
# code (with user-specified stratification) assumes that @strata is a simple
|
139
|
+
# array, so we need to convert it before loading the rewritten strata.
|
140
|
+
@strata = []
|
141
|
+
@rule_src = []
|
142
|
+
@rule_orig_src = []
|
143
|
+
declaration
|
144
|
+
@strata.each_with_index do |s,i|
|
145
|
+
raise BudError if s.class <= Array
|
146
|
+
@strata[i] = [s]
|
147
|
+
# Don't try to record source text for old-style rule blocks
|
148
|
+
@rule_src[i] = [""]
|
149
|
+
end
|
150
|
+
|
151
|
+
@rewritten_strata.each_with_index do |src_ary,i|
|
152
|
+
@strata[i] ||= []
|
153
|
+
@rule_src[i] ||= []
|
154
|
+
@rule_orig_src[i] ||= []
|
155
|
+
src_ary.each_with_index do |src, j|
|
156
|
+
@strata[i] << eval("lambda { #{src} }")
|
157
|
+
@rule_src[i] << src
|
158
|
+
@rule_orig_src[i] << @no_attr_rewrite_strata[i][j]
|
159
|
+
end
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
private
|
164
|
+
|
165
|
+
# Rewrite methods defined in the given klass to expand module references and
|
166
|
+
# temp collections. Imported modules are rewritten during the import process;
|
167
|
+
# we rewrite the main Bud class and any included modules here. Note that we
|
168
|
+
# only rewrite each distinct Class once.
|
169
|
+
def self.rewrite_local_methods(klass)
|
170
|
+
@done_rewrite ||= {}
|
171
|
+
return if @done_rewrite.has_key? klass.name
|
172
|
+
|
173
|
+
u = Unifier.new
|
174
|
+
ref_expander = NestedRefRewriter.new(klass.bud_import_table)
|
175
|
+
tmp_expander = TempExpander.new
|
176
|
+
r2r = Ruby2Ruby.new
|
177
|
+
|
178
|
+
klass.instance_methods(false).each do |m|
|
179
|
+
ast = ParseTree.translate(klass, m)
|
180
|
+
ast = u.process(ast)
|
181
|
+
ast = ref_expander.process(ast)
|
182
|
+
ast = tmp_expander.process(ast)
|
183
|
+
|
184
|
+
if (ref_expander.did_work or tmp_expander.did_work)
|
185
|
+
new_source = r2r.process(ast)
|
186
|
+
klass.module_eval new_source # Replace previous method def
|
187
|
+
end
|
188
|
+
|
189
|
+
ref_expander.did_work = false
|
190
|
+
tmp_expander.did_work = false
|
191
|
+
end
|
192
|
+
|
193
|
+
# If we found any temp statements in the klass's rule blocks, add a state
|
194
|
+
# block with declarations for the corresponding temp collections.
|
195
|
+
s = tmp_expander.get_state_meth(klass)
|
196
|
+
if s
|
197
|
+
state_src = r2r.process(s)
|
198
|
+
klass.module_eval(state_src)
|
199
|
+
end
|
200
|
+
|
201
|
+
# Always rewrite anonymous classes
|
202
|
+
@done_rewrite[klass.name] = true unless klass.name == ""
|
203
|
+
end
|
204
|
+
|
205
|
+
# Invoke all the user-defined state blocks and initialize builtin state.
|
206
|
+
def init_state
|
207
|
+
builtin_state
|
208
|
+
call_state_methods
|
209
|
+
end
|
210
|
+
|
211
|
+
# If module Y is a parent module of X, X's state block might reference state
|
212
|
+
# defined in Y. Hence, we want to invoke Y's state block first. However, when
|
213
|
+
# "import" and "include" are combined, we can't use the inheritance hierarchy
|
214
|
+
# to do this. When a module Z is imported, the import process inlines all the
|
215
|
+
# modules Z includes into a single module. Hence, we can no longer rely on the
|
216
|
+
# inheritance hierarchy to respect dependencies between modules. To fix this,
|
217
|
+
# we add an increasing ID to each state block's method name (assigned
|
218
|
+
# according to the order in which the state blocks are defined); we then sort
|
219
|
+
# by this order before invoking the state blocks.
|
220
|
+
def call_state_methods
|
221
|
+
meth_map = {} # map from ID => [Method]
|
222
|
+
self.class.instance_methods.each do |m|
|
223
|
+
next unless m =~ /^__state(\d+)__/
|
224
|
+
id = Regexp.last_match.captures.first.to_i
|
225
|
+
meth_map[id] ||= []
|
226
|
+
meth_map[id] << self.method(m)
|
227
|
+
end
|
228
|
+
|
229
|
+
meth_map.keys.sort.each do |i|
|
230
|
+
meth_map[i].each {|m| m.call}
|
231
|
+
end
|
232
|
+
end
|
233
|
+
|
234
|
+
# Evaluate all bootstrap blocks
|
235
|
+
def do_bootstrap
|
236
|
+
self.class.ancestors.reverse.each do |anc|
|
237
|
+
anc.instance_methods(false).each do |m|
|
238
|
+
if /^__bootstrap__/.match m
|
239
|
+
self.method(m.to_sym).call
|
240
|
+
end
|
241
|
+
end
|
242
|
+
end
|
243
|
+
bootstrap
|
244
|
+
|
245
|
+
@done_bootstrap = true
|
246
|
+
end
|
247
|
+
|
248
|
+
def do_rewrite
|
249
|
+
@meta_parser = BudMeta.new(self, @declarations)
|
250
|
+
@rewritten_strata, @no_attr_rewrite_strata = @meta_parser.meta_rewrite
|
251
|
+
end
|
252
|
+
|
253
|
+
public
|
254
|
+
|
255
|
+
########### give empty defaults for these
|
256
|
+
def declaration # :nodoc: all
|
257
|
+
end
|
258
|
+
def bootstrap # :nodoc: all
|
259
|
+
end
|
260
|
+
|
261
|
+
########### metaprogramming support for ruby and for rule rewriting
|
262
|
+
# helper to define instance methods
|
263
|
+
def singleton_class # :nodoc: all
|
264
|
+
class << self; self; end
|
265
|
+
end
|
266
|
+
|
267
|
+
######## methods for controlling execution
|
268
|
+
|
269
|
+
# Run Bud in the background (in a different thread). This means that the Bud
|
270
|
+
# interpreter will run asynchronously from the caller, so care must be used
|
271
|
+
# when interacting with it. For example, it is not safe to directly examine
|
272
|
+
# Bud collections from the caller's thread (see async_do and sync_do).
|
273
|
+
#
|
274
|
+
# This instance of Bud will continue to execute until stop_bg is called.
|
275
|
+
def run_bg
|
276
|
+
start_reactor
|
277
|
+
# Wait for Bud to start up before returning
|
278
|
+
schedule_and_wait do
|
279
|
+
start_bud
|
280
|
+
end
|
281
|
+
end
|
282
|
+
|
283
|
+
# Run Bud in the "foreground" -- the caller's thread will be used to run the
|
284
|
+
# Bud interpreter. This means this method won't return unless an error
|
285
|
+
# occurs. It is often more useful to run Bud asynchronously -- see run_bg.
|
286
|
+
def run_fg
|
287
|
+
# If we're called from the EventMachine thread (and EM is running), blocking
|
288
|
+
# the current thread would imply deadlocking ourselves.
|
289
|
+
if Thread.current == EventMachine::reactor_thread and EventMachine::reactor_running?
|
290
|
+
raise BudError, "Cannot invoke run_fg from inside EventMachine"
|
291
|
+
end
|
292
|
+
|
293
|
+
q = Queue.new
|
294
|
+
# Note that this must be a post-shutdown callback: if this is the only
|
295
|
+
# thread, then the program might exit after run_fg() returns. If run_fg()
|
296
|
+
# blocked on a normal shutdown callback, the program might exit before the
|
297
|
+
# other shutdown callbacks have a chance to run.
|
298
|
+
post_shutdown do
|
299
|
+
q.push(true)
|
300
|
+
end
|
301
|
+
|
302
|
+
run_bg
|
303
|
+
# Block caller's thread until Bud has shutdown
|
304
|
+
q.pop
|
305
|
+
end
|
306
|
+
|
307
|
+
# Shutdown a Bud instance that is running asynchronously. This method blocks
|
308
|
+
# until Bud has been shutdown. If +stop_em+ is true, the EventMachine event
|
309
|
+
# loop is also shutdown; this will interfere with the execution of any other
|
310
|
+
# Bud instances in the same process (as well as anything else that happens to
|
311
|
+
# use EventMachine).
|
312
|
+
def stop_bg(stop_em=false, do_shutdown_cb=true)
|
313
|
+
schedule_and_wait do
|
314
|
+
do_shutdown(do_shutdown_cb)
|
315
|
+
end
|
316
|
+
|
317
|
+
if stop_em
|
318
|
+
Bud.stop_em_loop
|
319
|
+
EventMachine::reactor_thread.join
|
320
|
+
end
|
321
|
+
end
|
322
|
+
|
323
|
+
# Register a callback that will be invoked when this instance of Bud is
|
324
|
+
# shutting down.
|
325
|
+
def on_shutdown(&blk)
|
326
|
+
# Start EM if not yet started
|
327
|
+
start_reactor
|
328
|
+
schedule_and_wait do
|
329
|
+
@shutdown_callbacks << blk
|
330
|
+
end
|
331
|
+
end
|
332
|
+
|
333
|
+
# Register a callback that will be invoked when *after* this instance of Bud
|
334
|
+
# has been shutdown.
|
335
|
+
def post_shutdown(&blk)
|
336
|
+
# Start EM if not yet started
|
337
|
+
start_reactor
|
338
|
+
schedule_and_wait do
|
339
|
+
@post_shutdown_callbacks << blk
|
340
|
+
end
|
341
|
+
end
|
342
|
+
|
343
|
+
# Given a block, evaluate that block inside the background Ruby thread at some
|
344
|
+
# time in the future. Because the block is evaluate inside the background Ruby
|
345
|
+
# thread, the block can safely examine Bud state. Naturally, this method can
|
346
|
+
# only be used when Bud is running in the background. Note that calling
|
347
|
+
# sync_do blocks the caller until the block has been evaluated; for a
|
348
|
+
# non-blocking version, see async_do.
|
349
|
+
#
|
350
|
+
# Note that the block is invoked after one Bud timestep has ended but before
|
351
|
+
# the next timestep begins. Hence, synchronous accumulation (<=) into a Bud
|
352
|
+
# scratch collection in a callback is typically not a useful thing to do: when
|
353
|
+
# the next tick begins, the content of any scratch collections will be
|
354
|
+
# emptied, which includes anything inserted by a sync_do block using <=. To
|
355
|
+
# avoid this behavior, insert into scratches using <+.
|
356
|
+
def sync_do
|
357
|
+
schedule_and_wait do
|
358
|
+
yield if block_given?
|
359
|
+
# Do another tick, in case the user-supplied block inserted any data
|
360
|
+
tick
|
361
|
+
end
|
362
|
+
end
|
363
|
+
|
364
|
+
# Like sync_do, but does not block the caller's thread: the given callback
|
365
|
+
# will be invoked at some future time. Note that calls to async_do respect
|
366
|
+
# FIFO order.
|
367
|
+
def async_do
|
368
|
+
EventMachine::schedule do
|
369
|
+
yield if block_given?
|
370
|
+
# Do another tick, in case the user-supplied block inserted any data
|
371
|
+
tick
|
372
|
+
end
|
373
|
+
end
|
374
|
+
|
375
|
+
# Shutdown any persistent tables used by the current Bud instance. If you are
|
376
|
+
# running Bud via tick() and using +tctable+ collections, you should call this
|
377
|
+
# after you're finished using Bud. Programs that use Bud via run_fg() or
|
378
|
+
# run_bg() don't need to call this manually.
|
379
|
+
def close_tables
|
380
|
+
@tables.each_value do |t|
|
381
|
+
t.close
|
382
|
+
end
|
383
|
+
end
|
384
|
+
|
385
|
+
# Register a new callback. Given the name of a Bud collection, this method
|
386
|
+
# arranges for the given block to be invoked at the end of any tick in which
|
387
|
+
# any tuples have been inserted into the specified collection. The code block
|
388
|
+
# is passed the collection as an argument; this provides a convenient way to
|
389
|
+
# examine the tuples inserted during that fixpoint. (Note that because the Bud
|
390
|
+
# runtime is blocked while the callback is invoked, it can also examine any
|
391
|
+
# other Bud state freely.)
|
392
|
+
#
|
393
|
+
# Note that registering callbacks on persistent collections (e.g., tables and
|
394
|
+
# tctables) is probably not a wise thing to do: as long as any tuples are
|
395
|
+
# stored in the collection, the callback will be invoked at the end of every
|
396
|
+
# tick.
|
397
|
+
def register_callback(tbl_name, &block)
|
398
|
+
# We allow callbacks to be added before or after EM has been started. To
|
399
|
+
# simplify matters, we start EM if it hasn't been started yet.
|
400
|
+
start_reactor
|
401
|
+
cb_id = nil
|
402
|
+
schedule_and_wait do
|
403
|
+
unless @tables.has_key? tbl_name
|
404
|
+
raise Bud::BudError, "No such table: #{tbl_name}"
|
405
|
+
end
|
406
|
+
|
407
|
+
raise Bud::BudError if @callbacks.has_key? @callback_id
|
408
|
+
@callbacks[@callback_id] = [tbl_name, block]
|
409
|
+
cb_id = @callback_id
|
410
|
+
@callback_id += 1
|
411
|
+
end
|
412
|
+
return cb_id
|
413
|
+
end
|
414
|
+
|
415
|
+
# Unregister the callback that has the given ID.
|
416
|
+
def unregister_callback(id)
|
417
|
+
schedule_and_wait do
|
418
|
+
raise Bud::BudError unless @callbacks.has_key? id
|
419
|
+
@callbacks.delete(id)
|
420
|
+
end
|
421
|
+
end
|
422
|
+
|
423
|
+
# sync_callback supports synchronous interaction with Bud modules. The caller
|
424
|
+
# supplies the name of an input collection, a set of tuples to insert, and an
|
425
|
+
# output collection on which to 'listen.' The call blocks until tuples are
|
426
|
+
# inserted into the output collection: these are returned to the caller.
|
427
|
+
def sync_callback(in_tbl, tupleset, out_tbl)
|
428
|
+
q = Queue.new
|
429
|
+
cb = register_callback(out_tbl) do |c|
|
430
|
+
q.push c.to_a
|
431
|
+
end
|
432
|
+
unless in_tbl.nil?
|
433
|
+
sync_do {
|
434
|
+
t = @tables[in_tbl]
|
435
|
+
if t.class <= Bud::BudChannel or t.class <= Bud::BudZkTable
|
436
|
+
t <~ tupleset
|
437
|
+
else
|
438
|
+
t <+ tupleset
|
439
|
+
end
|
440
|
+
}
|
441
|
+
end
|
442
|
+
result = q.pop
|
443
|
+
unregister_callback(cb)
|
444
|
+
return result
|
445
|
+
end
|
446
|
+
|
447
|
+
# A common special case for sync_callback: block on a delta to a table.
|
448
|
+
def delta(out_tbl)
|
449
|
+
sync_callback(nil, nil, out_tbl)
|
450
|
+
end
|
451
|
+
|
452
|
+
private
|
453
|
+
|
454
|
+
def invoke_callbacks
|
455
|
+
@callbacks.each_value do |cb|
|
456
|
+
tbl_name, block = cb
|
457
|
+
tbl = @tables[tbl_name]
|
458
|
+
unless tbl.empty?
|
459
|
+
block.call(tbl)
|
460
|
+
end
|
461
|
+
end
|
462
|
+
end
|
463
|
+
|
464
|
+
def start_reactor
|
465
|
+
return if EventMachine::reactor_running?
|
466
|
+
|
467
|
+
EventMachine::error_handler do |e|
|
468
|
+
puts "Unexpected Bud error: #{e.inspect}"
|
469
|
+
puts e.backtrace.join("\n")
|
470
|
+
Bud.shutdown_all_instances
|
471
|
+
raise e
|
472
|
+
end
|
473
|
+
|
474
|
+
# Block until EM has successfully started up.
|
475
|
+
q = Queue.new
|
476
|
+
# This thread helps us avoid race conditions on the start and stop of
|
477
|
+
# EventMachine's event loop.
|
478
|
+
Thread.new do
|
479
|
+
EventMachine.run do
|
480
|
+
q.push(true)
|
481
|
+
end
|
482
|
+
end
|
483
|
+
# Block waiting for EM's event loop to start up.
|
484
|
+
q.pop
|
485
|
+
end
|
486
|
+
|
487
|
+
# Schedule a block to be evaluated by EventMachine in the future, and
|
488
|
+
# block until this has happened.
|
489
|
+
def schedule_and_wait
|
490
|
+
# If EM isn't running, just run the user's block immediately
|
491
|
+
# XXX: not clear that this is the right behavior
|
492
|
+
unless EventMachine::reactor_running?
|
493
|
+
yield
|
494
|
+
return
|
495
|
+
end
|
496
|
+
|
497
|
+
q = Queue.new
|
498
|
+
EventMachine::schedule do
|
499
|
+
ret = false
|
500
|
+
begin
|
501
|
+
yield
|
502
|
+
rescue Exception
|
503
|
+
ret = $!
|
504
|
+
end
|
505
|
+
q.push(ret)
|
506
|
+
end
|
507
|
+
|
508
|
+
resp = q.pop
|
509
|
+
raise resp if resp
|
510
|
+
end
|
511
|
+
|
512
|
+
def do_shutdown(do_shutdown_cb=true)
|
513
|
+
# Silently ignore duplicate shutdown requests or attempts to shutdown an
|
514
|
+
# instance that hasn't been started yet.
|
515
|
+
return if @instance_id == ILLEGAL_INSTANCE_ID
|
516
|
+
|
517
|
+
$signal_lock.synchronize {
|
518
|
+
raise unless $bud_instances.has_key? @instance_id
|
519
|
+
$bud_instances.delete @instance_id
|
520
|
+
@instance_id = ILLEGAL_INSTANCE_ID
|
521
|
+
}
|
522
|
+
|
523
|
+
if do_shutdown_cb
|
524
|
+
@shutdown_callbacks.each {|cb| cb.call}
|
525
|
+
end
|
526
|
+
@timers.each {|t| t.cancel}
|
527
|
+
close_tables
|
528
|
+
@dsock.close_connection if EventMachine::reactor_running?
|
529
|
+
if do_shutdown_cb
|
530
|
+
@post_shutdown_callbacks.each {|cb| cb.call}
|
531
|
+
end
|
532
|
+
end
|
533
|
+
|
534
|
+
private
|
535
|
+
def start_bud
|
536
|
+
raise BudError unless EventMachine::reactor_thread?
|
537
|
+
|
538
|
+
@instance_id = Bud.init_signal_handlers(self)
|
539
|
+
do_start_server
|
540
|
+
|
541
|
+
# Initialize periodics
|
542
|
+
@periodics.each do |p|
|
543
|
+
@timers << set_periodic_timer(p.pername, p.ident, p.period)
|
544
|
+
end
|
545
|
+
|
546
|
+
# Arrange for Bud to read from stdin if enabled. Note that we can't do this
|
547
|
+
# earlier because we need to wait for EventMachine startup.
|
548
|
+
@stdio.start_stdin_reader if @options[:stdin]
|
549
|
+
@zk_tables.each_value {|t| t.start_watchers}
|
550
|
+
|
551
|
+
# Compute a fixpoint; this will also invoke any bootstrap blocks.
|
552
|
+
tick unless @lazy
|
553
|
+
|
554
|
+
@rtracer.sleep if options[:rtrace]
|
555
|
+
end
|
556
|
+
|
557
|
+
def do_start_server
|
558
|
+
@dsock = EventMachine::open_datagram_socket(@ip, @options[:port],
|
559
|
+
BudServer, self)
|
560
|
+
@port = Socket.unpack_sockaddr_in(@dsock.get_sockname)[0]
|
561
|
+
end
|
562
|
+
|
563
|
+
public
|
564
|
+
|
565
|
+
# Returns the IP and port of the Bud instance as a string. In addition to the
|
566
|
+
# local IP and port, the user may define an external IP and/or port. The
|
567
|
+
# external version of each is returned if available. If not, the local
|
568
|
+
# version is returned. There are use cases for mixing and matching local and
|
569
|
+
# external. local_ip:external_port would be if you have local port
|
570
|
+
# forwarding, and external_ip:local_port would be if you're in a DMZ, for
|
571
|
+
# example.
|
572
|
+
def ip_port
|
573
|
+
raise BudError, "ip_port called before port defined" if @port.nil? and @options[:port] == 0 and not @options[:ext_port]
|
574
|
+
|
575
|
+
ip = options[:ext_ip] ? "#{@options[:ext_ip]}" : "#{@ip}"
|
576
|
+
port = options[:ext_port] ? "#{@options[:ext_port]}" :
|
577
|
+
(@port.nil? ? "#{@options[:port]}" : "#{@port}")
|
578
|
+
ip + ":" + port
|
579
|
+
end
|
580
|
+
|
581
|
+
# Returns the internal IP and port. See ip_port.
|
582
|
+
def int_ip_port
|
583
|
+
raise BudError, "ip_port called before port defined" if @port.nil? and @options[:port] == 0
|
584
|
+
@port.nil? ? "#{@ip}:#{@options[:port]}" : "#{@ip}:#{@port}"
|
585
|
+
end
|
586
|
+
|
587
|
+
# Manually trigger one timestep of Bloom execution.
|
588
|
+
def tick
|
589
|
+
@tables.each_value do |t|
|
590
|
+
t.tick
|
591
|
+
end
|
592
|
+
|
593
|
+
@joinstate = {}
|
594
|
+
|
595
|
+
do_bootstrap unless @done_bootstrap
|
596
|
+
receive_inbound
|
597
|
+
|
598
|
+
@strata.each_with_index { |s,i| stratum_fixpoint(s, i) }
|
599
|
+
@viz.do_cards if @options[:trace]
|
600
|
+
do_flush
|
601
|
+
invoke_callbacks
|
602
|
+
@budtime += 1
|
603
|
+
end
|
604
|
+
|
605
|
+
private
|
606
|
+
|
607
|
+
# Builtin BUD state (predefined collections). We could define this using the
|
608
|
+
# standard "state" syntax, but we want to ensure that builtin state is
|
609
|
+
# initialized before user-defined state.
|
610
|
+
def builtin_state
|
611
|
+
channel :localtick, [:col1]
|
612
|
+
@stdio = terminal :stdio
|
613
|
+
@periodics = table :periodics_tbl, [:pername] => [:ident, :period]
|
614
|
+
|
615
|
+
# for BUD reflection
|
616
|
+
table :t_rules, [:rule_id] => [:lhs, :op, :src, :orig_src]
|
617
|
+
table :t_depends, [:rule_id, :lhs, :op, :body] => [:nm]
|
618
|
+
table :t_depends_tc, [:head, :body, :via, :neg, :temporal]
|
619
|
+
table :t_provides, [:interface] => [:input]
|
620
|
+
table :t_underspecified, t_provides.schema
|
621
|
+
table :t_stratum, [:predicate] => [:stratum]
|
622
|
+
table :t_cycle, [:predicate, :via, :neg, :temporal]
|
623
|
+
table :t_table_info, [:tab_name, :tab_type]
|
624
|
+
table :t_table_schema, [:tab_name, :col_name, :ord, :loc]
|
625
|
+
end
|
626
|
+
|
627
|
+
# Handle any inbound tuples off the wire and then clear. Received messages are
|
628
|
+
# placed directly into the storage of the appropriate local channel.
|
629
|
+
def receive_inbound
|
630
|
+
@inbound.each do |msg|
|
631
|
+
# puts "dequeueing tuple #{msg[1].inspect} into #{msg[0]} @ #{ip_port}"
|
632
|
+
tables[msg[0].to_sym] << msg[1]
|
633
|
+
end
|
634
|
+
@inbound = []
|
635
|
+
end
|
636
|
+
|
637
|
+
# "Flush" any tuples that need to be flushed. This does two things:
|
638
|
+
# 1. Emit outgoing tuples in channels and ZK tables.
|
639
|
+
# 2. Commit to disk any changes made to on-disk tables.
|
640
|
+
def do_flush
|
641
|
+
@channels.each { |c| @tables[c[0]].flush }
|
642
|
+
@zk_tables.each_value { |t| t.flush }
|
643
|
+
@tc_tables.each_value { |t| t.flush }
|
644
|
+
@dbm_tables.each_value { |t| t.flush }
|
645
|
+
end
|
646
|
+
|
647
|
+
def stratum_fixpoint(strat, strat_num)
|
648
|
+
# This routine uses semi-naive evaluation to compute a fixpoint of the rules
|
649
|
+
# in strat.
|
650
|
+
#
|
651
|
+
# As described in lib/collections.rb, each collection has three
|
652
|
+
# sub-collections of note here:
|
653
|
+
# @storage: the "main" storage of tuples
|
654
|
+
# @delta: tuples that should be used to drive derivation of new facts
|
655
|
+
# @new_delta: a place to store newly-derived facts
|
656
|
+
#
|
657
|
+
# The first time through this loop we mark @stratum_first_iter=true, which
|
658
|
+
# tells the Join::each code to join up all its @storage subcollections to
|
659
|
+
# start. In subsequent iterations the join code uses some table's @delta to
|
660
|
+
# ensure that only new tuples are derived.
|
661
|
+
#
|
662
|
+
# Note that calling "each" on a non-Join collection will iterate through
|
663
|
+
# both storage and delta.
|
664
|
+
#
|
665
|
+
# At the end of each iteration of this loop we transition:
|
666
|
+
# - @delta tuples are merged into @storage
|
667
|
+
# - @new_delta tuples are moved into @delta
|
668
|
+
# - @new_delta is set to empty
|
669
|
+
#
|
670
|
+
# XXX as a performance optimization, it would be nice to bypass the delta
|
671
|
+
# tables for any preds that don't participate in a rhs Join -- in that case
|
672
|
+
# there's pointless extra tuple movement letting tuples "graduate" through
|
673
|
+
# @new_delta and @delta.
|
674
|
+
|
675
|
+
# In semi-naive, the first iteration should join up tables on their storage
|
676
|
+
# fields; subsequent iterations do the delta-joins only. The
|
677
|
+
# stratum_first_iter field here distinguishes these cases.
|
678
|
+
@stratum_first_iter = true
|
679
|
+
begin
|
680
|
+
strat.each_with_index do |r,i|
|
681
|
+
fixpoint = false
|
682
|
+
begin
|
683
|
+
r.call
|
684
|
+
rescue Exception => e
|
685
|
+
# Don't report source text for certain rules (old-style rule blocks)
|
686
|
+
rule_src = @rule_orig_src[strat_num][i] unless @rule_orig_src[strat_num].nil?
|
687
|
+
src_msg = ""
|
688
|
+
unless rule_src == ""
|
689
|
+
src_msg = "\nRule: #{rule_src}"
|
690
|
+
end
|
691
|
+
|
692
|
+
new_e = e
|
693
|
+
unless new_e.class <= BudError
|
694
|
+
new_e = BudError
|
695
|
+
end
|
696
|
+
raise new_e, "Exception during Bud evaluation.\nException: #{e.inspect}.#{src_msg}"
|
697
|
+
end
|
698
|
+
end
|
699
|
+
@stratum_first_iter = false
|
700
|
+
fixpoint = true
|
701
|
+
# tick collections in this stratum; if we don't have info on that, tick all collections
|
702
|
+
colls = @stratum_collection_map[strat_num] if @stratum_collection_map
|
703
|
+
colls ||= @tables.keys
|
704
|
+
colls.each do |name|
|
705
|
+
begin
|
706
|
+
coll = self.send(name)
|
707
|
+
unless coll.delta.empty? and coll.new_delta.empty?
|
708
|
+
coll.tick_deltas
|
709
|
+
fixpoint = false
|
710
|
+
end
|
711
|
+
rescue
|
712
|
+
# ignore missing tables; rebl for example deletes them mid-stream
|
713
|
+
end
|
714
|
+
end
|
715
|
+
end while not fixpoint
|
716
|
+
end
|
717
|
+
|
718
|
+
private
|
719
|
+
|
720
|
+
######## ids and timers
|
721
|
+
def gen_id
|
722
|
+
Time.new.to_i.to_s << rand.to_s
|
723
|
+
end
|
724
|
+
|
725
|
+
def set_periodic_timer(name, id, period)
|
726
|
+
EventMachine::PeriodicTimer.new(period) do
|
727
|
+
@tables[name] <+ [[id, Time.new]]
|
728
|
+
tick
|
729
|
+
end
|
730
|
+
end
|
731
|
+
|
732
|
+
# Fork a new process. This is identical to Kernel#fork, except that it also
|
733
|
+
# cleans up Bud and EventMachine-related state. As with Kernel#fork, the
|
734
|
+
# caller supplies a code block that is run in the child process; the PID of
|
735
|
+
# the child is returned by this method.
|
736
|
+
def self.do_fork
|
737
|
+
Kernel.fork do
|
738
|
+
srand
|
739
|
+
# This is somewhat grotty: we basically clone what EM::fork_reactor does,
|
740
|
+
# except that we don't want the user-supplied block to be invoked by the
|
741
|
+
# reactor thread.
|
742
|
+
if EventMachine::reactor_running?
|
743
|
+
EventMachine::stop_event_loop
|
744
|
+
EventMachine::release_machine
|
745
|
+
EventMachine::instance_variable_set('@reactor_running', false)
|
746
|
+
end
|
747
|
+
# Shutdown all the Bud instances inherited from the parent process, but
|
748
|
+
# don't invoke their shutdown callbacks
|
749
|
+
Bud.shutdown_all_instances(false)
|
750
|
+
|
751
|
+
$got_shutdown_signal = false
|
752
|
+
$setup_signal_handler = false
|
753
|
+
|
754
|
+
yield
|
755
|
+
end
|
756
|
+
end
|
757
|
+
|
758
|
+
# Note that this affects anyone else in the same process who happens to be
|
759
|
+
# using EventMachine! This is also a non-blocking call; to block until EM
|
760
|
+
# has completely shutdown, join on EM::reactor_thread.
|
761
|
+
def self.stop_em_loop
|
762
|
+
EventMachine::stop_event_loop
|
763
|
+
|
764
|
+
# If another instance of Bud is started later, we'll need to reinitialize
|
765
|
+
# the signal handlers (since they depend on EM).
|
766
|
+
$signal_handler_setup = false
|
767
|
+
end
|
768
|
+
|
769
|
+
# Signal handling. If multiple Bud instances are running inside a single
|
770
|
+
# process, we want a SIGINT or SIGTERM signal to cleanly shutdown all of them.
|
771
|
+
def self.init_signal_handlers(b)
|
772
|
+
$signal_lock.synchronize {
|
773
|
+
# If we setup signal handlers and then fork a new process, we want to
|
774
|
+
# reinitialize the signal handler in the child process.
|
775
|
+
unless b.options[:no_signal_handlers] or $signal_handler_setup
|
776
|
+
EventMachine::PeriodicTimer.new(SIGNAL_CHECK_PERIOD) do
|
777
|
+
if $got_shutdown_signal
|
778
|
+
Bud.shutdown_all_instances
|
779
|
+
Bud.stop_em_loop
|
780
|
+
$got_shutdown_signal = false
|
781
|
+
end
|
782
|
+
end
|
783
|
+
|
784
|
+
["INT", "TERM"].each do |signal|
|
785
|
+
Signal.trap(signal) {
|
786
|
+
$got_shutdown_signal = true
|
787
|
+
}
|
788
|
+
end
|
789
|
+
$setup_signal_handler_pid = true
|
790
|
+
end
|
791
|
+
|
792
|
+
$instance_id += 1
|
793
|
+
$bud_instances[$instance_id] = b
|
794
|
+
return $instance_id
|
795
|
+
}
|
796
|
+
end
|
797
|
+
|
798
|
+
def self.shutdown_all_instances(do_shutdown_cb=true)
|
799
|
+
instances = nil
|
800
|
+
$signal_lock.synchronize {
|
801
|
+
instances = $bud_instances.clone
|
802
|
+
}
|
803
|
+
|
804
|
+
instances.each_value {|b| b.stop_bg(false, do_shutdown_cb) }
|
805
|
+
end
|
806
|
+
end
|