bud 0.0.4 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +2 -3
- data/bin/budvis +0 -66
- data/docs/README.md +27 -15
- data/docs/bust.md +1 -1
- data/docs/cheat.md +79 -30
- data/docs/operational.md +8 -4
- data/examples/basics/paths.rb +5 -3
- data/lib/bud/aggs.rb +1 -1
- data/lib/bud/bud_meta.rb +11 -2
- data/lib/bud/bust/bust.rb +1 -1
- data/lib/bud/collections.rb +78 -20
- data/lib/bud/deploy/threaddeploy.rb +1 -1
- data/lib/bud/errors.rb +3 -0
- data/lib/bud/graphs.rb +25 -26
- data/lib/bud/joins.rb +78 -33
- data/lib/bud/metrics.rb +43 -0
- data/lib/bud/monkeypatch.rb +1 -1
- data/lib/bud/rebl.rb +20 -13
- data/lib/bud/rewrite.rb +217 -39
- data/lib/bud/server.rb +16 -13
- data/lib/bud/state.rb +39 -25
- data/lib/bud/storage/dbm.rb +6 -1
- data/lib/bud/storage/tokyocabinet.rb +6 -0
- data/lib/bud/storage/zookeeper.rb +6 -6
- data/lib/bud/viz.rb +5 -1
- data/lib/bud/viz_util.rb +70 -0
- data/lib/bud.rb +227 -99
- metadata +33 -24
- data/docs/c.html +0 -251
- data/examples/deploy/deploy_ip_port +0 -1
- data/examples/deploy/keys.rb +0 -5
- data/lib/bud.rb.orig +0 -806
data/lib/bud.rb.orig
DELETED
@@ -1,806 +0,0 @@
|
|
1
|
-
require 'rubygems'
|
2
|
-
require 'eventmachine'
|
3
|
-
require 'msgpack'
|
4
|
-
require 'socket'
|
5
|
-
require 'superators'
|
6
|
-
require 'thread'
|
7
|
-
|
8
|
-
require 'bud/monkeypatch'
|
9
|
-
|
10
|
-
require 'bud/aggs'
|
11
|
-
require 'bud/bud_meta'
|
12
|
-
require 'bud/collections'
|
13
|
-
require 'bud/depanalysis'
|
14
|
-
require 'bud/deploy/forkdeploy'
|
15
|
-
require 'bud/deploy/threaddeploy'
|
16
|
-
require 'bud/errors'
|
17
|
-
require 'bud/joins'
|
18
|
-
require 'bud/rtrace'
|
19
|
-
require 'bud/server'
|
20
|
-
require 'bud/state'
|
21
|
-
require 'bud/storage/dbm'
|
22
|
-
require 'bud/storage/tokyocabinet'
|
23
|
-
require 'bud/storage/zookeeper'
|
24
|
-
require 'bud/stratify'
|
25
|
-
require 'bud/viz'
|
26
|
-
|
27
|
-
ILLEGAL_INSTANCE_ID = -1
|
28
|
-
SIGNAL_CHECK_PERIOD = 0.2
|
29
|
-
|
30
|
-
$signal_lock = Mutex.new
|
31
|
-
$got_shutdown_signal = false
|
32
|
-
$signal_handler_setup = false
|
33
|
-
$instance_id = 0
|
34
|
-
$bud_instances = {} # Map from instance id => Bud instance
|
35
|
-
|
36
|
-
# The root Bud module. To cause an instance of Bud to begin executing, there are
|
37
|
-
# three main options:
|
38
|
-
#
|
39
|
-
# 1. Synchronously. To do this, instantiate your program and then call tick()
|
40
|
-
# one or more times; each call evaluates a single Bud timestep. Note that in
|
41
|
-
# this mode, network communication (channels) and timers cannot be used. This
|
42
|
-
# is mostly intended for "one-shot" programs that compute a single result and
|
43
|
-
# then terminate.
|
44
|
-
# 2. In a separate thread in the foreground. To do this, instantiate your
|
45
|
-
# program and then call run_fg(). The Bud interpreter will then run, handling
|
46
|
-
# network events and evaluating new timesteps as appropriate. The run_fg()
|
47
|
-
# method will not return unless an error occurs.
|
48
|
-
# 3. In a separate thread in the background. To do this, instantiate your
|
49
|
-
# program and then call run_bg(). The Bud interpreter will run
|
50
|
-
# asynchronously. To interact with Bud (e.g., insert additional data or
|
51
|
-
# inspect the state of a Bud collection), use the sync_do and async_do
|
52
|
-
# methods. To shutdown the Bud interpreter, use stop_bg().
|
53
|
-
#
|
54
|
-
# Most programs should use method #3.
|
55
|
-
#
|
56
|
-
# :main: Bud
|
57
|
-
module Bud
|
58
|
-
attr_reader :strata, :budtime, :inbound, :options, :meta_parser, :viz, :rtracer
|
59
|
-
attr_reader :dsock
|
60
|
-
attr_reader :tables, :ip, :port
|
61
|
-
attr_reader :stratum_first_iter, :joinstate
|
62
|
-
attr_accessor :lazy # This can be changed on-the-fly by REBL
|
63
|
-
attr_accessor :stratum_collection_map
|
64
|
-
|
65
|
-
# options to the Bud runtime are passed in a hash, with the following keys
|
66
|
-
# * network configuration
|
67
|
-
# * <tt>:ip</tt> IP address string for this instance
|
68
|
-
# * <tt>:port</tt> port number for this instance
|
69
|
-
# * <tt>:ext_ip</tt> IP address at which external nodes can contact this instance
|
70
|
-
# * <tt>:ext_port</tt> port number to go with <tt>:ext_ip</tt>
|
71
|
-
# * <tt>:bust_port</tt> port number for the restful HTTP messages
|
72
|
-
# * operating system interaction
|
73
|
-
# * <tt>:stdin</tt> if non-nil, reading from the +stdio+ collection results in reading from this +IO+ handle
|
74
|
-
# * <tt>:stdout</tt> writing to the +stdio+ collection results in writing to this +IO+ handle; defaults to <tt>$stdout</tt>
|
75
|
-
# * <tt>:no_signal_handlers</tt> if true, runtime ignores +SIGINT+ and +SIGTERM+
|
76
|
-
# * tracing and output
|
77
|
-
# * <tt>:quiet</tt> if true, suppress certain messages
|
78
|
-
# * <tt>:trace</tt> if true, generate +budvis+ outputs
|
79
|
-
# * <tt>:rtrace</tt> if true, generate +budplot+ outputs
|
80
|
-
# * <tt>:dump_rewrite</tt> if true, dump results of internal rewriting of Bloom code to a file
|
81
|
-
# * controlling execution
|
82
|
-
# * <tt>:lazy</tt> if true, prevents runtime from ticking except on external calls to +tick+
|
83
|
-
# * <tt>:tag</tt> a name for this instance, suitable for display during tracing and visualization
|
84
|
-
# * storage configuration
|
85
|
-
# * <tt>:tc_dir</tt> filesystem directory to hold TokyoCabinet data stores
|
86
|
-
# * <tt>:tc_truncate</tt> if true, TokyoCabinet collections are opened with +OTRUNC+
|
87
|
-
# * deployment
|
88
|
-
# * <tt>:deploy</tt> enable deployment
|
89
|
-
# * <tt>:deploy_child_opts</tt> option hash to pass to deployed instances
|
90
|
-
def initialize(options={})
|
91
|
-
@tables = {}
|
92
|
-
@table_meta = []
|
93
|
-
@rewritten_strata = []
|
94
|
-
@channels = {}
|
95
|
-
@tc_tables = {}
|
96
|
-
@dbm_tables = {}
|
97
|
-
@zk_tables = {}
|
98
|
-
@callbacks = {}
|
99
|
-
@callback_id = 0
|
100
|
-
@shutdown_callbacks = []
|
101
|
-
@post_shutdown_callbacks = []
|
102
|
-
@timers = []
|
103
|
-
@budtime = 0
|
104
|
-
@inbound = []
|
105
|
-
@done_bootstrap = false
|
106
|
-
@joinstate = {} # joins are stateful, their state needs to be kept inside the Bud instance
|
107
|
-
@instance_id = ILLEGAL_INSTANCE_ID # Assigned when we start running
|
108
|
-
|
109
|
-
# Setup options (named arguments), along with default values
|
110
|
-
@options = options.clone
|
111
|
-
@lazy = @options[:lazy] ||= false
|
112
|
-
@options[:ip] ||= "127.0.0.1"
|
113
|
-
@ip = @options[:ip]
|
114
|
-
@options[:port] ||= 0
|
115
|
-
@options[:port] = @options[:port].to_i
|
116
|
-
# NB: If using an ephemeral port (specified by port = 0), the actual port
|
117
|
-
# number won't be known until we start EM
|
118
|
-
|
119
|
-
relatives = self.class.modules + [self.class]
|
120
|
-
relatives.each do |r|
|
121
|
-
Bud.rewrite_local_methods(r)
|
122
|
-
end
|
123
|
-
|
124
|
-
@declarations = ModuleRewriter.get_rule_defs(self.class)
|
125
|
-
|
126
|
-
init_state
|
127
|
-
|
128
|
-
@viz = VizOnline.new(self) if @options[:trace]
|
129
|
-
@rtracer = RTrace.new(self) if @options[:rtrace]
|
130
|
-
|
131
|
-
# Get dependency info and determine stratification order.
|
132
|
-
unless self.class <= Stratification or self.class <= DepAnalysis
|
133
|
-
do_rewrite
|
134
|
-
end
|
135
|
-
|
136
|
-
# Load the rules as a closure. Each element of @strata is an array of
|
137
|
-
# lambdas, one for each rewritten rule in that strata. Note that legacy Bud
|
138
|
-
# code (with user-specified stratification) assumes that @strata is a simple
|
139
|
-
# array, so we need to convert it before loading the rewritten strata.
|
140
|
-
@strata = []
|
141
|
-
@rule_src = []
|
142
|
-
@rule_orig_src = []
|
143
|
-
declaration
|
144
|
-
@strata.each_with_index do |s,i|
|
145
|
-
raise BudError if s.class <= Array
|
146
|
-
@strata[i] = [s]
|
147
|
-
# Don't try to record source text for old-style rule blocks
|
148
|
-
@rule_src[i] = [""]
|
149
|
-
end
|
150
|
-
|
151
|
-
@rewritten_strata.each_with_index do |src_ary,i|
|
152
|
-
@strata[i] ||= []
|
153
|
-
@rule_src[i] ||= []
|
154
|
-
@rule_orig_src[i] ||= []
|
155
|
-
src_ary.each_with_index do |src, j|
|
156
|
-
@strata[i] << eval("lambda { #{src} }")
|
157
|
-
@rule_src[i] << src
|
158
|
-
@rule_orig_src[i] << @no_attr_rewrite_strata[i][j]
|
159
|
-
end
|
160
|
-
end
|
161
|
-
end
|
162
|
-
|
163
|
-
private
|
164
|
-
|
165
|
-
# Rewrite methods defined in the given klass to expand module references and
|
166
|
-
# temp collections. Imported modules are rewritten during the import process;
|
167
|
-
# we rewrite the main Bud class and any included modules here. Note that we
|
168
|
-
# only rewrite each distinct Class once.
|
169
|
-
def self.rewrite_local_methods(klass)
|
170
|
-
@done_rewrite ||= {}
|
171
|
-
return if @done_rewrite.has_key? klass.name
|
172
|
-
|
173
|
-
u = Unifier.new
|
174
|
-
ref_expander = NestedRefRewriter.new(klass.bud_import_table)
|
175
|
-
tmp_expander = TempExpander.new
|
176
|
-
r2r = Ruby2Ruby.new
|
177
|
-
|
178
|
-
klass.instance_methods(false).each do |m|
|
179
|
-
ast = ParseTree.translate(klass, m)
|
180
|
-
ast = u.process(ast)
|
181
|
-
ast = ref_expander.process(ast)
|
182
|
-
ast = tmp_expander.process(ast)
|
183
|
-
|
184
|
-
if (ref_expander.did_work or tmp_expander.did_work)
|
185
|
-
new_source = r2r.process(ast)
|
186
|
-
klass.module_eval new_source # Replace previous method def
|
187
|
-
end
|
188
|
-
|
189
|
-
ref_expander.did_work = false
|
190
|
-
tmp_expander.did_work = false
|
191
|
-
end
|
192
|
-
|
193
|
-
# If we found any temp statements in the klass's rule blocks, add a state
|
194
|
-
# block with declarations for the corresponding temp collections.
|
195
|
-
s = tmp_expander.get_state_meth(klass)
|
196
|
-
if s
|
197
|
-
state_src = r2r.process(s)
|
198
|
-
klass.module_eval(state_src)
|
199
|
-
end
|
200
|
-
|
201
|
-
# Always rewrite anonymous classes
|
202
|
-
@done_rewrite[klass.name] = true unless klass.name == ""
|
203
|
-
end
|
204
|
-
|
205
|
-
# Invoke all the user-defined state blocks and initialize builtin state.
|
206
|
-
def init_state
|
207
|
-
builtin_state
|
208
|
-
call_state_methods
|
209
|
-
end
|
210
|
-
|
211
|
-
# If module Y is a parent module of X, X's state block might reference state
|
212
|
-
# defined in Y. Hence, we want to invoke Y's state block first. However, when
|
213
|
-
# "import" and "include" are combined, we can't use the inheritance hierarchy
|
214
|
-
# to do this. When a module Z is imported, the import process inlines all the
|
215
|
-
# modules Z includes into a single module. Hence, we can no longer rely on the
|
216
|
-
# inheritance hierarchy to respect dependencies between modules. To fix this,
|
217
|
-
# we add an increasing ID to each state block's method name (assigned
|
218
|
-
# according to the order in which the state blocks are defined); we then sort
|
219
|
-
# by this order before invoking the state blocks.
|
220
|
-
def call_state_methods
|
221
|
-
meth_map = {} # map from ID => [Method]
|
222
|
-
self.class.instance_methods.each do |m|
|
223
|
-
next unless m =~ /^__state(\d+)__/
|
224
|
-
id = Regexp.last_match.captures.first.to_i
|
225
|
-
meth_map[id] ||= []
|
226
|
-
meth_map[id] << self.method(m)
|
227
|
-
end
|
228
|
-
|
229
|
-
meth_map.keys.sort.each do |i|
|
230
|
-
meth_map[i].each {|m| m.call}
|
231
|
-
end
|
232
|
-
end
|
233
|
-
|
234
|
-
# Evaluate all bootstrap blocks
|
235
|
-
def do_bootstrap
|
236
|
-
self.class.ancestors.reverse.each do |anc|
|
237
|
-
anc.instance_methods(false).each do |m|
|
238
|
-
if /^__bootstrap__/.match m
|
239
|
-
self.method(m.to_sym).call
|
240
|
-
end
|
241
|
-
end
|
242
|
-
end
|
243
|
-
bootstrap
|
244
|
-
|
245
|
-
@done_bootstrap = true
|
246
|
-
end
|
247
|
-
|
248
|
-
def do_rewrite
|
249
|
-
@meta_parser = BudMeta.new(self, @declarations)
|
250
|
-
@rewritten_strata, @no_attr_rewrite_strata = @meta_parser.meta_rewrite
|
251
|
-
end
|
252
|
-
|
253
|
-
public
|
254
|
-
|
255
|
-
########### give empty defaults for these
|
256
|
-
def declaration # :nodoc: all
|
257
|
-
end
|
258
|
-
def bootstrap # :nodoc: all
|
259
|
-
end
|
260
|
-
|
261
|
-
########### metaprogramming support for ruby and for rule rewriting
|
262
|
-
# helper to define instance methods
|
263
|
-
def singleton_class # :nodoc: all
|
264
|
-
class << self; self; end
|
265
|
-
end
|
266
|
-
|
267
|
-
######## methods for controlling execution
|
268
|
-
|
269
|
-
# Run Bud in the background (in a different thread). This means that the Bud
|
270
|
-
# interpreter will run asynchronously from the caller, so care must be used
|
271
|
-
# when interacting with it. For example, it is not safe to directly examine
|
272
|
-
# Bud collections from the caller's thread (see async_do and sync_do).
|
273
|
-
#
|
274
|
-
# This instance of Bud will continue to execute until stop_bg is called.
|
275
|
-
def run_bg
|
276
|
-
start_reactor
|
277
|
-
# Wait for Bud to start up before returning
|
278
|
-
schedule_and_wait do
|
279
|
-
start_bud
|
280
|
-
end
|
281
|
-
end
|
282
|
-
|
283
|
-
# Run Bud in the "foreground" -- the caller's thread will be used to run the
|
284
|
-
# Bud interpreter. This means this method won't return unless an error
|
285
|
-
# occurs. It is often more useful to run Bud asynchronously -- see run_bg.
|
286
|
-
def run_fg
|
287
|
-
# If we're called from the EventMachine thread (and EM is running), blocking
|
288
|
-
# the current thread would imply deadlocking ourselves.
|
289
|
-
if Thread.current == EventMachine::reactor_thread and EventMachine::reactor_running?
|
290
|
-
raise BudError, "Cannot invoke run_fg from inside EventMachine"
|
291
|
-
end
|
292
|
-
|
293
|
-
q = Queue.new
|
294
|
-
# Note that this must be a post-shutdown callback: if this is the only
|
295
|
-
# thread, then the program might exit after run_fg() returns. If run_fg()
|
296
|
-
# blocked on a normal shutdown callback, the program might exit before the
|
297
|
-
# other shutdown callbacks have a chance to run.
|
298
|
-
post_shutdown do
|
299
|
-
q.push(true)
|
300
|
-
end
|
301
|
-
|
302
|
-
run_bg
|
303
|
-
# Block caller's thread until Bud has shutdown
|
304
|
-
q.pop
|
305
|
-
end
|
306
|
-
|
307
|
-
# Shutdown a Bud instance that is running asynchronously. This method blocks
|
308
|
-
# until Bud has been shutdown. If +stop_em+ is true, the EventMachine event
|
309
|
-
# loop is also shutdown; this will interfere with the execution of any other
|
310
|
-
# Bud instances in the same process (as well as anything else that happens to
|
311
|
-
# use EventMachine).
|
312
|
-
def stop_bg(stop_em=false, do_shutdown_cb=true)
|
313
|
-
schedule_and_wait do
|
314
|
-
do_shutdown(do_shutdown_cb)
|
315
|
-
end
|
316
|
-
|
317
|
-
if stop_em
|
318
|
-
Bud.stop_em_loop
|
319
|
-
EventMachine::reactor_thread.join
|
320
|
-
end
|
321
|
-
end
|
322
|
-
|
323
|
-
# Register a callback that will be invoked when this instance of Bud is
|
324
|
-
# shutting down.
|
325
|
-
def on_shutdown(&blk)
|
326
|
-
# Start EM if not yet started
|
327
|
-
start_reactor
|
328
|
-
schedule_and_wait do
|
329
|
-
@shutdown_callbacks << blk
|
330
|
-
end
|
331
|
-
end
|
332
|
-
|
333
|
-
# Register a callback that will be invoked when *after* this instance of Bud
|
334
|
-
# has been shutdown.
|
335
|
-
def post_shutdown(&blk)
|
336
|
-
# Start EM if not yet started
|
337
|
-
start_reactor
|
338
|
-
schedule_and_wait do
|
339
|
-
@post_shutdown_callbacks << blk
|
340
|
-
end
|
341
|
-
end
|
342
|
-
|
343
|
-
# Given a block, evaluate that block inside the background Ruby thread at some
|
344
|
-
# time in the future. Because the block is evaluate inside the background Ruby
|
345
|
-
# thread, the block can safely examine Bud state. Naturally, this method can
|
346
|
-
# only be used when Bud is running in the background. Note that calling
|
347
|
-
# sync_do blocks the caller until the block has been evaluated; for a
|
348
|
-
# non-blocking version, see async_do.
|
349
|
-
#
|
350
|
-
# Note that the block is invoked after one Bud timestep has ended but before
|
351
|
-
# the next timestep begins. Hence, synchronous accumulation (<=) into a Bud
|
352
|
-
# scratch collection in a callback is typically not a useful thing to do: when
|
353
|
-
# the next tick begins, the content of any scratch collections will be
|
354
|
-
# emptied, which includes anything inserted by a sync_do block using <=. To
|
355
|
-
# avoid this behavior, insert into scratches using <+.
|
356
|
-
def sync_do
|
357
|
-
schedule_and_wait do
|
358
|
-
yield if block_given?
|
359
|
-
# Do another tick, in case the user-supplied block inserted any data
|
360
|
-
tick
|
361
|
-
end
|
362
|
-
end
|
363
|
-
|
364
|
-
# Like sync_do, but does not block the caller's thread: the given callback
|
365
|
-
# will be invoked at some future time. Note that calls to async_do respect
|
366
|
-
# FIFO order.
|
367
|
-
def async_do
|
368
|
-
EventMachine::schedule do
|
369
|
-
yield if block_given?
|
370
|
-
# Do another tick, in case the user-supplied block inserted any data
|
371
|
-
tick
|
372
|
-
end
|
373
|
-
end
|
374
|
-
|
375
|
-
# Shutdown any persistent tables used by the current Bud instance. If you are
|
376
|
-
# running Bud via tick() and using +tctable+ collections, you should call this
|
377
|
-
# after you're finished using Bud. Programs that use Bud via run_fg() or
|
378
|
-
# run_bg() don't need to call this manually.
|
379
|
-
def close_tables
|
380
|
-
@tables.each_value do |t|
|
381
|
-
t.close
|
382
|
-
end
|
383
|
-
end
|
384
|
-
|
385
|
-
# Register a new callback. Given the name of a Bud collection, this method
|
386
|
-
# arranges for the given block to be invoked at the end of any tick in which
|
387
|
-
# any tuples have been inserted into the specified collection. The code block
|
388
|
-
# is passed the collection as an argument; this provides a convenient way to
|
389
|
-
# examine the tuples inserted during that fixpoint. (Note that because the Bud
|
390
|
-
# runtime is blocked while the callback is invoked, it can also examine any
|
391
|
-
# other Bud state freely.)
|
392
|
-
#
|
393
|
-
# Note that registering callbacks on persistent collections (e.g., tables and
|
394
|
-
# tctables) is probably not a wise thing to do: as long as any tuples are
|
395
|
-
# stored in the collection, the callback will be invoked at the end of every
|
396
|
-
# tick.
|
397
|
-
def register_callback(tbl_name, &block)
|
398
|
-
# We allow callbacks to be added before or after EM has been started. To
|
399
|
-
# simplify matters, we start EM if it hasn't been started yet.
|
400
|
-
start_reactor
|
401
|
-
cb_id = nil
|
402
|
-
schedule_and_wait do
|
403
|
-
unless @tables.has_key? tbl_name
|
404
|
-
raise Bud::BudError, "No such table: #{tbl_name}"
|
405
|
-
end
|
406
|
-
|
407
|
-
raise Bud::BudError if @callbacks.has_key? @callback_id
|
408
|
-
@callbacks[@callback_id] = [tbl_name, block]
|
409
|
-
cb_id = @callback_id
|
410
|
-
@callback_id += 1
|
411
|
-
end
|
412
|
-
return cb_id
|
413
|
-
end
|
414
|
-
|
415
|
-
# Unregister the callback that has the given ID.
|
416
|
-
def unregister_callback(id)
|
417
|
-
schedule_and_wait do
|
418
|
-
raise Bud::BudError unless @callbacks.has_key? id
|
419
|
-
@callbacks.delete(id)
|
420
|
-
end
|
421
|
-
end
|
422
|
-
|
423
|
-
# sync_callback supports synchronous interaction with Bud modules. The caller
|
424
|
-
# supplies the name of an input collection, a set of tuples to insert, and an
|
425
|
-
# output collection on which to 'listen.' The call blocks until tuples are
|
426
|
-
# inserted into the output collection: these are returned to the caller.
|
427
|
-
def sync_callback(in_tbl, tupleset, out_tbl)
|
428
|
-
q = Queue.new
|
429
|
-
cb = register_callback(out_tbl) do |c|
|
430
|
-
q.push c.to_a
|
431
|
-
end
|
432
|
-
unless in_tbl.nil?
|
433
|
-
sync_do {
|
434
|
-
t = @tables[in_tbl]
|
435
|
-
if t.class <= Bud::BudChannel or t.class <= Bud::BudZkTable
|
436
|
-
t <~ tupleset
|
437
|
-
else
|
438
|
-
t <+ tupleset
|
439
|
-
end
|
440
|
-
}
|
441
|
-
end
|
442
|
-
result = q.pop
|
443
|
-
unregister_callback(cb)
|
444
|
-
return result
|
445
|
-
end
|
446
|
-
|
447
|
-
# A common special case for sync_callback: block on a delta to a table.
|
448
|
-
def delta(out_tbl)
|
449
|
-
sync_callback(nil, nil, out_tbl)
|
450
|
-
end
|
451
|
-
|
452
|
-
private
|
453
|
-
|
454
|
-
def invoke_callbacks
|
455
|
-
@callbacks.each_value do |cb|
|
456
|
-
tbl_name, block = cb
|
457
|
-
tbl = @tables[tbl_name]
|
458
|
-
unless tbl.empty?
|
459
|
-
block.call(tbl)
|
460
|
-
end
|
461
|
-
end
|
462
|
-
end
|
463
|
-
|
464
|
-
def start_reactor
|
465
|
-
return if EventMachine::reactor_running?
|
466
|
-
|
467
|
-
EventMachine::error_handler do |e|
|
468
|
-
puts "Unexpected Bud error: #{e.inspect}"
|
469
|
-
puts e.backtrace.join("\n")
|
470
|
-
Bud.shutdown_all_instances
|
471
|
-
raise e
|
472
|
-
end
|
473
|
-
|
474
|
-
# Block until EM has successfully started up.
|
475
|
-
q = Queue.new
|
476
|
-
# This thread helps us avoid race conditions on the start and stop of
|
477
|
-
# EventMachine's event loop.
|
478
|
-
Thread.new do
|
479
|
-
EventMachine.run do
|
480
|
-
q.push(true)
|
481
|
-
end
|
482
|
-
end
|
483
|
-
# Block waiting for EM's event loop to start up.
|
484
|
-
q.pop
|
485
|
-
end
|
486
|
-
|
487
|
-
# Schedule a block to be evaluated by EventMachine in the future, and
|
488
|
-
# block until this has happened.
|
489
|
-
def schedule_and_wait
|
490
|
-
# If EM isn't running, just run the user's block immediately
|
491
|
-
# XXX: not clear that this is the right behavior
|
492
|
-
unless EventMachine::reactor_running?
|
493
|
-
yield
|
494
|
-
return
|
495
|
-
end
|
496
|
-
|
497
|
-
q = Queue.new
|
498
|
-
EventMachine::schedule do
|
499
|
-
ret = false
|
500
|
-
begin
|
501
|
-
yield
|
502
|
-
rescue Exception
|
503
|
-
ret = $!
|
504
|
-
end
|
505
|
-
q.push(ret)
|
506
|
-
end
|
507
|
-
|
508
|
-
resp = q.pop
|
509
|
-
raise resp if resp
|
510
|
-
end
|
511
|
-
|
512
|
-
def do_shutdown(do_shutdown_cb=true)
|
513
|
-
# Silently ignore duplicate shutdown requests or attempts to shutdown an
|
514
|
-
# instance that hasn't been started yet.
|
515
|
-
return if @instance_id == ILLEGAL_INSTANCE_ID
|
516
|
-
|
517
|
-
$signal_lock.synchronize {
|
518
|
-
raise unless $bud_instances.has_key? @instance_id
|
519
|
-
$bud_instances.delete @instance_id
|
520
|
-
@instance_id = ILLEGAL_INSTANCE_ID
|
521
|
-
}
|
522
|
-
|
523
|
-
if do_shutdown_cb
|
524
|
-
@shutdown_callbacks.each {|cb| cb.call}
|
525
|
-
end
|
526
|
-
@timers.each {|t| t.cancel}
|
527
|
-
close_tables
|
528
|
-
@dsock.close_connection if EventMachine::reactor_running?
|
529
|
-
if do_shutdown_cb
|
530
|
-
@post_shutdown_callbacks.each {|cb| cb.call}
|
531
|
-
end
|
532
|
-
end
|
533
|
-
|
534
|
-
private
|
535
|
-
def start_bud
|
536
|
-
raise BudError unless EventMachine::reactor_thread?
|
537
|
-
|
538
|
-
@instance_id = Bud.init_signal_handlers(self)
|
539
|
-
do_start_server
|
540
|
-
|
541
|
-
# Initialize periodics
|
542
|
-
@periodics.each do |p|
|
543
|
-
@timers << set_periodic_timer(p.pername, p.ident, p.period)
|
544
|
-
end
|
545
|
-
|
546
|
-
# Arrange for Bud to read from stdin if enabled. Note that we can't do this
|
547
|
-
# earlier because we need to wait for EventMachine startup.
|
548
|
-
@stdio.start_stdin_reader if @options[:stdin]
|
549
|
-
@zk_tables.each_value {|t| t.start_watchers}
|
550
|
-
|
551
|
-
# Compute a fixpoint; this will also invoke any bootstrap blocks.
|
552
|
-
tick unless @lazy
|
553
|
-
|
554
|
-
@rtracer.sleep if options[:rtrace]
|
555
|
-
end
|
556
|
-
|
557
|
-
def do_start_server
|
558
|
-
@dsock = EventMachine::open_datagram_socket(@ip, @options[:port],
|
559
|
-
BudServer, self)
|
560
|
-
@port = Socket.unpack_sockaddr_in(@dsock.get_sockname)[0]
|
561
|
-
end
|
562
|
-
|
563
|
-
public
|
564
|
-
|
565
|
-
# Returns the IP and port of the Bud instance as a string. In addition to the
|
566
|
-
# local IP and port, the user may define an external IP and/or port. The
|
567
|
-
# external version of each is returned if available. If not, the local
|
568
|
-
# version is returned. There are use cases for mixing and matching local and
|
569
|
-
# external. local_ip:external_port would be if you have local port
|
570
|
-
# forwarding, and external_ip:local_port would be if you're in a DMZ, for
|
571
|
-
# example.
|
572
|
-
def ip_port
|
573
|
-
raise BudError, "ip_port called before port defined" if @port.nil? and @options[:port] == 0 and not @options[:ext_port]
|
574
|
-
|
575
|
-
ip = options[:ext_ip] ? "#{@options[:ext_ip]}" : "#{@ip}"
|
576
|
-
port = options[:ext_port] ? "#{@options[:ext_port]}" :
|
577
|
-
(@port.nil? ? "#{@options[:port]}" : "#{@port}")
|
578
|
-
ip + ":" + port
|
579
|
-
end
|
580
|
-
|
581
|
-
# Returns the internal IP and port. See ip_port.
|
582
|
-
def int_ip_port
|
583
|
-
raise BudError, "ip_port called before port defined" if @port.nil? and @options[:port] == 0
|
584
|
-
@port.nil? ? "#{@ip}:#{@options[:port]}" : "#{@ip}:#{@port}"
|
585
|
-
end
|
586
|
-
|
587
|
-
# Manually trigger one timestep of Bloom execution.
|
588
|
-
def tick
|
589
|
-
@tables.each_value do |t|
|
590
|
-
t.tick
|
591
|
-
end
|
592
|
-
|
593
|
-
@joinstate = {}
|
594
|
-
|
595
|
-
do_bootstrap unless @done_bootstrap
|
596
|
-
receive_inbound
|
597
|
-
|
598
|
-
@strata.each_with_index { |s,i| stratum_fixpoint(s, i) }
|
599
|
-
@viz.do_cards if @options[:trace]
|
600
|
-
do_flush
|
601
|
-
invoke_callbacks
|
602
|
-
@budtime += 1
|
603
|
-
end
|
604
|
-
|
605
|
-
private
|
606
|
-
|
607
|
-
# Builtin BUD state (predefined collections). We could define this using the
|
608
|
-
# standard "state" syntax, but we want to ensure that builtin state is
|
609
|
-
# initialized before user-defined state.
|
610
|
-
def builtin_state
|
611
|
-
channel :localtick, [:col1]
|
612
|
-
@stdio = terminal :stdio
|
613
|
-
@periodics = table :periodics_tbl, [:pername] => [:ident, :period]
|
614
|
-
|
615
|
-
# for BUD reflection
|
616
|
-
table :t_rules, [:rule_id] => [:lhs, :op, :src, :orig_src]
|
617
|
-
table :t_depends, [:rule_id, :lhs, :op, :body] => [:nm]
|
618
|
-
table :t_depends_tc, [:head, :body, :via, :neg, :temporal]
|
619
|
-
table :t_provides, [:interface] => [:input]
|
620
|
-
table :t_underspecified, t_provides.schema
|
621
|
-
table :t_stratum, [:predicate] => [:stratum]
|
622
|
-
table :t_cycle, [:predicate, :via, :neg, :temporal]
|
623
|
-
table :t_table_info, [:tab_name, :tab_type]
|
624
|
-
table :t_table_schema, [:tab_name, :col_name, :ord, :loc]
|
625
|
-
end
|
626
|
-
|
627
|
-
# Handle any inbound tuples off the wire and then clear. Received messages are
|
628
|
-
# placed directly into the storage of the appropriate local channel.
|
629
|
-
def receive_inbound
|
630
|
-
@inbound.each do |msg|
|
631
|
-
# puts "dequeueing tuple #{msg[1].inspect} into #{msg[0]} @ #{ip_port}"
|
632
|
-
tables[msg[0].to_sym] << msg[1]
|
633
|
-
end
|
634
|
-
@inbound = []
|
635
|
-
end
|
636
|
-
|
637
|
-
# "Flush" any tuples that need to be flushed. This does two things:
|
638
|
-
# 1. Emit outgoing tuples in channels and ZK tables.
|
639
|
-
# 2. Commit to disk any changes made to on-disk tables.
|
640
|
-
def do_flush
|
641
|
-
@channels.each { |c| @tables[c[0]].flush }
|
642
|
-
@zk_tables.each_value { |t| t.flush }
|
643
|
-
@tc_tables.each_value { |t| t.flush }
|
644
|
-
@dbm_tables.each_value { |t| t.flush }
|
645
|
-
end
|
646
|
-
|
647
|
-
def stratum_fixpoint(strat, strat_num)
|
648
|
-
# This routine uses semi-naive evaluation to compute a fixpoint of the rules
|
649
|
-
# in strat.
|
650
|
-
#
|
651
|
-
# As described in lib/collections.rb, each collection has three
|
652
|
-
# sub-collections of note here:
|
653
|
-
# @storage: the "main" storage of tuples
|
654
|
-
# @delta: tuples that should be used to drive derivation of new facts
|
655
|
-
# @new_delta: a place to store newly-derived facts
|
656
|
-
#
|
657
|
-
# The first time through this loop we mark @stratum_first_iter=true, which
|
658
|
-
# tells the Join::each code to join up all its @storage subcollections to
|
659
|
-
# start. In subsequent iterations the join code uses some table's @delta to
|
660
|
-
# ensure that only new tuples are derived.
|
661
|
-
#
|
662
|
-
# Note that calling "each" on a non-Join collection will iterate through
|
663
|
-
# both storage and delta.
|
664
|
-
#
|
665
|
-
# At the end of each iteration of this loop we transition:
|
666
|
-
# - @delta tuples are merged into @storage
|
667
|
-
# - @new_delta tuples are moved into @delta
|
668
|
-
# - @new_delta is set to empty
|
669
|
-
#
|
670
|
-
# XXX as a performance optimization, it would be nice to bypass the delta
|
671
|
-
# tables for any preds that don't participate in a rhs Join -- in that case
|
672
|
-
# there's pointless extra tuple movement letting tuples "graduate" through
|
673
|
-
# @new_delta and @delta.
|
674
|
-
|
675
|
-
# In semi-naive, the first iteration should join up tables on their storage
|
676
|
-
# fields; subsequent iterations do the delta-joins only. The
|
677
|
-
# stratum_first_iter field here distinguishes these cases.
|
678
|
-
@stratum_first_iter = true
|
679
|
-
begin
|
680
|
-
strat.each_with_index do |r,i|
|
681
|
-
fixpoint = false
|
682
|
-
begin
|
683
|
-
r.call
|
684
|
-
rescue Exception => e
|
685
|
-
# Don't report source text for certain rules (old-style rule blocks)
|
686
|
-
rule_src = @rule_orig_src[strat_num][i] unless @rule_orig_src[strat_num].nil?
|
687
|
-
src_msg = ""
|
688
|
-
unless rule_src == ""
|
689
|
-
src_msg = "\nRule: #{rule_src}"
|
690
|
-
end
|
691
|
-
|
692
|
-
new_e = e
|
693
|
-
unless new_e.class <= BudError
|
694
|
-
new_e = BudError
|
695
|
-
end
|
696
|
-
raise new_e, "Exception during Bud evaluation.\nException: #{e.inspect}.#{src_msg}"
|
697
|
-
end
|
698
|
-
end
|
699
|
-
@stratum_first_iter = false
|
700
|
-
fixpoint = true
|
701
|
-
# tick collections in this stratum; if we don't have info on that, tick all collections
|
702
|
-
colls = @stratum_collection_map[strat_num] if @stratum_collection_map
|
703
|
-
colls ||= @tables.keys
|
704
|
-
colls.each do |name|
|
705
|
-
begin
|
706
|
-
coll = self.send(name)
|
707
|
-
unless coll.delta.empty? and coll.new_delta.empty?
|
708
|
-
coll.tick_deltas
|
709
|
-
fixpoint = false
|
710
|
-
end
|
711
|
-
rescue
|
712
|
-
# ignore missing tables; rebl for example deletes them mid-stream
|
713
|
-
end
|
714
|
-
end
|
715
|
-
end while not fixpoint
|
716
|
-
end
|
717
|
-
|
718
|
-
private
|
719
|
-
|
720
|
-
######## ids and timers
|
721
|
-
def gen_id
|
722
|
-
Time.new.to_i.to_s << rand.to_s
|
723
|
-
end
|
724
|
-
|
725
|
-
def set_periodic_timer(name, id, period)
|
726
|
-
EventMachine::PeriodicTimer.new(period) do
|
727
|
-
@tables[name] <+ [[id, Time.new]]
|
728
|
-
tick
|
729
|
-
end
|
730
|
-
end
|
731
|
-
|
732
|
-
# Fork a new process. This is identical to Kernel#fork, except that it also
|
733
|
-
# cleans up Bud and EventMachine-related state. As with Kernel#fork, the
|
734
|
-
# caller supplies a code block that is run in the child process; the PID of
|
735
|
-
# the child is returned by this method.
|
736
|
-
def self.do_fork
|
737
|
-
Kernel.fork do
|
738
|
-
srand
|
739
|
-
# This is somewhat grotty: we basically clone what EM::fork_reactor does,
|
740
|
-
# except that we don't want the user-supplied block to be invoked by the
|
741
|
-
# reactor thread.
|
742
|
-
if EventMachine::reactor_running?
|
743
|
-
EventMachine::stop_event_loop
|
744
|
-
EventMachine::release_machine
|
745
|
-
EventMachine::instance_variable_set('@reactor_running', false)
|
746
|
-
end
|
747
|
-
# Shutdown all the Bud instances inherited from the parent process, but
|
748
|
-
# don't invoke their shutdown callbacks
|
749
|
-
Bud.shutdown_all_instances(false)
|
750
|
-
|
751
|
-
$got_shutdown_signal = false
|
752
|
-
$setup_signal_handler = false
|
753
|
-
|
754
|
-
yield
|
755
|
-
end
|
756
|
-
end
|
757
|
-
|
758
|
-
# Note that this affects anyone else in the same process who happens to be
|
759
|
-
# using EventMachine! This is also a non-blocking call; to block until EM
|
760
|
-
# has completely shutdown, join on EM::reactor_thread.
|
761
|
-
def self.stop_em_loop
|
762
|
-
EventMachine::stop_event_loop
|
763
|
-
|
764
|
-
# If another instance of Bud is started later, we'll need to reinitialize
|
765
|
-
# the signal handlers (since they depend on EM).
|
766
|
-
$signal_handler_setup = false
|
767
|
-
end
|
768
|
-
|
769
|
-
# Signal handling. If multiple Bud instances are running inside a single
|
770
|
-
# process, we want a SIGINT or SIGTERM signal to cleanly shutdown all of them.
|
771
|
-
def self.init_signal_handlers(b)
|
772
|
-
$signal_lock.synchronize {
|
773
|
-
# If we setup signal handlers and then fork a new process, we want to
|
774
|
-
# reinitialize the signal handler in the child process.
|
775
|
-
unless b.options[:no_signal_handlers] or $signal_handler_setup
|
776
|
-
EventMachine::PeriodicTimer.new(SIGNAL_CHECK_PERIOD) do
|
777
|
-
if $got_shutdown_signal
|
778
|
-
Bud.shutdown_all_instances
|
779
|
-
Bud.stop_em_loop
|
780
|
-
$got_shutdown_signal = false
|
781
|
-
end
|
782
|
-
end
|
783
|
-
|
784
|
-
["INT", "TERM"].each do |signal|
|
785
|
-
Signal.trap(signal) {
|
786
|
-
$got_shutdown_signal = true
|
787
|
-
}
|
788
|
-
end
|
789
|
-
$setup_signal_handler_pid = true
|
790
|
-
end
|
791
|
-
|
792
|
-
$instance_id += 1
|
793
|
-
$bud_instances[$instance_id] = b
|
794
|
-
return $instance_id
|
795
|
-
}
|
796
|
-
end
|
797
|
-
|
798
|
-
def self.shutdown_all_instances(do_shutdown_cb=true)
|
799
|
-
instances = nil
|
800
|
-
$signal_lock.synchronize {
|
801
|
-
instances = $bud_instances.clone
|
802
|
-
}
|
803
|
-
|
804
|
-
instances.each_value {|b| b.stop_bg(false, do_shutdown_cb) }
|
805
|
-
end
|
806
|
-
end
|