bud 0.0.3 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (50) hide show
  1. data/README +33 -16
  2. data/bin/budplot +42 -65
  3. data/bin/budtimelines +235 -0
  4. data/bin/budvis +24 -122
  5. data/bin/rebl +1 -0
  6. data/docs/README.md +21 -10
  7. data/docs/bfs.md +4 -6
  8. data/docs/c.html +251 -0
  9. data/docs/cheat.md +45 -30
  10. data/docs/deploy.md +26 -26
  11. data/docs/getstarted.md +6 -4
  12. data/docs/visualizations.md +43 -31
  13. data/examples/chat/chat.rb +4 -9
  14. data/examples/chat/chat_server.rb +1 -8
  15. data/examples/deploy/deploy_ip_port +1 -0
  16. data/examples/deploy/keys.rb +5 -0
  17. data/examples/deploy/tokenring-ec2.rb +9 -9
  18. data/examples/deploy/{tokenring-local.rb → tokenring-fork.rb} +3 -5
  19. data/examples/deploy/tokenring-thread.rb +15 -0
  20. data/examples/deploy/tokenring.rb +25 -17
  21. data/lib/bud/aggs.rb +87 -25
  22. data/lib/bud/bud_meta.rb +48 -31
  23. data/lib/bud/bust/bust.rb +16 -15
  24. data/lib/bud/collections.rb +207 -232
  25. data/lib/bud/depanalysis.rb +1 -0
  26. data/lib/bud/deploy/countatomicdelivery.rb +8 -20
  27. data/lib/bud/deploy/deployer.rb +16 -16
  28. data/lib/bud/deploy/ec2deploy.rb +34 -35
  29. data/lib/bud/deploy/forkdeploy.rb +90 -0
  30. data/lib/bud/deploy/threaddeploy.rb +38 -0
  31. data/lib/bud/graphs.rb +103 -199
  32. data/lib/bud/joins.rb +190 -41
  33. data/lib/bud/monkeypatch.rb +84 -0
  34. data/lib/bud/rebl.rb +8 -1
  35. data/lib/bud/rewrite.rb +152 -49
  36. data/lib/bud/server.rb +1 -0
  37. data/lib/bud/state.rb +24 -10
  38. data/lib/bud/storage/dbm.rb +170 -0
  39. data/lib/bud/storage/tokyocabinet.rb +5 -1
  40. data/lib/bud/stratify.rb +6 -7
  41. data/lib/bud/viz.rb +31 -17
  42. data/lib/bud/viz_util.rb +204 -0
  43. data/lib/bud.rb +271 -244
  44. data/lib/bud.rb.orig +806 -0
  45. metadata +43 -22
  46. data/docs/bfs.raw +0 -251
  47. data/docs/diffs +0 -181
  48. data/examples/basics/out +0 -1103
  49. data/examples/basics/out.new +0 -856
  50. data/lib/bud/deploy/localdeploy.rb +0 -53
data/lib/bud.rb.orig ADDED
@@ -0,0 +1,806 @@
1
+ require 'rubygems'
2
+ require 'eventmachine'
3
+ require 'msgpack'
4
+ require 'socket'
5
+ require 'superators'
6
+ require 'thread'
7
+
8
+ require 'bud/monkeypatch'
9
+
10
+ require 'bud/aggs'
11
+ require 'bud/bud_meta'
12
+ require 'bud/collections'
13
+ require 'bud/depanalysis'
14
+ require 'bud/deploy/forkdeploy'
15
+ require 'bud/deploy/threaddeploy'
16
+ require 'bud/errors'
17
+ require 'bud/joins'
18
+ require 'bud/rtrace'
19
+ require 'bud/server'
20
+ require 'bud/state'
21
+ require 'bud/storage/dbm'
22
+ require 'bud/storage/tokyocabinet'
23
+ require 'bud/storage/zookeeper'
24
+ require 'bud/stratify'
25
+ require 'bud/viz'
26
+
27
+ ILLEGAL_INSTANCE_ID = -1
28
+ SIGNAL_CHECK_PERIOD = 0.2
29
+
30
+ $signal_lock = Mutex.new
31
+ $got_shutdown_signal = false
32
+ $signal_handler_setup = false
33
+ $instance_id = 0
34
+ $bud_instances = {} # Map from instance id => Bud instance
35
+
36
+ # The root Bud module. To cause an instance of Bud to begin executing, there are
37
+ # three main options:
38
+ #
39
+ # 1. Synchronously. To do this, instantiate your program and then call tick()
40
+ # one or more times; each call evaluates a single Bud timestep. Note that in
41
+ # this mode, network communication (channels) and timers cannot be used. This
42
+ # is mostly intended for "one-shot" programs that compute a single result and
43
+ # then terminate.
44
+ # 2. In a separate thread in the foreground. To do this, instantiate your
45
+ # program and then call run_fg(). The Bud interpreter will then run, handling
46
+ # network events and evaluating new timesteps as appropriate. The run_fg()
47
+ # method will not return unless an error occurs.
48
+ # 3. In a separate thread in the background. To do this, instantiate your
49
+ # program and then call run_bg(). The Bud interpreter will run
50
+ # asynchronously. To interact with Bud (e.g., insert additional data or
51
+ # inspect the state of a Bud collection), use the sync_do and async_do
52
+ # methods. To shutdown the Bud interpreter, use stop_bg().
53
+ #
54
+ # Most programs should use method #3.
55
+ #
56
+ # :main: Bud
57
+ module Bud
58
+ attr_reader :strata, :budtime, :inbound, :options, :meta_parser, :viz, :rtracer
59
+ attr_reader :dsock
60
+ attr_reader :tables, :ip, :port
61
+ attr_reader :stratum_first_iter, :joinstate
62
+ attr_accessor :lazy # This can be changed on-the-fly by REBL
63
+ attr_accessor :stratum_collection_map
64
+
65
+ # options to the Bud runtime are passed in a hash, with the following keys
66
+ # * network configuration
67
+ # * <tt>:ip</tt> IP address string for this instance
68
+ # * <tt>:port</tt> port number for this instance
69
+ # * <tt>:ext_ip</tt> IP address at which external nodes can contact this instance
70
+ # * <tt>:ext_port</tt> port number to go with <tt>:ext_ip</tt>
71
+ # * <tt>:bust_port</tt> port number for the restful HTTP messages
72
+ # * operating system interaction
73
+ # * <tt>:stdin</tt> if non-nil, reading from the +stdio+ collection results in reading from this +IO+ handle
74
+ # * <tt>:stdout</tt> writing to the +stdio+ collection results in writing to this +IO+ handle; defaults to <tt>$stdout</tt>
75
+ # * <tt>:no_signal_handlers</tt> if true, runtime ignores +SIGINT+ and +SIGTERM+
76
+ # * tracing and output
77
+ # * <tt>:quiet</tt> if true, suppress certain messages
78
+ # * <tt>:trace</tt> if true, generate +budvis+ outputs
79
+ # * <tt>:rtrace</tt> if true, generate +budplot+ outputs
80
+ # * <tt>:dump_rewrite</tt> if true, dump results of internal rewriting of Bloom code to a file
81
+ # * controlling execution
82
+ # * <tt>:lazy</tt> if true, prevents runtime from ticking except on external calls to +tick+
83
+ # * <tt>:tag</tt> a name for this instance, suitable for display during tracing and visualization
84
+ # * storage configuration
85
+ # * <tt>:tc_dir</tt> filesystem directory to hold TokyoCabinet data stores
86
+ # * <tt>:tc_truncate</tt> if true, TokyoCabinet collections are opened with +OTRUNC+
87
+ # * deployment
88
+ # * <tt>:deploy</tt> enable deployment
89
+ # * <tt>:deploy_child_opts</tt> option hash to pass to deployed instances
90
+ def initialize(options={})
91
+ @tables = {}
92
+ @table_meta = []
93
+ @rewritten_strata = []
94
+ @channels = {}
95
+ @tc_tables = {}
96
+ @dbm_tables = {}
97
+ @zk_tables = {}
98
+ @callbacks = {}
99
+ @callback_id = 0
100
+ @shutdown_callbacks = []
101
+ @post_shutdown_callbacks = []
102
+ @timers = []
103
+ @budtime = 0
104
+ @inbound = []
105
+ @done_bootstrap = false
106
+ @joinstate = {} # joins are stateful, their state needs to be kept inside the Bud instance
107
+ @instance_id = ILLEGAL_INSTANCE_ID # Assigned when we start running
108
+
109
+ # Setup options (named arguments), along with default values
110
+ @options = options.clone
111
+ @lazy = @options[:lazy] ||= false
112
+ @options[:ip] ||= "127.0.0.1"
113
+ @ip = @options[:ip]
114
+ @options[:port] ||= 0
115
+ @options[:port] = @options[:port].to_i
116
+ # NB: If using an ephemeral port (specified by port = 0), the actual port
117
+ # number won't be known until we start EM
118
+
119
+ relatives = self.class.modules + [self.class]
120
+ relatives.each do |r|
121
+ Bud.rewrite_local_methods(r)
122
+ end
123
+
124
+ @declarations = ModuleRewriter.get_rule_defs(self.class)
125
+
126
+ init_state
127
+
128
+ @viz = VizOnline.new(self) if @options[:trace]
129
+ @rtracer = RTrace.new(self) if @options[:rtrace]
130
+
131
+ # Get dependency info and determine stratification order.
132
+ unless self.class <= Stratification or self.class <= DepAnalysis
133
+ do_rewrite
134
+ end
135
+
136
+ # Load the rules as a closure. Each element of @strata is an array of
137
+ # lambdas, one for each rewritten rule in that strata. Note that legacy Bud
138
+ # code (with user-specified stratification) assumes that @strata is a simple
139
+ # array, so we need to convert it before loading the rewritten strata.
140
+ @strata = []
141
+ @rule_src = []
142
+ @rule_orig_src = []
143
+ declaration
144
+ @strata.each_with_index do |s,i|
145
+ raise BudError if s.class <= Array
146
+ @strata[i] = [s]
147
+ # Don't try to record source text for old-style rule blocks
148
+ @rule_src[i] = [""]
149
+ end
150
+
151
+ @rewritten_strata.each_with_index do |src_ary,i|
152
+ @strata[i] ||= []
153
+ @rule_src[i] ||= []
154
+ @rule_orig_src[i] ||= []
155
+ src_ary.each_with_index do |src, j|
156
+ @strata[i] << eval("lambda { #{src} }")
157
+ @rule_src[i] << src
158
+ @rule_orig_src[i] << @no_attr_rewrite_strata[i][j]
159
+ end
160
+ end
161
+ end
162
+
163
+ private
164
+
165
+ # Rewrite methods defined in the given klass to expand module references and
166
+ # temp collections. Imported modules are rewritten during the import process;
167
+ # we rewrite the main Bud class and any included modules here. Note that we
168
+ # only rewrite each distinct Class once.
169
+ def self.rewrite_local_methods(klass)
170
+ @done_rewrite ||= {}
171
+ return if @done_rewrite.has_key? klass.name
172
+
173
+ u = Unifier.new
174
+ ref_expander = NestedRefRewriter.new(klass.bud_import_table)
175
+ tmp_expander = TempExpander.new
176
+ r2r = Ruby2Ruby.new
177
+
178
+ klass.instance_methods(false).each do |m|
179
+ ast = ParseTree.translate(klass, m)
180
+ ast = u.process(ast)
181
+ ast = ref_expander.process(ast)
182
+ ast = tmp_expander.process(ast)
183
+
184
+ if (ref_expander.did_work or tmp_expander.did_work)
185
+ new_source = r2r.process(ast)
186
+ klass.module_eval new_source # Replace previous method def
187
+ end
188
+
189
+ ref_expander.did_work = false
190
+ tmp_expander.did_work = false
191
+ end
192
+
193
+ # If we found any temp statements in the klass's rule blocks, add a state
194
+ # block with declarations for the corresponding temp collections.
195
+ s = tmp_expander.get_state_meth(klass)
196
+ if s
197
+ state_src = r2r.process(s)
198
+ klass.module_eval(state_src)
199
+ end
200
+
201
+ # Always rewrite anonymous classes
202
+ @done_rewrite[klass.name] = true unless klass.name == ""
203
+ end
204
+
205
+ # Invoke all the user-defined state blocks and initialize builtin state.
206
+ def init_state
207
+ builtin_state
208
+ call_state_methods
209
+ end
210
+
211
+ # If module Y is a parent module of X, X's state block might reference state
212
+ # defined in Y. Hence, we want to invoke Y's state block first. However, when
213
+ # "import" and "include" are combined, we can't use the inheritance hierarchy
214
+ # to do this. When a module Z is imported, the import process inlines all the
215
+ # modules Z includes into a single module. Hence, we can no longer rely on the
216
+ # inheritance hierarchy to respect dependencies between modules. To fix this,
217
+ # we add an increasing ID to each state block's method name (assigned
218
+ # according to the order in which the state blocks are defined); we then sort
219
+ # by this order before invoking the state blocks.
220
+ def call_state_methods
221
+ meth_map = {} # map from ID => [Method]
222
+ self.class.instance_methods.each do |m|
223
+ next unless m =~ /^__state(\d+)__/
224
+ id = Regexp.last_match.captures.first.to_i
225
+ meth_map[id] ||= []
226
+ meth_map[id] << self.method(m)
227
+ end
228
+
229
+ meth_map.keys.sort.each do |i|
230
+ meth_map[i].each {|m| m.call}
231
+ end
232
+ end
233
+
234
+ # Evaluate all bootstrap blocks
235
+ def do_bootstrap
236
+ self.class.ancestors.reverse.each do |anc|
237
+ anc.instance_methods(false).each do |m|
238
+ if /^__bootstrap__/.match m
239
+ self.method(m.to_sym).call
240
+ end
241
+ end
242
+ end
243
+ bootstrap
244
+
245
+ @done_bootstrap = true
246
+ end
247
+
248
+ def do_rewrite
249
+ @meta_parser = BudMeta.new(self, @declarations)
250
+ @rewritten_strata, @no_attr_rewrite_strata = @meta_parser.meta_rewrite
251
+ end
252
+
253
+ public
254
+
255
+ ########### give empty defaults for these
256
+ def declaration # :nodoc: all
257
+ end
258
+ def bootstrap # :nodoc: all
259
+ end
260
+
261
+ ########### metaprogramming support for ruby and for rule rewriting
262
+ # helper to define instance methods
263
+ def singleton_class # :nodoc: all
264
+ class << self; self; end
265
+ end
266
+
267
+ ######## methods for controlling execution
268
+
269
+ # Run Bud in the background (in a different thread). This means that the Bud
270
+ # interpreter will run asynchronously from the caller, so care must be used
271
+ # when interacting with it. For example, it is not safe to directly examine
272
+ # Bud collections from the caller's thread (see async_do and sync_do).
273
+ #
274
+ # This instance of Bud will continue to execute until stop_bg is called.
275
+ def run_bg
276
+ start_reactor
277
+ # Wait for Bud to start up before returning
278
+ schedule_and_wait do
279
+ start_bud
280
+ end
281
+ end
282
+
283
+ # Run Bud in the "foreground" -- the caller's thread will be used to run the
284
+ # Bud interpreter. This means this method won't return unless an error
285
+ # occurs. It is often more useful to run Bud asynchronously -- see run_bg.
286
+ def run_fg
287
+ # If we're called from the EventMachine thread (and EM is running), blocking
288
+ # the current thread would imply deadlocking ourselves.
289
+ if Thread.current == EventMachine::reactor_thread and EventMachine::reactor_running?
290
+ raise BudError, "Cannot invoke run_fg from inside EventMachine"
291
+ end
292
+
293
+ q = Queue.new
294
+ # Note that this must be a post-shutdown callback: if this is the only
295
+ # thread, then the program might exit after run_fg() returns. If run_fg()
296
+ # blocked on a normal shutdown callback, the program might exit before the
297
+ # other shutdown callbacks have a chance to run.
298
+ post_shutdown do
299
+ q.push(true)
300
+ end
301
+
302
+ run_bg
303
+ # Block caller's thread until Bud has shutdown
304
+ q.pop
305
+ end
306
+
307
+ # Shutdown a Bud instance that is running asynchronously. This method blocks
308
+ # until Bud has been shutdown. If +stop_em+ is true, the EventMachine event
309
+ # loop is also shutdown; this will interfere with the execution of any other
310
+ # Bud instances in the same process (as well as anything else that happens to
311
+ # use EventMachine).
312
+ def stop_bg(stop_em=false, do_shutdown_cb=true)
313
+ schedule_and_wait do
314
+ do_shutdown(do_shutdown_cb)
315
+ end
316
+
317
+ if stop_em
318
+ Bud.stop_em_loop
319
+ EventMachine::reactor_thread.join
320
+ end
321
+ end
322
+
323
+ # Register a callback that will be invoked when this instance of Bud is
324
+ # shutting down.
325
+ def on_shutdown(&blk)
326
+ # Start EM if not yet started
327
+ start_reactor
328
+ schedule_and_wait do
329
+ @shutdown_callbacks << blk
330
+ end
331
+ end
332
+
333
+ # Register a callback that will be invoked when *after* this instance of Bud
334
+ # has been shutdown.
335
+ def post_shutdown(&blk)
336
+ # Start EM if not yet started
337
+ start_reactor
338
+ schedule_and_wait do
339
+ @post_shutdown_callbacks << blk
340
+ end
341
+ end
342
+
343
+ # Given a block, evaluate that block inside the background Ruby thread at some
344
+ # time in the future. Because the block is evaluate inside the background Ruby
345
+ # thread, the block can safely examine Bud state. Naturally, this method can
346
+ # only be used when Bud is running in the background. Note that calling
347
+ # sync_do blocks the caller until the block has been evaluated; for a
348
+ # non-blocking version, see async_do.
349
+ #
350
+ # Note that the block is invoked after one Bud timestep has ended but before
351
+ # the next timestep begins. Hence, synchronous accumulation (<=) into a Bud
352
+ # scratch collection in a callback is typically not a useful thing to do: when
353
+ # the next tick begins, the content of any scratch collections will be
354
+ # emptied, which includes anything inserted by a sync_do block using <=. To
355
+ # avoid this behavior, insert into scratches using <+.
356
+ def sync_do
357
+ schedule_and_wait do
358
+ yield if block_given?
359
+ # Do another tick, in case the user-supplied block inserted any data
360
+ tick
361
+ end
362
+ end
363
+
364
+ # Like sync_do, but does not block the caller's thread: the given callback
365
+ # will be invoked at some future time. Note that calls to async_do respect
366
+ # FIFO order.
367
+ def async_do
368
+ EventMachine::schedule do
369
+ yield if block_given?
370
+ # Do another tick, in case the user-supplied block inserted any data
371
+ tick
372
+ end
373
+ end
374
+
375
+ # Shutdown any persistent tables used by the current Bud instance. If you are
376
+ # running Bud via tick() and using +tctable+ collections, you should call this
377
+ # after you're finished using Bud. Programs that use Bud via run_fg() or
378
+ # run_bg() don't need to call this manually.
379
+ def close_tables
380
+ @tables.each_value do |t|
381
+ t.close
382
+ end
383
+ end
384
+
385
+ # Register a new callback. Given the name of a Bud collection, this method
386
+ # arranges for the given block to be invoked at the end of any tick in which
387
+ # any tuples have been inserted into the specified collection. The code block
388
+ # is passed the collection as an argument; this provides a convenient way to
389
+ # examine the tuples inserted during that fixpoint. (Note that because the Bud
390
+ # runtime is blocked while the callback is invoked, it can also examine any
391
+ # other Bud state freely.)
392
+ #
393
+ # Note that registering callbacks on persistent collections (e.g., tables and
394
+ # tctables) is probably not a wise thing to do: as long as any tuples are
395
+ # stored in the collection, the callback will be invoked at the end of every
396
+ # tick.
397
+ def register_callback(tbl_name, &block)
398
+ # We allow callbacks to be added before or after EM has been started. To
399
+ # simplify matters, we start EM if it hasn't been started yet.
400
+ start_reactor
401
+ cb_id = nil
402
+ schedule_and_wait do
403
+ unless @tables.has_key? tbl_name
404
+ raise Bud::BudError, "No such table: #{tbl_name}"
405
+ end
406
+
407
+ raise Bud::BudError if @callbacks.has_key? @callback_id
408
+ @callbacks[@callback_id] = [tbl_name, block]
409
+ cb_id = @callback_id
410
+ @callback_id += 1
411
+ end
412
+ return cb_id
413
+ end
414
+
415
+ # Unregister the callback that has the given ID.
416
+ def unregister_callback(id)
417
+ schedule_and_wait do
418
+ raise Bud::BudError unless @callbacks.has_key? id
419
+ @callbacks.delete(id)
420
+ end
421
+ end
422
+
423
+ # sync_callback supports synchronous interaction with Bud modules. The caller
424
+ # supplies the name of an input collection, a set of tuples to insert, and an
425
+ # output collection on which to 'listen.' The call blocks until tuples are
426
+ # inserted into the output collection: these are returned to the caller.
427
+ def sync_callback(in_tbl, tupleset, out_tbl)
428
+ q = Queue.new
429
+ cb = register_callback(out_tbl) do |c|
430
+ q.push c.to_a
431
+ end
432
+ unless in_tbl.nil?
433
+ sync_do {
434
+ t = @tables[in_tbl]
435
+ if t.class <= Bud::BudChannel or t.class <= Bud::BudZkTable
436
+ t <~ tupleset
437
+ else
438
+ t <+ tupleset
439
+ end
440
+ }
441
+ end
442
+ result = q.pop
443
+ unregister_callback(cb)
444
+ return result
445
+ end
446
+
447
+ # A common special case for sync_callback: block on a delta to a table.
448
+ def delta(out_tbl)
449
+ sync_callback(nil, nil, out_tbl)
450
+ end
451
+
452
+ private
453
+
454
+ def invoke_callbacks
455
+ @callbacks.each_value do |cb|
456
+ tbl_name, block = cb
457
+ tbl = @tables[tbl_name]
458
+ unless tbl.empty?
459
+ block.call(tbl)
460
+ end
461
+ end
462
+ end
463
+
464
+ def start_reactor
465
+ return if EventMachine::reactor_running?
466
+
467
+ EventMachine::error_handler do |e|
468
+ puts "Unexpected Bud error: #{e.inspect}"
469
+ puts e.backtrace.join("\n")
470
+ Bud.shutdown_all_instances
471
+ raise e
472
+ end
473
+
474
+ # Block until EM has successfully started up.
475
+ q = Queue.new
476
+ # This thread helps us avoid race conditions on the start and stop of
477
+ # EventMachine's event loop.
478
+ Thread.new do
479
+ EventMachine.run do
480
+ q.push(true)
481
+ end
482
+ end
483
+ # Block waiting for EM's event loop to start up.
484
+ q.pop
485
+ end
486
+
487
+ # Schedule a block to be evaluated by EventMachine in the future, and
488
+ # block until this has happened.
489
+ def schedule_and_wait
490
+ # If EM isn't running, just run the user's block immediately
491
+ # XXX: not clear that this is the right behavior
492
+ unless EventMachine::reactor_running?
493
+ yield
494
+ return
495
+ end
496
+
497
+ q = Queue.new
498
+ EventMachine::schedule do
499
+ ret = false
500
+ begin
501
+ yield
502
+ rescue Exception
503
+ ret = $!
504
+ end
505
+ q.push(ret)
506
+ end
507
+
508
+ resp = q.pop
509
+ raise resp if resp
510
+ end
511
+
512
+ def do_shutdown(do_shutdown_cb=true)
513
+ # Silently ignore duplicate shutdown requests or attempts to shutdown an
514
+ # instance that hasn't been started yet.
515
+ return if @instance_id == ILLEGAL_INSTANCE_ID
516
+
517
+ $signal_lock.synchronize {
518
+ raise unless $bud_instances.has_key? @instance_id
519
+ $bud_instances.delete @instance_id
520
+ @instance_id = ILLEGAL_INSTANCE_ID
521
+ }
522
+
523
+ if do_shutdown_cb
524
+ @shutdown_callbacks.each {|cb| cb.call}
525
+ end
526
+ @timers.each {|t| t.cancel}
527
+ close_tables
528
+ @dsock.close_connection if EventMachine::reactor_running?
529
+ if do_shutdown_cb
530
+ @post_shutdown_callbacks.each {|cb| cb.call}
531
+ end
532
+ end
533
+
534
+ private
535
+ def start_bud
536
+ raise BudError unless EventMachine::reactor_thread?
537
+
538
+ @instance_id = Bud.init_signal_handlers(self)
539
+ do_start_server
540
+
541
+ # Initialize periodics
542
+ @periodics.each do |p|
543
+ @timers << set_periodic_timer(p.pername, p.ident, p.period)
544
+ end
545
+
546
+ # Arrange for Bud to read from stdin if enabled. Note that we can't do this
547
+ # earlier because we need to wait for EventMachine startup.
548
+ @stdio.start_stdin_reader if @options[:stdin]
549
+ @zk_tables.each_value {|t| t.start_watchers}
550
+
551
+ # Compute a fixpoint; this will also invoke any bootstrap blocks.
552
+ tick unless @lazy
553
+
554
+ @rtracer.sleep if options[:rtrace]
555
+ end
556
+
557
+ def do_start_server
558
+ @dsock = EventMachine::open_datagram_socket(@ip, @options[:port],
559
+ BudServer, self)
560
+ @port = Socket.unpack_sockaddr_in(@dsock.get_sockname)[0]
561
+ end
562
+
563
+ public
564
+
565
+ # Returns the IP and port of the Bud instance as a string. In addition to the
566
+ # local IP and port, the user may define an external IP and/or port. The
567
+ # external version of each is returned if available. If not, the local
568
+ # version is returned. There are use cases for mixing and matching local and
569
+ # external. local_ip:external_port would be if you have local port
570
+ # forwarding, and external_ip:local_port would be if you're in a DMZ, for
571
+ # example.
572
+ def ip_port
573
+ raise BudError, "ip_port called before port defined" if @port.nil? and @options[:port] == 0 and not @options[:ext_port]
574
+
575
+ ip = options[:ext_ip] ? "#{@options[:ext_ip]}" : "#{@ip}"
576
+ port = options[:ext_port] ? "#{@options[:ext_port]}" :
577
+ (@port.nil? ? "#{@options[:port]}" : "#{@port}")
578
+ ip + ":" + port
579
+ end
580
+
581
+ # Returns the internal IP and port. See ip_port.
582
+ def int_ip_port
583
+ raise BudError, "ip_port called before port defined" if @port.nil? and @options[:port] == 0
584
+ @port.nil? ? "#{@ip}:#{@options[:port]}" : "#{@ip}:#{@port}"
585
+ end
586
+
587
+ # Manually trigger one timestep of Bloom execution.
588
+ def tick
589
+ @tables.each_value do |t|
590
+ t.tick
591
+ end
592
+
593
+ @joinstate = {}
594
+
595
+ do_bootstrap unless @done_bootstrap
596
+ receive_inbound
597
+
598
+ @strata.each_with_index { |s,i| stratum_fixpoint(s, i) }
599
+ @viz.do_cards if @options[:trace]
600
+ do_flush
601
+ invoke_callbacks
602
+ @budtime += 1
603
+ end
604
+
605
+ private
606
+
607
+ # Builtin BUD state (predefined collections). We could define this using the
608
+ # standard "state" syntax, but we want to ensure that builtin state is
609
+ # initialized before user-defined state.
610
+ def builtin_state
611
+ channel :localtick, [:col1]
612
+ @stdio = terminal :stdio
613
+ @periodics = table :periodics_tbl, [:pername] => [:ident, :period]
614
+
615
+ # for BUD reflection
616
+ table :t_rules, [:rule_id] => [:lhs, :op, :src, :orig_src]
617
+ table :t_depends, [:rule_id, :lhs, :op, :body] => [:nm]
618
+ table :t_depends_tc, [:head, :body, :via, :neg, :temporal]
619
+ table :t_provides, [:interface] => [:input]
620
+ table :t_underspecified, t_provides.schema
621
+ table :t_stratum, [:predicate] => [:stratum]
622
+ table :t_cycle, [:predicate, :via, :neg, :temporal]
623
+ table :t_table_info, [:tab_name, :tab_type]
624
+ table :t_table_schema, [:tab_name, :col_name, :ord, :loc]
625
+ end
626
+
627
+ # Handle any inbound tuples off the wire and then clear. Received messages are
628
+ # placed directly into the storage of the appropriate local channel.
629
+ def receive_inbound
630
+ @inbound.each do |msg|
631
+ # puts "dequeueing tuple #{msg[1].inspect} into #{msg[0]} @ #{ip_port}"
632
+ tables[msg[0].to_sym] << msg[1]
633
+ end
634
+ @inbound = []
635
+ end
636
+
637
+ # "Flush" any tuples that need to be flushed. This does two things:
638
+ # 1. Emit outgoing tuples in channels and ZK tables.
639
+ # 2. Commit to disk any changes made to on-disk tables.
640
+ def do_flush
641
+ @channels.each { |c| @tables[c[0]].flush }
642
+ @zk_tables.each_value { |t| t.flush }
643
+ @tc_tables.each_value { |t| t.flush }
644
+ @dbm_tables.each_value { |t| t.flush }
645
+ end
646
+
647
+ def stratum_fixpoint(strat, strat_num)
648
+ # This routine uses semi-naive evaluation to compute a fixpoint of the rules
649
+ # in strat.
650
+ #
651
+ # As described in lib/collections.rb, each collection has three
652
+ # sub-collections of note here:
653
+ # @storage: the "main" storage of tuples
654
+ # @delta: tuples that should be used to drive derivation of new facts
655
+ # @new_delta: a place to store newly-derived facts
656
+ #
657
+ # The first time through this loop we mark @stratum_first_iter=true, which
658
+ # tells the Join::each code to join up all its @storage subcollections to
659
+ # start. In subsequent iterations the join code uses some table's @delta to
660
+ # ensure that only new tuples are derived.
661
+ #
662
+ # Note that calling "each" on a non-Join collection will iterate through
663
+ # both storage and delta.
664
+ #
665
+ # At the end of each iteration of this loop we transition:
666
+ # - @delta tuples are merged into @storage
667
+ # - @new_delta tuples are moved into @delta
668
+ # - @new_delta is set to empty
669
+ #
670
+ # XXX as a performance optimization, it would be nice to bypass the delta
671
+ # tables for any preds that don't participate in a rhs Join -- in that case
672
+ # there's pointless extra tuple movement letting tuples "graduate" through
673
+ # @new_delta and @delta.
674
+
675
+ # In semi-naive, the first iteration should join up tables on their storage
676
+ # fields; subsequent iterations do the delta-joins only. The
677
+ # stratum_first_iter field here distinguishes these cases.
678
+ @stratum_first_iter = true
679
+ begin
680
+ strat.each_with_index do |r,i|
681
+ fixpoint = false
682
+ begin
683
+ r.call
684
+ rescue Exception => e
685
+ # Don't report source text for certain rules (old-style rule blocks)
686
+ rule_src = @rule_orig_src[strat_num][i] unless @rule_orig_src[strat_num].nil?
687
+ src_msg = ""
688
+ unless rule_src == ""
689
+ src_msg = "\nRule: #{rule_src}"
690
+ end
691
+
692
+ new_e = e
693
+ unless new_e.class <= BudError
694
+ new_e = BudError
695
+ end
696
+ raise new_e, "Exception during Bud evaluation.\nException: #{e.inspect}.#{src_msg}"
697
+ end
698
+ end
699
+ @stratum_first_iter = false
700
+ fixpoint = true
701
+ # tick collections in this stratum; if we don't have info on that, tick all collections
702
+ colls = @stratum_collection_map[strat_num] if @stratum_collection_map
703
+ colls ||= @tables.keys
704
+ colls.each do |name|
705
+ begin
706
+ coll = self.send(name)
707
+ unless coll.delta.empty? and coll.new_delta.empty?
708
+ coll.tick_deltas
709
+ fixpoint = false
710
+ end
711
+ rescue
712
+ # ignore missing tables; rebl for example deletes them mid-stream
713
+ end
714
+ end
715
+ end while not fixpoint
716
+ end
717
+
718
+ private
719
+
720
+ ######## ids and timers
721
+ def gen_id
722
+ Time.new.to_i.to_s << rand.to_s
723
+ end
724
+
725
+ def set_periodic_timer(name, id, period)
726
+ EventMachine::PeriodicTimer.new(period) do
727
+ @tables[name] <+ [[id, Time.new]]
728
+ tick
729
+ end
730
+ end
731
+
732
+ # Fork a new process. This is identical to Kernel#fork, except that it also
733
+ # cleans up Bud and EventMachine-related state. As with Kernel#fork, the
734
+ # caller supplies a code block that is run in the child process; the PID of
735
+ # the child is returned by this method.
736
+ def self.do_fork
737
+ Kernel.fork do
738
+ srand
739
+ # This is somewhat grotty: we basically clone what EM::fork_reactor does,
740
+ # except that we don't want the user-supplied block to be invoked by the
741
+ # reactor thread.
742
+ if EventMachine::reactor_running?
743
+ EventMachine::stop_event_loop
744
+ EventMachine::release_machine
745
+ EventMachine::instance_variable_set('@reactor_running', false)
746
+ end
747
+ # Shutdown all the Bud instances inherited from the parent process, but
748
+ # don't invoke their shutdown callbacks
749
+ Bud.shutdown_all_instances(false)
750
+
751
+ $got_shutdown_signal = false
752
+ $setup_signal_handler = false
753
+
754
+ yield
755
+ end
756
+ end
757
+
758
+ # Note that this affects anyone else in the same process who happens to be
759
+ # using EventMachine! This is also a non-blocking call; to block until EM
760
+ # has completely shutdown, join on EM::reactor_thread.
761
+ def self.stop_em_loop
762
+ EventMachine::stop_event_loop
763
+
764
+ # If another instance of Bud is started later, we'll need to reinitialize
765
+ # the signal handlers (since they depend on EM).
766
+ $signal_handler_setup = false
767
+ end
768
+
769
+ # Signal handling. If multiple Bud instances are running inside a single
770
+ # process, we want a SIGINT or SIGTERM signal to cleanly shutdown all of them.
771
+ def self.init_signal_handlers(b)
772
+ $signal_lock.synchronize {
773
+ # If we setup signal handlers and then fork a new process, we want to
774
+ # reinitialize the signal handler in the child process.
775
+ unless b.options[:no_signal_handlers] or $signal_handler_setup
776
+ EventMachine::PeriodicTimer.new(SIGNAL_CHECK_PERIOD) do
777
+ if $got_shutdown_signal
778
+ Bud.shutdown_all_instances
779
+ Bud.stop_em_loop
780
+ $got_shutdown_signal = false
781
+ end
782
+ end
783
+
784
+ ["INT", "TERM"].each do |signal|
785
+ Signal.trap(signal) {
786
+ $got_shutdown_signal = true
787
+ }
788
+ end
789
+ $setup_signal_handler_pid = true
790
+ end
791
+
792
+ $instance_id += 1
793
+ $bud_instances[$instance_id] = b
794
+ return $instance_id
795
+ }
796
+ end
797
+
798
+ def self.shutdown_all_instances(do_shutdown_cb=true)
799
+ instances = nil
800
+ $signal_lock.synchronize {
801
+ instances = $bud_instances.clone
802
+ }
803
+
804
+ instances.each_value {|b| b.stop_bg(false, do_shutdown_cb) }
805
+ end
806
+ end