bud 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. data/LICENSE +9 -0
  2. data/README +30 -0
  3. data/bin/budplot +134 -0
  4. data/bin/budvis +201 -0
  5. data/bin/rebl +4 -0
  6. data/docs/README.md +13 -0
  7. data/docs/bfs.md +379 -0
  8. data/docs/bfs.raw +251 -0
  9. data/docs/bfs_arch.png +0 -0
  10. data/docs/bloom-loop.png +0 -0
  11. data/docs/bust.md +83 -0
  12. data/docs/cheat.md +291 -0
  13. data/docs/deploy.md +96 -0
  14. data/docs/diffs +181 -0
  15. data/docs/getstarted.md +296 -0
  16. data/docs/intro.md +36 -0
  17. data/docs/modules.md +112 -0
  18. data/docs/operational.md +96 -0
  19. data/docs/rebl.md +99 -0
  20. data/docs/ruby_hooks.md +19 -0
  21. data/docs/visualizations.md +75 -0
  22. data/examples/README +1 -0
  23. data/examples/basics/hello.rb +12 -0
  24. data/examples/basics/out +1103 -0
  25. data/examples/basics/out.new +856 -0
  26. data/examples/basics/paths.rb +51 -0
  27. data/examples/bust/README.md +9 -0
  28. data/examples/bust/bustclient-example.rb +23 -0
  29. data/examples/bust/bustinspector.html +135 -0
  30. data/examples/bust/bustserver-example.rb +18 -0
  31. data/examples/chat/README.md +9 -0
  32. data/examples/chat/chat.rb +45 -0
  33. data/examples/chat/chat_protocol.rb +8 -0
  34. data/examples/chat/chat_server.rb +29 -0
  35. data/examples/deploy/tokenring-ec2.rb +26 -0
  36. data/examples/deploy/tokenring-local.rb +17 -0
  37. data/examples/deploy/tokenring.rb +39 -0
  38. data/lib/bud/aggs.rb +126 -0
  39. data/lib/bud/bud_meta.rb +185 -0
  40. data/lib/bud/bust/bust.rb +126 -0
  41. data/lib/bud/bust/client/idempotence.rb +10 -0
  42. data/lib/bud/bust/client/restclient.rb +49 -0
  43. data/lib/bud/collections.rb +937 -0
  44. data/lib/bud/depanalysis.rb +44 -0
  45. data/lib/bud/deploy/countatomicdelivery.rb +50 -0
  46. data/lib/bud/deploy/deployer.rb +67 -0
  47. data/lib/bud/deploy/ec2deploy.rb +200 -0
  48. data/lib/bud/deploy/localdeploy.rb +41 -0
  49. data/lib/bud/errors.rb +15 -0
  50. data/lib/bud/graphs.rb +405 -0
  51. data/lib/bud/joins.rb +300 -0
  52. data/lib/bud/rebl.rb +314 -0
  53. data/lib/bud/rewrite.rb +523 -0
  54. data/lib/bud/rtrace.rb +27 -0
  55. data/lib/bud/server.rb +43 -0
  56. data/lib/bud/state.rb +108 -0
  57. data/lib/bud/storage/tokyocabinet.rb +170 -0
  58. data/lib/bud/storage/zookeeper.rb +178 -0
  59. data/lib/bud/stratify.rb +83 -0
  60. data/lib/bud/viz.rb +65 -0
  61. data/lib/bud.rb +797 -0
  62. metadata +330 -0
data/lib/bud.rb ADDED
@@ -0,0 +1,797 @@
1
+ require 'rubygems'
2
+ require 'eventmachine'
3
+ require 'msgpack'
4
+ require 'socket'
5
+ require 'superators'
6
+ require 'thread'
7
+
8
+ require 'bud/aggs'
9
+ require 'bud/bud_meta'
10
+ require 'bud/collections'
11
+ require 'bud/errors'
12
+ require 'bud/joins'
13
+ require 'bud/rtrace'
14
+ require 'bud/server'
15
+ require 'bud/state'
16
+ require 'bud/storage/tokyocabinet'
17
+ require 'bud/storage/zookeeper'
18
+ require 'bud/viz'
19
+
20
+ # We monkeypatch Module to add support for Bloom state and code declarations.
21
+ class Module
22
+
23
+ # import another module and assign to a qualifier symbol: <tt>import MyModule => :m</tt>
24
+ def import(spec)
25
+ raise Bud::CompileError unless (spec.class <= Hash and spec.length == 1)
26
+ mod, local_name = spec.first
27
+ raise Bud::CompileError unless (mod.class <= Module and local_name.class <= Symbol)
28
+
29
+ # To correctly expand qualified references to an imported module, we keep a
30
+ # table with the local bind names of all the modules imported by this
31
+ # module. To handle nested references (a.b.c.d etc.), the import table for
32
+ # module X points to X's own nested import table.
33
+ @bud_import_tbl ||= {}
34
+ child_tbl = mod.bud_import_table
35
+ raise Bud::CompileError if @bud_import_tbl.has_key? local_name
36
+ @bud_import_tbl[local_name] = child_tbl.clone # XXX: clone needed?
37
+
38
+ rewritten_mod_name = ModuleRewriter.do_import(self, mod, local_name)
39
+ self.module_eval "include #{rewritten_mod_name}"
40
+ end
41
+
42
+ # the block of Bloom collection declarations. one per module.
43
+ def state(&block)
44
+ meth_name = Module.make_state_meth_name(self)
45
+ define_method(meth_name, &block)
46
+ end
47
+
48
+ # a ruby block to be run before timestep 1. one per module.
49
+ def bootstrap(&block)
50
+ meth_name = "__bootstrap__#{Module.get_class_name(self)}".to_sym
51
+ define_method(meth_name, &block)
52
+ end
53
+
54
+ # bloom statements to be registered with Bud runtime. optional +block_name+
55
+ # allows for multiple bloom blocks per module, and overriding
56
+ def bloom(block_name=nil, &block)
57
+ # If no block name was specified, generate a unique name
58
+ if block_name.nil?
59
+ @block_id ||= 0
60
+ block_name = "#{Module.get_class_name(self)}__#{@block_id.to_s}"
61
+ @block_id += 1
62
+ else
63
+ unless block_name.class <= Symbol
64
+ raise Bud::CompileError, "Bloom block names must be a symbol: #{block_name}"
65
+ end
66
+ end
67
+
68
+ # Note that we don't encode the module name ("self") into the name of the
69
+ # method. This allows named blocks to be overridden (via inheritance or
70
+ # mixin) in the same way as normal Ruby methods.
71
+ meth_name = "__bloom__#{block_name}"
72
+
73
+ # Don't allow duplicate named bloom blocks to be defined within a single
74
+ # module; this indicates a likely programmer error.
75
+ if instance_methods(false).include? meth_name
76
+ raise Bud::CompileError, "Duplicate named bloom block: '#{block_name}' in #{self}"
77
+ end
78
+ define_method(meth_name.to_sym, &block)
79
+ end
80
+
81
+ def bud_import_table() #:nodoc: all
82
+ @bud_import_tbl ||= {}
83
+ @bud_import_tbl
84
+ end
85
+
86
+ private
87
+ # Return a string with a version of the class name appropriate for embedding
88
+ # into a method name. Annoyingly, if you define class X nested inside
89
+ # class/module Y, X's class name is the string "Y::X". We don't want to define
90
+ # method names with semicolons in them, so just return "X" instead.
91
+ def self.get_class_name(klass)
92
+ klass.name.split("::").last
93
+ end
94
+
95
+ # State method blocks are named using an auto-incrementing counter. This is to
96
+ # ensure that we can rediscover the possible dependencies between these blocks
97
+ # after module import (see Bud#call_state_methods).
98
+ def self.make_state_meth_name(klass)
99
+ @state_meth_id ||= 0
100
+ r = "__state#{@state_meth_id}__#{Module.get_class_name(klass)}".to_sym
101
+ @state_meth_id += 1
102
+ return r
103
+ end
104
+ end
105
+
106
+ # The root Bud module. To cause an instance of Bud to begin executing, there are
107
+ # three main options:
108
+ #
109
+ # 1. Synchronously. To do this, instantiate your program and then call tick()
110
+ # one or more times; each call evaluates a single Bud timestep. Note that in
111
+ # this mode, network communication (channels) and timers cannot be used. This
112
+ # is mostly intended for "one-shot" programs that compute a single result and
113
+ # then terminate.
114
+ # 2. In a separate thread in the foreground. To do this, instantiate your
115
+ # program and then call run_fg(). The Bud interpreter will then run, handling
116
+ # network events and evaluating new timesteps as appropriate. The run_fg()
117
+ # method will not return unless an error occurs.
118
+ # 3. In a separate thread in the background. To do this, instantiate your
119
+ # program and then call run_bg(). The Bud interpreter will run
120
+ # asynchronously. To interact with Bud (e.g., insert additional data or
121
+ # inspect the state of a Bud collection), use the sync_do and async_do
122
+ # methods. To shutdown the Bud interpreter, use stop_bg().
123
+ #
124
+ # Most programs should use method #3.
125
+ #
126
+ # :main: Bud
127
+ module Bud
128
+ attr_reader :strata, :budtime, :inbound, :options, :meta_parser, :viz, :rtracer
129
+ attr_reader :dsock
130
+ attr_reader :tables, :ip, :port
131
+ attr_reader :stratum_first_iter
132
+ attr_accessor :lazy # This can be changed on-the-fly by REBL
133
+
134
+ # options to the bud runtime are passed in a hash, with the following keys
135
+ # * network configuration
136
+ # * <tt>:ip</tt> IP address string for this instance
137
+ # * <tt>:port</tt> port number for this instance
138
+ # * <tt>:ext_ip</tt> IP address at which external nodes can contact this instance
139
+ # * <tt>:ext_port</tt> port number to go with :ext_ip
140
+ # * <tt>:bust_port</tt> port number for the restful http messages
141
+ # * operating system interaction
142
+ # * <tt>:read_stdin</tt> if true, captures stdin via the stdio collection
143
+ # * <tt>:no_signal_handlers</tt> if true, runtime ignores SIGINT and SIGTERM
144
+ # * tracing and output
145
+ # * <tt>:quiet</tt> if true, suppress certain messages
146
+ # * <tt>:trace</tt> if true, generate budvis outputs
147
+ # * <tt>:rtrace</tt> if true, generate budplot outputs
148
+ # * <tt>:dump_rewrite</tt> if true, dump results of internal rewriting of Bloom code to a file
149
+ # * controlling execution
150
+ # * <tt>:lazy</tt> if true, prevents runtime from ticking except on external calls to +tick+
151
+ # * <tt>:tag</tt> a name for this instance, suitable for display during tracing and visualization
152
+ # * storage configuration
153
+ # * <tt>:tc_dir</tt> filesystem directory to hold TokyoCabinet data stores
154
+ # * <tt>:tc_truncate</tt> if true, TokyoCabinet collections are opened with OTRUNC
155
+ def initialize(options={})
156
+ @tables = {}
157
+ @table_meta = []
158
+ @rewritten_strata = []
159
+ @channels = {}
160
+ @tc_tables = {}
161
+ @zk_tables = {}
162
+ @callbacks = {}
163
+ @callback_id = 0
164
+ @timers = []
165
+ @budtime = 0
166
+ @inbound = []
167
+ @done_bootstrap = false
168
+ @em_stopped = Queue.new
169
+ @joinstate = {} # joins are stateful, their state needs to be kept inside the Bud instance
170
+
171
+ # Setup options (named arguments), along with default values
172
+ @options = options
173
+ @lazy = @options[:lazy] ||= false
174
+ @options[:ip] ||= "localhost"
175
+ @ip = @options[:ip]
176
+ @options[:port] ||= 0
177
+ @options[:port] = @options[:port].to_i
178
+ # NB: If using an ephemeral port (specified by port = 0), the actual port
179
+ # number won't be known until we start EM
180
+
181
+ relatives = self.class.modules + [self.class]
182
+ relatives.each do |r|
183
+ Bud.rewrite_local_methods(r)
184
+ end
185
+
186
+ @declarations = ModuleRewriter.get_rule_defs(self.class)
187
+
188
+ init_state
189
+
190
+ # NB: Somewhat hacky. Dependency analysis and stratification are implemented
191
+ # by Bud programs, so in order for those programs to parse, we need the
192
+ # "Bud" class to have been defined first.
193
+ require 'bud/depanalysis'
194
+ require 'bud/stratify'
195
+
196
+ @viz = VizOnline.new(self) if @options[:trace]
197
+ @rtracer = RTrace.new(self) if @options[:rtrace]
198
+
199
+ # Get dependency info and determine stratification order.
200
+ unless self.class <= Stratification or self.class <= DepAnalysis
201
+ do_rewrite
202
+ end
203
+
204
+ # Load the rules as a closure. Each element of @strata is an array of
205
+ # lambdas, one for each rewritten rule in that strata. Note that legacy Bud
206
+ # code (with user-specified stratification) assumes that @strata is a simple
207
+ # array, so we need to convert it before loading the rewritten strata.
208
+ @strata = []
209
+ @rule_src = []
210
+ declaration
211
+ @strata.each_with_index do |s,i|
212
+ raise BudError if s.class <= Array
213
+ @strata[i] = [s]
214
+ # Don't try to record source text for old-style rule blocks
215
+ @rule_src[i] = [""]
216
+ end
217
+
218
+ @rewritten_strata.each_with_index do |src_ary,i|
219
+ @strata[i] ||= []
220
+ @rule_src[i] ||= []
221
+ src_ary.each do |src|
222
+ @strata[i] << eval("lambda { #{src} }")
223
+ @rule_src[i] << src
224
+ end
225
+ end
226
+ end
227
+
228
+ private
229
+
230
+ # Rewrite methods defined in the given klass to expand module references and
231
+ # temp collections. Imported modules are rewritten during the import process;
232
+ # we rewrite the main Bud class and any included modules here. Note that we
233
+ # only rewrite each distinct Class once.
234
+ def self.rewrite_local_methods(klass)
235
+ @done_rewrite ||= {}
236
+ return if @done_rewrite.has_key? klass.name
237
+
238
+ u = Unifier.new
239
+ ref_expander = NestedRefRewriter.new(klass.bud_import_table)
240
+ tmp_expander = TempExpander.new
241
+ r2r = Ruby2Ruby.new
242
+
243
+ klass.instance_methods(false).each do |m|
244
+ ast = ParseTree.translate(klass, m)
245
+ ast = u.process(ast)
246
+ ast = ref_expander.process(ast)
247
+ ast = tmp_expander.process(ast)
248
+
249
+ if (ref_expander.did_work or tmp_expander.did_work)
250
+ new_source = r2r.process(ast)
251
+ klass.module_eval new_source # Replace previous method def
252
+ end
253
+
254
+ ref_expander.did_work = false
255
+ tmp_expander.did_work = false
256
+ end
257
+
258
+ # If we found any temp statements in the klass's rule blocks, add a state
259
+ # block with declarations for the corresponding temp collections.
260
+ s = tmp_expander.get_state_meth(klass)
261
+ if s
262
+ state_src = r2r.process(s)
263
+ klass.module_eval(state_src)
264
+ end
265
+
266
+ # Always rewrite anonymous classes
267
+ @done_rewrite[klass.name] = true unless klass.name == ""
268
+ end
269
+
270
+ # Invoke all the user-defined state blocks and initialize builtin state.
271
+ def init_state
272
+ builtin_state
273
+ call_state_methods
274
+ end
275
+
276
+ # If module Y is a parent module of X, X's state block might reference state
277
+ # defined in Y. Hence, we want to invoke Y's state block first. However, when
278
+ # "import" and "include" are combined, we can't use the inheritance hierarchy
279
+ # to do this. When a module Z is imported, the import process inlines all the
280
+ # modules Z includes into a single module. Hence, we can no longer rely on the
281
+ # inheritance hierarchy to respect dependencies between modules. To fix this,
282
+ # we add an increasing ID to each state block's method name (assigned
283
+ # according to the order in which the state blocks are defined); we then sort
284
+ # by this order before invoking the state blocks.
285
+ def call_state_methods
286
+ meth_map = {} # map from ID => [Method]
287
+ self.class.instance_methods.each do |m|
288
+ next unless m =~ /^__state(\d+)__/
289
+ id = Regexp.last_match.captures.first.to_i
290
+ meth_map[id] ||= []
291
+ meth_map[id] << self.method(m)
292
+ end
293
+
294
+ meth_map.keys.sort.each do |i|
295
+ meth_map[i].each {|m| m.call}
296
+ end
297
+ end
298
+
299
+ # Evaluate all bootstrap blocks
300
+ def do_bootstrap
301
+ self.class.ancestors.reverse.each do |anc|
302
+ anc.instance_methods(false).each do |m|
303
+ if /^__bootstrap__/.match m
304
+ self.method(m.to_sym).call
305
+ end
306
+ end
307
+ end
308
+ bootstrap
309
+
310
+ @done_bootstrap = true
311
+ end
312
+
313
+ def do_rewrite
314
+ @meta_parser = BudMeta.new(self, @declarations)
315
+ @rewritten_strata = @meta_parser.meta_rewrite
316
+ end
317
+
318
+ public
319
+
320
+ ########### give empty defaults for these
321
+ def declaration # :nodoc: all
322
+ end
323
+ def bootstrap # :nodoc: all
324
+ end
325
+
326
+ ########### metaprogramming support for ruby and for rule rewriting
327
+ # helper to define instance methods
328
+ def singleton_class # :nodoc: all
329
+ class << self; self; end
330
+ end
331
+
332
+ ######## methods for controlling execution
333
+
334
+ # Run Bud in the background (in a different thread). This means that the Bud
335
+ # interpreter will run asynchronously from the caller, so care must be used
336
+ # when interacting with it. For example, it is not safe to directly examine
337
+ # Bud collections from the caller's thread (see async_do and sync_do).
338
+ #
339
+ # This instance of Bud will continue to execute until stop_bg is called.
340
+ def run_bg
341
+ start_reactor
342
+ # Wait for Bud to start up before returning
343
+ schedule_and_wait do
344
+ start_bud
345
+ end
346
+ end
347
+
348
+ # Run Bud in the "foreground" -- the caller's thread will be used to run the
349
+ # Bud interpreter. This means this method won't return unless an error
350
+ # occurs. It is often more useful to run Bud asynchronously -- see run_bg.
351
+ #
352
+ # Note that run_fg cannot be invoked if run_bg has already been called in the
353
+ # same Ruby process.
354
+ #
355
+ # Execution proceeds in time ticks, a la Dedalus.
356
+ # * Within each tick there may be multiple strata.
357
+ # * Within each stratum we do multiple semi-naive iterations.
358
+ def run_fg
359
+ raise BudError if EventMachine::reactor_running?
360
+
361
+ EventMachine::run {
362
+ start_bud
363
+ }
364
+ end
365
+
366
+ # Shutdown a Bud instance that is running asynchronously. This method blocks
367
+ # until Bud has been shutdown. If +stop_em+ is true, the EventMachine event
368
+ # loop is also shutdown; this will interfere with the execution of any other
369
+ # Bud instances in the same process (as well as anything else that happens to
370
+ # use EventMachine).
371
+ def stop_bg(stop_em=false)
372
+ if stop_em
373
+ schedule_shutdown(true)
374
+ # Wait until EM has completely shutdown before we return.
375
+ @em_stopped.pop
376
+ else
377
+ schedule_and_wait do
378
+ do_shutdown(false)
379
+ end
380
+ end
381
+ end
382
+
383
+ # Given a block, evaluate that block inside the background Ruby thread at some
384
+ # time in the future. Because the block is evaluate inside the background Ruby
385
+ # thread, the block can safely examine Bud state. Naturally, this method can
386
+ # only be used when Bud is running in the background. Note that calling
387
+ # sync_do blocks the caller until the block has been evaluated; for a
388
+ # non-blocking version, see async_do.
389
+ #
390
+ # Note that the block is invoked after one Bud timestep has ended but before
391
+ # the next timestep begins. Hence, synchronous accumulation (<=) into a Bud
392
+ # scratch collection in a callback is typically not a useful thing to do: when
393
+ # the next tick begins, the content of any scratch collections will be
394
+ # emptied, which includes anything inserted by a sync_do block using <=. To
395
+ # avoid this behavior, insert into scratches using <+.
396
+ def sync_do
397
+ schedule_and_wait do
398
+ yield if block_given?
399
+ # Do another tick, in case the user-supplied block inserted any data
400
+ tick
401
+ end
402
+ end
403
+
404
+ # Like sync_do, but does not block the caller's thread: the given callback
405
+ # will be invoked at some future time. Note that calls to async_do respect
406
+ # FIFO order.
407
+ def async_do
408
+ EventMachine::schedule do
409
+ yield if block_given?
410
+ # Do another tick, in case the user-supplied block inserted any data
411
+ tick
412
+ end
413
+ end
414
+
415
+ # Shutdown any persistent tables used by the current Bud instance. If you are
416
+ # running Bud via tick() and using `tctable` collections, you should call this
417
+ # after you're finished using Bud. Programs that use Bud via run_fg() or
418
+ # run_bg() don't need to call this manually.
419
+ def close_tables
420
+ @tables.each_value do |t|
421
+ t.close
422
+ end
423
+ end
424
+
425
+ # Register a new callback. Given the name of a Bud collection, this method
426
+ # arranges for the given block to be invoked at the end of any tick in which
427
+ # any tuples have been inserted into the specified collection. The code block
428
+ # is passed the collection as an argument; this provides a convenient way to
429
+ # examine the tuples inserted during that fixpoint. (Note that because the Bud
430
+ # runtime is blocked while the callback is invoked, it can also examine any
431
+ # other Bud state freely.)
432
+ #
433
+ # Note that registering callbacks on persistent collections (e.g., tables and
434
+ # tctables) is probably not a wise thing to do: as long as any tuples are
435
+ # stored in the collection, the callback will be invoked at the end of every
436
+ # tick.
437
+ def register_callback(tbl_name, &block)
438
+ # We allow callbacks to be added before or after EM has been started. To
439
+ # simplify matters, we start EM if it hasn't been started yet.
440
+ start_reactor
441
+ cb_id = nil
442
+ schedule_and_wait do
443
+ unless @tables.has_key? tbl_name
444
+ raise Bud::BudError, "No such table: #{tbl_name}"
445
+ end
446
+
447
+ raise Bud::BudError if @callbacks.has_key? @callback_id
448
+ @callbacks[@callback_id] = [tbl_name, block]
449
+ cb_id = @callback_id
450
+ @callback_id += 1
451
+ end
452
+ return cb_id
453
+ end
454
+
455
+ # Unregister the callback that has the given ID.
456
+ def unregister_callback(id)
457
+ schedule_and_wait do
458
+ raise Bud::BudError unless @callbacks.has_key? id
459
+ @callbacks.delete(id)
460
+ end
461
+ end
462
+
463
+ # sync_callback supports synchronous interaction with Bud modules. The caller
464
+ # supplies the name of an input collection, a set of tuples to insert, and an
465
+ # output collection on which to 'listen.' The call blocks until tuples are
466
+ # inserted into the output collection: these are returned to the caller.
467
+ def sync_callback(in_tbl, tupleset, out_tbl)
468
+ q = Queue.new
469
+ cb = register_callback(out_tbl) do |c|
470
+ q.push c.to_a
471
+ end
472
+ unless in_tbl.nil?
473
+ sync_do {
474
+ t = @tables[in_tbl]
475
+ if t.class <= Bud::BudChannel or t.class <= Bud::BudZkTable
476
+ t <~ tupleset
477
+ else
478
+ t <+ tupleset
479
+ end
480
+ }
481
+ end
482
+ result = q.pop
483
+ unregister_callback(cb)
484
+ return result
485
+ end
486
+
487
+ # a common special case for sync_callback: block on a delta to a table.
488
+ def delta(out_tbl)
489
+ sync_callback(nil, nil, out_tbl)
490
+ end
491
+
492
+ private
493
+
494
+ def invoke_callbacks
495
+ @callbacks.each_value do |cb|
496
+ tbl_name, block = cb
497
+ tbl = @tables[tbl_name]
498
+ unless tbl.empty?
499
+ block.call(tbl)
500
+ end
501
+ end
502
+ end
503
+
504
+ def start_reactor
505
+ return if EventMachine::reactor_running?
506
+
507
+ EventMachine::error_handler do |e|
508
+ puts "Unexpected Bud error: #{e.inspect}"
509
+ puts e.backtrace.join("\n")
510
+ raise e
511
+ end
512
+
513
+ # Block until EM has successfully started up.
514
+ q = Queue.new
515
+ # This thread helps us avoid race conditions on the start and stop of
516
+ # EventMachine's event loop.
517
+ Thread.new do
518
+ EventMachine.run do
519
+ q << true
520
+ end
521
+ # Executed only after EventMachine::stop_event_loop is done
522
+ @em_stopped << true
523
+ end
524
+ # Block waiting for EM's event loop to start up.
525
+ q.pop
526
+ end
527
+
528
+ # Schedule a block to be evaluated by EventMachine in the future, and
529
+ # block until this has happened.
530
+ def schedule_and_wait
531
+ # Try to defend against error situations in which EM has stopped, but we've
532
+ # been called nonetheless. This is racy, but better than nothing.
533
+ raise BudError, "EM not running" unless EventMachine::reactor_running?
534
+
535
+ q = Queue.new
536
+ EventMachine::schedule do
537
+ ret = false
538
+ begin
539
+ yield
540
+ rescue Exception
541
+ ret = $!
542
+ end
543
+ q.push(ret)
544
+ end
545
+
546
+ resp = q.pop
547
+ raise resp if resp
548
+ end
549
+
550
+ def do_shutdown(stop_em=false)
551
+ @timers.each do |t|
552
+ t.cancel
553
+ end
554
+ close_tables
555
+ @dsock.close_connection
556
+ # Note that this affects anyone else in the same process who happens to be
557
+ # using EventMachine! This is also a non-blocking call; to block until EM
558
+ # has completely shutdown, we use the @em_stopped queue.
559
+ EventMachine::stop_event_loop if stop_em
560
+ end
561
+
562
+ # Schedule a "graceful" shutdown for a future EM tick. If EM is not currently
563
+ # running, shutdown immediately.
564
+ def schedule_shutdown(stop_em=false)
565
+ if EventMachine::reactor_running?
566
+ EventMachine::schedule do
567
+ do_shutdown(stop_em)
568
+ end
569
+ else
570
+ do_shutdown(stop_em)
571
+ end
572
+ end
573
+
574
+ def start_bud
575
+ raise BudError unless EventMachine::reactor_thread?
576
+
577
+ # If we get SIGINT or SIGTERM, shutdown gracefully
578
+ unless @options[:no_signal_handlers]
579
+ Signal.trap("INT") do
580
+ schedule_shutdown(true)
581
+ end
582
+ Signal.trap("TRAP") do
583
+ schedule_shutdown(true)
584
+ end
585
+ end
586
+
587
+ do_start_server
588
+
589
+ # Initialize periodics
590
+ @periodics.each do |p|
591
+ @timers << set_periodic_timer(p.pername, p.ident, p.period)
592
+ end
593
+
594
+ # Arrange for Bud to read from stdin if enabled. Note that we can't do this
595
+ # earlier because we need to wait for EventMachine startup.
596
+ @stdio.start_stdin_reader if @options[:read_stdin]
597
+ @zk_tables.each_value {|t| t.start_watchers}
598
+
599
+ # Compute a fixpoint; this will also invoke any bootstrap blocks.
600
+ tick unless @lazy
601
+
602
+ @rtracer.sleep if options[:rtrace]
603
+ end
604
+
605
+ def do_start_server
606
+ @dsock = EventMachine::open_datagram_socket(@ip, @options[:port],
607
+ BudServer, self)
608
+ @port = Socket.unpack_sockaddr_in(@dsock.get_sockname)[0]
609
+ end
610
+
611
+ public
612
+
613
+ # Returns the ip and port of the Bud instance. In addition to the local IP
614
+ # and port, the user may define an external IP and/or port. the external
615
+ # version of each is returned if available. If not, the local version is
616
+ # returned. There are use cases for mixing and matching local and external.
617
+ # local_ip:external_port would be if you have local port forwarding, and
618
+ # external_ip:local_port would be if you're in a DMZ, for example
619
+ def ip_port
620
+ raise BudError, "ip_port called before port defined" if @port.nil? and @options[:port] == 0 and not @options[:ext_port]
621
+
622
+ ip = options[:ext_ip] ? "#{@options[:ext_ip]}" : "#{@ip}"
623
+ port = options[:ext_port] ? "#{@options[:ext_port]}" :
624
+ (@port.nil? ? "#{@options[:port]}" : "#{@port}")
625
+ ip + ":" + port
626
+ end
627
+
628
+ # Returns the internal IP and port. See ip_port
629
+ def int_ip_port
630
+ raise BudError, "ip_port called before port defined" if @port.nil? and @options[:port] == 0
631
+ @port.nil? ? "#{@ip}:#{@options[:port]}" : "#{@ip}:#{@port}"
632
+ end
633
+
634
+ # manually trigger one timestep of Bloom execution.
635
+ def tick
636
+ @tables.each_value do |t|
637
+ t.tick
638
+ end
639
+
640
+ @joinstate = {}
641
+
642
+ do_bootstrap unless @done_bootstrap
643
+ receive_inbound
644
+
645
+ @strata.each_with_index { |s,i| stratum_fixpoint(s, i) }
646
+ @viz.do_cards if @options[:trace]
647
+ do_flush
648
+ invoke_callbacks
649
+ @budtime += 1
650
+ end
651
+
652
+ private
653
+
654
+ # Builtin BUD state (predefined collections). We could define this using the
655
+ # standard "state" syntax, but we want to ensure that builtin state is
656
+ # initialized before user-defined state.
657
+ def builtin_state
658
+ channel :localtick, [:col1]
659
+ @stdio = terminal :stdio
660
+ @periodics = table :periodics_tbl, [:pername] => [:ident, :period]
661
+
662
+ # for BUD reflection
663
+ table :t_rules, [:rule_id] => [:lhs, :op, :src]
664
+ table :t_depends, [:rule_id, :lhs, :op, :body] => [:nm]
665
+ table :t_depends_tc, [:head, :body, :via, :neg, :temporal]
666
+ table :t_provides, [:interface] => [:input]
667
+ table :t_underspecified, t_provides.schema
668
+ table :t_stratum, [:predicate] => [:stratum]
669
+ table :t_cycle, [:predicate, :via, :neg, :temporal]
670
+ end
671
+
672
+ # Handle any inbound tuples off the wire and then clear. Received messages are
673
+ # placed directly into the storage of the appropriate local channel.
674
+ def receive_inbound
675
+ @inbound.each do |msg|
676
+ # puts "dequeueing tuple #{msg[1].inspect} into #{msg[0]} @ #{ip_port}"
677
+ tables[msg[0].to_sym] << msg[1]
678
+ end
679
+ @inbound = []
680
+ end
681
+
682
+ # "Flush" any tuples that need to be flushed. This does two things:
683
+ # 1. Emit outgoing tuples in channels and ZK tables.
684
+ # 2. Commit to disk any changes made to on-disk tables.
685
+ def do_flush
686
+ @channels.each { |c| @tables[c[0]].flush }
687
+ @zk_tables.each_value { |t| t.flush }
688
+ @tc_tables.each_value { |t| t.flush }
689
+ end
690
+
691
+ def stratum_fixpoint(strat, strat_num)
692
+ # This routine uses semi-naive evaluation to compute
693
+ # a fixpoint of the rules in strat.
694
+ #
695
+ # As described in lib/collections.rb, each collection has three
696
+ # sub-collections of note here:
697
+ # @storage: the "main" storage of tuples
698
+ # @delta: tuples that should be used to drive derivation of new facts
699
+ # @new_delta: a place to store newly-derived facts
700
+ #
701
+ # The first time through this loop we mark @stratum_first_iter=true,
702
+ # while tells the Join::each code to join up all its @storage subcollections
703
+ # to start. In subsequent iterations the join code uses some table's @delta
704
+ # to ensure that only new tuples are derived.
705
+ #
706
+ # Note that calling "each" on a non-Join collection will iterate through both
707
+ # storage and delta.
708
+ #
709
+ # At the end of each iteration of this loop we transition:
710
+ # - @delta tuples are merged into @storage
711
+ # - @new_delta tuples are moved into @delta
712
+ # - @new_delta is set to empty
713
+ #
714
+ # XXX as a performance optimization, it would be nice to bypass the delta
715
+ # tables for any preds that don't participate in a rhs Join -- in that
716
+ # case there's pointless extra tuple movement letting tuples "graduate"
717
+ # through @new_delta and @delta.
718
+
719
+ # In semi-naive, the first iteration should join up tables
720
+ # on their storage fields; subsequent iterations do the
721
+ # delta-joins only. The stratum_first_iter field here distinguishes
722
+ # these cases.
723
+ @stratum_first_iter = true
724
+ begin
725
+ strat.each_with_index do |r,i|
726
+ begin
727
+ r.call
728
+ rescue Exception => e
729
+ # Don't report source text for certain rules (old-style rule blocks)
730
+ rule_src = @rule_src[strat_num][i]
731
+ src_msg = ""
732
+ unless rule_src == ""
733
+ src_msg = "\nRule: #{rule_src}"
734
+ end
735
+
736
+ new_e = e
737
+ unless new_e.class <= BudError
738
+ new_e = BudError
739
+ end
740
+ raise new_e, "Exception during Bud evaluation.\nException: #{e.inspect}.#{src_msg}"
741
+ end
742
+ end
743
+ @stratum_first_iter = false
744
+ # XXX this next line is inefficient.
745
+ # we could call tick_deltas only on predicates in this stratum.
746
+ # but it's not easy right now (??) to pull out tables in a given stratum
747
+ @tables.each{|name,coll| coll.tick_deltas}
748
+ end while not @tables.all?{|name,coll| coll.new_delta.empty? and coll.delta.empty?}
749
+ end
750
+
751
+ ####### Joins
752
+ def wrap_map(j, &blk)
753
+ if blk.nil?
754
+ return j
755
+ else
756
+ return j.map(&blk)
757
+ end
758
+ end
759
+
760
+ public
761
+ def joinstate # :nodoc: all
762
+ @joinstate
763
+ end
764
+
765
+ public
766
+ def join(collections, *preds, &blk) # :nodoc: all
767
+ # since joins are stateful, we want to allocate them once and store in this Bud instance
768
+ # we ID them on their tablenames, preds, and block
769
+ return wrap_map(BudJoin.new(collections, self, preds), &blk)
770
+ end
771
+
772
+ def natjoin(collections, &blk) # :nodoc: all
773
+ # for all pairs of relations, add predicates on matching column names
774
+ preds = BudJoin::natural_preds(self, collections)
775
+ join(collections, *preds, &blk)
776
+ end
777
+
778
+ # left-outer-join syntax to be used in rhs of Bloom statements.
779
+ # first argument an array of 2 collections, second argument an array of predicates (as in Bud::BudCollection.pairs)
780
+ def leftjoin(collections, *preds, &blk)
781
+ return wrap_map(BudLeftJoin.new(collections, self, preds), &blk)
782
+ end
783
+
784
+ private
785
+
786
+ ######## ids and timers
787
+ def gen_id
788
+ Time.new.to_i.to_s << rand.to_s
789
+ end
790
+
791
+ def set_periodic_timer(name, id, period)
792
+ EventMachine::PeriodicTimer.new(period) do
793
+ @tables[name] <+ [[id, Time.new.to_s]]
794
+ tick
795
+ end
796
+ end
797
+ end