bud 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (62) hide show
  1. data/LICENSE +9 -0
  2. data/README +30 -0
  3. data/bin/budplot +134 -0
  4. data/bin/budvis +201 -0
  5. data/bin/rebl +4 -0
  6. data/docs/README.md +13 -0
  7. data/docs/bfs.md +379 -0
  8. data/docs/bfs.raw +251 -0
  9. data/docs/bfs_arch.png +0 -0
  10. data/docs/bloom-loop.png +0 -0
  11. data/docs/bust.md +83 -0
  12. data/docs/cheat.md +291 -0
  13. data/docs/deploy.md +96 -0
  14. data/docs/diffs +181 -0
  15. data/docs/getstarted.md +296 -0
  16. data/docs/intro.md +36 -0
  17. data/docs/modules.md +112 -0
  18. data/docs/operational.md +96 -0
  19. data/docs/rebl.md +99 -0
  20. data/docs/ruby_hooks.md +19 -0
  21. data/docs/visualizations.md +75 -0
  22. data/examples/README +1 -0
  23. data/examples/basics/hello.rb +12 -0
  24. data/examples/basics/out +1103 -0
  25. data/examples/basics/out.new +856 -0
  26. data/examples/basics/paths.rb +51 -0
  27. data/examples/bust/README.md +9 -0
  28. data/examples/bust/bustclient-example.rb +23 -0
  29. data/examples/bust/bustinspector.html +135 -0
  30. data/examples/bust/bustserver-example.rb +18 -0
  31. data/examples/chat/README.md +9 -0
  32. data/examples/chat/chat.rb +45 -0
  33. data/examples/chat/chat_protocol.rb +8 -0
  34. data/examples/chat/chat_server.rb +29 -0
  35. data/examples/deploy/tokenring-ec2.rb +26 -0
  36. data/examples/deploy/tokenring-local.rb +17 -0
  37. data/examples/deploy/tokenring.rb +39 -0
  38. data/lib/bud/aggs.rb +126 -0
  39. data/lib/bud/bud_meta.rb +185 -0
  40. data/lib/bud/bust/bust.rb +126 -0
  41. data/lib/bud/bust/client/idempotence.rb +10 -0
  42. data/lib/bud/bust/client/restclient.rb +49 -0
  43. data/lib/bud/collections.rb +937 -0
  44. data/lib/bud/depanalysis.rb +44 -0
  45. data/lib/bud/deploy/countatomicdelivery.rb +50 -0
  46. data/lib/bud/deploy/deployer.rb +67 -0
  47. data/lib/bud/deploy/ec2deploy.rb +200 -0
  48. data/lib/bud/deploy/localdeploy.rb +41 -0
  49. data/lib/bud/errors.rb +15 -0
  50. data/lib/bud/graphs.rb +405 -0
  51. data/lib/bud/joins.rb +300 -0
  52. data/lib/bud/rebl.rb +314 -0
  53. data/lib/bud/rewrite.rb +523 -0
  54. data/lib/bud/rtrace.rb +27 -0
  55. data/lib/bud/server.rb +43 -0
  56. data/lib/bud/state.rb +108 -0
  57. data/lib/bud/storage/tokyocabinet.rb +170 -0
  58. data/lib/bud/storage/zookeeper.rb +178 -0
  59. data/lib/bud/stratify.rb +83 -0
  60. data/lib/bud/viz.rb +65 -0
  61. data/lib/bud.rb +797 -0
  62. metadata +330 -0
data/lib/bud.rb ADDED
@@ -0,0 +1,797 @@
1
+ require 'rubygems'
2
+ require 'eventmachine'
3
+ require 'msgpack'
4
+ require 'socket'
5
+ require 'superators'
6
+ require 'thread'
7
+
8
+ require 'bud/aggs'
9
+ require 'bud/bud_meta'
10
+ require 'bud/collections'
11
+ require 'bud/errors'
12
+ require 'bud/joins'
13
+ require 'bud/rtrace'
14
+ require 'bud/server'
15
+ require 'bud/state'
16
+ require 'bud/storage/tokyocabinet'
17
+ require 'bud/storage/zookeeper'
18
+ require 'bud/viz'
19
+
20
+ # We monkeypatch Module to add support for Bloom state and code declarations.
21
+ class Module
22
+
23
+ # import another module and assign to a qualifier symbol: <tt>import MyModule => :m</tt>
24
+ def import(spec)
25
+ raise Bud::CompileError unless (spec.class <= Hash and spec.length == 1)
26
+ mod, local_name = spec.first
27
+ raise Bud::CompileError unless (mod.class <= Module and local_name.class <= Symbol)
28
+
29
+ # To correctly expand qualified references to an imported module, we keep a
30
+ # table with the local bind names of all the modules imported by this
31
+ # module. To handle nested references (a.b.c.d etc.), the import table for
32
+ # module X points to X's own nested import table.
33
+ @bud_import_tbl ||= {}
34
+ child_tbl = mod.bud_import_table
35
+ raise Bud::CompileError if @bud_import_tbl.has_key? local_name
36
+ @bud_import_tbl[local_name] = child_tbl.clone # XXX: clone needed?
37
+
38
+ rewritten_mod_name = ModuleRewriter.do_import(self, mod, local_name)
39
+ self.module_eval "include #{rewritten_mod_name}"
40
+ end
41
+
42
+ # the block of Bloom collection declarations. one per module.
43
+ def state(&block)
44
+ meth_name = Module.make_state_meth_name(self)
45
+ define_method(meth_name, &block)
46
+ end
47
+
48
+ # a ruby block to be run before timestep 1. one per module.
49
+ def bootstrap(&block)
50
+ meth_name = "__bootstrap__#{Module.get_class_name(self)}".to_sym
51
+ define_method(meth_name, &block)
52
+ end
53
+
54
+ # bloom statements to be registered with Bud runtime. optional +block_name+
55
+ # allows for multiple bloom blocks per module, and overriding
56
+ def bloom(block_name=nil, &block)
57
+ # If no block name was specified, generate a unique name
58
+ if block_name.nil?
59
+ @block_id ||= 0
60
+ block_name = "#{Module.get_class_name(self)}__#{@block_id.to_s}"
61
+ @block_id += 1
62
+ else
63
+ unless block_name.class <= Symbol
64
+ raise Bud::CompileError, "Bloom block names must be a symbol: #{block_name}"
65
+ end
66
+ end
67
+
68
+ # Note that we don't encode the module name ("self") into the name of the
69
+ # method. This allows named blocks to be overridden (via inheritance or
70
+ # mixin) in the same way as normal Ruby methods.
71
+ meth_name = "__bloom__#{block_name}"
72
+
73
+ # Don't allow duplicate named bloom blocks to be defined within a single
74
+ # module; this indicates a likely programmer error.
75
+ if instance_methods(false).include? meth_name
76
+ raise Bud::CompileError, "Duplicate named bloom block: '#{block_name}' in #{self}"
77
+ end
78
+ define_method(meth_name.to_sym, &block)
79
+ end
80
+
81
+ def bud_import_table() #:nodoc: all
82
+ @bud_import_tbl ||= {}
83
+ @bud_import_tbl
84
+ end
85
+
86
+ private
87
+ # Return a string with a version of the class name appropriate for embedding
88
+ # into a method name. Annoyingly, if you define class X nested inside
89
+ # class/module Y, X's class name is the string "Y::X". We don't want to define
90
+ # method names with semicolons in them, so just return "X" instead.
91
+ def self.get_class_name(klass)
92
+ klass.name.split("::").last
93
+ end
94
+
95
+ # State method blocks are named using an auto-incrementing counter. This is to
96
+ # ensure that we can rediscover the possible dependencies between these blocks
97
+ # after module import (see Bud#call_state_methods).
98
+ def self.make_state_meth_name(klass)
99
+ @state_meth_id ||= 0
100
+ r = "__state#{@state_meth_id}__#{Module.get_class_name(klass)}".to_sym
101
+ @state_meth_id += 1
102
+ return r
103
+ end
104
+ end
105
+
106
+ # The root Bud module. To cause an instance of Bud to begin executing, there are
107
+ # three main options:
108
+ #
109
+ # 1. Synchronously. To do this, instantiate your program and then call tick()
110
+ # one or more times; each call evaluates a single Bud timestep. Note that in
111
+ # this mode, network communication (channels) and timers cannot be used. This
112
+ # is mostly intended for "one-shot" programs that compute a single result and
113
+ # then terminate.
114
+ # 2. In a separate thread in the foreground. To do this, instantiate your
115
+ # program and then call run_fg(). The Bud interpreter will then run, handling
116
+ # network events and evaluating new timesteps as appropriate. The run_fg()
117
+ # method will not return unless an error occurs.
118
+ # 3. In a separate thread in the background. To do this, instantiate your
119
+ # program and then call run_bg(). The Bud interpreter will run
120
+ # asynchronously. To interact with Bud (e.g., insert additional data or
121
+ # inspect the state of a Bud collection), use the sync_do and async_do
122
+ # methods. To shutdown the Bud interpreter, use stop_bg().
123
+ #
124
+ # Most programs should use method #3.
125
+ #
126
+ # :main: Bud
127
+ module Bud
128
+ attr_reader :strata, :budtime, :inbound, :options, :meta_parser, :viz, :rtracer
129
+ attr_reader :dsock
130
+ attr_reader :tables, :ip, :port
131
+ attr_reader :stratum_first_iter
132
+ attr_accessor :lazy # This can be changed on-the-fly by REBL
133
+
134
+ # options to the bud runtime are passed in a hash, with the following keys
135
+ # * network configuration
136
+ # * <tt>:ip</tt> IP address string for this instance
137
+ # * <tt>:port</tt> port number for this instance
138
+ # * <tt>:ext_ip</tt> IP address at which external nodes can contact this instance
139
+ # * <tt>:ext_port</tt> port number to go with :ext_ip
140
+ # * <tt>:bust_port</tt> port number for the restful http messages
141
+ # * operating system interaction
142
+ # * <tt>:read_stdin</tt> if true, captures stdin via the stdio collection
143
+ # * <tt>:no_signal_handlers</tt> if true, runtime ignores SIGINT and SIGTERM
144
+ # * tracing and output
145
+ # * <tt>:quiet</tt> if true, suppress certain messages
146
+ # * <tt>:trace</tt> if true, generate budvis outputs
147
+ # * <tt>:rtrace</tt> if true, generate budplot outputs
148
+ # * <tt>:dump_rewrite</tt> if true, dump results of internal rewriting of Bloom code to a file
149
+ # * controlling execution
150
+ # * <tt>:lazy</tt> if true, prevents runtime from ticking except on external calls to +tick+
151
+ # * <tt>:tag</tt> a name for this instance, suitable for display during tracing and visualization
152
+ # * storage configuration
153
+ # * <tt>:tc_dir</tt> filesystem directory to hold TokyoCabinet data stores
154
+ # * <tt>:tc_truncate</tt> if true, TokyoCabinet collections are opened with OTRUNC
155
+ def initialize(options={})
156
+ @tables = {}
157
+ @table_meta = []
158
+ @rewritten_strata = []
159
+ @channels = {}
160
+ @tc_tables = {}
161
+ @zk_tables = {}
162
+ @callbacks = {}
163
+ @callback_id = 0
164
+ @timers = []
165
+ @budtime = 0
166
+ @inbound = []
167
+ @done_bootstrap = false
168
+ @em_stopped = Queue.new
169
+ @joinstate = {} # joins are stateful, their state needs to be kept inside the Bud instance
170
+
171
+ # Setup options (named arguments), along with default values
172
+ @options = options
173
+ @lazy = @options[:lazy] ||= false
174
+ @options[:ip] ||= "localhost"
175
+ @ip = @options[:ip]
176
+ @options[:port] ||= 0
177
+ @options[:port] = @options[:port].to_i
178
+ # NB: If using an ephemeral port (specified by port = 0), the actual port
179
+ # number won't be known until we start EM
180
+
181
+ relatives = self.class.modules + [self.class]
182
+ relatives.each do |r|
183
+ Bud.rewrite_local_methods(r)
184
+ end
185
+
186
+ @declarations = ModuleRewriter.get_rule_defs(self.class)
187
+
188
+ init_state
189
+
190
+ # NB: Somewhat hacky. Dependency analysis and stratification are implemented
191
+ # by Bud programs, so in order for those programs to parse, we need the
192
+ # "Bud" class to have been defined first.
193
+ require 'bud/depanalysis'
194
+ require 'bud/stratify'
195
+
196
+ @viz = VizOnline.new(self) if @options[:trace]
197
+ @rtracer = RTrace.new(self) if @options[:rtrace]
198
+
199
+ # Get dependency info and determine stratification order.
200
+ unless self.class <= Stratification or self.class <= DepAnalysis
201
+ do_rewrite
202
+ end
203
+
204
+ # Load the rules as a closure. Each element of @strata is an array of
205
+ # lambdas, one for each rewritten rule in that strata. Note that legacy Bud
206
+ # code (with user-specified stratification) assumes that @strata is a simple
207
+ # array, so we need to convert it before loading the rewritten strata.
208
+ @strata = []
209
+ @rule_src = []
210
+ declaration
211
+ @strata.each_with_index do |s,i|
212
+ raise BudError if s.class <= Array
213
+ @strata[i] = [s]
214
+ # Don't try to record source text for old-style rule blocks
215
+ @rule_src[i] = [""]
216
+ end
217
+
218
+ @rewritten_strata.each_with_index do |src_ary,i|
219
+ @strata[i] ||= []
220
+ @rule_src[i] ||= []
221
+ src_ary.each do |src|
222
+ @strata[i] << eval("lambda { #{src} }")
223
+ @rule_src[i] << src
224
+ end
225
+ end
226
+ end
227
+
228
+ private
229
+
230
+ # Rewrite methods defined in the given klass to expand module references and
231
+ # temp collections. Imported modules are rewritten during the import process;
232
+ # we rewrite the main Bud class and any included modules here. Note that we
233
+ # only rewrite each distinct Class once.
234
+ def self.rewrite_local_methods(klass)
235
+ @done_rewrite ||= {}
236
+ return if @done_rewrite.has_key? klass.name
237
+
238
+ u = Unifier.new
239
+ ref_expander = NestedRefRewriter.new(klass.bud_import_table)
240
+ tmp_expander = TempExpander.new
241
+ r2r = Ruby2Ruby.new
242
+
243
+ klass.instance_methods(false).each do |m|
244
+ ast = ParseTree.translate(klass, m)
245
+ ast = u.process(ast)
246
+ ast = ref_expander.process(ast)
247
+ ast = tmp_expander.process(ast)
248
+
249
+ if (ref_expander.did_work or tmp_expander.did_work)
250
+ new_source = r2r.process(ast)
251
+ klass.module_eval new_source # Replace previous method def
252
+ end
253
+
254
+ ref_expander.did_work = false
255
+ tmp_expander.did_work = false
256
+ end
257
+
258
+ # If we found any temp statements in the klass's rule blocks, add a state
259
+ # block with declarations for the corresponding temp collections.
260
+ s = tmp_expander.get_state_meth(klass)
261
+ if s
262
+ state_src = r2r.process(s)
263
+ klass.module_eval(state_src)
264
+ end
265
+
266
+ # Always rewrite anonymous classes
267
+ @done_rewrite[klass.name] = true unless klass.name == ""
268
+ end
269
+
270
+ # Invoke all the user-defined state blocks and initialize builtin state.
271
+ def init_state
272
+ builtin_state
273
+ call_state_methods
274
+ end
275
+
276
+ # If module Y is a parent module of X, X's state block might reference state
277
+ # defined in Y. Hence, we want to invoke Y's state block first. However, when
278
+ # "import" and "include" are combined, we can't use the inheritance hierarchy
279
+ # to do this. When a module Z is imported, the import process inlines all the
280
+ # modules Z includes into a single module. Hence, we can no longer rely on the
281
+ # inheritance hierarchy to respect dependencies between modules. To fix this,
282
+ # we add an increasing ID to each state block's method name (assigned
283
+ # according to the order in which the state blocks are defined); we then sort
284
+ # by this order before invoking the state blocks.
285
+ def call_state_methods
286
+ meth_map = {} # map from ID => [Method]
287
+ self.class.instance_methods.each do |m|
288
+ next unless m =~ /^__state(\d+)__/
289
+ id = Regexp.last_match.captures.first.to_i
290
+ meth_map[id] ||= []
291
+ meth_map[id] << self.method(m)
292
+ end
293
+
294
+ meth_map.keys.sort.each do |i|
295
+ meth_map[i].each {|m| m.call}
296
+ end
297
+ end
298
+
299
+ # Evaluate all bootstrap blocks
300
+ def do_bootstrap
301
+ self.class.ancestors.reverse.each do |anc|
302
+ anc.instance_methods(false).each do |m|
303
+ if /^__bootstrap__/.match m
304
+ self.method(m.to_sym).call
305
+ end
306
+ end
307
+ end
308
+ bootstrap
309
+
310
+ @done_bootstrap = true
311
+ end
312
+
313
+ def do_rewrite
314
+ @meta_parser = BudMeta.new(self, @declarations)
315
+ @rewritten_strata = @meta_parser.meta_rewrite
316
+ end
317
+
318
+ public
319
+
320
+ ########### give empty defaults for these
321
+ def declaration # :nodoc: all
322
+ end
323
+ def bootstrap # :nodoc: all
324
+ end
325
+
326
+ ########### metaprogramming support for ruby and for rule rewriting
327
+ # helper to define instance methods
328
+ def singleton_class # :nodoc: all
329
+ class << self; self; end
330
+ end
331
+
332
+ ######## methods for controlling execution
333
+
334
+ # Run Bud in the background (in a different thread). This means that the Bud
335
+ # interpreter will run asynchronously from the caller, so care must be used
336
+ # when interacting with it. For example, it is not safe to directly examine
337
+ # Bud collections from the caller's thread (see async_do and sync_do).
338
+ #
339
+ # This instance of Bud will continue to execute until stop_bg is called.
340
+ def run_bg
341
+ start_reactor
342
+ # Wait for Bud to start up before returning
343
+ schedule_and_wait do
344
+ start_bud
345
+ end
346
+ end
347
+
348
+ # Run Bud in the "foreground" -- the caller's thread will be used to run the
349
+ # Bud interpreter. This means this method won't return unless an error
350
+ # occurs. It is often more useful to run Bud asynchronously -- see run_bg.
351
+ #
352
+ # Note that run_fg cannot be invoked if run_bg has already been called in the
353
+ # same Ruby process.
354
+ #
355
+ # Execution proceeds in time ticks, a la Dedalus.
356
+ # * Within each tick there may be multiple strata.
357
+ # * Within each stratum we do multiple semi-naive iterations.
358
+ def run_fg
359
+ raise BudError if EventMachine::reactor_running?
360
+
361
+ EventMachine::run {
362
+ start_bud
363
+ }
364
+ end
365
+
366
+ # Shutdown a Bud instance that is running asynchronously. This method blocks
367
+ # until Bud has been shutdown. If +stop_em+ is true, the EventMachine event
368
+ # loop is also shutdown; this will interfere with the execution of any other
369
+ # Bud instances in the same process (as well as anything else that happens to
370
+ # use EventMachine).
371
+ def stop_bg(stop_em=false)
372
+ if stop_em
373
+ schedule_shutdown(true)
374
+ # Wait until EM has completely shutdown before we return.
375
+ @em_stopped.pop
376
+ else
377
+ schedule_and_wait do
378
+ do_shutdown(false)
379
+ end
380
+ end
381
+ end
382
+
383
+ # Given a block, evaluate that block inside the background Ruby thread at some
384
+ # time in the future. Because the block is evaluate inside the background Ruby
385
+ # thread, the block can safely examine Bud state. Naturally, this method can
386
+ # only be used when Bud is running in the background. Note that calling
387
+ # sync_do blocks the caller until the block has been evaluated; for a
388
+ # non-blocking version, see async_do.
389
+ #
390
+ # Note that the block is invoked after one Bud timestep has ended but before
391
+ # the next timestep begins. Hence, synchronous accumulation (<=) into a Bud
392
+ # scratch collection in a callback is typically not a useful thing to do: when
393
+ # the next tick begins, the content of any scratch collections will be
394
+ # emptied, which includes anything inserted by a sync_do block using <=. To
395
+ # avoid this behavior, insert into scratches using <+.
396
+ def sync_do
397
+ schedule_and_wait do
398
+ yield if block_given?
399
+ # Do another tick, in case the user-supplied block inserted any data
400
+ tick
401
+ end
402
+ end
403
+
404
+ # Like sync_do, but does not block the caller's thread: the given callback
405
+ # will be invoked at some future time. Note that calls to async_do respect
406
+ # FIFO order.
407
+ def async_do
408
+ EventMachine::schedule do
409
+ yield if block_given?
410
+ # Do another tick, in case the user-supplied block inserted any data
411
+ tick
412
+ end
413
+ end
414
+
415
+ # Shutdown any persistent tables used by the current Bud instance. If you are
416
+ # running Bud via tick() and using `tctable` collections, you should call this
417
+ # after you're finished using Bud. Programs that use Bud via run_fg() or
418
+ # run_bg() don't need to call this manually.
419
+ def close_tables
420
+ @tables.each_value do |t|
421
+ t.close
422
+ end
423
+ end
424
+
425
+ # Register a new callback. Given the name of a Bud collection, this method
426
+ # arranges for the given block to be invoked at the end of any tick in which
427
+ # any tuples have been inserted into the specified collection. The code block
428
+ # is passed the collection as an argument; this provides a convenient way to
429
+ # examine the tuples inserted during that fixpoint. (Note that because the Bud
430
+ # runtime is blocked while the callback is invoked, it can also examine any
431
+ # other Bud state freely.)
432
+ #
433
+ # Note that registering callbacks on persistent collections (e.g., tables and
434
+ # tctables) is probably not a wise thing to do: as long as any tuples are
435
+ # stored in the collection, the callback will be invoked at the end of every
436
+ # tick.
437
+ def register_callback(tbl_name, &block)
438
+ # We allow callbacks to be added before or after EM has been started. To
439
+ # simplify matters, we start EM if it hasn't been started yet.
440
+ start_reactor
441
+ cb_id = nil
442
+ schedule_and_wait do
443
+ unless @tables.has_key? tbl_name
444
+ raise Bud::BudError, "No such table: #{tbl_name}"
445
+ end
446
+
447
+ raise Bud::BudError if @callbacks.has_key? @callback_id
448
+ @callbacks[@callback_id] = [tbl_name, block]
449
+ cb_id = @callback_id
450
+ @callback_id += 1
451
+ end
452
+ return cb_id
453
+ end
454
+
455
+ # Unregister the callback that has the given ID.
456
+ def unregister_callback(id)
457
+ schedule_and_wait do
458
+ raise Bud::BudError unless @callbacks.has_key? id
459
+ @callbacks.delete(id)
460
+ end
461
+ end
462
+
463
+ # sync_callback supports synchronous interaction with Bud modules. The caller
464
+ # supplies the name of an input collection, a set of tuples to insert, and an
465
+ # output collection on which to 'listen.' The call blocks until tuples are
466
+ # inserted into the output collection: these are returned to the caller.
467
+ def sync_callback(in_tbl, tupleset, out_tbl)
468
+ q = Queue.new
469
+ cb = register_callback(out_tbl) do |c|
470
+ q.push c.to_a
471
+ end
472
+ unless in_tbl.nil?
473
+ sync_do {
474
+ t = @tables[in_tbl]
475
+ if t.class <= Bud::BudChannel or t.class <= Bud::BudZkTable
476
+ t <~ tupleset
477
+ else
478
+ t <+ tupleset
479
+ end
480
+ }
481
+ end
482
+ result = q.pop
483
+ unregister_callback(cb)
484
+ return result
485
+ end
486
+
487
+ # a common special case for sync_callback: block on a delta to a table.
488
+ def delta(out_tbl)
489
+ sync_callback(nil, nil, out_tbl)
490
+ end
491
+
492
+ private
493
+
494
+ def invoke_callbacks
495
+ @callbacks.each_value do |cb|
496
+ tbl_name, block = cb
497
+ tbl = @tables[tbl_name]
498
+ unless tbl.empty?
499
+ block.call(tbl)
500
+ end
501
+ end
502
+ end
503
+
504
+ def start_reactor
505
+ return if EventMachine::reactor_running?
506
+
507
+ EventMachine::error_handler do |e|
508
+ puts "Unexpected Bud error: #{e.inspect}"
509
+ puts e.backtrace.join("\n")
510
+ raise e
511
+ end
512
+
513
+ # Block until EM has successfully started up.
514
+ q = Queue.new
515
+ # This thread helps us avoid race conditions on the start and stop of
516
+ # EventMachine's event loop.
517
+ Thread.new do
518
+ EventMachine.run do
519
+ q << true
520
+ end
521
+ # Executed only after EventMachine::stop_event_loop is done
522
+ @em_stopped << true
523
+ end
524
+ # Block waiting for EM's event loop to start up.
525
+ q.pop
526
+ end
527
+
528
+ # Schedule a block to be evaluated by EventMachine in the future, and
529
+ # block until this has happened.
530
+ def schedule_and_wait
531
+ # Try to defend against error situations in which EM has stopped, but we've
532
+ # been called nonetheless. This is racy, but better than nothing.
533
+ raise BudError, "EM not running" unless EventMachine::reactor_running?
534
+
535
+ q = Queue.new
536
+ EventMachine::schedule do
537
+ ret = false
538
+ begin
539
+ yield
540
+ rescue Exception
541
+ ret = $!
542
+ end
543
+ q.push(ret)
544
+ end
545
+
546
+ resp = q.pop
547
+ raise resp if resp
548
+ end
549
+
550
+ def do_shutdown(stop_em=false)
551
+ @timers.each do |t|
552
+ t.cancel
553
+ end
554
+ close_tables
555
+ @dsock.close_connection
556
+ # Note that this affects anyone else in the same process who happens to be
557
+ # using EventMachine! This is also a non-blocking call; to block until EM
558
+ # has completely shutdown, we use the @em_stopped queue.
559
+ EventMachine::stop_event_loop if stop_em
560
+ end
561
+
562
+ # Schedule a "graceful" shutdown for a future EM tick. If EM is not currently
563
+ # running, shutdown immediately.
564
+ def schedule_shutdown(stop_em=false)
565
+ if EventMachine::reactor_running?
566
+ EventMachine::schedule do
567
+ do_shutdown(stop_em)
568
+ end
569
+ else
570
+ do_shutdown(stop_em)
571
+ end
572
+ end
573
+
574
+ def start_bud
575
+ raise BudError unless EventMachine::reactor_thread?
576
+
577
+ # If we get SIGINT or SIGTERM, shutdown gracefully
578
+ unless @options[:no_signal_handlers]
579
+ Signal.trap("INT") do
580
+ schedule_shutdown(true)
581
+ end
582
+ Signal.trap("TRAP") do
583
+ schedule_shutdown(true)
584
+ end
585
+ end
586
+
587
+ do_start_server
588
+
589
+ # Initialize periodics
590
+ @periodics.each do |p|
591
+ @timers << set_periodic_timer(p.pername, p.ident, p.period)
592
+ end
593
+
594
+ # Arrange for Bud to read from stdin if enabled. Note that we can't do this
595
+ # earlier because we need to wait for EventMachine startup.
596
+ @stdio.start_stdin_reader if @options[:read_stdin]
597
+ @zk_tables.each_value {|t| t.start_watchers}
598
+
599
+ # Compute a fixpoint; this will also invoke any bootstrap blocks.
600
+ tick unless @lazy
601
+
602
+ @rtracer.sleep if options[:rtrace]
603
+ end
604
+
605
+ def do_start_server
606
+ @dsock = EventMachine::open_datagram_socket(@ip, @options[:port],
607
+ BudServer, self)
608
+ @port = Socket.unpack_sockaddr_in(@dsock.get_sockname)[0]
609
+ end
610
+
611
+ public
612
+
613
+ # Returns the ip and port of the Bud instance. In addition to the local IP
614
+ # and port, the user may define an external IP and/or port. the external
615
+ # version of each is returned if available. If not, the local version is
616
+ # returned. There are use cases for mixing and matching local and external.
617
+ # local_ip:external_port would be if you have local port forwarding, and
618
+ # external_ip:local_port would be if you're in a DMZ, for example
619
+ def ip_port
620
+ raise BudError, "ip_port called before port defined" if @port.nil? and @options[:port] == 0 and not @options[:ext_port]
621
+
622
+ ip = options[:ext_ip] ? "#{@options[:ext_ip]}" : "#{@ip}"
623
+ port = options[:ext_port] ? "#{@options[:ext_port]}" :
624
+ (@port.nil? ? "#{@options[:port]}" : "#{@port}")
625
+ ip + ":" + port
626
+ end
627
+
628
+ # Returns the internal IP and port. See ip_port
629
+ def int_ip_port
630
+ raise BudError, "ip_port called before port defined" if @port.nil? and @options[:port] == 0
631
+ @port.nil? ? "#{@ip}:#{@options[:port]}" : "#{@ip}:#{@port}"
632
+ end
633
+
634
+ # manually trigger one timestep of Bloom execution.
635
+ def tick
636
+ @tables.each_value do |t|
637
+ t.tick
638
+ end
639
+
640
+ @joinstate = {}
641
+
642
+ do_bootstrap unless @done_bootstrap
643
+ receive_inbound
644
+
645
+ @strata.each_with_index { |s,i| stratum_fixpoint(s, i) }
646
+ @viz.do_cards if @options[:trace]
647
+ do_flush
648
+ invoke_callbacks
649
+ @budtime += 1
650
+ end
651
+
652
+ private
653
+
654
+ # Builtin BUD state (predefined collections). We could define this using the
655
+ # standard "state" syntax, but we want to ensure that builtin state is
656
+ # initialized before user-defined state.
657
+ def builtin_state
658
+ channel :localtick, [:col1]
659
+ @stdio = terminal :stdio
660
+ @periodics = table :periodics_tbl, [:pername] => [:ident, :period]
661
+
662
+ # for BUD reflection
663
+ table :t_rules, [:rule_id] => [:lhs, :op, :src]
664
+ table :t_depends, [:rule_id, :lhs, :op, :body] => [:nm]
665
+ table :t_depends_tc, [:head, :body, :via, :neg, :temporal]
666
+ table :t_provides, [:interface] => [:input]
667
+ table :t_underspecified, t_provides.schema
668
+ table :t_stratum, [:predicate] => [:stratum]
669
+ table :t_cycle, [:predicate, :via, :neg, :temporal]
670
+ end
671
+
672
+ # Handle any inbound tuples off the wire and then clear. Received messages are
673
+ # placed directly into the storage of the appropriate local channel.
674
+ def receive_inbound
675
+ @inbound.each do |msg|
676
+ # puts "dequeueing tuple #{msg[1].inspect} into #{msg[0]} @ #{ip_port}"
677
+ tables[msg[0].to_sym] << msg[1]
678
+ end
679
+ @inbound = []
680
+ end
681
+
682
+ # "Flush" any tuples that need to be flushed. This does two things:
683
+ # 1. Emit outgoing tuples in channels and ZK tables.
684
+ # 2. Commit to disk any changes made to on-disk tables.
685
+ def do_flush
686
+ @channels.each { |c| @tables[c[0]].flush }
687
+ @zk_tables.each_value { |t| t.flush }
688
+ @tc_tables.each_value { |t| t.flush }
689
+ end
690
+
691
+ def stratum_fixpoint(strat, strat_num)
692
+ # This routine uses semi-naive evaluation to compute
693
+ # a fixpoint of the rules in strat.
694
+ #
695
+ # As described in lib/collections.rb, each collection has three
696
+ # sub-collections of note here:
697
+ # @storage: the "main" storage of tuples
698
+ # @delta: tuples that should be used to drive derivation of new facts
699
+ # @new_delta: a place to store newly-derived facts
700
+ #
701
+ # The first time through this loop we mark @stratum_first_iter=true,
702
+ # while tells the Join::each code to join up all its @storage subcollections
703
+ # to start. In subsequent iterations the join code uses some table's @delta
704
+ # to ensure that only new tuples are derived.
705
+ #
706
+ # Note that calling "each" on a non-Join collection will iterate through both
707
+ # storage and delta.
708
+ #
709
+ # At the end of each iteration of this loop we transition:
710
+ # - @delta tuples are merged into @storage
711
+ # - @new_delta tuples are moved into @delta
712
+ # - @new_delta is set to empty
713
+ #
714
+ # XXX as a performance optimization, it would be nice to bypass the delta
715
+ # tables for any preds that don't participate in a rhs Join -- in that
716
+ # case there's pointless extra tuple movement letting tuples "graduate"
717
+ # through @new_delta and @delta.
718
+
719
+ # In semi-naive, the first iteration should join up tables
720
+ # on their storage fields; subsequent iterations do the
721
+ # delta-joins only. The stratum_first_iter field here distinguishes
722
+ # these cases.
723
+ @stratum_first_iter = true
724
+ begin
725
+ strat.each_with_index do |r,i|
726
+ begin
727
+ r.call
728
+ rescue Exception => e
729
+ # Don't report source text for certain rules (old-style rule blocks)
730
+ rule_src = @rule_src[strat_num][i]
731
+ src_msg = ""
732
+ unless rule_src == ""
733
+ src_msg = "\nRule: #{rule_src}"
734
+ end
735
+
736
+ new_e = e
737
+ unless new_e.class <= BudError
738
+ new_e = BudError
739
+ end
740
+ raise new_e, "Exception during Bud evaluation.\nException: #{e.inspect}.#{src_msg}"
741
+ end
742
+ end
743
+ @stratum_first_iter = false
744
+ # XXX this next line is inefficient.
745
+ # we could call tick_deltas only on predicates in this stratum.
746
+ # but it's not easy right now (??) to pull out tables in a given stratum
747
+ @tables.each{|name,coll| coll.tick_deltas}
748
+ end while not @tables.all?{|name,coll| coll.new_delta.empty? and coll.delta.empty?}
749
+ end
750
+
751
+ ####### Joins
752
+ def wrap_map(j, &blk)
753
+ if blk.nil?
754
+ return j
755
+ else
756
+ return j.map(&blk)
757
+ end
758
+ end
759
+
760
+ public
761
+ def joinstate # :nodoc: all
762
+ @joinstate
763
+ end
764
+
765
+ public
766
+ def join(collections, *preds, &blk) # :nodoc: all
767
+ # since joins are stateful, we want to allocate them once and store in this Bud instance
768
+ # we ID them on their tablenames, preds, and block
769
+ return wrap_map(BudJoin.new(collections, self, preds), &blk)
770
+ end
771
+
772
+ def natjoin(collections, &blk) # :nodoc: all
773
+ # for all pairs of relations, add predicates on matching column names
774
+ preds = BudJoin::natural_preds(self, collections)
775
+ join(collections, *preds, &blk)
776
+ end
777
+
778
+ # left-outer-join syntax to be used in rhs of Bloom statements.
779
+ # first argument an array of 2 collections, second argument an array of predicates (as in Bud::BudCollection.pairs)
780
+ def leftjoin(collections, *preds, &blk)
781
+ return wrap_map(BudLeftJoin.new(collections, self, preds), &blk)
782
+ end
783
+
784
+ private
785
+
786
+ ######## ids and timers
787
+ def gen_id
788
+ Time.new.to_i.to_s << rand.to_s
789
+ end
790
+
791
+ def set_periodic_timer(name, id, period)
792
+ EventMachine::PeriodicTimer.new(period) do
793
+ @tables[name] <+ [[id, Time.new.to_s]]
794
+ tick
795
+ end
796
+ end
797
+ end