bud 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +9 -0
- data/README +30 -0
- data/bin/budplot +134 -0
- data/bin/budvis +201 -0
- data/bin/rebl +4 -0
- data/docs/README.md +13 -0
- data/docs/bfs.md +379 -0
- data/docs/bfs.raw +251 -0
- data/docs/bfs_arch.png +0 -0
- data/docs/bloom-loop.png +0 -0
- data/docs/bust.md +83 -0
- data/docs/cheat.md +291 -0
- data/docs/deploy.md +96 -0
- data/docs/diffs +181 -0
- data/docs/getstarted.md +296 -0
- data/docs/intro.md +36 -0
- data/docs/modules.md +112 -0
- data/docs/operational.md +96 -0
- data/docs/rebl.md +99 -0
- data/docs/ruby_hooks.md +19 -0
- data/docs/visualizations.md +75 -0
- data/examples/README +1 -0
- data/examples/basics/hello.rb +12 -0
- data/examples/basics/out +1103 -0
- data/examples/basics/out.new +856 -0
- data/examples/basics/paths.rb +51 -0
- data/examples/bust/README.md +9 -0
- data/examples/bust/bustclient-example.rb +23 -0
- data/examples/bust/bustinspector.html +135 -0
- data/examples/bust/bustserver-example.rb +18 -0
- data/examples/chat/README.md +9 -0
- data/examples/chat/chat.rb +45 -0
- data/examples/chat/chat_protocol.rb +8 -0
- data/examples/chat/chat_server.rb +29 -0
- data/examples/deploy/tokenring-ec2.rb +26 -0
- data/examples/deploy/tokenring-local.rb +17 -0
- data/examples/deploy/tokenring.rb +39 -0
- data/lib/bud/aggs.rb +126 -0
- data/lib/bud/bud_meta.rb +185 -0
- data/lib/bud/bust/bust.rb +126 -0
- data/lib/bud/bust/client/idempotence.rb +10 -0
- data/lib/bud/bust/client/restclient.rb +49 -0
- data/lib/bud/collections.rb +937 -0
- data/lib/bud/depanalysis.rb +44 -0
- data/lib/bud/deploy/countatomicdelivery.rb +50 -0
- data/lib/bud/deploy/deployer.rb +67 -0
- data/lib/bud/deploy/ec2deploy.rb +200 -0
- data/lib/bud/deploy/localdeploy.rb +41 -0
- data/lib/bud/errors.rb +15 -0
- data/lib/bud/graphs.rb +405 -0
- data/lib/bud/joins.rb +300 -0
- data/lib/bud/rebl.rb +314 -0
- data/lib/bud/rewrite.rb +523 -0
- data/lib/bud/rtrace.rb +27 -0
- data/lib/bud/server.rb +43 -0
- data/lib/bud/state.rb +108 -0
- data/lib/bud/storage/tokyocabinet.rb +170 -0
- data/lib/bud/storage/zookeeper.rb +178 -0
- data/lib/bud/stratify.rb +83 -0
- data/lib/bud/viz.rb +65 -0
- data/lib/bud.rb +797 -0
- metadata +330 -0
data/lib/bud.rb
ADDED
@@ -0,0 +1,797 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'eventmachine'
|
3
|
+
require 'msgpack'
|
4
|
+
require 'socket'
|
5
|
+
require 'superators'
|
6
|
+
require 'thread'
|
7
|
+
|
8
|
+
require 'bud/aggs'
|
9
|
+
require 'bud/bud_meta'
|
10
|
+
require 'bud/collections'
|
11
|
+
require 'bud/errors'
|
12
|
+
require 'bud/joins'
|
13
|
+
require 'bud/rtrace'
|
14
|
+
require 'bud/server'
|
15
|
+
require 'bud/state'
|
16
|
+
require 'bud/storage/tokyocabinet'
|
17
|
+
require 'bud/storage/zookeeper'
|
18
|
+
require 'bud/viz'
|
19
|
+
|
20
|
+
# We monkeypatch Module to add support for Bloom state and code declarations.
|
21
|
+
class Module
|
22
|
+
|
23
|
+
# import another module and assign to a qualifier symbol: <tt>import MyModule => :m</tt>
|
24
|
+
def import(spec)
|
25
|
+
raise Bud::CompileError unless (spec.class <= Hash and spec.length == 1)
|
26
|
+
mod, local_name = spec.first
|
27
|
+
raise Bud::CompileError unless (mod.class <= Module and local_name.class <= Symbol)
|
28
|
+
|
29
|
+
# To correctly expand qualified references to an imported module, we keep a
|
30
|
+
# table with the local bind names of all the modules imported by this
|
31
|
+
# module. To handle nested references (a.b.c.d etc.), the import table for
|
32
|
+
# module X points to X's own nested import table.
|
33
|
+
@bud_import_tbl ||= {}
|
34
|
+
child_tbl = mod.bud_import_table
|
35
|
+
raise Bud::CompileError if @bud_import_tbl.has_key? local_name
|
36
|
+
@bud_import_tbl[local_name] = child_tbl.clone # XXX: clone needed?
|
37
|
+
|
38
|
+
rewritten_mod_name = ModuleRewriter.do_import(self, mod, local_name)
|
39
|
+
self.module_eval "include #{rewritten_mod_name}"
|
40
|
+
end
|
41
|
+
|
42
|
+
# the block of Bloom collection declarations. one per module.
|
43
|
+
def state(&block)
|
44
|
+
meth_name = Module.make_state_meth_name(self)
|
45
|
+
define_method(meth_name, &block)
|
46
|
+
end
|
47
|
+
|
48
|
+
# a ruby block to be run before timestep 1. one per module.
|
49
|
+
def bootstrap(&block)
|
50
|
+
meth_name = "__bootstrap__#{Module.get_class_name(self)}".to_sym
|
51
|
+
define_method(meth_name, &block)
|
52
|
+
end
|
53
|
+
|
54
|
+
# bloom statements to be registered with Bud runtime. optional +block_name+
|
55
|
+
# allows for multiple bloom blocks per module, and overriding
|
56
|
+
def bloom(block_name=nil, &block)
|
57
|
+
# If no block name was specified, generate a unique name
|
58
|
+
if block_name.nil?
|
59
|
+
@block_id ||= 0
|
60
|
+
block_name = "#{Module.get_class_name(self)}__#{@block_id.to_s}"
|
61
|
+
@block_id += 1
|
62
|
+
else
|
63
|
+
unless block_name.class <= Symbol
|
64
|
+
raise Bud::CompileError, "Bloom block names must be a symbol: #{block_name}"
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
# Note that we don't encode the module name ("self") into the name of the
|
69
|
+
# method. This allows named blocks to be overridden (via inheritance or
|
70
|
+
# mixin) in the same way as normal Ruby methods.
|
71
|
+
meth_name = "__bloom__#{block_name}"
|
72
|
+
|
73
|
+
# Don't allow duplicate named bloom blocks to be defined within a single
|
74
|
+
# module; this indicates a likely programmer error.
|
75
|
+
if instance_methods(false).include? meth_name
|
76
|
+
raise Bud::CompileError, "Duplicate named bloom block: '#{block_name}' in #{self}"
|
77
|
+
end
|
78
|
+
define_method(meth_name.to_sym, &block)
|
79
|
+
end
|
80
|
+
|
81
|
+
def bud_import_table() #:nodoc: all
|
82
|
+
@bud_import_tbl ||= {}
|
83
|
+
@bud_import_tbl
|
84
|
+
end
|
85
|
+
|
86
|
+
private
|
87
|
+
# Return a string with a version of the class name appropriate for embedding
|
88
|
+
# into a method name. Annoyingly, if you define class X nested inside
|
89
|
+
# class/module Y, X's class name is the string "Y::X". We don't want to define
|
90
|
+
# method names with semicolons in them, so just return "X" instead.
|
91
|
+
def self.get_class_name(klass)
|
92
|
+
klass.name.split("::").last
|
93
|
+
end
|
94
|
+
|
95
|
+
# State method blocks are named using an auto-incrementing counter. This is to
|
96
|
+
# ensure that we can rediscover the possible dependencies between these blocks
|
97
|
+
# after module import (see Bud#call_state_methods).
|
98
|
+
def self.make_state_meth_name(klass)
|
99
|
+
@state_meth_id ||= 0
|
100
|
+
r = "__state#{@state_meth_id}__#{Module.get_class_name(klass)}".to_sym
|
101
|
+
@state_meth_id += 1
|
102
|
+
return r
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
# The root Bud module. To cause an instance of Bud to begin executing, there are
|
107
|
+
# three main options:
|
108
|
+
#
|
109
|
+
# 1. Synchronously. To do this, instantiate your program and then call tick()
|
110
|
+
# one or more times; each call evaluates a single Bud timestep. Note that in
|
111
|
+
# this mode, network communication (channels) and timers cannot be used. This
|
112
|
+
# is mostly intended for "one-shot" programs that compute a single result and
|
113
|
+
# then terminate.
|
114
|
+
# 2. In a separate thread in the foreground. To do this, instantiate your
|
115
|
+
# program and then call run_fg(). The Bud interpreter will then run, handling
|
116
|
+
# network events and evaluating new timesteps as appropriate. The run_fg()
|
117
|
+
# method will not return unless an error occurs.
|
118
|
+
# 3. In a separate thread in the background. To do this, instantiate your
|
119
|
+
# program and then call run_bg(). The Bud interpreter will run
|
120
|
+
# asynchronously. To interact with Bud (e.g., insert additional data or
|
121
|
+
# inspect the state of a Bud collection), use the sync_do and async_do
|
122
|
+
# methods. To shutdown the Bud interpreter, use stop_bg().
|
123
|
+
#
|
124
|
+
# Most programs should use method #3.
|
125
|
+
#
|
126
|
+
# :main: Bud
|
127
|
+
module Bud
|
128
|
+
attr_reader :strata, :budtime, :inbound, :options, :meta_parser, :viz, :rtracer
|
129
|
+
attr_reader :dsock
|
130
|
+
attr_reader :tables, :ip, :port
|
131
|
+
attr_reader :stratum_first_iter
|
132
|
+
attr_accessor :lazy # This can be changed on-the-fly by REBL
|
133
|
+
|
134
|
+
# options to the bud runtime are passed in a hash, with the following keys
|
135
|
+
# * network configuration
|
136
|
+
# * <tt>:ip</tt> IP address string for this instance
|
137
|
+
# * <tt>:port</tt> port number for this instance
|
138
|
+
# * <tt>:ext_ip</tt> IP address at which external nodes can contact this instance
|
139
|
+
# * <tt>:ext_port</tt> port number to go with :ext_ip
|
140
|
+
# * <tt>:bust_port</tt> port number for the restful http messages
|
141
|
+
# * operating system interaction
|
142
|
+
# * <tt>:read_stdin</tt> if true, captures stdin via the stdio collection
|
143
|
+
# * <tt>:no_signal_handlers</tt> if true, runtime ignores SIGINT and SIGTERM
|
144
|
+
# * tracing and output
|
145
|
+
# * <tt>:quiet</tt> if true, suppress certain messages
|
146
|
+
# * <tt>:trace</tt> if true, generate budvis outputs
|
147
|
+
# * <tt>:rtrace</tt> if true, generate budplot outputs
|
148
|
+
# * <tt>:dump_rewrite</tt> if true, dump results of internal rewriting of Bloom code to a file
|
149
|
+
# * controlling execution
|
150
|
+
# * <tt>:lazy</tt> if true, prevents runtime from ticking except on external calls to +tick+
|
151
|
+
# * <tt>:tag</tt> a name for this instance, suitable for display during tracing and visualization
|
152
|
+
# * storage configuration
|
153
|
+
# * <tt>:tc_dir</tt> filesystem directory to hold TokyoCabinet data stores
|
154
|
+
# * <tt>:tc_truncate</tt> if true, TokyoCabinet collections are opened with OTRUNC
|
155
|
+
def initialize(options={})
|
156
|
+
@tables = {}
|
157
|
+
@table_meta = []
|
158
|
+
@rewritten_strata = []
|
159
|
+
@channels = {}
|
160
|
+
@tc_tables = {}
|
161
|
+
@zk_tables = {}
|
162
|
+
@callbacks = {}
|
163
|
+
@callback_id = 0
|
164
|
+
@timers = []
|
165
|
+
@budtime = 0
|
166
|
+
@inbound = []
|
167
|
+
@done_bootstrap = false
|
168
|
+
@em_stopped = Queue.new
|
169
|
+
@joinstate = {} # joins are stateful, their state needs to be kept inside the Bud instance
|
170
|
+
|
171
|
+
# Setup options (named arguments), along with default values
|
172
|
+
@options = options
|
173
|
+
@lazy = @options[:lazy] ||= false
|
174
|
+
@options[:ip] ||= "localhost"
|
175
|
+
@ip = @options[:ip]
|
176
|
+
@options[:port] ||= 0
|
177
|
+
@options[:port] = @options[:port].to_i
|
178
|
+
# NB: If using an ephemeral port (specified by port = 0), the actual port
|
179
|
+
# number won't be known until we start EM
|
180
|
+
|
181
|
+
relatives = self.class.modules + [self.class]
|
182
|
+
relatives.each do |r|
|
183
|
+
Bud.rewrite_local_methods(r)
|
184
|
+
end
|
185
|
+
|
186
|
+
@declarations = ModuleRewriter.get_rule_defs(self.class)
|
187
|
+
|
188
|
+
init_state
|
189
|
+
|
190
|
+
# NB: Somewhat hacky. Dependency analysis and stratification are implemented
|
191
|
+
# by Bud programs, so in order for those programs to parse, we need the
|
192
|
+
# "Bud" class to have been defined first.
|
193
|
+
require 'bud/depanalysis'
|
194
|
+
require 'bud/stratify'
|
195
|
+
|
196
|
+
@viz = VizOnline.new(self) if @options[:trace]
|
197
|
+
@rtracer = RTrace.new(self) if @options[:rtrace]
|
198
|
+
|
199
|
+
# Get dependency info and determine stratification order.
|
200
|
+
unless self.class <= Stratification or self.class <= DepAnalysis
|
201
|
+
do_rewrite
|
202
|
+
end
|
203
|
+
|
204
|
+
# Load the rules as a closure. Each element of @strata is an array of
|
205
|
+
# lambdas, one for each rewritten rule in that strata. Note that legacy Bud
|
206
|
+
# code (with user-specified stratification) assumes that @strata is a simple
|
207
|
+
# array, so we need to convert it before loading the rewritten strata.
|
208
|
+
@strata = []
|
209
|
+
@rule_src = []
|
210
|
+
declaration
|
211
|
+
@strata.each_with_index do |s,i|
|
212
|
+
raise BudError if s.class <= Array
|
213
|
+
@strata[i] = [s]
|
214
|
+
# Don't try to record source text for old-style rule blocks
|
215
|
+
@rule_src[i] = [""]
|
216
|
+
end
|
217
|
+
|
218
|
+
@rewritten_strata.each_with_index do |src_ary,i|
|
219
|
+
@strata[i] ||= []
|
220
|
+
@rule_src[i] ||= []
|
221
|
+
src_ary.each do |src|
|
222
|
+
@strata[i] << eval("lambda { #{src} }")
|
223
|
+
@rule_src[i] << src
|
224
|
+
end
|
225
|
+
end
|
226
|
+
end
|
227
|
+
|
228
|
+
private
|
229
|
+
|
230
|
+
# Rewrite methods defined in the given klass to expand module references and
|
231
|
+
# temp collections. Imported modules are rewritten during the import process;
|
232
|
+
# we rewrite the main Bud class and any included modules here. Note that we
|
233
|
+
# only rewrite each distinct Class once.
|
234
|
+
def self.rewrite_local_methods(klass)
|
235
|
+
@done_rewrite ||= {}
|
236
|
+
return if @done_rewrite.has_key? klass.name
|
237
|
+
|
238
|
+
u = Unifier.new
|
239
|
+
ref_expander = NestedRefRewriter.new(klass.bud_import_table)
|
240
|
+
tmp_expander = TempExpander.new
|
241
|
+
r2r = Ruby2Ruby.new
|
242
|
+
|
243
|
+
klass.instance_methods(false).each do |m|
|
244
|
+
ast = ParseTree.translate(klass, m)
|
245
|
+
ast = u.process(ast)
|
246
|
+
ast = ref_expander.process(ast)
|
247
|
+
ast = tmp_expander.process(ast)
|
248
|
+
|
249
|
+
if (ref_expander.did_work or tmp_expander.did_work)
|
250
|
+
new_source = r2r.process(ast)
|
251
|
+
klass.module_eval new_source # Replace previous method def
|
252
|
+
end
|
253
|
+
|
254
|
+
ref_expander.did_work = false
|
255
|
+
tmp_expander.did_work = false
|
256
|
+
end
|
257
|
+
|
258
|
+
# If we found any temp statements in the klass's rule blocks, add a state
|
259
|
+
# block with declarations for the corresponding temp collections.
|
260
|
+
s = tmp_expander.get_state_meth(klass)
|
261
|
+
if s
|
262
|
+
state_src = r2r.process(s)
|
263
|
+
klass.module_eval(state_src)
|
264
|
+
end
|
265
|
+
|
266
|
+
# Always rewrite anonymous classes
|
267
|
+
@done_rewrite[klass.name] = true unless klass.name == ""
|
268
|
+
end
|
269
|
+
|
270
|
+
# Invoke all the user-defined state blocks and initialize builtin state.
|
271
|
+
def init_state
|
272
|
+
builtin_state
|
273
|
+
call_state_methods
|
274
|
+
end
|
275
|
+
|
276
|
+
# If module Y is a parent module of X, X's state block might reference state
|
277
|
+
# defined in Y. Hence, we want to invoke Y's state block first. However, when
|
278
|
+
# "import" and "include" are combined, we can't use the inheritance hierarchy
|
279
|
+
# to do this. When a module Z is imported, the import process inlines all the
|
280
|
+
# modules Z includes into a single module. Hence, we can no longer rely on the
|
281
|
+
# inheritance hierarchy to respect dependencies between modules. To fix this,
|
282
|
+
# we add an increasing ID to each state block's method name (assigned
|
283
|
+
# according to the order in which the state blocks are defined); we then sort
|
284
|
+
# by this order before invoking the state blocks.
|
285
|
+
def call_state_methods
|
286
|
+
meth_map = {} # map from ID => [Method]
|
287
|
+
self.class.instance_methods.each do |m|
|
288
|
+
next unless m =~ /^__state(\d+)__/
|
289
|
+
id = Regexp.last_match.captures.first.to_i
|
290
|
+
meth_map[id] ||= []
|
291
|
+
meth_map[id] << self.method(m)
|
292
|
+
end
|
293
|
+
|
294
|
+
meth_map.keys.sort.each do |i|
|
295
|
+
meth_map[i].each {|m| m.call}
|
296
|
+
end
|
297
|
+
end
|
298
|
+
|
299
|
+
# Evaluate all bootstrap blocks
|
300
|
+
def do_bootstrap
|
301
|
+
self.class.ancestors.reverse.each do |anc|
|
302
|
+
anc.instance_methods(false).each do |m|
|
303
|
+
if /^__bootstrap__/.match m
|
304
|
+
self.method(m.to_sym).call
|
305
|
+
end
|
306
|
+
end
|
307
|
+
end
|
308
|
+
bootstrap
|
309
|
+
|
310
|
+
@done_bootstrap = true
|
311
|
+
end
|
312
|
+
|
313
|
+
def do_rewrite
|
314
|
+
@meta_parser = BudMeta.new(self, @declarations)
|
315
|
+
@rewritten_strata = @meta_parser.meta_rewrite
|
316
|
+
end
|
317
|
+
|
318
|
+
public
|
319
|
+
|
320
|
+
########### give empty defaults for these
|
321
|
+
def declaration # :nodoc: all
|
322
|
+
end
|
323
|
+
def bootstrap # :nodoc: all
|
324
|
+
end
|
325
|
+
|
326
|
+
########### metaprogramming support for ruby and for rule rewriting
|
327
|
+
# helper to define instance methods
|
328
|
+
def singleton_class # :nodoc: all
|
329
|
+
class << self; self; end
|
330
|
+
end
|
331
|
+
|
332
|
+
######## methods for controlling execution
|
333
|
+
|
334
|
+
# Run Bud in the background (in a different thread). This means that the Bud
|
335
|
+
# interpreter will run asynchronously from the caller, so care must be used
|
336
|
+
# when interacting with it. For example, it is not safe to directly examine
|
337
|
+
# Bud collections from the caller's thread (see async_do and sync_do).
|
338
|
+
#
|
339
|
+
# This instance of Bud will continue to execute until stop_bg is called.
|
340
|
+
def run_bg
|
341
|
+
start_reactor
|
342
|
+
# Wait for Bud to start up before returning
|
343
|
+
schedule_and_wait do
|
344
|
+
start_bud
|
345
|
+
end
|
346
|
+
end
|
347
|
+
|
348
|
+
# Run Bud in the "foreground" -- the caller's thread will be used to run the
|
349
|
+
# Bud interpreter. This means this method won't return unless an error
|
350
|
+
# occurs. It is often more useful to run Bud asynchronously -- see run_bg.
|
351
|
+
#
|
352
|
+
# Note that run_fg cannot be invoked if run_bg has already been called in the
|
353
|
+
# same Ruby process.
|
354
|
+
#
|
355
|
+
# Execution proceeds in time ticks, a la Dedalus.
|
356
|
+
# * Within each tick there may be multiple strata.
|
357
|
+
# * Within each stratum we do multiple semi-naive iterations.
|
358
|
+
def run_fg
|
359
|
+
raise BudError if EventMachine::reactor_running?
|
360
|
+
|
361
|
+
EventMachine::run {
|
362
|
+
start_bud
|
363
|
+
}
|
364
|
+
end
|
365
|
+
|
366
|
+
# Shutdown a Bud instance that is running asynchronously. This method blocks
|
367
|
+
# until Bud has been shutdown. If +stop_em+ is true, the EventMachine event
|
368
|
+
# loop is also shutdown; this will interfere with the execution of any other
|
369
|
+
# Bud instances in the same process (as well as anything else that happens to
|
370
|
+
# use EventMachine).
|
371
|
+
def stop_bg(stop_em=false)
|
372
|
+
if stop_em
|
373
|
+
schedule_shutdown(true)
|
374
|
+
# Wait until EM has completely shutdown before we return.
|
375
|
+
@em_stopped.pop
|
376
|
+
else
|
377
|
+
schedule_and_wait do
|
378
|
+
do_shutdown(false)
|
379
|
+
end
|
380
|
+
end
|
381
|
+
end
|
382
|
+
|
383
|
+
# Given a block, evaluate that block inside the background Ruby thread at some
|
384
|
+
# time in the future. Because the block is evaluate inside the background Ruby
|
385
|
+
# thread, the block can safely examine Bud state. Naturally, this method can
|
386
|
+
# only be used when Bud is running in the background. Note that calling
|
387
|
+
# sync_do blocks the caller until the block has been evaluated; for a
|
388
|
+
# non-blocking version, see async_do.
|
389
|
+
#
|
390
|
+
# Note that the block is invoked after one Bud timestep has ended but before
|
391
|
+
# the next timestep begins. Hence, synchronous accumulation (<=) into a Bud
|
392
|
+
# scratch collection in a callback is typically not a useful thing to do: when
|
393
|
+
# the next tick begins, the content of any scratch collections will be
|
394
|
+
# emptied, which includes anything inserted by a sync_do block using <=. To
|
395
|
+
# avoid this behavior, insert into scratches using <+.
|
396
|
+
def sync_do
|
397
|
+
schedule_and_wait do
|
398
|
+
yield if block_given?
|
399
|
+
# Do another tick, in case the user-supplied block inserted any data
|
400
|
+
tick
|
401
|
+
end
|
402
|
+
end
|
403
|
+
|
404
|
+
# Like sync_do, but does not block the caller's thread: the given callback
|
405
|
+
# will be invoked at some future time. Note that calls to async_do respect
|
406
|
+
# FIFO order.
|
407
|
+
def async_do
|
408
|
+
EventMachine::schedule do
|
409
|
+
yield if block_given?
|
410
|
+
# Do another tick, in case the user-supplied block inserted any data
|
411
|
+
tick
|
412
|
+
end
|
413
|
+
end
|
414
|
+
|
415
|
+
# Shutdown any persistent tables used by the current Bud instance. If you are
|
416
|
+
# running Bud via tick() and using `tctable` collections, you should call this
|
417
|
+
# after you're finished using Bud. Programs that use Bud via run_fg() or
|
418
|
+
# run_bg() don't need to call this manually.
|
419
|
+
def close_tables
|
420
|
+
@tables.each_value do |t|
|
421
|
+
t.close
|
422
|
+
end
|
423
|
+
end
|
424
|
+
|
425
|
+
# Register a new callback. Given the name of a Bud collection, this method
|
426
|
+
# arranges for the given block to be invoked at the end of any tick in which
|
427
|
+
# any tuples have been inserted into the specified collection. The code block
|
428
|
+
# is passed the collection as an argument; this provides a convenient way to
|
429
|
+
# examine the tuples inserted during that fixpoint. (Note that because the Bud
|
430
|
+
# runtime is blocked while the callback is invoked, it can also examine any
|
431
|
+
# other Bud state freely.)
|
432
|
+
#
|
433
|
+
# Note that registering callbacks on persistent collections (e.g., tables and
|
434
|
+
# tctables) is probably not a wise thing to do: as long as any tuples are
|
435
|
+
# stored in the collection, the callback will be invoked at the end of every
|
436
|
+
# tick.
|
437
|
+
def register_callback(tbl_name, &block)
|
438
|
+
# We allow callbacks to be added before or after EM has been started. To
|
439
|
+
# simplify matters, we start EM if it hasn't been started yet.
|
440
|
+
start_reactor
|
441
|
+
cb_id = nil
|
442
|
+
schedule_and_wait do
|
443
|
+
unless @tables.has_key? tbl_name
|
444
|
+
raise Bud::BudError, "No such table: #{tbl_name}"
|
445
|
+
end
|
446
|
+
|
447
|
+
raise Bud::BudError if @callbacks.has_key? @callback_id
|
448
|
+
@callbacks[@callback_id] = [tbl_name, block]
|
449
|
+
cb_id = @callback_id
|
450
|
+
@callback_id += 1
|
451
|
+
end
|
452
|
+
return cb_id
|
453
|
+
end
|
454
|
+
|
455
|
+
# Unregister the callback that has the given ID.
|
456
|
+
def unregister_callback(id)
|
457
|
+
schedule_and_wait do
|
458
|
+
raise Bud::BudError unless @callbacks.has_key? id
|
459
|
+
@callbacks.delete(id)
|
460
|
+
end
|
461
|
+
end
|
462
|
+
|
463
|
+
# sync_callback supports synchronous interaction with Bud modules. The caller
|
464
|
+
# supplies the name of an input collection, a set of tuples to insert, and an
|
465
|
+
# output collection on which to 'listen.' The call blocks until tuples are
|
466
|
+
# inserted into the output collection: these are returned to the caller.
|
467
|
+
def sync_callback(in_tbl, tupleset, out_tbl)
|
468
|
+
q = Queue.new
|
469
|
+
cb = register_callback(out_tbl) do |c|
|
470
|
+
q.push c.to_a
|
471
|
+
end
|
472
|
+
unless in_tbl.nil?
|
473
|
+
sync_do {
|
474
|
+
t = @tables[in_tbl]
|
475
|
+
if t.class <= Bud::BudChannel or t.class <= Bud::BudZkTable
|
476
|
+
t <~ tupleset
|
477
|
+
else
|
478
|
+
t <+ tupleset
|
479
|
+
end
|
480
|
+
}
|
481
|
+
end
|
482
|
+
result = q.pop
|
483
|
+
unregister_callback(cb)
|
484
|
+
return result
|
485
|
+
end
|
486
|
+
|
487
|
+
# a common special case for sync_callback: block on a delta to a table.
|
488
|
+
def delta(out_tbl)
|
489
|
+
sync_callback(nil, nil, out_tbl)
|
490
|
+
end
|
491
|
+
|
492
|
+
private
|
493
|
+
|
494
|
+
def invoke_callbacks
|
495
|
+
@callbacks.each_value do |cb|
|
496
|
+
tbl_name, block = cb
|
497
|
+
tbl = @tables[tbl_name]
|
498
|
+
unless tbl.empty?
|
499
|
+
block.call(tbl)
|
500
|
+
end
|
501
|
+
end
|
502
|
+
end
|
503
|
+
|
504
|
+
def start_reactor
|
505
|
+
return if EventMachine::reactor_running?
|
506
|
+
|
507
|
+
EventMachine::error_handler do |e|
|
508
|
+
puts "Unexpected Bud error: #{e.inspect}"
|
509
|
+
puts e.backtrace.join("\n")
|
510
|
+
raise e
|
511
|
+
end
|
512
|
+
|
513
|
+
# Block until EM has successfully started up.
|
514
|
+
q = Queue.new
|
515
|
+
# This thread helps us avoid race conditions on the start and stop of
|
516
|
+
# EventMachine's event loop.
|
517
|
+
Thread.new do
|
518
|
+
EventMachine.run do
|
519
|
+
q << true
|
520
|
+
end
|
521
|
+
# Executed only after EventMachine::stop_event_loop is done
|
522
|
+
@em_stopped << true
|
523
|
+
end
|
524
|
+
# Block waiting for EM's event loop to start up.
|
525
|
+
q.pop
|
526
|
+
end
|
527
|
+
|
528
|
+
# Schedule a block to be evaluated by EventMachine in the future, and
|
529
|
+
# block until this has happened.
|
530
|
+
def schedule_and_wait
|
531
|
+
# Try to defend against error situations in which EM has stopped, but we've
|
532
|
+
# been called nonetheless. This is racy, but better than nothing.
|
533
|
+
raise BudError, "EM not running" unless EventMachine::reactor_running?
|
534
|
+
|
535
|
+
q = Queue.new
|
536
|
+
EventMachine::schedule do
|
537
|
+
ret = false
|
538
|
+
begin
|
539
|
+
yield
|
540
|
+
rescue Exception
|
541
|
+
ret = $!
|
542
|
+
end
|
543
|
+
q.push(ret)
|
544
|
+
end
|
545
|
+
|
546
|
+
resp = q.pop
|
547
|
+
raise resp if resp
|
548
|
+
end
|
549
|
+
|
550
|
+
def do_shutdown(stop_em=false)
|
551
|
+
@timers.each do |t|
|
552
|
+
t.cancel
|
553
|
+
end
|
554
|
+
close_tables
|
555
|
+
@dsock.close_connection
|
556
|
+
# Note that this affects anyone else in the same process who happens to be
|
557
|
+
# using EventMachine! This is also a non-blocking call; to block until EM
|
558
|
+
# has completely shutdown, we use the @em_stopped queue.
|
559
|
+
EventMachine::stop_event_loop if stop_em
|
560
|
+
end
|
561
|
+
|
562
|
+
# Schedule a "graceful" shutdown for a future EM tick. If EM is not currently
|
563
|
+
# running, shutdown immediately.
|
564
|
+
def schedule_shutdown(stop_em=false)
|
565
|
+
if EventMachine::reactor_running?
|
566
|
+
EventMachine::schedule do
|
567
|
+
do_shutdown(stop_em)
|
568
|
+
end
|
569
|
+
else
|
570
|
+
do_shutdown(stop_em)
|
571
|
+
end
|
572
|
+
end
|
573
|
+
|
574
|
+
def start_bud
|
575
|
+
raise BudError unless EventMachine::reactor_thread?
|
576
|
+
|
577
|
+
# If we get SIGINT or SIGTERM, shutdown gracefully
|
578
|
+
unless @options[:no_signal_handlers]
|
579
|
+
Signal.trap("INT") do
|
580
|
+
schedule_shutdown(true)
|
581
|
+
end
|
582
|
+
Signal.trap("TRAP") do
|
583
|
+
schedule_shutdown(true)
|
584
|
+
end
|
585
|
+
end
|
586
|
+
|
587
|
+
do_start_server
|
588
|
+
|
589
|
+
# Initialize periodics
|
590
|
+
@periodics.each do |p|
|
591
|
+
@timers << set_periodic_timer(p.pername, p.ident, p.period)
|
592
|
+
end
|
593
|
+
|
594
|
+
# Arrange for Bud to read from stdin if enabled. Note that we can't do this
|
595
|
+
# earlier because we need to wait for EventMachine startup.
|
596
|
+
@stdio.start_stdin_reader if @options[:read_stdin]
|
597
|
+
@zk_tables.each_value {|t| t.start_watchers}
|
598
|
+
|
599
|
+
# Compute a fixpoint; this will also invoke any bootstrap blocks.
|
600
|
+
tick unless @lazy
|
601
|
+
|
602
|
+
@rtracer.sleep if options[:rtrace]
|
603
|
+
end
|
604
|
+
|
605
|
+
def do_start_server
|
606
|
+
@dsock = EventMachine::open_datagram_socket(@ip, @options[:port],
|
607
|
+
BudServer, self)
|
608
|
+
@port = Socket.unpack_sockaddr_in(@dsock.get_sockname)[0]
|
609
|
+
end
|
610
|
+
|
611
|
+
public
|
612
|
+
|
613
|
+
# Returns the ip and port of the Bud instance. In addition to the local IP
|
614
|
+
# and port, the user may define an external IP and/or port. the external
|
615
|
+
# version of each is returned if available. If not, the local version is
|
616
|
+
# returned. There are use cases for mixing and matching local and external.
|
617
|
+
# local_ip:external_port would be if you have local port forwarding, and
|
618
|
+
# external_ip:local_port would be if you're in a DMZ, for example
|
619
|
+
def ip_port
|
620
|
+
raise BudError, "ip_port called before port defined" if @port.nil? and @options[:port] == 0 and not @options[:ext_port]
|
621
|
+
|
622
|
+
ip = options[:ext_ip] ? "#{@options[:ext_ip]}" : "#{@ip}"
|
623
|
+
port = options[:ext_port] ? "#{@options[:ext_port]}" :
|
624
|
+
(@port.nil? ? "#{@options[:port]}" : "#{@port}")
|
625
|
+
ip + ":" + port
|
626
|
+
end
|
627
|
+
|
628
|
+
# Returns the internal IP and port. See ip_port
|
629
|
+
def int_ip_port
|
630
|
+
raise BudError, "ip_port called before port defined" if @port.nil? and @options[:port] == 0
|
631
|
+
@port.nil? ? "#{@ip}:#{@options[:port]}" : "#{@ip}:#{@port}"
|
632
|
+
end
|
633
|
+
|
634
|
+
# manually trigger one timestep of Bloom execution.
|
635
|
+
def tick
|
636
|
+
@tables.each_value do |t|
|
637
|
+
t.tick
|
638
|
+
end
|
639
|
+
|
640
|
+
@joinstate = {}
|
641
|
+
|
642
|
+
do_bootstrap unless @done_bootstrap
|
643
|
+
receive_inbound
|
644
|
+
|
645
|
+
@strata.each_with_index { |s,i| stratum_fixpoint(s, i) }
|
646
|
+
@viz.do_cards if @options[:trace]
|
647
|
+
do_flush
|
648
|
+
invoke_callbacks
|
649
|
+
@budtime += 1
|
650
|
+
end
|
651
|
+
|
652
|
+
private
|
653
|
+
|
654
|
+
# Builtin BUD state (predefined collections). We could define this using the
|
655
|
+
# standard "state" syntax, but we want to ensure that builtin state is
|
656
|
+
# initialized before user-defined state.
|
657
|
+
def builtin_state
|
658
|
+
channel :localtick, [:col1]
|
659
|
+
@stdio = terminal :stdio
|
660
|
+
@periodics = table :periodics_tbl, [:pername] => [:ident, :period]
|
661
|
+
|
662
|
+
# for BUD reflection
|
663
|
+
table :t_rules, [:rule_id] => [:lhs, :op, :src]
|
664
|
+
table :t_depends, [:rule_id, :lhs, :op, :body] => [:nm]
|
665
|
+
table :t_depends_tc, [:head, :body, :via, :neg, :temporal]
|
666
|
+
table :t_provides, [:interface] => [:input]
|
667
|
+
table :t_underspecified, t_provides.schema
|
668
|
+
table :t_stratum, [:predicate] => [:stratum]
|
669
|
+
table :t_cycle, [:predicate, :via, :neg, :temporal]
|
670
|
+
end
|
671
|
+
|
672
|
+
# Handle any inbound tuples off the wire and then clear. Received messages are
|
673
|
+
# placed directly into the storage of the appropriate local channel.
|
674
|
+
def receive_inbound
|
675
|
+
@inbound.each do |msg|
|
676
|
+
# puts "dequeueing tuple #{msg[1].inspect} into #{msg[0]} @ #{ip_port}"
|
677
|
+
tables[msg[0].to_sym] << msg[1]
|
678
|
+
end
|
679
|
+
@inbound = []
|
680
|
+
end
|
681
|
+
|
682
|
+
# "Flush" any tuples that need to be flushed. This does two things:
|
683
|
+
# 1. Emit outgoing tuples in channels and ZK tables.
|
684
|
+
# 2. Commit to disk any changes made to on-disk tables.
|
685
|
+
def do_flush
|
686
|
+
@channels.each { |c| @tables[c[0]].flush }
|
687
|
+
@zk_tables.each_value { |t| t.flush }
|
688
|
+
@tc_tables.each_value { |t| t.flush }
|
689
|
+
end
|
690
|
+
|
691
|
+
def stratum_fixpoint(strat, strat_num)
|
692
|
+
# This routine uses semi-naive evaluation to compute
|
693
|
+
# a fixpoint of the rules in strat.
|
694
|
+
#
|
695
|
+
# As described in lib/collections.rb, each collection has three
|
696
|
+
# sub-collections of note here:
|
697
|
+
# @storage: the "main" storage of tuples
|
698
|
+
# @delta: tuples that should be used to drive derivation of new facts
|
699
|
+
# @new_delta: a place to store newly-derived facts
|
700
|
+
#
|
701
|
+
# The first time through this loop we mark @stratum_first_iter=true,
|
702
|
+
# while tells the Join::each code to join up all its @storage subcollections
|
703
|
+
# to start. In subsequent iterations the join code uses some table's @delta
|
704
|
+
# to ensure that only new tuples are derived.
|
705
|
+
#
|
706
|
+
# Note that calling "each" on a non-Join collection will iterate through both
|
707
|
+
# storage and delta.
|
708
|
+
#
|
709
|
+
# At the end of each iteration of this loop we transition:
|
710
|
+
# - @delta tuples are merged into @storage
|
711
|
+
# - @new_delta tuples are moved into @delta
|
712
|
+
# - @new_delta is set to empty
|
713
|
+
#
|
714
|
+
# XXX as a performance optimization, it would be nice to bypass the delta
|
715
|
+
# tables for any preds that don't participate in a rhs Join -- in that
|
716
|
+
# case there's pointless extra tuple movement letting tuples "graduate"
|
717
|
+
# through @new_delta and @delta.
|
718
|
+
|
719
|
+
# In semi-naive, the first iteration should join up tables
|
720
|
+
# on their storage fields; subsequent iterations do the
|
721
|
+
# delta-joins only. The stratum_first_iter field here distinguishes
|
722
|
+
# these cases.
|
723
|
+
@stratum_first_iter = true
|
724
|
+
begin
|
725
|
+
strat.each_with_index do |r,i|
|
726
|
+
begin
|
727
|
+
r.call
|
728
|
+
rescue Exception => e
|
729
|
+
# Don't report source text for certain rules (old-style rule blocks)
|
730
|
+
rule_src = @rule_src[strat_num][i]
|
731
|
+
src_msg = ""
|
732
|
+
unless rule_src == ""
|
733
|
+
src_msg = "\nRule: #{rule_src}"
|
734
|
+
end
|
735
|
+
|
736
|
+
new_e = e
|
737
|
+
unless new_e.class <= BudError
|
738
|
+
new_e = BudError
|
739
|
+
end
|
740
|
+
raise new_e, "Exception during Bud evaluation.\nException: #{e.inspect}.#{src_msg}"
|
741
|
+
end
|
742
|
+
end
|
743
|
+
@stratum_first_iter = false
|
744
|
+
# XXX this next line is inefficient.
|
745
|
+
# we could call tick_deltas only on predicates in this stratum.
|
746
|
+
# but it's not easy right now (??) to pull out tables in a given stratum
|
747
|
+
@tables.each{|name,coll| coll.tick_deltas}
|
748
|
+
end while not @tables.all?{|name,coll| coll.new_delta.empty? and coll.delta.empty?}
|
749
|
+
end
|
750
|
+
|
751
|
+
####### Joins
|
752
|
+
def wrap_map(j, &blk)
|
753
|
+
if blk.nil?
|
754
|
+
return j
|
755
|
+
else
|
756
|
+
return j.map(&blk)
|
757
|
+
end
|
758
|
+
end
|
759
|
+
|
760
|
+
public
|
761
|
+
def joinstate # :nodoc: all
|
762
|
+
@joinstate
|
763
|
+
end
|
764
|
+
|
765
|
+
public
|
766
|
+
def join(collections, *preds, &blk) # :nodoc: all
|
767
|
+
# since joins are stateful, we want to allocate them once and store in this Bud instance
|
768
|
+
# we ID them on their tablenames, preds, and block
|
769
|
+
return wrap_map(BudJoin.new(collections, self, preds), &blk)
|
770
|
+
end
|
771
|
+
|
772
|
+
def natjoin(collections, &blk) # :nodoc: all
|
773
|
+
# for all pairs of relations, add predicates on matching column names
|
774
|
+
preds = BudJoin::natural_preds(self, collections)
|
775
|
+
join(collections, *preds, &blk)
|
776
|
+
end
|
777
|
+
|
778
|
+
# left-outer-join syntax to be used in rhs of Bloom statements.
|
779
|
+
# first argument an array of 2 collections, second argument an array of predicates (as in Bud::BudCollection.pairs)
|
780
|
+
def leftjoin(collections, *preds, &blk)
|
781
|
+
return wrap_map(BudLeftJoin.new(collections, self, preds), &blk)
|
782
|
+
end
|
783
|
+
|
784
|
+
private
|
785
|
+
|
786
|
+
######## ids and timers
|
787
|
+
def gen_id
|
788
|
+
Time.new.to_i.to_s << rand.to_s
|
789
|
+
end
|
790
|
+
|
791
|
+
def set_periodic_timer(name, id, period)
|
792
|
+
EventMachine::PeriodicTimer.new(period) do
|
793
|
+
@tables[name] <+ [[id, Time.new.to_s]]
|
794
|
+
tick
|
795
|
+
end
|
796
|
+
end
|
797
|
+
end
|