bud 0.9.7 → 0.9.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,3 @@
1
- $struct_classes = {}
2
1
  module Bud
3
2
  ########
4
3
  #--
@@ -17,7 +16,7 @@ module Bud
17
16
  attr_accessor :bud_instance # :nodoc: all
18
17
  attr_reader :tabname, :cols, :key_cols # :nodoc: all
19
18
  attr_reader :struct
20
- attr_reader :storage, :delta, :new_delta, :pending, :tick_delta # :nodoc: all
19
+ attr_reader :new_delta, :pending # :nodoc: all
21
20
  attr_reader :wired_by, :scanner_cnt
22
21
  attr_accessor :invalidated, :rescan
23
22
  attr_accessor :is_source
@@ -63,7 +62,7 @@ module Bud
63
62
  if @cols.empty?
64
63
  @cols = nil
65
64
  else
66
- @struct = ($struct_classes[@cols] ||= Bud::TupleStruct.new(*@cols))
65
+ @struct = Bud::TupleStruct.new_struct(@cols)
67
66
  @structlen = @struct.members.length
68
67
  end
69
68
  setup_accessors
@@ -250,7 +249,7 @@ module Bud
250
249
  def sort(&blk)
251
250
  if @bud_instance.wiring?
252
251
  pusher = self.pro
253
- pusher.sort("sort#{object_id}", @bud_instance, @cols, &blk)
252
+ pusher.sort("sort#{object_id}".to_sym, @bud_instance, @cols, &blk)
254
253
  else
255
254
  @storage.values.sort(&blk)
256
255
  end
@@ -275,7 +274,12 @@ module Bud
275
274
 
276
275
  public
277
276
  def each_raw(&block)
278
- @storage.each_value(&block)
277
+ each_from([@storage], &block)
278
+ end
279
+
280
+ public
281
+ def each_delta(&block)
282
+ each_from([@delta], &block)
279
283
  end
280
284
 
281
285
  public
@@ -301,37 +305,26 @@ module Bud
301
305
  public
302
306
  def tick_metrics
303
307
  strat_num = bud_instance.this_stratum
304
- rule_num = bud_instance.this_rule
305
- addr = nil
306
308
  addr = bud_instance.ip_port unless bud_instance.port.nil?
309
+ key = { :addr=>addr, :tabname=>qualified_tabname,
310
+ :strat_num=>strat_num}
311
+
307
312
  bud_instance.metrics[:collections] ||= {}
308
- bud_instance.metrics[:collections][{:addr=>addr, :tabname=>qualified_tabname, :strat_num=>strat_num, :rule_num=>rule_num}] ||= 0
309
- bud_instance.metrics[:collections][{:addr=>addr, :tabname=>qualified_tabname, :strat_num=>strat_num, :rule_num=>rule_num}] += 1
313
+ bud_instance.metrics[:collections][key] ||= 0
314
+ bud_instance.metrics[:collections][key] += 1
310
315
  end
311
316
 
312
317
  private
313
318
  def each_from(bufs, &block) # :nodoc: all
319
+ do_metrics = bud_instance.options[:metrics]
314
320
  bufs.each do |b|
315
321
  b.each_value do |v|
316
- tick_metrics if bud_instance and bud_instance.options[:metrics]
322
+ tick_metrics if do_metrics
317
323
  yield v
318
324
  end
319
325
  end
320
326
  end
321
327
 
322
- public
323
- def each_from_sym(buf_syms, &block) # :nodoc: all
324
- bufs = buf_syms.map do |s|
325
- case s
326
- when :storage then @storage
327
- when :delta then @delta
328
- when :new_delta then @new_delta
329
- else raise Bud::Error, "bad symbol passed into each_from_sym"
330
- end
331
- end
332
- each_from(bufs, &block)
333
- end
334
-
335
328
  private
336
329
  def init_storage
337
330
  @storage = {}
@@ -374,7 +367,7 @@ module Bud
374
367
  # checks for +item+ in the collection
375
368
  public
376
369
  def include?(item)
377
- return true if key_cols.nil? or (key_cols.empty? and length > 0)
370
+ return true if key_cols.nil?
378
371
  return false if item.nil?
379
372
  key = get_key_vals(item)
380
373
  return (item == self[key])
@@ -650,6 +643,11 @@ module Bud
650
643
  end
651
644
  end
652
645
 
646
+ superator "<~" do |o|
647
+ # Overridden when <~ is defined (i.e., channels and terminals)
648
+ raise Bud::CompileError, "#{tabname} cannot appear on the lhs of a <~ operator"
649
+ end
650
+
653
651
  def tick
654
652
  raise Bud::Error, "tick must be overriden in #{self.class}"
655
653
  end
@@ -1111,6 +1109,11 @@ module Bud
1111
1109
  return true
1112
1110
  end
1113
1111
 
1112
+ public
1113
+ def bootstrap
1114
+ # override BudCollection; pending should not be moved into delta.
1115
+ end
1116
+
1114
1117
  public
1115
1118
  def flush #:nodoc: all
1116
1119
  out_io = get_out_io
@@ -1257,7 +1260,7 @@ module Bud
1257
1260
 
1258
1261
  def invalidated=(val)
1259
1262
  # Might be reset to false at end-of-tick, but shouldn't be set to true
1260
- raise Bud::Error, "cannot not set invalidate on table '#{@tabname}'" if val
1263
+ raise Bud::Error, "cannot set invalidate on table '#{@tabname}'" if val
1261
1264
  super
1262
1265
  end
1263
1266
 
@@ -1383,7 +1386,7 @@ module Bud
1383
1386
  end
1384
1387
 
1385
1388
  class BudFileReader < BudReadOnly # :nodoc: all
1386
- def initialize(name, filename, delimiter, bud_instance) # :nodoc: all
1389
+ def initialize(name, filename, bud_instance) # :nodoc: all
1387
1390
  super(name, bud_instance, {[:lineno] => [:text]})
1388
1391
  @filename = filename
1389
1392
  @storage = {}
@@ -199,7 +199,7 @@ module Bud
199
199
  public
200
200
  def pro(the_name=elem_name, the_schema=schema, &blk)
201
201
  toplevel = @bud_instance.toplevel
202
- elem = Bud::PushElement.new("project#{object_id}",
202
+ elem = Bud::PushElement.new("project#{object_id}".to_sym,
203
203
  toplevel.this_rule_context,
204
204
  @collection_name, the_schema)
205
205
  self.wire_to(elem)
@@ -213,7 +213,7 @@ module Bud
213
213
  public
214
214
  def each_with_index(&blk)
215
215
  toplevel = @bud_instance.toplevel
216
- elem = Bud::PushEachWithIndex.new("each_with_index#{object_id}",
216
+ elem = Bud::PushEachWithIndex.new("each_with_index#{object_id}".to_sym,
217
217
  toplevel.this_rule_context,
218
218
  @collection_name)
219
219
  elem.set_block(&blk)
@@ -284,7 +284,7 @@ module Bud
284
284
 
285
285
  aggpairs = prep_aggpairs(aggpairs)
286
286
  toplevel = @bud_instance.toplevel
287
- g = Bud::PushGroup.new('grp'+Time.new.tv_usec.to_s, toplevel.this_rule_context,
287
+ g = Bud::PushGroup.new("grp#{Time.new.tv_usec}".to_sym, toplevel.this_rule_context,
288
288
  @collection_name, keycols, aggpairs, the_schema, &blk)
289
289
  self.wire_to(g)
290
290
  toplevel.push_elems[[self.object_id, :group, keycols, aggpairs, blk]] = g
@@ -302,7 +302,7 @@ module Bud
302
302
  end
303
303
 
304
304
  aggpairs = [[agg, collection]]
305
- aa = Bud::PushArgAgg.new('argagg'+Time.new.tv_usec.to_s, toplevel.this_rule_context,
305
+ aa = Bud::PushArgAgg.new("argagg#{Time.new.tv_usec}".to_sym, toplevel.this_rule_context,
306
306
  @collection_name, gbkey_cols, aggpairs, schema, &blk)
307
307
  self.wire_to(aa)
308
308
  toplevel.push_elems[[self.object_id, :argagg, gbkey_cols, aggpairs, blk]] = aa
@@ -346,7 +346,7 @@ module Bud
346
346
  end
347
347
 
348
348
  def reduce(initial, &blk)
349
- retval = Bud::PushReduce.new("reduce#{Time.new.tv_usec}",
349
+ retval = Bud::PushReduce.new("reduce#{Time.new.tv_usec}".to_sym,
350
350
  @bud_instance, @collection_name,
351
351
  schema, initial, &blk)
352
352
  self.wire_to(retval)
@@ -498,7 +498,7 @@ module Bud
498
498
  end
499
499
 
500
500
  # send deltas out in all cases
501
- @collection.delta.each_value {|item| push_out(item)}
501
+ @collection.each_delta {|item| push_out(item)}
502
502
  end
503
503
  end
504
504
 
@@ -1,6 +1,5 @@
1
1
  require 'bud/executor/elements'
2
2
 
3
- $EMPTY = []
4
3
  module Bud
5
4
  class PushSHJoin < PushStatefulElement
6
5
  attr_reader :all_rels_below, :origpreds, :relnames, :keys, :localpreds
@@ -41,18 +40,6 @@ module Bud
41
40
  @selfjoins << name if cnt == 2
42
41
  end
43
42
 
44
- # derive schema: one column for each table.
45
- # duplicated inputs get distinguishing numeral
46
- @cols = []
47
- retval = @all_rels_below.reduce({}) do |memo, r|
48
- r_name = r.qualified_tabname.to_s
49
- memo[r_name] ||= 0
50
- newstr = r_name + (memo[r_name] > 0 ? "_#{memo[r_name]}" : "")
51
- @cols << newstr.to_sym
52
- memo[r_name] += 1
53
- memo
54
- end
55
-
56
43
  setup_preds(preds) unless preds.empty?
57
44
  setup_state
58
45
 
@@ -143,14 +130,6 @@ module Bud
143
130
  if source_elem.rescan
144
131
  puts "#{qualified_tabname} rel:#{i}(#{source_elem.qualified_tabname}) invalidated" if $BUD_DEBUG
145
132
  @hash_tables[i] = {}
146
- if i == 0
147
- # Only if i == 0 because outer joins in Bloom are left outer joins.
148
- # If i == 1, missing_keys will be corrected when items are populated
149
- # in the rhs fork.
150
- # XXX This is not modular. We are doing invalidation work for outer
151
- # joins, which is part of a separate module PushSHOuterJoin.
152
- @missing_keys.clear
153
- end
154
133
  end
155
134
  end
156
135
  end
@@ -165,7 +144,7 @@ module Bud
165
144
  # referenced in entry.
166
145
  subtuple = 0
167
146
  all_rels_below[0..all_rels_below.length-1].each_with_index do |t,i|
168
- if t.qualified_tabname == entry[0]
147
+ if t.qualified_tabname == name
169
148
  subtuple = i
170
149
  break
171
150
  end
@@ -183,7 +162,7 @@ module Bud
183
162
  elsif k.class <= Array
184
163
  [k,v]
185
164
  elsif k.class <= Symbol
186
- if @all_rels_below and @all_rels_below.length == 2
165
+ if @all_rels_below.length == 2
187
166
  [find_attr_match(k, @all_rels_below[0]), find_attr_match(v, @all_rels_below[1])]
188
167
  else
189
168
  [find_attr_match(k), find_attr_match(v)]
@@ -235,9 +214,10 @@ module Bud
235
214
 
236
215
  protected
237
216
  def canonicalize_localpreds(rel_list, preds) # :nodoc:all
238
- retval = preds.map do |p|
239
- # reverse if lhs is rel_list[1], *unless* it's a self-join!
240
- (p[0][0] == rel_list[1].qualified_tabname and p[0][0] != p[1][0]) ? p.reverse : p
217
+ second_rel = rel_list[1].qualified_tabname
218
+ preds.map do |p|
219
+ # reverse if lhs is second_rel *unless* it's a self-join!
220
+ (p[0][0] == second_rel and p[0][0] != p[1][0]) ? p.reverse : p
241
221
  end
242
222
  end
243
223
 
@@ -251,16 +231,15 @@ module Bud
251
231
  # again if we didn't rescan now.
252
232
  replay_join if @rescan
253
233
 
254
- if @selfjoins.include? source.qualified_tabname
234
+ source_tbl = source.qualified_tabname
235
+ if @selfjoins.include? source_tbl
255
236
  offsets = []
256
- @relnames.each_with_index{|r,i| offsets << i if r == source.qualified_tabname}
237
+ @relnames.each_with_index{|r,i| offsets << i if r == source_tbl}
257
238
  else
258
- offsets = [@relnames.index(source.qualified_tabname)]
259
- end
260
- raise Bud::Error, "item #{item.inspect} inserted into join from unknown source #{source.elem_name}" if offsets == $EMPTY
261
- offsets.each do |offset|
262
- insert_item(item, offset)
239
+ offsets = [@relnames.index(source_tbl)]
263
240
  end
241
+
242
+ offsets.each {|offset| insert_item(item, offset)}
264
243
  end
265
244
 
266
245
  protected
@@ -332,14 +311,32 @@ module Bud
332
311
  ####
333
312
  # and now, the Bloom-facing methods
334
313
  # given a * expression over n collections, form all combinations of items
335
- # subject to an array of predicates, pred
336
- # currently supports two options for equijoin predicates:
314
+ # subject to an array of predicates, +preds+.
315
+ # currently supports two syntax options for equijoin predicates:
337
316
  # general form: an array of arrays capturing a conjunction of equiv. classes
338
317
  # [[table1.col1, table2.col2, table3.col3], [table1.col2, table2.col3]]
339
318
  # common form: a hash capturing equality of a column on left with one on right.
340
319
  # :col1 => :col2 (same as lefttable.col1 => righttable.col2)
341
320
  public
342
321
  def pairs(*preds, &blk)
322
+ if @cols.nil?
323
+ # derive schema if needed: one column for each table. duplicated inputs
324
+ # get distinguishing numeral.
325
+ #
326
+ # XXX: actually, this seems completely bogus. The schema for the output
327
+ # of the join should depend on the join's *targetlist*.
328
+ @cols = []
329
+ retval = @all_rels_below.reduce({}) do |memo, r|
330
+ r_name = r.qualified_tabname.to_s
331
+ memo[r_name] ||= 0
332
+ newstr = r_name + (memo[r_name] > 0 ? "_#{memo[r_name]}" : "")
333
+ @cols << newstr.to_sym
334
+ memo[r_name] += 1
335
+ memo
336
+ end
337
+ setup_accessors
338
+ end
339
+
343
340
  @origpreds = preds
344
341
  setup_preds(preds) unless preds.empty?
345
342
  # given new preds, the state for the join will be different. set it up again.
@@ -361,20 +358,24 @@ module Bud
361
358
  end
362
359
 
363
360
  public
364
- def rights(*preds, &blk)
365
- @cols = blk.nil? ? @bud_instance.toplevel.tables[@rels[1].qualified_tabname].cols : nil
366
- setup_accessors if blk.nil?
361
+ def lefts(*preds, &blk)
362
+ if blk.nil?
363
+ @cols = @bud_instance.toplevel.tables[@rels[0].qualified_tabname].cols
364
+ setup_accessors
365
+ end
367
366
  pairs(*preds) do |x,y|
368
- blk.nil? ? y : blk.call(y)
367
+ blk.nil? ? x : blk.call(x)
369
368
  end
370
369
  end
371
370
 
372
371
  public
373
- def lefts(*preds, &blk)
374
- @cols = blk.nil? ? @bud_instance.toplevel.tables[@rels[0].qualified_tabname].cols : nil
375
- setup_accessors if blk.nil?
372
+ def rights(*preds, &blk)
373
+ if blk.nil?
374
+ @cols = @bud_instance.toplevel.tables[@rels[1].qualified_tabname].cols
375
+ setup_accessors
376
+ end
376
377
  pairs(*preds) do |x,y|
377
- blk.nil? ? x : blk.call(x)
378
+ blk.nil? ? y : blk.call(y)
378
379
  end
379
380
  end
380
381
 
@@ -405,13 +406,13 @@ module Bud
405
406
  public
406
407
  def flatten(*preds, &blk)
407
408
  if blk.nil?
408
- @cols = dupfree_schema(@bud_instance.tables[@cols[0]].cols + @bud_instance.tables[@cols[1]].cols)
409
+ @cols = dupfree_schema(@rels[0].cols + @rels[1].cols)
409
410
  else
410
411
  @cols = []
411
412
  end
412
413
  setup_accessors
413
414
  pairs(*preds) do |x,y|
414
- blk.nil? ? x.to_a + y.to_a : blk.call(x.to_a + y.to_a)
415
+ blk.nil? ? x + y : blk.call(x + y)
415
416
  end
416
417
  end
417
418
 
@@ -484,6 +485,14 @@ module Bud
484
485
  end
485
486
  end
486
487
  end
488
+
489
+ public
490
+ def invalidate_cache
491
+ super
492
+ # Only if need to check left join rel because outer joins in Bloom are
493
+ # left outer joins.
494
+ @missing_keys.clear if @rels.first.rescan
495
+ end
487
496
  end
488
497
 
489
498
 
@@ -497,11 +506,11 @@ module Bud
497
506
  # first flush, at which point we are sure to have seen all the t-side tuples
498
507
  # in this tick.
499
508
  class PushNotIn < PushStatefulElement
500
- def initialize(rellist, bud_instance, preds=nil, &blk) # :nodoc: all
509
+ def initialize(rellist, bud_instance, preds, &blk) # :nodoc: all
501
510
  @lhs, @rhs = rellist
502
511
  @lhs_keycols = nil
503
512
  @rhs_keycols = nil
504
- name_in = "#{@lhs.qualified_tabname}_notin_#{@rhs.qualified_tabname}"
513
+ name_in = "#{@lhs.qualified_tabname}_notin_#{@rhs.qualified_tabname}".to_sym
505
514
  super(name_in, bud_instance, nil, @lhs.schema)
506
515
  setup_preds(preds) unless preds.empty?
507
516
  @rhs_rcvd = false
@@ -532,12 +541,13 @@ module Bud
532
541
  end
533
542
 
534
543
  def find_col(colspec, rel)
535
- if colspec.is_a? Symbol
544
+ case colspec
545
+ when Symbol
536
546
  unless rel.respond_to? colspec
537
547
  raise Bud::Error, "attribute :#{colspec} not found in #{rel.qualified_tabname}"
538
548
  end
539
549
  col_desc = rel.send(colspec)
540
- elsif colspec.is_a? Array
550
+ when Array
541
551
  col_desc = colspec
542
552
  else
543
553
  raise Bud::Error, "symbol or column spec expected. Got #{colspec}"
@@ -546,8 +556,8 @@ module Bud
546
556
  end
547
557
 
548
558
  def get_key(item, offset)
549
- keycols = offset == 0 ? @lhs_keycols : @rhs_keycols
550
- keycols.nil? ? $EMPTY : item.values_at(*keycols)
559
+ keycols = (offset == 0 ? @lhs_keycols : @rhs_keycols)
560
+ keycols.nil? ? [] : item.values_at(*keycols)
551
561
  end
552
562
 
553
563
  public
@@ -580,8 +590,9 @@ module Bud
580
590
  # growing any more, until the next tick.
581
591
  unless @rhs_rcvd
582
592
  @rhs_rcvd = true
593
+ rhs_hash = @hash_tables[1]
583
594
  @hash_tables[0].each do |key,values|
584
- rhs_values = @hash_tables[1][key]
595
+ rhs_values = rhs_hash[key]
585
596
  values.each {|item| process_match(item, rhs_values)}
586
597
  end
587
598
  end
@@ -500,6 +500,11 @@ class Bud::LatticeWrapper
500
500
  end
501
501
  end
502
502
 
503
+ superator "<~" do |o|
504
+ # Overridden when <~ is defined (i.e., channels and terminals)
505
+ raise Bud::CompileError, "#{tabname} cannot appear on the lhs of a <~ operator"
506
+ end
507
+
503
508
  # XXX: refactor with BudCollection to avoid duplication of code
504
509
  def add_merge_target
505
510
  toplevel = @bud_instance.toplevel
@@ -10,17 +10,27 @@ class Class
10
10
  end
11
11
  end
12
12
 
13
+ $struct_classes = {}
14
+ $struct_lock = Mutex.new
15
+
13
16
  # FIXME: Should likely override #hash and #eql? as well.
14
17
  class Bud::TupleStruct < Struct
15
18
  include Comparable
16
19
 
20
+ def self.new_struct(cols)
21
+ $struct_lock.synchronize {
22
+ ($struct_classes[cols] ||= Bud::TupleStruct.new(*cols))
23
+ }
24
+ end
25
+
26
+ # XXX: This only considers two TupleStruct instances to be equal if they have
27
+ # the same schema (column names) AND the same contents; unclear if structural
28
+ # equality (consider only values, not column names) would be better.
17
29
  def <=>(o)
18
30
  if o.class == self.class
19
31
  self.each_with_index do |e, i|
20
32
  other = o[i]
21
33
  next if e == other
22
- return nil if e.nil?
23
- return nil if other.nil?
24
34
  return e <=> other
25
35
  end
26
36
  return 0
@@ -35,18 +45,27 @@ class Bud::TupleStruct < Struct
35
45
  if o.class == self.class
36
46
  return super
37
47
  elsif o.class == Array
38
- begin
39
- self.each_with_index do |el, i|
40
- return false if el != o[i]
41
- end
42
- return true
43
- rescue StandardError
44
- return false
48
+ return false if self.length != o.length
49
+ self.each_with_index do |el, i|
50
+ return false if el != o[i]
45
51
  end
52
+ return true
46
53
  end
47
54
  false
48
55
  end
49
56
 
57
+ def hash
58
+ self.values.hash
59
+ end
60
+
61
+ def eql?(o)
62
+ self == o
63
+ end
64
+
65
+ def +(o)
66
+ self.to_ary + o.to_ary
67
+ end
68
+
50
69
  def to_msgpack(out=nil)
51
70
  self.to_a.to_msgpack(out)
52
71
  end
@@ -56,15 +75,23 @@ class Bud::TupleStruct < Struct
56
75
  end
57
76
 
58
77
  alias :to_s :inspect
78
+ alias :to_ary :to_a
59
79
  end
60
80
 
61
81
  # XXX: TEMPORARY/UGLY hack to ensure that arrays and structs compare. This can be
62
82
  # removed once tests are rewritten.
63
83
  class Array
64
- alias :oldeq :==
84
+ alias :old_eq :==
85
+ alias :old_eql? :eql?
86
+
65
87
  def ==(o)
66
88
  o = o.to_a if o.kind_of? Bud::TupleStruct
67
- self.oldeq(o)
89
+ self.old_eq(o)
90
+ end
91
+
92
+ def eql?(o)
93
+ o = o.to_a if o.kind_of? Bud::TupleStruct
94
+ self.old_eql?(o)
68
95
  end
69
96
  end
70
97