bud 0.9.7 → 0.9.8

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,4 +1,3 @@
1
- $struct_classes = {}
2
1
  module Bud
3
2
  ########
4
3
  #--
@@ -17,7 +16,7 @@ module Bud
17
16
  attr_accessor :bud_instance # :nodoc: all
18
17
  attr_reader :tabname, :cols, :key_cols # :nodoc: all
19
18
  attr_reader :struct
20
- attr_reader :storage, :delta, :new_delta, :pending, :tick_delta # :nodoc: all
19
+ attr_reader :new_delta, :pending # :nodoc: all
21
20
  attr_reader :wired_by, :scanner_cnt
22
21
  attr_accessor :invalidated, :rescan
23
22
  attr_accessor :is_source
@@ -63,7 +62,7 @@ module Bud
63
62
  if @cols.empty?
64
63
  @cols = nil
65
64
  else
66
- @struct = ($struct_classes[@cols] ||= Bud::TupleStruct.new(*@cols))
65
+ @struct = Bud::TupleStruct.new_struct(@cols)
67
66
  @structlen = @struct.members.length
68
67
  end
69
68
  setup_accessors
@@ -250,7 +249,7 @@ module Bud
250
249
  def sort(&blk)
251
250
  if @bud_instance.wiring?
252
251
  pusher = self.pro
253
- pusher.sort("sort#{object_id}", @bud_instance, @cols, &blk)
252
+ pusher.sort("sort#{object_id}".to_sym, @bud_instance, @cols, &blk)
254
253
  else
255
254
  @storage.values.sort(&blk)
256
255
  end
@@ -275,7 +274,12 @@ module Bud
275
274
 
276
275
  public
277
276
  def each_raw(&block)
278
- @storage.each_value(&block)
277
+ each_from([@storage], &block)
278
+ end
279
+
280
+ public
281
+ def each_delta(&block)
282
+ each_from([@delta], &block)
279
283
  end
280
284
 
281
285
  public
@@ -301,37 +305,26 @@ module Bud
301
305
  public
302
306
  def tick_metrics
303
307
  strat_num = bud_instance.this_stratum
304
- rule_num = bud_instance.this_rule
305
- addr = nil
306
308
  addr = bud_instance.ip_port unless bud_instance.port.nil?
309
+ key = { :addr=>addr, :tabname=>qualified_tabname,
310
+ :strat_num=>strat_num}
311
+
307
312
  bud_instance.metrics[:collections] ||= {}
308
- bud_instance.metrics[:collections][{:addr=>addr, :tabname=>qualified_tabname, :strat_num=>strat_num, :rule_num=>rule_num}] ||= 0
309
- bud_instance.metrics[:collections][{:addr=>addr, :tabname=>qualified_tabname, :strat_num=>strat_num, :rule_num=>rule_num}] += 1
313
+ bud_instance.metrics[:collections][key] ||= 0
314
+ bud_instance.metrics[:collections][key] += 1
310
315
  end
311
316
 
312
317
  private
313
318
  def each_from(bufs, &block) # :nodoc: all
319
+ do_metrics = bud_instance.options[:metrics]
314
320
  bufs.each do |b|
315
321
  b.each_value do |v|
316
- tick_metrics if bud_instance and bud_instance.options[:metrics]
322
+ tick_metrics if do_metrics
317
323
  yield v
318
324
  end
319
325
  end
320
326
  end
321
327
 
322
- public
323
- def each_from_sym(buf_syms, &block) # :nodoc: all
324
- bufs = buf_syms.map do |s|
325
- case s
326
- when :storage then @storage
327
- when :delta then @delta
328
- when :new_delta then @new_delta
329
- else raise Bud::Error, "bad symbol passed into each_from_sym"
330
- end
331
- end
332
- each_from(bufs, &block)
333
- end
334
-
335
328
  private
336
329
  def init_storage
337
330
  @storage = {}
@@ -374,7 +367,7 @@ module Bud
374
367
  # checks for +item+ in the collection
375
368
  public
376
369
  def include?(item)
377
- return true if key_cols.nil? or (key_cols.empty? and length > 0)
370
+ return true if key_cols.nil?
378
371
  return false if item.nil?
379
372
  key = get_key_vals(item)
380
373
  return (item == self[key])
@@ -650,6 +643,11 @@ module Bud
650
643
  end
651
644
  end
652
645
 
646
+ superator "<~" do |o|
647
+ # Overridden when <~ is defined (i.e., channels and terminals)
648
+ raise Bud::CompileError, "#{tabname} cannot appear on the lhs of a <~ operator"
649
+ end
650
+
653
651
  def tick
654
652
  raise Bud::Error, "tick must be overriden in #{self.class}"
655
653
  end
@@ -1111,6 +1109,11 @@ module Bud
1111
1109
  return true
1112
1110
  end
1113
1111
 
1112
+ public
1113
+ def bootstrap
1114
+ # override BudCollection; pending should not be moved into delta.
1115
+ end
1116
+
1114
1117
  public
1115
1118
  def flush #:nodoc: all
1116
1119
  out_io = get_out_io
@@ -1257,7 +1260,7 @@ module Bud
1257
1260
 
1258
1261
  def invalidated=(val)
1259
1262
  # Might be reset to false at end-of-tick, but shouldn't be set to true
1260
- raise Bud::Error, "cannot not set invalidate on table '#{@tabname}'" if val
1263
+ raise Bud::Error, "cannot set invalidate on table '#{@tabname}'" if val
1261
1264
  super
1262
1265
  end
1263
1266
 
@@ -1383,7 +1386,7 @@ module Bud
1383
1386
  end
1384
1387
 
1385
1388
  class BudFileReader < BudReadOnly # :nodoc: all
1386
- def initialize(name, filename, delimiter, bud_instance) # :nodoc: all
1389
+ def initialize(name, filename, bud_instance) # :nodoc: all
1387
1390
  super(name, bud_instance, {[:lineno] => [:text]})
1388
1391
  @filename = filename
1389
1392
  @storage = {}
@@ -199,7 +199,7 @@ module Bud
199
199
  public
200
200
  def pro(the_name=elem_name, the_schema=schema, &blk)
201
201
  toplevel = @bud_instance.toplevel
202
- elem = Bud::PushElement.new("project#{object_id}",
202
+ elem = Bud::PushElement.new("project#{object_id}".to_sym,
203
203
  toplevel.this_rule_context,
204
204
  @collection_name, the_schema)
205
205
  self.wire_to(elem)
@@ -213,7 +213,7 @@ module Bud
213
213
  public
214
214
  def each_with_index(&blk)
215
215
  toplevel = @bud_instance.toplevel
216
- elem = Bud::PushEachWithIndex.new("each_with_index#{object_id}",
216
+ elem = Bud::PushEachWithIndex.new("each_with_index#{object_id}".to_sym,
217
217
  toplevel.this_rule_context,
218
218
  @collection_name)
219
219
  elem.set_block(&blk)
@@ -284,7 +284,7 @@ module Bud
284
284
 
285
285
  aggpairs = prep_aggpairs(aggpairs)
286
286
  toplevel = @bud_instance.toplevel
287
- g = Bud::PushGroup.new('grp'+Time.new.tv_usec.to_s, toplevel.this_rule_context,
287
+ g = Bud::PushGroup.new("grp#{Time.new.tv_usec}".to_sym, toplevel.this_rule_context,
288
288
  @collection_name, keycols, aggpairs, the_schema, &blk)
289
289
  self.wire_to(g)
290
290
  toplevel.push_elems[[self.object_id, :group, keycols, aggpairs, blk]] = g
@@ -302,7 +302,7 @@ module Bud
302
302
  end
303
303
 
304
304
  aggpairs = [[agg, collection]]
305
- aa = Bud::PushArgAgg.new('argagg'+Time.new.tv_usec.to_s, toplevel.this_rule_context,
305
+ aa = Bud::PushArgAgg.new("argagg#{Time.new.tv_usec}".to_sym, toplevel.this_rule_context,
306
306
  @collection_name, gbkey_cols, aggpairs, schema, &blk)
307
307
  self.wire_to(aa)
308
308
  toplevel.push_elems[[self.object_id, :argagg, gbkey_cols, aggpairs, blk]] = aa
@@ -346,7 +346,7 @@ module Bud
346
346
  end
347
347
 
348
348
  def reduce(initial, &blk)
349
- retval = Bud::PushReduce.new("reduce#{Time.new.tv_usec}",
349
+ retval = Bud::PushReduce.new("reduce#{Time.new.tv_usec}".to_sym,
350
350
  @bud_instance, @collection_name,
351
351
  schema, initial, &blk)
352
352
  self.wire_to(retval)
@@ -498,7 +498,7 @@ module Bud
498
498
  end
499
499
 
500
500
  # send deltas out in all cases
501
- @collection.delta.each_value {|item| push_out(item)}
501
+ @collection.each_delta {|item| push_out(item)}
502
502
  end
503
503
  end
504
504
 
@@ -1,6 +1,5 @@
1
1
  require 'bud/executor/elements'
2
2
 
3
- $EMPTY = []
4
3
  module Bud
5
4
  class PushSHJoin < PushStatefulElement
6
5
  attr_reader :all_rels_below, :origpreds, :relnames, :keys, :localpreds
@@ -41,18 +40,6 @@ module Bud
41
40
  @selfjoins << name if cnt == 2
42
41
  end
43
42
 
44
- # derive schema: one column for each table.
45
- # duplicated inputs get distinguishing numeral
46
- @cols = []
47
- retval = @all_rels_below.reduce({}) do |memo, r|
48
- r_name = r.qualified_tabname.to_s
49
- memo[r_name] ||= 0
50
- newstr = r_name + (memo[r_name] > 0 ? "_#{memo[r_name]}" : "")
51
- @cols << newstr.to_sym
52
- memo[r_name] += 1
53
- memo
54
- end
55
-
56
43
  setup_preds(preds) unless preds.empty?
57
44
  setup_state
58
45
 
@@ -143,14 +130,6 @@ module Bud
143
130
  if source_elem.rescan
144
131
  puts "#{qualified_tabname} rel:#{i}(#{source_elem.qualified_tabname}) invalidated" if $BUD_DEBUG
145
132
  @hash_tables[i] = {}
146
- if i == 0
147
- # Only if i == 0 because outer joins in Bloom are left outer joins.
148
- # If i == 1, missing_keys will be corrected when items are populated
149
- # in the rhs fork.
150
- # XXX This is not modular. We are doing invalidation work for outer
151
- # joins, which is part of a separate module PushSHOuterJoin.
152
- @missing_keys.clear
153
- end
154
133
  end
155
134
  end
156
135
  end
@@ -165,7 +144,7 @@ module Bud
165
144
  # referenced in entry.
166
145
  subtuple = 0
167
146
  all_rels_below[0..all_rels_below.length-1].each_with_index do |t,i|
168
- if t.qualified_tabname == entry[0]
147
+ if t.qualified_tabname == name
169
148
  subtuple = i
170
149
  break
171
150
  end
@@ -183,7 +162,7 @@ module Bud
183
162
  elsif k.class <= Array
184
163
  [k,v]
185
164
  elsif k.class <= Symbol
186
- if @all_rels_below and @all_rels_below.length == 2
165
+ if @all_rels_below.length == 2
187
166
  [find_attr_match(k, @all_rels_below[0]), find_attr_match(v, @all_rels_below[1])]
188
167
  else
189
168
  [find_attr_match(k), find_attr_match(v)]
@@ -235,9 +214,10 @@ module Bud
235
214
 
236
215
  protected
237
216
  def canonicalize_localpreds(rel_list, preds) # :nodoc:all
238
- retval = preds.map do |p|
239
- # reverse if lhs is rel_list[1], *unless* it's a self-join!
240
- (p[0][0] == rel_list[1].qualified_tabname and p[0][0] != p[1][0]) ? p.reverse : p
217
+ second_rel = rel_list[1].qualified_tabname
218
+ preds.map do |p|
219
+ # reverse if lhs is second_rel *unless* it's a self-join!
220
+ (p[0][0] == second_rel and p[0][0] != p[1][0]) ? p.reverse : p
241
221
  end
242
222
  end
243
223
 
@@ -251,16 +231,15 @@ module Bud
251
231
  # again if we didn't rescan now.
252
232
  replay_join if @rescan
253
233
 
254
- if @selfjoins.include? source.qualified_tabname
234
+ source_tbl = source.qualified_tabname
235
+ if @selfjoins.include? source_tbl
255
236
  offsets = []
256
- @relnames.each_with_index{|r,i| offsets << i if r == source.qualified_tabname}
237
+ @relnames.each_with_index{|r,i| offsets << i if r == source_tbl}
257
238
  else
258
- offsets = [@relnames.index(source.qualified_tabname)]
259
- end
260
- raise Bud::Error, "item #{item.inspect} inserted into join from unknown source #{source.elem_name}" if offsets == $EMPTY
261
- offsets.each do |offset|
262
- insert_item(item, offset)
239
+ offsets = [@relnames.index(source_tbl)]
263
240
  end
241
+
242
+ offsets.each {|offset| insert_item(item, offset)}
264
243
  end
265
244
 
266
245
  protected
@@ -332,14 +311,32 @@ module Bud
332
311
  ####
333
312
  # and now, the Bloom-facing methods
334
313
  # given a * expression over n collections, form all combinations of items
335
- # subject to an array of predicates, pred
336
- # currently supports two options for equijoin predicates:
314
+ # subject to an array of predicates, +preds+.
315
+ # currently supports two syntax options for equijoin predicates:
337
316
  # general form: an array of arrays capturing a conjunction of equiv. classes
338
317
  # [[table1.col1, table2.col2, table3.col3], [table1.col2, table2.col3]]
339
318
  # common form: a hash capturing equality of a column on left with one on right.
340
319
  # :col1 => :col2 (same as lefttable.col1 => righttable.col2)
341
320
  public
342
321
  def pairs(*preds, &blk)
322
+ if @cols.nil?
323
+ # derive schema if needed: one column for each table. duplicated inputs
324
+ # get distinguishing numeral.
325
+ #
326
+ # XXX: actually, this seems completely bogus. The schema for the output
327
+ # of the join should depend on the join's *targetlist*.
328
+ @cols = []
329
+ retval = @all_rels_below.reduce({}) do |memo, r|
330
+ r_name = r.qualified_tabname.to_s
331
+ memo[r_name] ||= 0
332
+ newstr = r_name + (memo[r_name] > 0 ? "_#{memo[r_name]}" : "")
333
+ @cols << newstr.to_sym
334
+ memo[r_name] += 1
335
+ memo
336
+ end
337
+ setup_accessors
338
+ end
339
+
343
340
  @origpreds = preds
344
341
  setup_preds(preds) unless preds.empty?
345
342
  # given new preds, the state for the join will be different. set it up again.
@@ -361,20 +358,24 @@ module Bud
361
358
  end
362
359
 
363
360
  public
364
- def rights(*preds, &blk)
365
- @cols = blk.nil? ? @bud_instance.toplevel.tables[@rels[1].qualified_tabname].cols : nil
366
- setup_accessors if blk.nil?
361
+ def lefts(*preds, &blk)
362
+ if blk.nil?
363
+ @cols = @bud_instance.toplevel.tables[@rels[0].qualified_tabname].cols
364
+ setup_accessors
365
+ end
367
366
  pairs(*preds) do |x,y|
368
- blk.nil? ? y : blk.call(y)
367
+ blk.nil? ? x : blk.call(x)
369
368
  end
370
369
  end
371
370
 
372
371
  public
373
- def lefts(*preds, &blk)
374
- @cols = blk.nil? ? @bud_instance.toplevel.tables[@rels[0].qualified_tabname].cols : nil
375
- setup_accessors if blk.nil?
372
+ def rights(*preds, &blk)
373
+ if blk.nil?
374
+ @cols = @bud_instance.toplevel.tables[@rels[1].qualified_tabname].cols
375
+ setup_accessors
376
+ end
376
377
  pairs(*preds) do |x,y|
377
- blk.nil? ? x : blk.call(x)
378
+ blk.nil? ? y : blk.call(y)
378
379
  end
379
380
  end
380
381
 
@@ -405,13 +406,13 @@ module Bud
405
406
  public
406
407
  def flatten(*preds, &blk)
407
408
  if blk.nil?
408
- @cols = dupfree_schema(@bud_instance.tables[@cols[0]].cols + @bud_instance.tables[@cols[1]].cols)
409
+ @cols = dupfree_schema(@rels[0].cols + @rels[1].cols)
409
410
  else
410
411
  @cols = []
411
412
  end
412
413
  setup_accessors
413
414
  pairs(*preds) do |x,y|
414
- blk.nil? ? x.to_a + y.to_a : blk.call(x.to_a + y.to_a)
415
+ blk.nil? ? x + y : blk.call(x + y)
415
416
  end
416
417
  end
417
418
 
@@ -484,6 +485,14 @@ module Bud
484
485
  end
485
486
  end
486
487
  end
488
+
489
+ public
490
+ def invalidate_cache
491
+ super
492
+ # Only if need to check left join rel because outer joins in Bloom are
493
+ # left outer joins.
494
+ @missing_keys.clear if @rels.first.rescan
495
+ end
487
496
  end
488
497
 
489
498
 
@@ -497,11 +506,11 @@ module Bud
497
506
  # first flush, at which point we are sure to have seen all the t-side tuples
498
507
  # in this tick.
499
508
  class PushNotIn < PushStatefulElement
500
- def initialize(rellist, bud_instance, preds=nil, &blk) # :nodoc: all
509
+ def initialize(rellist, bud_instance, preds, &blk) # :nodoc: all
501
510
  @lhs, @rhs = rellist
502
511
  @lhs_keycols = nil
503
512
  @rhs_keycols = nil
504
- name_in = "#{@lhs.qualified_tabname}_notin_#{@rhs.qualified_tabname}"
513
+ name_in = "#{@lhs.qualified_tabname}_notin_#{@rhs.qualified_tabname}".to_sym
505
514
  super(name_in, bud_instance, nil, @lhs.schema)
506
515
  setup_preds(preds) unless preds.empty?
507
516
  @rhs_rcvd = false
@@ -532,12 +541,13 @@ module Bud
532
541
  end
533
542
 
534
543
  def find_col(colspec, rel)
535
- if colspec.is_a? Symbol
544
+ case colspec
545
+ when Symbol
536
546
  unless rel.respond_to? colspec
537
547
  raise Bud::Error, "attribute :#{colspec} not found in #{rel.qualified_tabname}"
538
548
  end
539
549
  col_desc = rel.send(colspec)
540
- elsif colspec.is_a? Array
550
+ when Array
541
551
  col_desc = colspec
542
552
  else
543
553
  raise Bud::Error, "symbol or column spec expected. Got #{colspec}"
@@ -546,8 +556,8 @@ module Bud
546
556
  end
547
557
 
548
558
  def get_key(item, offset)
549
- keycols = offset == 0 ? @lhs_keycols : @rhs_keycols
550
- keycols.nil? ? $EMPTY : item.values_at(*keycols)
559
+ keycols = (offset == 0 ? @lhs_keycols : @rhs_keycols)
560
+ keycols.nil? ? [] : item.values_at(*keycols)
551
561
  end
552
562
 
553
563
  public
@@ -580,8 +590,9 @@ module Bud
580
590
  # growing any more, until the next tick.
581
591
  unless @rhs_rcvd
582
592
  @rhs_rcvd = true
593
+ rhs_hash = @hash_tables[1]
583
594
  @hash_tables[0].each do |key,values|
584
- rhs_values = @hash_tables[1][key]
595
+ rhs_values = rhs_hash[key]
585
596
  values.each {|item| process_match(item, rhs_values)}
586
597
  end
587
598
  end
@@ -500,6 +500,11 @@ class Bud::LatticeWrapper
500
500
  end
501
501
  end
502
502
 
503
+ superator "<~" do |o|
504
+ # Overridden when <~ is defined (i.e., channels and terminals)
505
+ raise Bud::CompileError, "#{tabname} cannot appear on the lhs of a <~ operator"
506
+ end
507
+
503
508
  # XXX: refactor with BudCollection to avoid duplication of code
504
509
  def add_merge_target
505
510
  toplevel = @bud_instance.toplevel
@@ -10,17 +10,27 @@ class Class
10
10
  end
11
11
  end
12
12
 
13
+ $struct_classes = {}
14
+ $struct_lock = Mutex.new
15
+
13
16
  # FIXME: Should likely override #hash and #eql? as well.
14
17
  class Bud::TupleStruct < Struct
15
18
  include Comparable
16
19
 
20
+ def self.new_struct(cols)
21
+ $struct_lock.synchronize {
22
+ ($struct_classes[cols] ||= Bud::TupleStruct.new(*cols))
23
+ }
24
+ end
25
+
26
+ # XXX: This only considers two TupleStruct instances to be equal if they have
27
+ # the same schema (column names) AND the same contents; unclear if structural
28
+ # equality (consider only values, not column names) would be better.
17
29
  def <=>(o)
18
30
  if o.class == self.class
19
31
  self.each_with_index do |e, i|
20
32
  other = o[i]
21
33
  next if e == other
22
- return nil if e.nil?
23
- return nil if other.nil?
24
34
  return e <=> other
25
35
  end
26
36
  return 0
@@ -35,18 +45,27 @@ class Bud::TupleStruct < Struct
35
45
  if o.class == self.class
36
46
  return super
37
47
  elsif o.class == Array
38
- begin
39
- self.each_with_index do |el, i|
40
- return false if el != o[i]
41
- end
42
- return true
43
- rescue StandardError
44
- return false
48
+ return false if self.length != o.length
49
+ self.each_with_index do |el, i|
50
+ return false if el != o[i]
45
51
  end
52
+ return true
46
53
  end
47
54
  false
48
55
  end
49
56
 
57
+ def hash
58
+ self.values.hash
59
+ end
60
+
61
+ def eql?(o)
62
+ self == o
63
+ end
64
+
65
+ def +(o)
66
+ self.to_ary + o.to_ary
67
+ end
68
+
50
69
  def to_msgpack(out=nil)
51
70
  self.to_a.to_msgpack(out)
52
71
  end
@@ -56,15 +75,23 @@ class Bud::TupleStruct < Struct
56
75
  end
57
76
 
58
77
  alias :to_s :inspect
78
+ alias :to_ary :to_a
59
79
  end
60
80
 
61
81
  # XXX: TEMPORARY/UGLY hack to ensure that arrays and structs compare. This can be
62
82
  # removed once tests are rewritten.
63
83
  class Array
64
- alias :oldeq :==
84
+ alias :old_eq :==
85
+ alias :old_eql? :eql?
86
+
65
87
  def ==(o)
66
88
  o = o.to_a if o.kind_of? Bud::TupleStruct
67
- self.oldeq(o)
89
+ self.old_eq(o)
90
+ end
91
+
92
+ def eql?(o)
93
+ o = o.to_a if o.kind_of? Bud::TupleStruct
94
+ self.old_eql?(o)
68
95
  end
69
96
  end
70
97