cascading.jruby 0.0.4 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/History.txt CHANGED
@@ -0,0 +1,17 @@
1
+ 0.0.5 - Addressing Janino pain
2
+
3
+ This release expands upon the ExprStub class adding composition time compilation
4
+ and validation of Janino expressions. This causes faulty Janino expressions to
5
+ fail early rather than hours into a job.
6
+
7
+ 0.0.4 - Cleanup
8
+
9
+ This release merges in a good deal of work from Etsy and addresses many internal
10
+ structural issues with cascading.jruby node hierarchies. Greatly expands unit
11
+ tests and makes samples easily executable as a form of functional testing.
12
+
13
+ 0.0.3 - Etsy's first version of cascading.jruby
14
+
15
+ First major commit of work from Etsy that introduces scope propagation to
16
+ cascading.jruby scripts in support of field propagation and primary keys.
17
+ Mainly a cleanup and testing release.
data/README.md CHANGED
@@ -4,4 +4,6 @@
4
4
 
5
5
  It requires Hadoop (>= 0.18.3) and Cascading (>=1.0.1) to be set via the environment variables: `HADOOP_HOME` and `CASCADING_HOME`
6
6
 
7
+ It has been tested on JRuby versions 1.2.0, 1.4.0, and 1.5.3.
8
+
7
9
  Copyright 2009, Grégoire Marabout.
@@ -10,7 +10,7 @@ module Cascading
10
10
  class Assembly < Cascading::Node
11
11
  include Operations
12
12
 
13
- attr_accessor :tail_pipe, :head_pipe, :outgoing_scopes
13
+ attr_accessor :head_pipe, :tail_pipe, :incoming_scopes, :outgoing_scopes
14
14
 
15
15
  def initialize(name, parent, outgoing_scopes = {})
16
16
  super(name, parent)
@@ -27,6 +27,15 @@ module Cascading
27
27
  @outgoing_scopes[name] ||= Scope.empty_scope(name)
28
28
  end
29
29
  @tail_pipe = @head_pipe
30
+ @incoming_scopes = [scope]
31
+ end
32
+
33
+ def describe(offset = '')
34
+ incoming_scopes_desc = "#{incoming_scopes.map{ |incoming_scope| incoming_scope.values_fields.to_a.inspect }.join(', ')}"
35
+ incoming_scopes_desc = "(#{incoming_scopes_desc})" unless incoming_scopes.size == 1
36
+ description = "#{offset}#{name}:assembly :: #{incoming_scopes_desc} -> #{scope.values_fields.to_a.inspect}"
37
+ description += "\n#{child_names.map{ |child| children[child].describe("#{offset} ") }.join("\n")}" unless children.empty?
38
+ description
30
39
  end
31
40
 
32
41
  def parent_flow
@@ -90,7 +99,7 @@ module Cascading
90
99
  first_fields = first_fields - scope.grouping_fields.to_a
91
100
  if first_fields.size > 0
92
101
  first *first_fields
93
- puts "Firsting: #{first_fields.inspect} in assembly: #{@name}"
102
+ puts "Firsting: #{first_fields.inspect} in assembly: #{name}"
94
103
  end
95
104
 
96
105
  bind_names scope.grouping_fields.to_a if every_applied?
@@ -102,14 +111,14 @@ module Cascading
102
111
  end
103
112
 
104
113
  def to_s
105
- "#{@name} : head pipe : #{@head_pipe} - tail pipe: #{@tail_pipe}"
114
+ "#{name} : head pipe : #{@head_pipe} - tail pipe: #{@tail_pipe}"
106
115
  end
107
116
 
108
117
  # Builds a join (CoGroup) pipe. Requires a list of assembly names to join.
109
118
  def join(*args, &block)
110
119
  options = args.extract_options!
111
120
 
112
- pipes, incoming_scopes = [], []
121
+ pipes, @incoming_scopes = [], []
113
122
  args.each do |assembly_name|
114
123
  assembly = parent_flow.find_child(assembly_name)
115
124
  raise "Could not find assembly '#{assembly_name}' in join" unless assembly
@@ -129,7 +138,7 @@ module Cascading
129
138
  group_fields << fields(group_fields_args)
130
139
  end
131
140
  elsif group_fields_args.kind_of?(Hash)
132
- pipes, incoming_scopes = [], []
141
+ pipes, @incoming_scopes = [], []
133
142
  keys = group_fields_args.keys.sort
134
143
  keys.each do |assembly_name|
135
144
  v = group_fields_args[assembly_name]
@@ -233,7 +242,7 @@ module Cascading
233
242
  # This actually creates a GroupBy pipe.
234
243
  # It expects a list of assembly names as parameter.
235
244
  def union_pipes(*args)
236
- pipes, incoming_scopes = [], []
245
+ pipes, @incoming_scopes = [], []
237
246
  args[0].each do |assembly_name|
238
247
  assembly = parent_flow.find_child(assembly_name)
239
248
  pipes << assembly.tail_pipe
@@ -281,9 +290,7 @@ module Cascading
281
290
  # Example:
282
291
  # project "field1", "field2"
283
292
  def project(*args)
284
- fields = fields(args)
285
- operation = Java::CascadingOperation::Identity.new
286
- make_each(Java::CascadingPipe::Each, @tail_pipe, fields, operation)
293
+ each fields(args), :function => Java::CascadingOperation::Identity.new
287
294
  end
288
295
 
289
296
  # Removes the specified fields from the current assembly.
@@ -301,9 +308,7 @@ module Cascading
301
308
  # Example:
302
309
  # bind_names "field1", "field2"
303
310
  def bind_names(*new_names)
304
- new_fields = fields(new_names)
305
- operation = Java::CascadingOperation::Identity.new(new_fields)
306
- make_each(Java::CascadingPipe::Each, @tail_pipe, all_fields, operation)
311
+ each all_fields, :function => Java::CascadingOperation::Identity.new(fields(new_names))
307
312
  end
308
313
 
309
314
  # Renames fields according to the mapping provided.
@@ -319,9 +324,7 @@ module Cascading
319
324
  old_key = scope.primary_key_fields.to_a
320
325
  new_key = old_key.map{ |name| name_map[name] || name }
321
326
 
322
- new_fields = fields(new_names)
323
- operation = Java::CascadingOperation::Identity.new(new_fields)
324
- make_each(Java::CascadingPipe::Each, @tail_pipe, all_fields, operation)
327
+ each all_fields, :function => Java::CascadingOperation::Identity.new(fields(new_names))
325
328
  primary(*new_key)
326
329
  end
327
330
 
@@ -330,22 +333,19 @@ module Cascading
330
333
  types = JAVA_TYPE_MAP.values_at(*type_map.values_at(*names))
331
334
  fields = fields(names)
332
335
  types = types.to_java(java.lang.Class)
333
- operation = Java::CascadingOperation::Identity.new(fields, types)
334
- make_each(Java::CascadingPipe::Each, @tail_pipe, fields, operation)
336
+ each fields, :function => Java::CascadingOperation::Identity.new(fields, types)
335
337
  end
336
338
 
337
339
  def copy(*args)
338
340
  options = args.extract_options!
339
341
  from = args[0] || all_fields
340
342
  into = args[1] || options[:into] || all_fields
341
- operation = Java::CascadingOperation::Identity.new(fields(into))
342
- make_each(Java::CascadingPipe::Each, @tail_pipe, fields(from), operation, Java::CascadingTuple::Fields::ALL)
343
+ each fields(from), :function => Java::CascadingOperation::Identity.new(fields(into)), :output => all_fields
343
344
  end
344
345
 
345
346
  # A pipe that does nothing.
346
347
  def pass(*args)
347
- operation = Java::CascadingOperation::Identity.new
348
- make_each(Java::CascadingPipe::Each, @tail_pipe, all_fields, operation)
348
+ each all_fields, :function => Java::CascadingOperation::Identity.new
349
349
  end
350
350
 
351
351
  def assert(*args)
@@ -593,9 +593,8 @@ module Cascading
593
593
  value = args[field_name]
594
594
 
595
595
  if value.kind_of?(ExprStub)
596
- each all_fields,
597
- :function => expression_function(field_name, :expression => value.expression,
598
- :parameters => value.types), :output => all_fields
596
+ value.validate_scope(scope)
597
+ each all_fields, :function => expression_function(field_name, :expression => value.expression, :parameters => value.types), :output => all_fields
599
598
  else
600
599
  each all_fields, :function => insert_function([field_name], :values => [value]), :output => all_fields
601
600
  end
@@ -612,15 +611,23 @@ module Cascading
612
611
  # * <tt>:expression</tt> a string. Specifies a Janino expression used to filter the tuples. This option has the
613
612
  # same effect than providing it as first unamed argument. If this option is provided, then the filter is Janino
614
613
  # expression-based. This is incompatible with the _pattern_ option.
614
+ # * <tt>:validate</tt> a boolean. Passed into Cascading#expr to enable or disable
615
+ # expression validation. Defaults to true.
616
+ # * <tt>:validate_with</tt> a hash. Actual arguments used by Cascading#expr for
617
+ # expression validation. Defaults to {}.
615
618
  def filter(*args)
616
619
  options = args.extract_options!
617
620
  from = options.delete(:from) || all_fields
618
621
  expression = options.delete(:expression) || args.shift
619
622
  regex = options.delete(:pattern)
623
+ validate = options.has_key?(:validate) ? options.delete(:validate) : true
624
+ validate_with = options.has_key?(:validate_with) ? options.delete(:validate_with) : {}
625
+
620
626
  if expression
621
- stub = ExprStub.new(expression)
627
+ stub = expr(expression, { :validate => validate, :validate_with => validate_with })
622
628
  types, expression = stub.types, stub.expression
623
629
 
630
+ stub.validate_scope(scope)
624
631
  each from, :filter => expression_filter(
625
632
  :parameters => types,
626
633
  :expression => expression
@@ -650,6 +657,10 @@ module Cascading
650
657
  # * <tt>:expression</tt> a string. Specifies a Janino expression used to filter the tuples. This option has the
651
658
  # same effect than providing it as first unamed argument. If this option is provided, then the filter is Janino
652
659
  # expression-based.
660
+ # * <tt>:validate</tt> a boolean. Passed into Cascading#expr to enable or disable
661
+ # expression validation. Defaults to true.
662
+ # * <tt>:validate_with</tt> a hash. Actual arguments used by Cascading#expr for
663
+ # expression validation. Defaults to {}.
653
664
  def reject(*args)
654
665
  options = args.extract_options
655
666
  raise "Regex not allowed" if options && options[:pattern]
@@ -665,6 +676,10 @@ module Cascading
665
676
  # * <tt>:expression</tt> a string. Specifies a Janino expression used to select the tuples. This option has the
666
677
  # same effect than providing it as first unamed argument. If this option is provided, then the filter is Janino
667
678
  # expression-based.
679
+ # * <tt>:validate</tt> a boolean. Passed into Cascading#expr to enable or disable
680
+ # expression validation. Defaults to true.
681
+ # * <tt>:validate_with</tt> a hash. Actual arguments used by Cascading#expr for
682
+ # expression validation. Defaults to {}.
668
683
  def where(*args)
669
684
  options = args.extract_options
670
685
  raise "Regex not allowed" if options && options[:pattern]
@@ -4,21 +4,28 @@
4
4
 
5
5
  module Cascading
6
6
  class Node
7
- attr_accessor :name, :parent, :children, :last_child
7
+ attr_accessor :name, :parent, :children, :child_names, :last_child
8
8
 
9
9
  def initialize(name, parent)
10
10
  @name = name
11
11
  @parent = parent
12
12
  @children = {}
13
+ @child_names = []
13
14
  @last_child = nil
14
15
  end
15
16
 
16
17
  def add_child(node)
17
18
  @children[node.name] = node
19
+ @child_names << node.name
18
20
  @last_child = node
19
21
  node
20
22
  end
21
23
 
24
+ def describe(offset = '')
25
+ "#{offset}#{name}:node\n#{child_names.map{ |child| children[child].describe("#{offset} ") }.join("\n")}"
26
+ end
27
+ alias desc describe
28
+
22
29
  def find_child(name)
23
30
  children.each do |child_name, child|
24
31
  return child if child_name == name
@@ -22,6 +22,10 @@ module Cascading
22
22
  flow
23
23
  end
24
24
 
25
+ def describe(offset = '')
26
+ "#{offset}#{name}:cascade\n#{child_names.map{ |child| children[child].describe("#{offset} ") }.join("\n")}"
27
+ end
28
+
25
29
  def draw(dir, properties = nil)
26
30
  @children.each do |name, flow|
27
31
  flow.connect(properties).writeDOT("#{dir}/#{name}.dot")
@@ -25,9 +25,14 @@ module Cascading
25
25
  flow
26
26
  end
27
27
 
28
- def expr(s)
29
- return s if s.kind_of?(ExprStub)
30
- ExprStub.new(s)
28
+ def describe
29
+ Cascade.all.map{ |cascade| cascade.describe }.join("\n")
30
+ end
31
+ alias desc describe
32
+
33
+ # See ExprStub.expr
34
+ def expr(expression, params = {})
35
+ ExprStub.expr(expression, params)
31
36
  end
32
37
 
33
38
  # Creates a cascading.tuple.Fields instance from a string or an array of strings.
@@ -42,7 +47,7 @@ module Cascading
42
47
  end
43
48
  raise "Fields cannot be nil: #{fields.inspect}" if fields.include?(nil)
44
49
  end
45
- return Java::CascadingTuple::Fields.new([fields].flatten.map{ |f| f.kind_of?(Fixnum) && JRUBY_VERSION > '1.2.0' ? f.to_java(:int) : f }.to_java(java.lang.Comparable))
50
+ return Java::CascadingTuple::Fields.new([fields].flatten.map{ |f| f.kind_of?(Fixnum) ? java.lang.Integer.new(f) : f }.to_java(java.lang.Comparable))
46
51
  end
47
52
 
48
53
  def all_fields
@@ -1,30 +1,35 @@
1
- # Wrapper meant for NativeExceptions that wrap exceptions from Cascading. The
2
- # trouble is that the combined stack traces are so long, printing them case
3
- # actually omit locations in the cascading.jruby or application code that
4
- # matter, leaving you with no information about the source of the error. This
5
- # class just swallows all the nested exceptions, printing their message, while
6
- # giving you a direct route into JRuby code to the cause of the problem.
7
- class CascadingException < StandardError
8
- def initialize(native_exception, message)
9
- @ne = native_exception
10
- super("#{message}\n#{trace_causes(@ne, 1)}")
11
- end
1
+ # NativeException wrapper that prints the full nested stack trace of the Java
2
+ # exception and all of its causes wrapped by the NativeException.
3
+ # NativeException by default reveals only the first cause, which is
4
+ # insufficient for tracing cascading.jruby errors into JRuby code or revealing
5
+ # underlying Janino expression problems.
6
+ module Cascading
7
+ class CascadingException < StandardError
8
+ def initialize(native_exception, message)
9
+ @ne = native_exception
10
+ super("#{message}\n#{trace_causes(@ne, 1)}")
11
+ end
12
12
 
13
- def cause(depth)
14
- fetch_cause(@ne, depth)
15
- end
13
+ def cause(depth)
14
+ fetch_cause(@ne, depth)
15
+ end
16
16
 
17
- private
17
+ private
18
18
 
19
- def fetch_cause(ne, depth)
20
- return ne if depth <= 1
21
- fetch_cause(ne.cause, depth - 1)
22
- end
19
+ def fetch_cause(ne, depth)
20
+ return ne if depth <= 1
21
+ fetch_cause(ne.cause, depth - 1)
22
+ end
23
23
 
24
- def trace_causes(ne, depth)
25
- return unless ne
26
- trace = "Cause #{depth}: #{ne}\n"
27
- trace += ne.stack_trace.map { |e| " at #{e.class_name}.#{e.method_name}(#{e.file_name}:#{e.line_number})" }.join("\n") + "\n" if ne.respond_to?(:stack_trace)
28
- trace += "#{trace_causes(ne.cause, depth + 1)}"
24
+ def trace_causes(ne, depth)
25
+ return unless ne
26
+ trace = "Cause #{depth}: #{ne.respond_to?(:java_class) ? ne.java_class : ne.class}: #{ne}\n"
27
+ if ne.respond_to?(:stack_trace)
28
+ trace += "#{ne.stack_trace.map{ |e| " at #{e.class_name}.#{e.method_name}(#{e.file_name}:#{e.line_number})" }.join("\n")}\n"
29
+ elsif ne.respond_to?(:backtrace)
30
+ trace += " #{ne.backtrace.join("\n ")}\n"
31
+ end
32
+ trace += "#{trace_causes(ne.cause, depth + 1)}"
33
+ end
29
34
  end
30
35
  end
@@ -1,33 +1,164 @@
1
- class ExprStub
2
- attr_accessor :expression, :types
1
+ module Cascading
2
+ class ExprStub
3
+ attr_accessor :expression, :types, :input_expression
3
4
 
4
- def initialize(st)
5
- @expression = st.dup
6
- @types = {}
5
+ # ExprStub requires a Janino expression decorated with field types. For
6
+ # example: '"Found: " + (x:int + y:int) + " " + z:string'. Type names are
7
+ # defined in Cascading::JAVA_TYPE_MAP.
8
+ def initialize(expression)
9
+ @input_expression = expression
10
+ @expression = expression.dup
11
+ @types = {}
7
12
 
8
- # Simple regexp based parser for types
13
+ # Simple regexp based parser for types
9
14
 
10
- JAVA_TYPE_MAP.each do |sym, klass|
11
- @expression.gsub!(/[A-Za-z0-9_]+:#{sym.to_s}/) do |match|
12
- name = match.split(/:/).first.gsub(/\s+/, "")
13
- @types[name] = klass
14
- match.gsub(/:#{sym.to_s}/, "")
15
+ JAVA_TYPE_MAP.each do |sym, klass|
16
+ @expression.gsub!(/[A-Za-z0-9_]+:#{sym.to_s}/) do |match|
17
+ name = match.split(/:/).first.gsub(/\s+/, "")
18
+ @types[name] = klass
19
+ match.gsub(/:#{sym.to_s}/, "")
20
+ end
15
21
  end
16
22
  end
17
- end
18
23
 
19
- def self.split_hash(h)
20
- keys, values = h.keys.sort, []
21
- keys.each do |key|
22
- values << h[key]
24
+ def to_s
25
+ @input_expression
26
+ end
27
+
28
+ # Convenience constructor for an ExprStub that optionally performs
29
+ # validation. Takes a string to use as a Janino expression and an optional
30
+ # params hash. By default, the param :validate is set to true (performs
31
+ # expression validation using default actual argument values) and the param
32
+ # :validate_with is set to {} (which doesn't override any of the default
33
+ # actual argument values used for validation).
34
+ def self.expr(expression, params = {})
35
+ params = { :validate => true, :validate_with => {} }.merge(params)
36
+ expr_stub = expression.kind_of?(ExprStub) ? expression : ExprStub.new(expression).compile
37
+ expr_stub.validate(params[:validate_with]) if params[:validate]
38
+ puts "Expression validation is disabled for '#{expression}'" unless params[:validate]
39
+ expr_stub
40
+ end
41
+
42
+ # Scan, parse, and compile expression, then return this ExprStub upon
43
+ # success. Throws an CascadingException upon failure.
44
+ def compile
45
+ evaluator
46
+ self
47
+ end
48
+
49
+ # Evaluates this ExprStub given a hash mapping argument names to argument
50
+ # values. Names may be strings or symbols. Throws an CascadingException
51
+ # upon failure.
52
+ def eval(actual_args)
53
+ actual_args = actual_args.inject({}) do |string_keys, (arg, value)|
54
+ string_keys[arg.to_s] = specific_to_java(value, @types[arg.to_s])
55
+ string_keys
56
+ end
57
+ args, values = split_hash(actual_args)
58
+ unused = validate_fields(args)
59
+ return self.eval(actual_args.reject{ |arg, value| unused.include?(arg) }) unless unused.empty?
60
+ evaluate(values)
61
+ end
62
+
63
+ # Evaluates this ExprStub with default values for each actual argument.
64
+ # Values may be overridden with the optional actual_args argument, which
65
+ # accepts a hash like ExprStub#eval. Throws an CascadingException upon
66
+ # failure.
67
+ def validate(actual_args = {})
68
+ self.eval(test_values.merge(actual_args))
69
+ end
70
+
71
+ def validate_scope(scope)
72
+ validate_fields(scope.values_fields.to_a)
73
+ end
74
+
75
+ # Throws an exception if any arguments required by this ExprStub are
76
+ # missing from fields. Returns those fields which are unused. Throws an
77
+ # ExprArgException upon failure.
78
+ def validate_fields(fields)
79
+ names = @types.keys.sort
80
+ missing = names - fields
81
+ raise ExprArgException.new("Expression '#{@expression}' is missing these fields: #{missing.inspect}\nRequires: #{names.inspect}, found: #{fields.inspect}") unless missing.empty?
82
+ fields - names
83
+ end
84
+
85
+ private
86
+
87
+ def split_hash(h)
88
+ keys, values = h.sort.inject([[], []]) do |(keys, values), (key, value)|
89
+ [keys << key, values << value]
90
+ end
91
+ [keys, values]
92
+ end
93
+
94
+ # Evaluate this ExprStub given an array of actual arguments. Throws an
95
+ # CascadingException upon failure. GOTCHA: requires values to be in order
96
+ # of lexicographically sorted formal arguments.
97
+ def evaluate(values)
98
+ begin
99
+ evaluator.evaluate(values.to_java)
100
+ rescue NativeException => ne
101
+ raise CascadingException.new(ne, "Exception encountered while evaluating '#{@expression}' with arguments: #{values.inspect}")
102
+ end
23
103
  end
24
- [keys, values]
25
- end
26
104
 
27
- def self.split_names_and_types(expr_types)
28
- names, types = split_hash(expr_types)
29
- names = names.to_java(java.lang.String)
30
- types = types.to_java(java.lang.Class)
31
- [names, types]
105
+ # Building an evaluator ensures that the expression scans, parses, and
106
+ # compiles
107
+ def evaluator
108
+ begin
109
+ names, types = names_and_types
110
+ Java::OrgCodehausJanino::ExpressionEvaluator.new(@expression, java.lang.Comparable.java_class, names, types)
111
+ rescue NativeException => ne
112
+ raise CascadingException.new(ne, "Exception encountered while compiling '#{@expression}'")
113
+ end
114
+ end
115
+
116
+ # Extract Java names and types from @types hash
117
+ def names_and_types
118
+ names, types = split_hash(@types)
119
+ [names.to_java(java.lang.String), types.to_java(java.lang.Class)]
120
+ end
121
+
122
+ # Makes best effort to convert Ruby numbers into the Java numeric type
123
+ # exepcted by a Janino expression. However, if the conversion fails, it
124
+ # returns the original value so that the exception thrown will be from
125
+ # Janino, not this code.
126
+ def specific_to_java(value, type)
127
+ # GOTCHA: Java's Float and Long have constructors that take strings and
128
+ # parse them. If value is a string representation of a number, this code
129
+ # could coerce it to a number whereas invocation of the Janino expression
130
+ # would fail. We therefore punt if value is a String.
131
+ return value if value.kind_of?(::String)
132
+ if type == java.lang.Float.java_class
133
+ return value if value.kind_of?(::Integer)
134
+ java.lang.Float.new(value) rescue value
135
+ elsif type == java.lang.Long.java_class && JRUBY_VERSION <= '1.2.0'
136
+ return value if value.kind_of?(::Float)
137
+ java.lang.Long.new(value) rescue value
138
+ elsif type == java.lang.Integer.java_class && JRUBY_VERSION > '1.2.0'
139
+ return value if value.kind_of?(::Float)
140
+ java.lang.Integer.new(value) rescue value
141
+ else
142
+ value
143
+ end
144
+ end
145
+
146
+ @@defaults = {
147
+ java.lang.Integer.java_class => JRUBY_VERSION > '1.2.0' ? java.lang.Integer.new(0) : 0,
148
+ java.lang.Boolean.java_class => false,
149
+ java.lang.Double.java_class => 0.0,
150
+ java.lang.Float.java_class => java.lang.Float.new(0.0),
151
+ java.lang.Long.java_class => JRUBY_VERSION > '1.2.0' ? 0 : java.lang.Long.new(0),
152
+ java.lang.String.java_class => nil,
153
+ }
154
+
155
+ def test_values
156
+ @types.sort.inject({}) do |test_values, (name, type)|
157
+ test_values[name] = @@defaults[type]
158
+ test_values
159
+ end
160
+ end
32
161
  end
162
+
163
+ class ExprArgException < StandardError; end
33
164
  end
@@ -8,10 +8,10 @@ module Cascading
8
8
  class Flow < Cascading::Node
9
9
  extend Registerable
10
10
 
11
- attr_accessor :properties, :sources, :sinks, :outgoing_scopes, :listeners
11
+ attr_accessor :properties, :sources, :sinks, :incoming_scopes, :outgoing_scopes, :listeners
12
12
 
13
13
  def initialize(name, parent)
14
- @properties, @sources, @sinks, @outgoing_scopes, @listeners = {}, {}, {}, {}, []
14
+ @properties, @sources, @sinks, @incoming_scopes, @outgoing_scopes, @listeners = {}, {}, {}, {}, {}, []
15
15
  super(name, parent)
16
16
  self.class.add(name, self)
17
17
  end
@@ -29,9 +29,9 @@ module Cascading
29
29
  # reference a path.
30
30
  def sink(*args)
31
31
  if (args.size == 2)
32
- @sinks[args[0]] = args[1]
32
+ sinks[args[0]] = args[1]
33
33
  elsif (args.size == 1)
34
- @sinks[@name] = args[0]
34
+ sinks[name] = args[0]
35
35
  end
36
36
  end
37
37
 
@@ -40,14 +40,24 @@ module Cascading
40
40
  # reference a path.
41
41
  def source(*args)
42
42
  if (args.size == 2)
43
- @sources[args[0]] = args[1]
44
- @outgoing_scopes[args[0]] = Scope.tap_scope(args[1], args[0])
43
+ sources[args[0]] = args[1]
44
+ incoming_scopes[args[0]] = Scope.tap_scope(args[1], args[0])
45
+ outgoing_scopes[args[0]] = incoming_scopes[args[0]]
45
46
  elsif (args.size == 1)
46
- @sources[@name] = args[0]
47
- @outgoing_scopes[@name] = Scope.empty_scope(@name)
47
+ sources[name] = args[0]
48
+ incoming_scopes[name] = Scope.empty_scope(name)
49
+ outgoing_scopes[name] = incoming_scopes[name]
48
50
  end
49
51
  end
50
52
 
53
+ def describe(offset = '')
54
+ description = "#{offset}#{name}:flow\n"
55
+ description += "#{sources.keys.map{ |source| "#{offset} #{source}:source :: #{incoming_scopes[source].values_fields.to_a.inspect}" }.join("\n")}\n"
56
+ description += "#{child_names.map{ |child| children[child].describe("#{offset} ") }.join("\n")}\n"
57
+ description += "#{sinks.keys.map{ |sink| "#{offset} #{sink}:sink :: #{outgoing_scopes[sink].values_fields.to_a.inspect}" }.join("\n")}"
58
+ description
59
+ end
60
+
51
61
  def scope(name = nil)
52
62
  raise 'Must specify name if no children have been defined yet' unless name || last_child
53
63
  name ||= last_child.name
data/lib/cascading.rb CHANGED
@@ -6,7 +6,7 @@ require 'java'
6
6
 
7
7
  module Cascading
8
8
  # :stopdoc:
9
- VERSION = '0.0.4'
9
+ VERSION = '0.0.5'
10
10
  LIBPATH = ::File.expand_path(::File.dirname(__FILE__)) + ::File::SEPARATOR
11
11
  PATH = ::File.dirname(LIBPATH) + ::File::SEPARATOR
12
12
  CASCADING_HOME = ENV['CASCADING_HOME']
data/samples/copy.rb CHANGED
@@ -10,7 +10,8 @@ cascade 'copy' do
10
10
 
11
11
  assembly 'input' do
12
12
  rename 'line' => 'value'
13
- reject 'value:string.indexOf("R") == -1'
13
+ # We override validate_with because we know line will never be null
14
+ reject 'value:string.indexOf("R") == -1', :validate_with => { :value => 'nothinghere' }
14
15
  end
15
16
 
16
17
  sink 'input', tap('output/copy', :sink_mode => :replace)
@@ -1,6 +1,11 @@
1
1
  require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
2
 
3
- describe Cascading do
3
+ context Cascading do
4
+ it 'should handle string and integer field names' do
5
+ f = fields(['a', 1, 'b', 2])
6
+ f.to_a.should == ['a', 1, 'b', 2]
7
+ end
8
+
4
9
  it 'should dedup field names from multiple sources' do
5
10
  left_names = ['a', 'b', 'c', 'd', 'e']
6
11
  mid_names = ['a', 'f']