cascading.jruby 0.0.4 → 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
data/History.txt CHANGED
@@ -0,0 +1,17 @@
1
+ 0.0.5 - Addressing Janino pain
2
+
3
+ This release expands upon the ExprStub class adding composition time compilation
4
+ and validation of Janino expressions. This causes faulty Janino expressions to
5
+ fail early rather than hours into a job.
6
+
7
+ 0.0.4 - Cleanup
8
+
9
+ This release merges in a good deal of work from Etsy and addresses many internal
10
+ structural issues with cascading.jruby node hierarchies. Greatly expands unit
11
+ tests and makes samples easily executable as a form of functional testing.
12
+
13
+ 0.0.3 - Etsy's first version of cascading.jruby
14
+
15
+ First major commit of work from Etsy that introduces scope propagation to
16
+ cascading.jruby scripts in support of field propagation and primary keys.
17
+ Mainly a cleanup and testing release.
data/README.md CHANGED
@@ -4,4 +4,6 @@
4
4
 
5
5
  It requires Hadoop (>= 0.18.3) and Cascading (>=1.0.1) to be set via the environment variables: `HADOOP_HOME` and `CASCADING_HOME`
6
6
 
7
+ It has been tested on JRuby versions 1.2.0, 1.4.0, and 1.5.3.
8
+
7
9
  Copyright 2009, Grégoire Marabout.
@@ -10,7 +10,7 @@ module Cascading
10
10
  class Assembly < Cascading::Node
11
11
  include Operations
12
12
 
13
- attr_accessor :tail_pipe, :head_pipe, :outgoing_scopes
13
+ attr_accessor :head_pipe, :tail_pipe, :incoming_scopes, :outgoing_scopes
14
14
 
15
15
  def initialize(name, parent, outgoing_scopes = {})
16
16
  super(name, parent)
@@ -27,6 +27,15 @@ module Cascading
27
27
  @outgoing_scopes[name] ||= Scope.empty_scope(name)
28
28
  end
29
29
  @tail_pipe = @head_pipe
30
+ @incoming_scopes = [scope]
31
+ end
32
+
33
+ def describe(offset = '')
34
+ incoming_scopes_desc = "#{incoming_scopes.map{ |incoming_scope| incoming_scope.values_fields.to_a.inspect }.join(', ')}"
35
+ incoming_scopes_desc = "(#{incoming_scopes_desc})" unless incoming_scopes.size == 1
36
+ description = "#{offset}#{name}:assembly :: #{incoming_scopes_desc} -> #{scope.values_fields.to_a.inspect}"
37
+ description += "\n#{child_names.map{ |child| children[child].describe("#{offset} ") }.join("\n")}" unless children.empty?
38
+ description
30
39
  end
31
40
 
32
41
  def parent_flow
@@ -90,7 +99,7 @@ module Cascading
90
99
  first_fields = first_fields - scope.grouping_fields.to_a
91
100
  if first_fields.size > 0
92
101
  first *first_fields
93
- puts "Firsting: #{first_fields.inspect} in assembly: #{@name}"
102
+ puts "Firsting: #{first_fields.inspect} in assembly: #{name}"
94
103
  end
95
104
 
96
105
  bind_names scope.grouping_fields.to_a if every_applied?
@@ -102,14 +111,14 @@ module Cascading
102
111
  end
103
112
 
104
113
  def to_s
105
- "#{@name} : head pipe : #{@head_pipe} - tail pipe: #{@tail_pipe}"
114
+ "#{name} : head pipe : #{@head_pipe} - tail pipe: #{@tail_pipe}"
106
115
  end
107
116
 
108
117
  # Builds a join (CoGroup) pipe. Requires a list of assembly names to join.
109
118
  def join(*args, &block)
110
119
  options = args.extract_options!
111
120
 
112
- pipes, incoming_scopes = [], []
121
+ pipes, @incoming_scopes = [], []
113
122
  args.each do |assembly_name|
114
123
  assembly = parent_flow.find_child(assembly_name)
115
124
  raise "Could not find assembly '#{assembly_name}' in join" unless assembly
@@ -129,7 +138,7 @@ module Cascading
129
138
  group_fields << fields(group_fields_args)
130
139
  end
131
140
  elsif group_fields_args.kind_of?(Hash)
132
- pipes, incoming_scopes = [], []
141
+ pipes, @incoming_scopes = [], []
133
142
  keys = group_fields_args.keys.sort
134
143
  keys.each do |assembly_name|
135
144
  v = group_fields_args[assembly_name]
@@ -233,7 +242,7 @@ module Cascading
233
242
  # This actually creates a GroupBy pipe.
234
243
  # It expects a list of assembly names as parameter.
235
244
  def union_pipes(*args)
236
- pipes, incoming_scopes = [], []
245
+ pipes, @incoming_scopes = [], []
237
246
  args[0].each do |assembly_name|
238
247
  assembly = parent_flow.find_child(assembly_name)
239
248
  pipes << assembly.tail_pipe
@@ -281,9 +290,7 @@ module Cascading
281
290
  # Example:
282
291
  # project "field1", "field2"
283
292
  def project(*args)
284
- fields = fields(args)
285
- operation = Java::CascadingOperation::Identity.new
286
- make_each(Java::CascadingPipe::Each, @tail_pipe, fields, operation)
293
+ each fields(args), :function => Java::CascadingOperation::Identity.new
287
294
  end
288
295
 
289
296
  # Removes the specified fields from the current assembly.
@@ -301,9 +308,7 @@ module Cascading
301
308
  # Example:
302
309
  # bind_names "field1", "field2"
303
310
  def bind_names(*new_names)
304
- new_fields = fields(new_names)
305
- operation = Java::CascadingOperation::Identity.new(new_fields)
306
- make_each(Java::CascadingPipe::Each, @tail_pipe, all_fields, operation)
311
+ each all_fields, :function => Java::CascadingOperation::Identity.new(fields(new_names))
307
312
  end
308
313
 
309
314
  # Renames fields according to the mapping provided.
@@ -319,9 +324,7 @@ module Cascading
319
324
  old_key = scope.primary_key_fields.to_a
320
325
  new_key = old_key.map{ |name| name_map[name] || name }
321
326
 
322
- new_fields = fields(new_names)
323
- operation = Java::CascadingOperation::Identity.new(new_fields)
324
- make_each(Java::CascadingPipe::Each, @tail_pipe, all_fields, operation)
327
+ each all_fields, :function => Java::CascadingOperation::Identity.new(fields(new_names))
325
328
  primary(*new_key)
326
329
  end
327
330
 
@@ -330,22 +333,19 @@ module Cascading
330
333
  types = JAVA_TYPE_MAP.values_at(*type_map.values_at(*names))
331
334
  fields = fields(names)
332
335
  types = types.to_java(java.lang.Class)
333
- operation = Java::CascadingOperation::Identity.new(fields, types)
334
- make_each(Java::CascadingPipe::Each, @tail_pipe, fields, operation)
336
+ each fields, :function => Java::CascadingOperation::Identity.new(fields, types)
335
337
  end
336
338
 
337
339
  def copy(*args)
338
340
  options = args.extract_options!
339
341
  from = args[0] || all_fields
340
342
  into = args[1] || options[:into] || all_fields
341
- operation = Java::CascadingOperation::Identity.new(fields(into))
342
- make_each(Java::CascadingPipe::Each, @tail_pipe, fields(from), operation, Java::CascadingTuple::Fields::ALL)
343
+ each fields(from), :function => Java::CascadingOperation::Identity.new(fields(into)), :output => all_fields
343
344
  end
344
345
 
345
346
  # A pipe that does nothing.
346
347
  def pass(*args)
347
- operation = Java::CascadingOperation::Identity.new
348
- make_each(Java::CascadingPipe::Each, @tail_pipe, all_fields, operation)
348
+ each all_fields, :function => Java::CascadingOperation::Identity.new
349
349
  end
350
350
 
351
351
  def assert(*args)
@@ -593,9 +593,8 @@ module Cascading
593
593
  value = args[field_name]
594
594
 
595
595
  if value.kind_of?(ExprStub)
596
- each all_fields,
597
- :function => expression_function(field_name, :expression => value.expression,
598
- :parameters => value.types), :output => all_fields
596
+ value.validate_scope(scope)
597
+ each all_fields, :function => expression_function(field_name, :expression => value.expression, :parameters => value.types), :output => all_fields
599
598
  else
600
599
  each all_fields, :function => insert_function([field_name], :values => [value]), :output => all_fields
601
600
  end
@@ -612,15 +611,23 @@ module Cascading
612
611
  # * <tt>:expression</tt> a string. Specifies a Janino expression used to filter the tuples. This option has the
613
612
  # same effect than providing it as first unamed argument. If this option is provided, then the filter is Janino
614
613
  # expression-based. This is incompatible with the _pattern_ option.
614
+ # * <tt>:validate</tt> a boolean. Passed into Cascading#expr to enable or disable
615
+ # expression validation. Defaults to true.
616
+ # * <tt>:validate_with</tt> a hash. Actual arguments used by Cascading#expr for
617
+ # expression validation. Defaults to {}.
615
618
  def filter(*args)
616
619
  options = args.extract_options!
617
620
  from = options.delete(:from) || all_fields
618
621
  expression = options.delete(:expression) || args.shift
619
622
  regex = options.delete(:pattern)
623
+ validate = options.has_key?(:validate) ? options.delete(:validate) : true
624
+ validate_with = options.has_key?(:validate_with) ? options.delete(:validate_with) : {}
625
+
620
626
  if expression
621
- stub = ExprStub.new(expression)
627
+ stub = expr(expression, { :validate => validate, :validate_with => validate_with })
622
628
  types, expression = stub.types, stub.expression
623
629
 
630
+ stub.validate_scope(scope)
624
631
  each from, :filter => expression_filter(
625
632
  :parameters => types,
626
633
  :expression => expression
@@ -650,6 +657,10 @@ module Cascading
650
657
  # * <tt>:expression</tt> a string. Specifies a Janino expression used to filter the tuples. This option has the
651
658
  # same effect than providing it as first unamed argument. If this option is provided, then the filter is Janino
652
659
  # expression-based.
660
+ # * <tt>:validate</tt> a boolean. Passed into Cascading#expr to enable or disable
661
+ # expression validation. Defaults to true.
662
+ # * <tt>:validate_with</tt> a hash. Actual arguments used by Cascading#expr for
663
+ # expression validation. Defaults to {}.
653
664
  def reject(*args)
654
665
  options = args.extract_options
655
666
  raise "Regex not allowed" if options && options[:pattern]
@@ -665,6 +676,10 @@ module Cascading
665
676
  # * <tt>:expression</tt> a string. Specifies a Janino expression used to select the tuples. This option has the
666
677
  # same effect than providing it as first unamed argument. If this option is provided, then the filter is Janino
667
678
  # expression-based.
679
+ # * <tt>:validate</tt> a boolean. Passed into Cascading#expr to enable or disable
680
+ # expression validation. Defaults to true.
681
+ # * <tt>:validate_with</tt> a hash. Actual arguments used by Cascading#expr for
682
+ # expression validation. Defaults to {}.
668
683
  def where(*args)
669
684
  options = args.extract_options
670
685
  raise "Regex not allowed" if options && options[:pattern]
@@ -4,21 +4,28 @@
4
4
 
5
5
  module Cascading
6
6
  class Node
7
- attr_accessor :name, :parent, :children, :last_child
7
+ attr_accessor :name, :parent, :children, :child_names, :last_child
8
8
 
9
9
  def initialize(name, parent)
10
10
  @name = name
11
11
  @parent = parent
12
12
  @children = {}
13
+ @child_names = []
13
14
  @last_child = nil
14
15
  end
15
16
 
16
17
  def add_child(node)
17
18
  @children[node.name] = node
19
+ @child_names << node.name
18
20
  @last_child = node
19
21
  node
20
22
  end
21
23
 
24
+ def describe(offset = '')
25
+ "#{offset}#{name}:node\n#{child_names.map{ |child| children[child].describe("#{offset} ") }.join("\n")}"
26
+ end
27
+ alias desc describe
28
+
22
29
  def find_child(name)
23
30
  children.each do |child_name, child|
24
31
  return child if child_name == name
@@ -22,6 +22,10 @@ module Cascading
22
22
  flow
23
23
  end
24
24
 
25
+ def describe(offset = '')
26
+ "#{offset}#{name}:cascade\n#{child_names.map{ |child| children[child].describe("#{offset} ") }.join("\n")}"
27
+ end
28
+
25
29
  def draw(dir, properties = nil)
26
30
  @children.each do |name, flow|
27
31
  flow.connect(properties).writeDOT("#{dir}/#{name}.dot")
@@ -25,9 +25,14 @@ module Cascading
25
25
  flow
26
26
  end
27
27
 
28
- def expr(s)
29
- return s if s.kind_of?(ExprStub)
30
- ExprStub.new(s)
28
+ def describe
29
+ Cascade.all.map{ |cascade| cascade.describe }.join("\n")
30
+ end
31
+ alias desc describe
32
+
33
+ # See ExprStub.expr
34
+ def expr(expression, params = {})
35
+ ExprStub.expr(expression, params)
31
36
  end
32
37
 
33
38
  # Creates a cascading.tuple.Fields instance from a string or an array of strings.
@@ -42,7 +47,7 @@ module Cascading
42
47
  end
43
48
  raise "Fields cannot be nil: #{fields.inspect}" if fields.include?(nil)
44
49
  end
45
- return Java::CascadingTuple::Fields.new([fields].flatten.map{ |f| f.kind_of?(Fixnum) && JRUBY_VERSION > '1.2.0' ? f.to_java(:int) : f }.to_java(java.lang.Comparable))
50
+ return Java::CascadingTuple::Fields.new([fields].flatten.map{ |f| f.kind_of?(Fixnum) ? java.lang.Integer.new(f) : f }.to_java(java.lang.Comparable))
46
51
  end
47
52
 
48
53
  def all_fields
@@ -1,30 +1,35 @@
1
- # Wrapper meant for NativeExceptions that wrap exceptions from Cascading. The
2
- # trouble is that the combined stack traces are so long, printing them case
3
- # actually omit locations in the cascading.jruby or application code that
4
- # matter, leaving you with no information about the source of the error. This
5
- # class just swallows all the nested exceptions, printing their message, while
6
- # giving you a direct route into JRuby code to the cause of the problem.
7
- class CascadingException < StandardError
8
- def initialize(native_exception, message)
9
- @ne = native_exception
10
- super("#{message}\n#{trace_causes(@ne, 1)}")
11
- end
1
+ # NativeException wrapper that prints the full nested stack trace of the Java
2
+ # exception and all of its causes wrapped by the NativeException.
3
+ # NativeException by default reveals only the first cause, which is
4
+ # insufficient for tracing cascading.jruby errors into JRuby code or revealing
5
+ # underlying Janino expression problems.
6
+ module Cascading
7
+ class CascadingException < StandardError
8
+ def initialize(native_exception, message)
9
+ @ne = native_exception
10
+ super("#{message}\n#{trace_causes(@ne, 1)}")
11
+ end
12
12
 
13
- def cause(depth)
14
- fetch_cause(@ne, depth)
15
- end
13
+ def cause(depth)
14
+ fetch_cause(@ne, depth)
15
+ end
16
16
 
17
- private
17
+ private
18
18
 
19
- def fetch_cause(ne, depth)
20
- return ne if depth <= 1
21
- fetch_cause(ne.cause, depth - 1)
22
- end
19
+ def fetch_cause(ne, depth)
20
+ return ne if depth <= 1
21
+ fetch_cause(ne.cause, depth - 1)
22
+ end
23
23
 
24
- def trace_causes(ne, depth)
25
- return unless ne
26
- trace = "Cause #{depth}: #{ne}\n"
27
- trace += ne.stack_trace.map { |e| " at #{e.class_name}.#{e.method_name}(#{e.file_name}:#{e.line_number})" }.join("\n") + "\n" if ne.respond_to?(:stack_trace)
28
- trace += "#{trace_causes(ne.cause, depth + 1)}"
24
+ def trace_causes(ne, depth)
25
+ return unless ne
26
+ trace = "Cause #{depth}: #{ne.respond_to?(:java_class) ? ne.java_class : ne.class}: #{ne}\n"
27
+ if ne.respond_to?(:stack_trace)
28
+ trace += "#{ne.stack_trace.map{ |e| " at #{e.class_name}.#{e.method_name}(#{e.file_name}:#{e.line_number})" }.join("\n")}\n"
29
+ elsif ne.respond_to?(:backtrace)
30
+ trace += " #{ne.backtrace.join("\n ")}\n"
31
+ end
32
+ trace += "#{trace_causes(ne.cause, depth + 1)}"
33
+ end
29
34
  end
30
35
  end
@@ -1,33 +1,164 @@
1
- class ExprStub
2
- attr_accessor :expression, :types
1
+ module Cascading
2
+ class ExprStub
3
+ attr_accessor :expression, :types, :input_expression
3
4
 
4
- def initialize(st)
5
- @expression = st.dup
6
- @types = {}
5
+ # ExprStub requires a Janino expression decorated with field types. For
6
+ # example: '"Found: " + (x:int + y:int) + " " + z:string'. Type names are
7
+ # defined in Cascading::JAVA_TYPE_MAP.
8
+ def initialize(expression)
9
+ @input_expression = expression
10
+ @expression = expression.dup
11
+ @types = {}
7
12
 
8
- # Simple regexp based parser for types
13
+ # Simple regexp based parser for types
9
14
 
10
- JAVA_TYPE_MAP.each do |sym, klass|
11
- @expression.gsub!(/[A-Za-z0-9_]+:#{sym.to_s}/) do |match|
12
- name = match.split(/:/).first.gsub(/\s+/, "")
13
- @types[name] = klass
14
- match.gsub(/:#{sym.to_s}/, "")
15
+ JAVA_TYPE_MAP.each do |sym, klass|
16
+ @expression.gsub!(/[A-Za-z0-9_]+:#{sym.to_s}/) do |match|
17
+ name = match.split(/:/).first.gsub(/\s+/, "")
18
+ @types[name] = klass
19
+ match.gsub(/:#{sym.to_s}/, "")
20
+ end
15
21
  end
16
22
  end
17
- end
18
23
 
19
- def self.split_hash(h)
20
- keys, values = h.keys.sort, []
21
- keys.each do |key|
22
- values << h[key]
24
+ def to_s
25
+ @input_expression
26
+ end
27
+
28
+ # Convenience constructor for an ExprStub that optionally performs
29
+ # validation. Takes a string to use as a Janino expression and an optional
30
+ # params hash. By default, the param :validate is set to true (performs
31
+ # expression validation using default actual argument values) and the param
32
+ # :validate_with is set to {} (which doesn't override any of the default
33
+ # actual argument values used for validation).
34
+ def self.expr(expression, params = {})
35
+ params = { :validate => true, :validate_with => {} }.merge(params)
36
+ expr_stub = expression.kind_of?(ExprStub) ? expression : ExprStub.new(expression).compile
37
+ expr_stub.validate(params[:validate_with]) if params[:validate]
38
+ puts "Expression validation is disabled for '#{expression}'" unless params[:validate]
39
+ expr_stub
40
+ end
41
+
42
+ # Scan, parse, and compile expression, then return this ExprStub upon
43
+ # success. Throws an CascadingException upon failure.
44
+ def compile
45
+ evaluator
46
+ self
47
+ end
48
+
49
+ # Evaluates this ExprStub given a hash mapping argument names to argument
50
+ # values. Names may be strings or symbols. Throws an CascadingException
51
+ # upon failure.
52
+ def eval(actual_args)
53
+ actual_args = actual_args.inject({}) do |string_keys, (arg, value)|
54
+ string_keys[arg.to_s] = specific_to_java(value, @types[arg.to_s])
55
+ string_keys
56
+ end
57
+ args, values = split_hash(actual_args)
58
+ unused = validate_fields(args)
59
+ return self.eval(actual_args.reject{ |arg, value| unused.include?(arg) }) unless unused.empty?
60
+ evaluate(values)
61
+ end
62
+
63
+ # Evaluates this ExprStub with default values for each actual argument.
64
+ # Values may be overridden with the optional actual_args argument, which
65
+ # accepts a hash like ExprStub#eval. Throws an CascadingException upon
66
+ # failure.
67
+ def validate(actual_args = {})
68
+ self.eval(test_values.merge(actual_args))
69
+ end
70
+
71
+ def validate_scope(scope)
72
+ validate_fields(scope.values_fields.to_a)
73
+ end
74
+
75
+ # Throws an exception if any arguments required by this ExprStub are
76
+ # missing from fields. Returns those fields which are unused. Throws an
77
+ # ExprArgException upon failure.
78
+ def validate_fields(fields)
79
+ names = @types.keys.sort
80
+ missing = names - fields
81
+ raise ExprArgException.new("Expression '#{@expression}' is missing these fields: #{missing.inspect}\nRequires: #{names.inspect}, found: #{fields.inspect}") unless missing.empty?
82
+ fields - names
83
+ end
84
+
85
+ private
86
+
87
+ def split_hash(h)
88
+ keys, values = h.sort.inject([[], []]) do |(keys, values), (key, value)|
89
+ [keys << key, values << value]
90
+ end
91
+ [keys, values]
92
+ end
93
+
94
+ # Evaluate this ExprStub given an array of actual arguments. Throws an
95
+ # CascadingException upon failure. GOTCHA: requires values to be in order
96
+ # of lexicographically sorted formal arguments.
97
+ def evaluate(values)
98
+ begin
99
+ evaluator.evaluate(values.to_java)
100
+ rescue NativeException => ne
101
+ raise CascadingException.new(ne, "Exception encountered while evaluating '#{@expression}' with arguments: #{values.inspect}")
102
+ end
23
103
  end
24
- [keys, values]
25
- end
26
104
 
27
- def self.split_names_and_types(expr_types)
28
- names, types = split_hash(expr_types)
29
- names = names.to_java(java.lang.String)
30
- types = types.to_java(java.lang.Class)
31
- [names, types]
105
+ # Building an evaluator ensures that the expression scans, parses, and
106
+ # compiles
107
+ def evaluator
108
+ begin
109
+ names, types = names_and_types
110
+ Java::OrgCodehausJanino::ExpressionEvaluator.new(@expression, java.lang.Comparable.java_class, names, types)
111
+ rescue NativeException => ne
112
+ raise CascadingException.new(ne, "Exception encountered while compiling '#{@expression}'")
113
+ end
114
+ end
115
+
116
+ # Extract Java names and types from @types hash
117
+ def names_and_types
118
+ names, types = split_hash(@types)
119
+ [names.to_java(java.lang.String), types.to_java(java.lang.Class)]
120
+ end
121
+
122
+ # Makes best effort to convert Ruby numbers into the Java numeric type
123
+ # exepcted by a Janino expression. However, if the conversion fails, it
124
+ # returns the original value so that the exception thrown will be from
125
+ # Janino, not this code.
126
+ def specific_to_java(value, type)
127
+ # GOTCHA: Java's Float and Long have constructors that take strings and
128
+ # parse them. If value is a string representation of a number, this code
129
+ # could coerce it to a number whereas invocation of the Janino expression
130
+ # would fail. We therefore punt if value is a String.
131
+ return value if value.kind_of?(::String)
132
+ if type == java.lang.Float.java_class
133
+ return value if value.kind_of?(::Integer)
134
+ java.lang.Float.new(value) rescue value
135
+ elsif type == java.lang.Long.java_class && JRUBY_VERSION <= '1.2.0'
136
+ return value if value.kind_of?(::Float)
137
+ java.lang.Long.new(value) rescue value
138
+ elsif type == java.lang.Integer.java_class && JRUBY_VERSION > '1.2.0'
139
+ return value if value.kind_of?(::Float)
140
+ java.lang.Integer.new(value) rescue value
141
+ else
142
+ value
143
+ end
144
+ end
145
+
146
+ @@defaults = {
147
+ java.lang.Integer.java_class => JRUBY_VERSION > '1.2.0' ? java.lang.Integer.new(0) : 0,
148
+ java.lang.Boolean.java_class => false,
149
+ java.lang.Double.java_class => 0.0,
150
+ java.lang.Float.java_class => java.lang.Float.new(0.0),
151
+ java.lang.Long.java_class => JRUBY_VERSION > '1.2.0' ? 0 : java.lang.Long.new(0),
152
+ java.lang.String.java_class => nil,
153
+ }
154
+
155
+ def test_values
156
+ @types.sort.inject({}) do |test_values, (name, type)|
157
+ test_values[name] = @@defaults[type]
158
+ test_values
159
+ end
160
+ end
32
161
  end
162
+
163
+ class ExprArgException < StandardError; end
33
164
  end
@@ -8,10 +8,10 @@ module Cascading
8
8
  class Flow < Cascading::Node
9
9
  extend Registerable
10
10
 
11
- attr_accessor :properties, :sources, :sinks, :outgoing_scopes, :listeners
11
+ attr_accessor :properties, :sources, :sinks, :incoming_scopes, :outgoing_scopes, :listeners
12
12
 
13
13
  def initialize(name, parent)
14
- @properties, @sources, @sinks, @outgoing_scopes, @listeners = {}, {}, {}, {}, []
14
+ @properties, @sources, @sinks, @incoming_scopes, @outgoing_scopes, @listeners = {}, {}, {}, {}, {}, []
15
15
  super(name, parent)
16
16
  self.class.add(name, self)
17
17
  end
@@ -29,9 +29,9 @@ module Cascading
29
29
  # reference a path.
30
30
  def sink(*args)
31
31
  if (args.size == 2)
32
- @sinks[args[0]] = args[1]
32
+ sinks[args[0]] = args[1]
33
33
  elsif (args.size == 1)
34
- @sinks[@name] = args[0]
34
+ sinks[name] = args[0]
35
35
  end
36
36
  end
37
37
 
@@ -40,14 +40,24 @@ module Cascading
40
40
  # reference a path.
41
41
  def source(*args)
42
42
  if (args.size == 2)
43
- @sources[args[0]] = args[1]
44
- @outgoing_scopes[args[0]] = Scope.tap_scope(args[1], args[0])
43
+ sources[args[0]] = args[1]
44
+ incoming_scopes[args[0]] = Scope.tap_scope(args[1], args[0])
45
+ outgoing_scopes[args[0]] = incoming_scopes[args[0]]
45
46
  elsif (args.size == 1)
46
- @sources[@name] = args[0]
47
- @outgoing_scopes[@name] = Scope.empty_scope(@name)
47
+ sources[name] = args[0]
48
+ incoming_scopes[name] = Scope.empty_scope(name)
49
+ outgoing_scopes[name] = incoming_scopes[name]
48
50
  end
49
51
  end
50
52
 
53
+ def describe(offset = '')
54
+ description = "#{offset}#{name}:flow\n"
55
+ description += "#{sources.keys.map{ |source| "#{offset} #{source}:source :: #{incoming_scopes[source].values_fields.to_a.inspect}" }.join("\n")}\n"
56
+ description += "#{child_names.map{ |child| children[child].describe("#{offset} ") }.join("\n")}\n"
57
+ description += "#{sinks.keys.map{ |sink| "#{offset} #{sink}:sink :: #{outgoing_scopes[sink].values_fields.to_a.inspect}" }.join("\n")}"
58
+ description
59
+ end
60
+
51
61
  def scope(name = nil)
52
62
  raise 'Must specify name if no children have been defined yet' unless name || last_child
53
63
  name ||= last_child.name
data/lib/cascading.rb CHANGED
@@ -6,7 +6,7 @@ require 'java'
6
6
 
7
7
  module Cascading
8
8
  # :stopdoc:
9
- VERSION = '0.0.4'
9
+ VERSION = '0.0.5'
10
10
  LIBPATH = ::File.expand_path(::File.dirname(__FILE__)) + ::File::SEPARATOR
11
11
  PATH = ::File.dirname(LIBPATH) + ::File::SEPARATOR
12
12
  CASCADING_HOME = ENV['CASCADING_HOME']
data/samples/copy.rb CHANGED
@@ -10,7 +10,8 @@ cascade 'copy' do
10
10
 
11
11
  assembly 'input' do
12
12
  rename 'line' => 'value'
13
- reject 'value:string.indexOf("R") == -1'
13
+ # We override validate_with because we know line will never be null
14
+ reject 'value:string.indexOf("R") == -1', :validate_with => { :value => 'nothinghere' }
14
15
  end
15
16
 
16
17
  sink 'input', tap('output/copy', :sink_mode => :replace)
@@ -1,6 +1,11 @@
1
1
  require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
2
 
3
- describe Cascading do
3
+ context Cascading do
4
+ it 'should handle string and integer field names' do
5
+ f = fields(['a', 1, 'b', 2])
6
+ f.to_a.should == ['a', 1, 'b', 2]
7
+ end
8
+
4
9
  it 'should dedup field names from multiple sources' do
5
10
  left_names = ['a', 'b', 'c', 'd', 'e']
6
11
  mid_names = ['a', 'f']