cascading.jruby 0.0.4 → 0.0.5
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +17 -0
- data/README.md +2 -0
- data/lib/cascading/assembly.rb +40 -25
- data/lib/cascading/base.rb +8 -1
- data/lib/cascading/cascade.rb +4 -0
- data/lib/cascading/cascading.rb +9 -4
- data/lib/cascading/cascading_exception.rb +29 -24
- data/lib/cascading/expr_stub.rb +154 -23
- data/lib/cascading/flow.rb +18 -8
- data/lib/cascading.rb +1 -1
- data/samples/copy.rb +2 -1
- data/spec/cascading_spec.rb +6 -1
- data/spec/expr_spec.rb +221 -1
- data/spec/jruby_version_spec.rb +72 -0
- data/spec/primary_key_spec.rb +1 -1
- data/spec/scope_spec.rb +1 -1
- data/spec/spec_util.rb +1 -1
- data/tasks/spec.rake +1 -1
- metadata +117 -126
- data/tags +0 -238
data/History.txt
CHANGED
@@ -0,0 +1,17 @@
|
|
1
|
+
0.0.5 - Addressing Janino pain
|
2
|
+
|
3
|
+
This release expands upon the ExprStub class adding composition time compilation
|
4
|
+
and validation of Janino expressions. This causes faulty Janino expressions to
|
5
|
+
fail early rather than hours into a job.
|
6
|
+
|
7
|
+
0.0.4 - Cleanup
|
8
|
+
|
9
|
+
This release merges in a good deal of work from Etsy and addresses many internal
|
10
|
+
structural issues with cascading.jruby node hierarchies. Greatly expands unit
|
11
|
+
tests and makes samples easily executable as a form of functional testing.
|
12
|
+
|
13
|
+
0.0.3 - Etsy's first version of cascading.jruby
|
14
|
+
|
15
|
+
First major commit of work from Etsy that introduces scope propagation to
|
16
|
+
cascading.jruby scripts in support of field propagation and primary keys.
|
17
|
+
Mainly a cleanup and testing release.
|
data/README.md
CHANGED
data/lib/cascading/assembly.rb
CHANGED
@@ -10,7 +10,7 @@ module Cascading
|
|
10
10
|
class Assembly < Cascading::Node
|
11
11
|
include Operations
|
12
12
|
|
13
|
-
attr_accessor :tail_pipe, :
|
13
|
+
attr_accessor :head_pipe, :tail_pipe, :incoming_scopes, :outgoing_scopes
|
14
14
|
|
15
15
|
def initialize(name, parent, outgoing_scopes = {})
|
16
16
|
super(name, parent)
|
@@ -27,6 +27,15 @@ module Cascading
|
|
27
27
|
@outgoing_scopes[name] ||= Scope.empty_scope(name)
|
28
28
|
end
|
29
29
|
@tail_pipe = @head_pipe
|
30
|
+
@incoming_scopes = [scope]
|
31
|
+
end
|
32
|
+
|
33
|
+
def describe(offset = '')
|
34
|
+
incoming_scopes_desc = "#{incoming_scopes.map{ |incoming_scope| incoming_scope.values_fields.to_a.inspect }.join(', ')}"
|
35
|
+
incoming_scopes_desc = "(#{incoming_scopes_desc})" unless incoming_scopes.size == 1
|
36
|
+
description = "#{offset}#{name}:assembly :: #{incoming_scopes_desc} -> #{scope.values_fields.to_a.inspect}"
|
37
|
+
description += "\n#{child_names.map{ |child| children[child].describe("#{offset} ") }.join("\n")}" unless children.empty?
|
38
|
+
description
|
30
39
|
end
|
31
40
|
|
32
41
|
def parent_flow
|
@@ -90,7 +99,7 @@ module Cascading
|
|
90
99
|
first_fields = first_fields - scope.grouping_fields.to_a
|
91
100
|
if first_fields.size > 0
|
92
101
|
first *first_fields
|
93
|
-
puts "Firsting: #{first_fields.inspect} in assembly: #{
|
102
|
+
puts "Firsting: #{first_fields.inspect} in assembly: #{name}"
|
94
103
|
end
|
95
104
|
|
96
105
|
bind_names scope.grouping_fields.to_a if every_applied?
|
@@ -102,14 +111,14 @@ module Cascading
|
|
102
111
|
end
|
103
112
|
|
104
113
|
def to_s
|
105
|
-
"#{
|
114
|
+
"#{name} : head pipe : #{@head_pipe} - tail pipe: #{@tail_pipe}"
|
106
115
|
end
|
107
116
|
|
108
117
|
# Builds a join (CoGroup) pipe. Requires a list of assembly names to join.
|
109
118
|
def join(*args, &block)
|
110
119
|
options = args.extract_options!
|
111
120
|
|
112
|
-
pipes, incoming_scopes = [], []
|
121
|
+
pipes, @incoming_scopes = [], []
|
113
122
|
args.each do |assembly_name|
|
114
123
|
assembly = parent_flow.find_child(assembly_name)
|
115
124
|
raise "Could not find assembly '#{assembly_name}' in join" unless assembly
|
@@ -129,7 +138,7 @@ module Cascading
|
|
129
138
|
group_fields << fields(group_fields_args)
|
130
139
|
end
|
131
140
|
elsif group_fields_args.kind_of?(Hash)
|
132
|
-
pipes, incoming_scopes = [], []
|
141
|
+
pipes, @incoming_scopes = [], []
|
133
142
|
keys = group_fields_args.keys.sort
|
134
143
|
keys.each do |assembly_name|
|
135
144
|
v = group_fields_args[assembly_name]
|
@@ -233,7 +242,7 @@ module Cascading
|
|
233
242
|
# This actually creates a GroupBy pipe.
|
234
243
|
# It expects a list of assembly names as parameter.
|
235
244
|
def union_pipes(*args)
|
236
|
-
pipes, incoming_scopes = [], []
|
245
|
+
pipes, @incoming_scopes = [], []
|
237
246
|
args[0].each do |assembly_name|
|
238
247
|
assembly = parent_flow.find_child(assembly_name)
|
239
248
|
pipes << assembly.tail_pipe
|
@@ -281,9 +290,7 @@ module Cascading
|
|
281
290
|
# Example:
|
282
291
|
# project "field1", "field2"
|
283
292
|
def project(*args)
|
284
|
-
|
285
|
-
operation = Java::CascadingOperation::Identity.new
|
286
|
-
make_each(Java::CascadingPipe::Each, @tail_pipe, fields, operation)
|
293
|
+
each fields(args), :function => Java::CascadingOperation::Identity.new
|
287
294
|
end
|
288
295
|
|
289
296
|
# Removes the specified fields from the current assembly.
|
@@ -301,9 +308,7 @@ module Cascading
|
|
301
308
|
# Example:
|
302
309
|
# bind_names "field1", "field2"
|
303
310
|
def bind_names(*new_names)
|
304
|
-
|
305
|
-
operation = Java::CascadingOperation::Identity.new(new_fields)
|
306
|
-
make_each(Java::CascadingPipe::Each, @tail_pipe, all_fields, operation)
|
311
|
+
each all_fields, :function => Java::CascadingOperation::Identity.new(fields(new_names))
|
307
312
|
end
|
308
313
|
|
309
314
|
# Renames fields according to the mapping provided.
|
@@ -319,9 +324,7 @@ module Cascading
|
|
319
324
|
old_key = scope.primary_key_fields.to_a
|
320
325
|
new_key = old_key.map{ |name| name_map[name] || name }
|
321
326
|
|
322
|
-
|
323
|
-
operation = Java::CascadingOperation::Identity.new(new_fields)
|
324
|
-
make_each(Java::CascadingPipe::Each, @tail_pipe, all_fields, operation)
|
327
|
+
each all_fields, :function => Java::CascadingOperation::Identity.new(fields(new_names))
|
325
328
|
primary(*new_key)
|
326
329
|
end
|
327
330
|
|
@@ -330,22 +333,19 @@ module Cascading
|
|
330
333
|
types = JAVA_TYPE_MAP.values_at(*type_map.values_at(*names))
|
331
334
|
fields = fields(names)
|
332
335
|
types = types.to_java(java.lang.Class)
|
333
|
-
|
334
|
-
make_each(Java::CascadingPipe::Each, @tail_pipe, fields, operation)
|
336
|
+
each fields, :function => Java::CascadingOperation::Identity.new(fields, types)
|
335
337
|
end
|
336
338
|
|
337
339
|
def copy(*args)
|
338
340
|
options = args.extract_options!
|
339
341
|
from = args[0] || all_fields
|
340
342
|
into = args[1] || options[:into] || all_fields
|
341
|
-
|
342
|
-
make_each(Java::CascadingPipe::Each, @tail_pipe, fields(from), operation, Java::CascadingTuple::Fields::ALL)
|
343
|
+
each fields(from), :function => Java::CascadingOperation::Identity.new(fields(into)), :output => all_fields
|
343
344
|
end
|
344
345
|
|
345
346
|
# A pipe that does nothing.
|
346
347
|
def pass(*args)
|
347
|
-
|
348
|
-
make_each(Java::CascadingPipe::Each, @tail_pipe, all_fields, operation)
|
348
|
+
each all_fields, :function => Java::CascadingOperation::Identity.new
|
349
349
|
end
|
350
350
|
|
351
351
|
def assert(*args)
|
@@ -593,9 +593,8 @@ module Cascading
|
|
593
593
|
value = args[field_name]
|
594
594
|
|
595
595
|
if value.kind_of?(ExprStub)
|
596
|
-
|
597
|
-
|
598
|
-
:parameters => value.types), :output => all_fields
|
596
|
+
value.validate_scope(scope)
|
597
|
+
each all_fields, :function => expression_function(field_name, :expression => value.expression, :parameters => value.types), :output => all_fields
|
599
598
|
else
|
600
599
|
each all_fields, :function => insert_function([field_name], :values => [value]), :output => all_fields
|
601
600
|
end
|
@@ -612,15 +611,23 @@ module Cascading
|
|
612
611
|
# * <tt>:expression</tt> a string. Specifies a Janino expression used to filter the tuples. This option has the
|
613
612
|
# same effect than providing it as first unamed argument. If this option is provided, then the filter is Janino
|
614
613
|
# expression-based. This is incompatible with the _pattern_ option.
|
614
|
+
# * <tt>:validate</tt> a boolean. Passed into Cascading#expr to enable or disable
|
615
|
+
# expression validation. Defaults to true.
|
616
|
+
# * <tt>:validate_with</tt> a hash. Actual arguments used by Cascading#expr for
|
617
|
+
# expression validation. Defaults to {}.
|
615
618
|
def filter(*args)
|
616
619
|
options = args.extract_options!
|
617
620
|
from = options.delete(:from) || all_fields
|
618
621
|
expression = options.delete(:expression) || args.shift
|
619
622
|
regex = options.delete(:pattern)
|
623
|
+
validate = options.has_key?(:validate) ? options.delete(:validate) : true
|
624
|
+
validate_with = options.has_key?(:validate_with) ? options.delete(:validate_with) : {}
|
625
|
+
|
620
626
|
if expression
|
621
|
-
stub =
|
627
|
+
stub = expr(expression, { :validate => validate, :validate_with => validate_with })
|
622
628
|
types, expression = stub.types, stub.expression
|
623
629
|
|
630
|
+
stub.validate_scope(scope)
|
624
631
|
each from, :filter => expression_filter(
|
625
632
|
:parameters => types,
|
626
633
|
:expression => expression
|
@@ -650,6 +657,10 @@ module Cascading
|
|
650
657
|
# * <tt>:expression</tt> a string. Specifies a Janino expression used to filter the tuples. This option has the
|
651
658
|
# same effect than providing it as first unamed argument. If this option is provided, then the filter is Janino
|
652
659
|
# expression-based.
|
660
|
+
# * <tt>:validate</tt> a boolean. Passed into Cascading#expr to enable or disable
|
661
|
+
# expression validation. Defaults to true.
|
662
|
+
# * <tt>:validate_with</tt> a hash. Actual arguments used by Cascading#expr for
|
663
|
+
# expression validation. Defaults to {}.
|
653
664
|
def reject(*args)
|
654
665
|
options = args.extract_options
|
655
666
|
raise "Regex not allowed" if options && options[:pattern]
|
@@ -665,6 +676,10 @@ module Cascading
|
|
665
676
|
# * <tt>:expression</tt> a string. Specifies a Janino expression used to select the tuples. This option has the
|
666
677
|
# same effect than providing it as first unamed argument. If this option is provided, then the filter is Janino
|
667
678
|
# expression-based.
|
679
|
+
# * <tt>:validate</tt> a boolean. Passed into Cascading#expr to enable or disable
|
680
|
+
# expression validation. Defaults to true.
|
681
|
+
# * <tt>:validate_with</tt> a hash. Actual arguments used by Cascading#expr for
|
682
|
+
# expression validation. Defaults to {}.
|
668
683
|
def where(*args)
|
669
684
|
options = args.extract_options
|
670
685
|
raise "Regex not allowed" if options && options[:pattern]
|
data/lib/cascading/base.rb
CHANGED
@@ -4,21 +4,28 @@
|
|
4
4
|
|
5
5
|
module Cascading
|
6
6
|
class Node
|
7
|
-
attr_accessor :name, :parent, :children, :last_child
|
7
|
+
attr_accessor :name, :parent, :children, :child_names, :last_child
|
8
8
|
|
9
9
|
def initialize(name, parent)
|
10
10
|
@name = name
|
11
11
|
@parent = parent
|
12
12
|
@children = {}
|
13
|
+
@child_names = []
|
13
14
|
@last_child = nil
|
14
15
|
end
|
15
16
|
|
16
17
|
def add_child(node)
|
17
18
|
@children[node.name] = node
|
19
|
+
@child_names << node.name
|
18
20
|
@last_child = node
|
19
21
|
node
|
20
22
|
end
|
21
23
|
|
24
|
+
def describe(offset = '')
|
25
|
+
"#{offset}#{name}:node\n#{child_names.map{ |child| children[child].describe("#{offset} ") }.join("\n")}"
|
26
|
+
end
|
27
|
+
alias desc describe
|
28
|
+
|
22
29
|
def find_child(name)
|
23
30
|
children.each do |child_name, child|
|
24
31
|
return child if child_name == name
|
data/lib/cascading/cascade.rb
CHANGED
@@ -22,6 +22,10 @@ module Cascading
|
|
22
22
|
flow
|
23
23
|
end
|
24
24
|
|
25
|
+
def describe(offset = '')
|
26
|
+
"#{offset}#{name}:cascade\n#{child_names.map{ |child| children[child].describe("#{offset} ") }.join("\n")}"
|
27
|
+
end
|
28
|
+
|
25
29
|
def draw(dir, properties = nil)
|
26
30
|
@children.each do |name, flow|
|
27
31
|
flow.connect(properties).writeDOT("#{dir}/#{name}.dot")
|
data/lib/cascading/cascading.rb
CHANGED
@@ -25,9 +25,14 @@ module Cascading
|
|
25
25
|
flow
|
26
26
|
end
|
27
27
|
|
28
|
-
def
|
29
|
-
|
30
|
-
|
28
|
+
def describe
|
29
|
+
Cascade.all.map{ |cascade| cascade.describe }.join("\n")
|
30
|
+
end
|
31
|
+
alias desc describe
|
32
|
+
|
33
|
+
# See ExprStub.expr
|
34
|
+
def expr(expression, params = {})
|
35
|
+
ExprStub.expr(expression, params)
|
31
36
|
end
|
32
37
|
|
33
38
|
# Creates a cascading.tuple.Fields instance from a string or an array of strings.
|
@@ -42,7 +47,7 @@ module Cascading
|
|
42
47
|
end
|
43
48
|
raise "Fields cannot be nil: #{fields.inspect}" if fields.include?(nil)
|
44
49
|
end
|
45
|
-
return Java::CascadingTuple::Fields.new([fields].flatten.map{ |f| f.kind_of?(Fixnum)
|
50
|
+
return Java::CascadingTuple::Fields.new([fields].flatten.map{ |f| f.kind_of?(Fixnum) ? java.lang.Integer.new(f) : f }.to_java(java.lang.Comparable))
|
46
51
|
end
|
47
52
|
|
48
53
|
def all_fields
|
@@ -1,30 +1,35 @@
|
|
1
|
-
#
|
2
|
-
#
|
3
|
-
#
|
4
|
-
#
|
5
|
-
#
|
6
|
-
|
7
|
-
class CascadingException < StandardError
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
1
|
+
# NativeException wrapper that prints the full nested stack trace of the Java
|
2
|
+
# exception and all of its causes wrapped by the NativeException.
|
3
|
+
# NativeException by default reveals only the first cause, which is
|
4
|
+
# insufficient for tracing cascading.jruby errors into JRuby code or revealing
|
5
|
+
# underlying Janino expression problems.
|
6
|
+
module Cascading
|
7
|
+
class CascadingException < StandardError
|
8
|
+
def initialize(native_exception, message)
|
9
|
+
@ne = native_exception
|
10
|
+
super("#{message}\n#{trace_causes(@ne, 1)}")
|
11
|
+
end
|
12
12
|
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
def cause(depth)
|
14
|
+
fetch_cause(@ne, depth)
|
15
|
+
end
|
16
16
|
|
17
|
-
|
17
|
+
private
|
18
18
|
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
19
|
+
def fetch_cause(ne, depth)
|
20
|
+
return ne if depth <= 1
|
21
|
+
fetch_cause(ne.cause, depth - 1)
|
22
|
+
end
|
23
23
|
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
24
|
+
def trace_causes(ne, depth)
|
25
|
+
return unless ne
|
26
|
+
trace = "Cause #{depth}: #{ne.respond_to?(:java_class) ? ne.java_class : ne.class}: #{ne}\n"
|
27
|
+
if ne.respond_to?(:stack_trace)
|
28
|
+
trace += "#{ne.stack_trace.map{ |e| " at #{e.class_name}.#{e.method_name}(#{e.file_name}:#{e.line_number})" }.join("\n")}\n"
|
29
|
+
elsif ne.respond_to?(:backtrace)
|
30
|
+
trace += " #{ne.backtrace.join("\n ")}\n"
|
31
|
+
end
|
32
|
+
trace += "#{trace_causes(ne.cause, depth + 1)}"
|
33
|
+
end
|
29
34
|
end
|
30
35
|
end
|
data/lib/cascading/expr_stub.rb
CHANGED
@@ -1,33 +1,164 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
module Cascading
|
2
|
+
class ExprStub
|
3
|
+
attr_accessor :expression, :types, :input_expression
|
3
4
|
|
4
|
-
|
5
|
-
|
6
|
-
|
5
|
+
# ExprStub requires a Janino expression decorated with field types. For
|
6
|
+
# example: '"Found: " + (x:int + y:int) + " " + z:string'. Type names are
|
7
|
+
# defined in Cascading::JAVA_TYPE_MAP.
|
8
|
+
def initialize(expression)
|
9
|
+
@input_expression = expression
|
10
|
+
@expression = expression.dup
|
11
|
+
@types = {}
|
7
12
|
|
8
|
-
|
13
|
+
# Simple regexp based parser for types
|
9
14
|
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
+
JAVA_TYPE_MAP.each do |sym, klass|
|
16
|
+
@expression.gsub!(/[A-Za-z0-9_]+:#{sym.to_s}/) do |match|
|
17
|
+
name = match.split(/:/).first.gsub(/\s+/, "")
|
18
|
+
@types[name] = klass
|
19
|
+
match.gsub(/:#{sym.to_s}/, "")
|
20
|
+
end
|
15
21
|
end
|
16
22
|
end
|
17
|
-
end
|
18
23
|
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
24
|
+
def to_s
|
25
|
+
@input_expression
|
26
|
+
end
|
27
|
+
|
28
|
+
# Convenience constructor for an ExprStub that optionally performs
|
29
|
+
# validation. Takes a string to use as a Janino expression and an optional
|
30
|
+
# params hash. By default, the param :validate is set to true (performs
|
31
|
+
# expression validation using default actual argument values) and the param
|
32
|
+
# :validate_with is set to {} (which doesn't override any of the default
|
33
|
+
# actual argument values used for validation).
|
34
|
+
def self.expr(expression, params = {})
|
35
|
+
params = { :validate => true, :validate_with => {} }.merge(params)
|
36
|
+
expr_stub = expression.kind_of?(ExprStub) ? expression : ExprStub.new(expression).compile
|
37
|
+
expr_stub.validate(params[:validate_with]) if params[:validate]
|
38
|
+
puts "Expression validation is disabled for '#{expression}'" unless params[:validate]
|
39
|
+
expr_stub
|
40
|
+
end
|
41
|
+
|
42
|
+
# Scan, parse, and compile expression, then return this ExprStub upon
|
43
|
+
# success. Throws an CascadingException upon failure.
|
44
|
+
def compile
|
45
|
+
evaluator
|
46
|
+
self
|
47
|
+
end
|
48
|
+
|
49
|
+
# Evaluates this ExprStub given a hash mapping argument names to argument
|
50
|
+
# values. Names may be strings or symbols. Throws an CascadingException
|
51
|
+
# upon failure.
|
52
|
+
def eval(actual_args)
|
53
|
+
actual_args = actual_args.inject({}) do |string_keys, (arg, value)|
|
54
|
+
string_keys[arg.to_s] = specific_to_java(value, @types[arg.to_s])
|
55
|
+
string_keys
|
56
|
+
end
|
57
|
+
args, values = split_hash(actual_args)
|
58
|
+
unused = validate_fields(args)
|
59
|
+
return self.eval(actual_args.reject{ |arg, value| unused.include?(arg) }) unless unused.empty?
|
60
|
+
evaluate(values)
|
61
|
+
end
|
62
|
+
|
63
|
+
# Evaluates this ExprStub with default values for each actual argument.
|
64
|
+
# Values may be overridden with the optional actual_args argument, which
|
65
|
+
# accepts a hash like ExprStub#eval. Throws an CascadingException upon
|
66
|
+
# failure.
|
67
|
+
def validate(actual_args = {})
|
68
|
+
self.eval(test_values.merge(actual_args))
|
69
|
+
end
|
70
|
+
|
71
|
+
def validate_scope(scope)
|
72
|
+
validate_fields(scope.values_fields.to_a)
|
73
|
+
end
|
74
|
+
|
75
|
+
# Throws an exception if any arguments required by this ExprStub are
|
76
|
+
# missing from fields. Returns those fields which are unused. Throws an
|
77
|
+
# ExprArgException upon failure.
|
78
|
+
def validate_fields(fields)
|
79
|
+
names = @types.keys.sort
|
80
|
+
missing = names - fields
|
81
|
+
raise ExprArgException.new("Expression '#{@expression}' is missing these fields: #{missing.inspect}\nRequires: #{names.inspect}, found: #{fields.inspect}") unless missing.empty?
|
82
|
+
fields - names
|
83
|
+
end
|
84
|
+
|
85
|
+
private
|
86
|
+
|
87
|
+
def split_hash(h)
|
88
|
+
keys, values = h.sort.inject([[], []]) do |(keys, values), (key, value)|
|
89
|
+
[keys << key, values << value]
|
90
|
+
end
|
91
|
+
[keys, values]
|
92
|
+
end
|
93
|
+
|
94
|
+
# Evaluate this ExprStub given an array of actual arguments. Throws an
|
95
|
+
# CascadingException upon failure. GOTCHA: requires values to be in order
|
96
|
+
# of lexicographically sorted formal arguments.
|
97
|
+
def evaluate(values)
|
98
|
+
begin
|
99
|
+
evaluator.evaluate(values.to_java)
|
100
|
+
rescue NativeException => ne
|
101
|
+
raise CascadingException.new(ne, "Exception encountered while evaluating '#{@expression}' with arguments: #{values.inspect}")
|
102
|
+
end
|
23
103
|
end
|
24
|
-
[keys, values]
|
25
|
-
end
|
26
104
|
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
105
|
+
# Building an evaluator ensures that the expression scans, parses, and
|
106
|
+
# compiles
|
107
|
+
def evaluator
|
108
|
+
begin
|
109
|
+
names, types = names_and_types
|
110
|
+
Java::OrgCodehausJanino::ExpressionEvaluator.new(@expression, java.lang.Comparable.java_class, names, types)
|
111
|
+
rescue NativeException => ne
|
112
|
+
raise CascadingException.new(ne, "Exception encountered while compiling '#{@expression}'")
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
# Extract Java names and types from @types hash
|
117
|
+
def names_and_types
|
118
|
+
names, types = split_hash(@types)
|
119
|
+
[names.to_java(java.lang.String), types.to_java(java.lang.Class)]
|
120
|
+
end
|
121
|
+
|
122
|
+
# Makes best effort to convert Ruby numbers into the Java numeric type
|
123
|
+
# exepcted by a Janino expression. However, if the conversion fails, it
|
124
|
+
# returns the original value so that the exception thrown will be from
|
125
|
+
# Janino, not this code.
|
126
|
+
def specific_to_java(value, type)
|
127
|
+
# GOTCHA: Java's Float and Long have constructors that take strings and
|
128
|
+
# parse them. If value is a string representation of a number, this code
|
129
|
+
# could coerce it to a number whereas invocation of the Janino expression
|
130
|
+
# would fail. We therefore punt if value is a String.
|
131
|
+
return value if value.kind_of?(::String)
|
132
|
+
if type == java.lang.Float.java_class
|
133
|
+
return value if value.kind_of?(::Integer)
|
134
|
+
java.lang.Float.new(value) rescue value
|
135
|
+
elsif type == java.lang.Long.java_class && JRUBY_VERSION <= '1.2.0'
|
136
|
+
return value if value.kind_of?(::Float)
|
137
|
+
java.lang.Long.new(value) rescue value
|
138
|
+
elsif type == java.lang.Integer.java_class && JRUBY_VERSION > '1.2.0'
|
139
|
+
return value if value.kind_of?(::Float)
|
140
|
+
java.lang.Integer.new(value) rescue value
|
141
|
+
else
|
142
|
+
value
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
@@defaults = {
|
147
|
+
java.lang.Integer.java_class => JRUBY_VERSION > '1.2.0' ? java.lang.Integer.new(0) : 0,
|
148
|
+
java.lang.Boolean.java_class => false,
|
149
|
+
java.lang.Double.java_class => 0.0,
|
150
|
+
java.lang.Float.java_class => java.lang.Float.new(0.0),
|
151
|
+
java.lang.Long.java_class => JRUBY_VERSION > '1.2.0' ? 0 : java.lang.Long.new(0),
|
152
|
+
java.lang.String.java_class => nil,
|
153
|
+
}
|
154
|
+
|
155
|
+
def test_values
|
156
|
+
@types.sort.inject({}) do |test_values, (name, type)|
|
157
|
+
test_values[name] = @@defaults[type]
|
158
|
+
test_values
|
159
|
+
end
|
160
|
+
end
|
32
161
|
end
|
162
|
+
|
163
|
+
class ExprArgException < StandardError; end
|
33
164
|
end
|
data/lib/cascading/flow.rb
CHANGED
@@ -8,10 +8,10 @@ module Cascading
|
|
8
8
|
class Flow < Cascading::Node
|
9
9
|
extend Registerable
|
10
10
|
|
11
|
-
attr_accessor :properties, :sources, :sinks, :outgoing_scopes, :listeners
|
11
|
+
attr_accessor :properties, :sources, :sinks, :incoming_scopes, :outgoing_scopes, :listeners
|
12
12
|
|
13
13
|
def initialize(name, parent)
|
14
|
-
@properties, @sources, @sinks, @outgoing_scopes, @listeners = {}, {}, {}, {}, []
|
14
|
+
@properties, @sources, @sinks, @incoming_scopes, @outgoing_scopes, @listeners = {}, {}, {}, {}, {}, []
|
15
15
|
super(name, parent)
|
16
16
|
self.class.add(name, self)
|
17
17
|
end
|
@@ -29,9 +29,9 @@ module Cascading
|
|
29
29
|
# reference a path.
|
30
30
|
def sink(*args)
|
31
31
|
if (args.size == 2)
|
32
|
-
|
32
|
+
sinks[args[0]] = args[1]
|
33
33
|
elsif (args.size == 1)
|
34
|
-
|
34
|
+
sinks[name] = args[0]
|
35
35
|
end
|
36
36
|
end
|
37
37
|
|
@@ -40,14 +40,24 @@ module Cascading
|
|
40
40
|
# reference a path.
|
41
41
|
def source(*args)
|
42
42
|
if (args.size == 2)
|
43
|
-
|
44
|
-
|
43
|
+
sources[args[0]] = args[1]
|
44
|
+
incoming_scopes[args[0]] = Scope.tap_scope(args[1], args[0])
|
45
|
+
outgoing_scopes[args[0]] = incoming_scopes[args[0]]
|
45
46
|
elsif (args.size == 1)
|
46
|
-
|
47
|
-
|
47
|
+
sources[name] = args[0]
|
48
|
+
incoming_scopes[name] = Scope.empty_scope(name)
|
49
|
+
outgoing_scopes[name] = incoming_scopes[name]
|
48
50
|
end
|
49
51
|
end
|
50
52
|
|
53
|
+
def describe(offset = '')
|
54
|
+
description = "#{offset}#{name}:flow\n"
|
55
|
+
description += "#{sources.keys.map{ |source| "#{offset} #{source}:source :: #{incoming_scopes[source].values_fields.to_a.inspect}" }.join("\n")}\n"
|
56
|
+
description += "#{child_names.map{ |child| children[child].describe("#{offset} ") }.join("\n")}\n"
|
57
|
+
description += "#{sinks.keys.map{ |sink| "#{offset} #{sink}:sink :: #{outgoing_scopes[sink].values_fields.to_a.inspect}" }.join("\n")}"
|
58
|
+
description
|
59
|
+
end
|
60
|
+
|
51
61
|
def scope(name = nil)
|
52
62
|
raise 'Must specify name if no children have been defined yet' unless name || last_child
|
53
63
|
name ||= last_child.name
|
data/lib/cascading.rb
CHANGED
data/samples/copy.rb
CHANGED
@@ -10,7 +10,8 @@ cascade 'copy' do
|
|
10
10
|
|
11
11
|
assembly 'input' do
|
12
12
|
rename 'line' => 'value'
|
13
|
-
|
13
|
+
# We override validate_with because we know line will never be null
|
14
|
+
reject 'value:string.indexOf("R") == -1', :validate_with => { :value => 'nothinghere' }
|
14
15
|
end
|
15
16
|
|
16
17
|
sink 'input', tap('output/copy', :sink_mode => :replace)
|
data/spec/cascading_spec.rb
CHANGED
@@ -1,6 +1,11 @@
|
|
1
1
|
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
2
2
|
|
3
|
-
|
3
|
+
context Cascading do
|
4
|
+
it 'should handle string and integer field names' do
|
5
|
+
f = fields(['a', 1, 'b', 2])
|
6
|
+
f.to_a.should == ['a', 1, 'b', 2]
|
7
|
+
end
|
8
|
+
|
4
9
|
it 'should dedup field names from multiple sources' do
|
5
10
|
left_names = ['a', 'b', 'c', 'd', 'e']
|
6
11
|
mid_names = ['a', 'f']
|