cascading.jruby 0.0.4 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +17 -0
- data/README.md +2 -0
- data/lib/cascading/assembly.rb +40 -25
- data/lib/cascading/base.rb +8 -1
- data/lib/cascading/cascade.rb +4 -0
- data/lib/cascading/cascading.rb +9 -4
- data/lib/cascading/cascading_exception.rb +29 -24
- data/lib/cascading/expr_stub.rb +154 -23
- data/lib/cascading/flow.rb +18 -8
- data/lib/cascading.rb +1 -1
- data/samples/copy.rb +2 -1
- data/spec/cascading_spec.rb +6 -1
- data/spec/expr_spec.rb +221 -1
- data/spec/jruby_version_spec.rb +72 -0
- data/spec/primary_key_spec.rb +1 -1
- data/spec/scope_spec.rb +1 -1
- data/spec/spec_util.rb +1 -1
- data/tasks/spec.rake +1 -1
- metadata +117 -126
- data/tags +0 -238
data/History.txt
CHANGED
@@ -0,0 +1,17 @@
|
|
1
|
+
0.0.5 - Addressing Janino pain
|
2
|
+
|
3
|
+
This release expands upon the ExprStub class adding composition time compilation
|
4
|
+
and validation of Janino expressions. This causes faulty Janino expressions to
|
5
|
+
fail early rather than hours into a job.
|
6
|
+
|
7
|
+
0.0.4 - Cleanup
|
8
|
+
|
9
|
+
This release merges in a good deal of work from Etsy and addresses many internal
|
10
|
+
structural issues with cascading.jruby node hierarchies. Greatly expands unit
|
11
|
+
tests and makes samples easily executable as a form of functional testing.
|
12
|
+
|
13
|
+
0.0.3 - Etsy's first version of cascading.jruby
|
14
|
+
|
15
|
+
First major commit of work from Etsy that introduces scope propagation to
|
16
|
+
cascading.jruby scripts in support of field propagation and primary keys.
|
17
|
+
Mainly a cleanup and testing release.
|
data/README.md
CHANGED
data/lib/cascading/assembly.rb
CHANGED
@@ -10,7 +10,7 @@ module Cascading
|
|
10
10
|
class Assembly < Cascading::Node
|
11
11
|
include Operations
|
12
12
|
|
13
|
-
attr_accessor :tail_pipe, :
|
13
|
+
attr_accessor :head_pipe, :tail_pipe, :incoming_scopes, :outgoing_scopes
|
14
14
|
|
15
15
|
def initialize(name, parent, outgoing_scopes = {})
|
16
16
|
super(name, parent)
|
@@ -27,6 +27,15 @@ module Cascading
|
|
27
27
|
@outgoing_scopes[name] ||= Scope.empty_scope(name)
|
28
28
|
end
|
29
29
|
@tail_pipe = @head_pipe
|
30
|
+
@incoming_scopes = [scope]
|
31
|
+
end
|
32
|
+
|
33
|
+
def describe(offset = '')
|
34
|
+
incoming_scopes_desc = "#{incoming_scopes.map{ |incoming_scope| incoming_scope.values_fields.to_a.inspect }.join(', ')}"
|
35
|
+
incoming_scopes_desc = "(#{incoming_scopes_desc})" unless incoming_scopes.size == 1
|
36
|
+
description = "#{offset}#{name}:assembly :: #{incoming_scopes_desc} -> #{scope.values_fields.to_a.inspect}"
|
37
|
+
description += "\n#{child_names.map{ |child| children[child].describe("#{offset} ") }.join("\n")}" unless children.empty?
|
38
|
+
description
|
30
39
|
end
|
31
40
|
|
32
41
|
def parent_flow
|
@@ -90,7 +99,7 @@ module Cascading
|
|
90
99
|
first_fields = first_fields - scope.grouping_fields.to_a
|
91
100
|
if first_fields.size > 0
|
92
101
|
first *first_fields
|
93
|
-
puts "Firsting: #{first_fields.inspect} in assembly: #{
|
102
|
+
puts "Firsting: #{first_fields.inspect} in assembly: #{name}"
|
94
103
|
end
|
95
104
|
|
96
105
|
bind_names scope.grouping_fields.to_a if every_applied?
|
@@ -102,14 +111,14 @@ module Cascading
|
|
102
111
|
end
|
103
112
|
|
104
113
|
def to_s
|
105
|
-
"#{
|
114
|
+
"#{name} : head pipe : #{@head_pipe} - tail pipe: #{@tail_pipe}"
|
106
115
|
end
|
107
116
|
|
108
117
|
# Builds a join (CoGroup) pipe. Requires a list of assembly names to join.
|
109
118
|
def join(*args, &block)
|
110
119
|
options = args.extract_options!
|
111
120
|
|
112
|
-
pipes, incoming_scopes = [], []
|
121
|
+
pipes, @incoming_scopes = [], []
|
113
122
|
args.each do |assembly_name|
|
114
123
|
assembly = parent_flow.find_child(assembly_name)
|
115
124
|
raise "Could not find assembly '#{assembly_name}' in join" unless assembly
|
@@ -129,7 +138,7 @@ module Cascading
|
|
129
138
|
group_fields << fields(group_fields_args)
|
130
139
|
end
|
131
140
|
elsif group_fields_args.kind_of?(Hash)
|
132
|
-
pipes, incoming_scopes = [], []
|
141
|
+
pipes, @incoming_scopes = [], []
|
133
142
|
keys = group_fields_args.keys.sort
|
134
143
|
keys.each do |assembly_name|
|
135
144
|
v = group_fields_args[assembly_name]
|
@@ -233,7 +242,7 @@ module Cascading
|
|
233
242
|
# This actually creates a GroupBy pipe.
|
234
243
|
# It expects a list of assembly names as parameter.
|
235
244
|
def union_pipes(*args)
|
236
|
-
pipes, incoming_scopes = [], []
|
245
|
+
pipes, @incoming_scopes = [], []
|
237
246
|
args[0].each do |assembly_name|
|
238
247
|
assembly = parent_flow.find_child(assembly_name)
|
239
248
|
pipes << assembly.tail_pipe
|
@@ -281,9 +290,7 @@ module Cascading
|
|
281
290
|
# Example:
|
282
291
|
# project "field1", "field2"
|
283
292
|
def project(*args)
|
284
|
-
|
285
|
-
operation = Java::CascadingOperation::Identity.new
|
286
|
-
make_each(Java::CascadingPipe::Each, @tail_pipe, fields, operation)
|
293
|
+
each fields(args), :function => Java::CascadingOperation::Identity.new
|
287
294
|
end
|
288
295
|
|
289
296
|
# Removes the specified fields from the current assembly.
|
@@ -301,9 +308,7 @@ module Cascading
|
|
301
308
|
# Example:
|
302
309
|
# bind_names "field1", "field2"
|
303
310
|
def bind_names(*new_names)
|
304
|
-
|
305
|
-
operation = Java::CascadingOperation::Identity.new(new_fields)
|
306
|
-
make_each(Java::CascadingPipe::Each, @tail_pipe, all_fields, operation)
|
311
|
+
each all_fields, :function => Java::CascadingOperation::Identity.new(fields(new_names))
|
307
312
|
end
|
308
313
|
|
309
314
|
# Renames fields according to the mapping provided.
|
@@ -319,9 +324,7 @@ module Cascading
|
|
319
324
|
old_key = scope.primary_key_fields.to_a
|
320
325
|
new_key = old_key.map{ |name| name_map[name] || name }
|
321
326
|
|
322
|
-
|
323
|
-
operation = Java::CascadingOperation::Identity.new(new_fields)
|
324
|
-
make_each(Java::CascadingPipe::Each, @tail_pipe, all_fields, operation)
|
327
|
+
each all_fields, :function => Java::CascadingOperation::Identity.new(fields(new_names))
|
325
328
|
primary(*new_key)
|
326
329
|
end
|
327
330
|
|
@@ -330,22 +333,19 @@ module Cascading
|
|
330
333
|
types = JAVA_TYPE_MAP.values_at(*type_map.values_at(*names))
|
331
334
|
fields = fields(names)
|
332
335
|
types = types.to_java(java.lang.Class)
|
333
|
-
|
334
|
-
make_each(Java::CascadingPipe::Each, @tail_pipe, fields, operation)
|
336
|
+
each fields, :function => Java::CascadingOperation::Identity.new(fields, types)
|
335
337
|
end
|
336
338
|
|
337
339
|
def copy(*args)
|
338
340
|
options = args.extract_options!
|
339
341
|
from = args[0] || all_fields
|
340
342
|
into = args[1] || options[:into] || all_fields
|
341
|
-
|
342
|
-
make_each(Java::CascadingPipe::Each, @tail_pipe, fields(from), operation, Java::CascadingTuple::Fields::ALL)
|
343
|
+
each fields(from), :function => Java::CascadingOperation::Identity.new(fields(into)), :output => all_fields
|
343
344
|
end
|
344
345
|
|
345
346
|
# A pipe that does nothing.
|
346
347
|
def pass(*args)
|
347
|
-
|
348
|
-
make_each(Java::CascadingPipe::Each, @tail_pipe, all_fields, operation)
|
348
|
+
each all_fields, :function => Java::CascadingOperation::Identity.new
|
349
349
|
end
|
350
350
|
|
351
351
|
def assert(*args)
|
@@ -593,9 +593,8 @@ module Cascading
|
|
593
593
|
value = args[field_name]
|
594
594
|
|
595
595
|
if value.kind_of?(ExprStub)
|
596
|
-
|
597
|
-
|
598
|
-
:parameters => value.types), :output => all_fields
|
596
|
+
value.validate_scope(scope)
|
597
|
+
each all_fields, :function => expression_function(field_name, :expression => value.expression, :parameters => value.types), :output => all_fields
|
599
598
|
else
|
600
599
|
each all_fields, :function => insert_function([field_name], :values => [value]), :output => all_fields
|
601
600
|
end
|
@@ -612,15 +611,23 @@ module Cascading
|
|
612
611
|
# * <tt>:expression</tt> a string. Specifies a Janino expression used to filter the tuples. This option has the
|
613
612
|
# same effect than providing it as first unamed argument. If this option is provided, then the filter is Janino
|
614
613
|
# expression-based. This is incompatible with the _pattern_ option.
|
614
|
+
# * <tt>:validate</tt> a boolean. Passed into Cascading#expr to enable or disable
|
615
|
+
# expression validation. Defaults to true.
|
616
|
+
# * <tt>:validate_with</tt> a hash. Actual arguments used by Cascading#expr for
|
617
|
+
# expression validation. Defaults to {}.
|
615
618
|
def filter(*args)
|
616
619
|
options = args.extract_options!
|
617
620
|
from = options.delete(:from) || all_fields
|
618
621
|
expression = options.delete(:expression) || args.shift
|
619
622
|
regex = options.delete(:pattern)
|
623
|
+
validate = options.has_key?(:validate) ? options.delete(:validate) : true
|
624
|
+
validate_with = options.has_key?(:validate_with) ? options.delete(:validate_with) : {}
|
625
|
+
|
620
626
|
if expression
|
621
|
-
stub =
|
627
|
+
stub = expr(expression, { :validate => validate, :validate_with => validate_with })
|
622
628
|
types, expression = stub.types, stub.expression
|
623
629
|
|
630
|
+
stub.validate_scope(scope)
|
624
631
|
each from, :filter => expression_filter(
|
625
632
|
:parameters => types,
|
626
633
|
:expression => expression
|
@@ -650,6 +657,10 @@ module Cascading
|
|
650
657
|
# * <tt>:expression</tt> a string. Specifies a Janino expression used to filter the tuples. This option has the
|
651
658
|
# same effect than providing it as first unamed argument. If this option is provided, then the filter is Janino
|
652
659
|
# expression-based.
|
660
|
+
# * <tt>:validate</tt> a boolean. Passed into Cascading#expr to enable or disable
|
661
|
+
# expression validation. Defaults to true.
|
662
|
+
# * <tt>:validate_with</tt> a hash. Actual arguments used by Cascading#expr for
|
663
|
+
# expression validation. Defaults to {}.
|
653
664
|
def reject(*args)
|
654
665
|
options = args.extract_options
|
655
666
|
raise "Regex not allowed" if options && options[:pattern]
|
@@ -665,6 +676,10 @@ module Cascading
|
|
665
676
|
# * <tt>:expression</tt> a string. Specifies a Janino expression used to select the tuples. This option has the
|
666
677
|
# same effect than providing it as first unamed argument. If this option is provided, then the filter is Janino
|
667
678
|
# expression-based.
|
679
|
+
# * <tt>:validate</tt> a boolean. Passed into Cascading#expr to enable or disable
|
680
|
+
# expression validation. Defaults to true.
|
681
|
+
# * <tt>:validate_with</tt> a hash. Actual arguments used by Cascading#expr for
|
682
|
+
# expression validation. Defaults to {}.
|
668
683
|
def where(*args)
|
669
684
|
options = args.extract_options
|
670
685
|
raise "Regex not allowed" if options && options[:pattern]
|
data/lib/cascading/base.rb
CHANGED
@@ -4,21 +4,28 @@
|
|
4
4
|
|
5
5
|
module Cascading
|
6
6
|
class Node
|
7
|
-
attr_accessor :name, :parent, :children, :last_child
|
7
|
+
attr_accessor :name, :parent, :children, :child_names, :last_child
|
8
8
|
|
9
9
|
def initialize(name, parent)
|
10
10
|
@name = name
|
11
11
|
@parent = parent
|
12
12
|
@children = {}
|
13
|
+
@child_names = []
|
13
14
|
@last_child = nil
|
14
15
|
end
|
15
16
|
|
16
17
|
def add_child(node)
|
17
18
|
@children[node.name] = node
|
19
|
+
@child_names << node.name
|
18
20
|
@last_child = node
|
19
21
|
node
|
20
22
|
end
|
21
23
|
|
24
|
+
def describe(offset = '')
|
25
|
+
"#{offset}#{name}:node\n#{child_names.map{ |child| children[child].describe("#{offset} ") }.join("\n")}"
|
26
|
+
end
|
27
|
+
alias desc describe
|
28
|
+
|
22
29
|
def find_child(name)
|
23
30
|
children.each do |child_name, child|
|
24
31
|
return child if child_name == name
|
data/lib/cascading/cascade.rb
CHANGED
@@ -22,6 +22,10 @@ module Cascading
|
|
22
22
|
flow
|
23
23
|
end
|
24
24
|
|
25
|
+
def describe(offset = '')
|
26
|
+
"#{offset}#{name}:cascade\n#{child_names.map{ |child| children[child].describe("#{offset} ") }.join("\n")}"
|
27
|
+
end
|
28
|
+
|
25
29
|
def draw(dir, properties = nil)
|
26
30
|
@children.each do |name, flow|
|
27
31
|
flow.connect(properties).writeDOT("#{dir}/#{name}.dot")
|
data/lib/cascading/cascading.rb
CHANGED
@@ -25,9 +25,14 @@ module Cascading
|
|
25
25
|
flow
|
26
26
|
end
|
27
27
|
|
28
|
-
def
|
29
|
-
|
30
|
-
|
28
|
+
def describe
|
29
|
+
Cascade.all.map{ |cascade| cascade.describe }.join("\n")
|
30
|
+
end
|
31
|
+
alias desc describe
|
32
|
+
|
33
|
+
# See ExprStub.expr
|
34
|
+
def expr(expression, params = {})
|
35
|
+
ExprStub.expr(expression, params)
|
31
36
|
end
|
32
37
|
|
33
38
|
# Creates a cascading.tuple.Fields instance from a string or an array of strings.
|
@@ -42,7 +47,7 @@ module Cascading
|
|
42
47
|
end
|
43
48
|
raise "Fields cannot be nil: #{fields.inspect}" if fields.include?(nil)
|
44
49
|
end
|
45
|
-
return Java::CascadingTuple::Fields.new([fields].flatten.map{ |f| f.kind_of?(Fixnum)
|
50
|
+
return Java::CascadingTuple::Fields.new([fields].flatten.map{ |f| f.kind_of?(Fixnum) ? java.lang.Integer.new(f) : f }.to_java(java.lang.Comparable))
|
46
51
|
end
|
47
52
|
|
48
53
|
def all_fields
|
@@ -1,30 +1,35 @@
|
|
1
|
-
#
|
2
|
-
#
|
3
|
-
#
|
4
|
-
#
|
5
|
-
#
|
6
|
-
|
7
|
-
class CascadingException < StandardError
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
1
|
+
# NativeException wrapper that prints the full nested stack trace of the Java
|
2
|
+
# exception and all of its causes wrapped by the NativeException.
|
3
|
+
# NativeException by default reveals only the first cause, which is
|
4
|
+
# insufficient for tracing cascading.jruby errors into JRuby code or revealing
|
5
|
+
# underlying Janino expression problems.
|
6
|
+
module Cascading
|
7
|
+
class CascadingException < StandardError
|
8
|
+
def initialize(native_exception, message)
|
9
|
+
@ne = native_exception
|
10
|
+
super("#{message}\n#{trace_causes(@ne, 1)}")
|
11
|
+
end
|
12
12
|
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
def cause(depth)
|
14
|
+
fetch_cause(@ne, depth)
|
15
|
+
end
|
16
16
|
|
17
|
-
|
17
|
+
private
|
18
18
|
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
19
|
+
def fetch_cause(ne, depth)
|
20
|
+
return ne if depth <= 1
|
21
|
+
fetch_cause(ne.cause, depth - 1)
|
22
|
+
end
|
23
23
|
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
24
|
+
def trace_causes(ne, depth)
|
25
|
+
return unless ne
|
26
|
+
trace = "Cause #{depth}: #{ne.respond_to?(:java_class) ? ne.java_class : ne.class}: #{ne}\n"
|
27
|
+
if ne.respond_to?(:stack_trace)
|
28
|
+
trace += "#{ne.stack_trace.map{ |e| " at #{e.class_name}.#{e.method_name}(#{e.file_name}:#{e.line_number})" }.join("\n")}\n"
|
29
|
+
elsif ne.respond_to?(:backtrace)
|
30
|
+
trace += " #{ne.backtrace.join("\n ")}\n"
|
31
|
+
end
|
32
|
+
trace += "#{trace_causes(ne.cause, depth + 1)}"
|
33
|
+
end
|
29
34
|
end
|
30
35
|
end
|
data/lib/cascading/expr_stub.rb
CHANGED
@@ -1,33 +1,164 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
module Cascading
|
2
|
+
class ExprStub
|
3
|
+
attr_accessor :expression, :types, :input_expression
|
3
4
|
|
4
|
-
|
5
|
-
|
6
|
-
|
5
|
+
# ExprStub requires a Janino expression decorated with field types. For
|
6
|
+
# example: '"Found: " + (x:int + y:int) + " " + z:string'. Type names are
|
7
|
+
# defined in Cascading::JAVA_TYPE_MAP.
|
8
|
+
def initialize(expression)
|
9
|
+
@input_expression = expression
|
10
|
+
@expression = expression.dup
|
11
|
+
@types = {}
|
7
12
|
|
8
|
-
|
13
|
+
# Simple regexp based parser for types
|
9
14
|
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
+
JAVA_TYPE_MAP.each do |sym, klass|
|
16
|
+
@expression.gsub!(/[A-Za-z0-9_]+:#{sym.to_s}/) do |match|
|
17
|
+
name = match.split(/:/).first.gsub(/\s+/, "")
|
18
|
+
@types[name] = klass
|
19
|
+
match.gsub(/:#{sym.to_s}/, "")
|
20
|
+
end
|
15
21
|
end
|
16
22
|
end
|
17
|
-
end
|
18
23
|
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
24
|
+
def to_s
|
25
|
+
@input_expression
|
26
|
+
end
|
27
|
+
|
28
|
+
# Convenience constructor for an ExprStub that optionally performs
|
29
|
+
# validation. Takes a string to use as a Janino expression and an optional
|
30
|
+
# params hash. By default, the param :validate is set to true (performs
|
31
|
+
# expression validation using default actual argument values) and the param
|
32
|
+
# :validate_with is set to {} (which doesn't override any of the default
|
33
|
+
# actual argument values used for validation).
|
34
|
+
def self.expr(expression, params = {})
|
35
|
+
params = { :validate => true, :validate_with => {} }.merge(params)
|
36
|
+
expr_stub = expression.kind_of?(ExprStub) ? expression : ExprStub.new(expression).compile
|
37
|
+
expr_stub.validate(params[:validate_with]) if params[:validate]
|
38
|
+
puts "Expression validation is disabled for '#{expression}'" unless params[:validate]
|
39
|
+
expr_stub
|
40
|
+
end
|
41
|
+
|
42
|
+
# Scan, parse, and compile expression, then return this ExprStub upon
|
43
|
+
# success. Throws an CascadingException upon failure.
|
44
|
+
def compile
|
45
|
+
evaluator
|
46
|
+
self
|
47
|
+
end
|
48
|
+
|
49
|
+
# Evaluates this ExprStub given a hash mapping argument names to argument
|
50
|
+
# values. Names may be strings or symbols. Throws an CascadingException
|
51
|
+
# upon failure.
|
52
|
+
def eval(actual_args)
|
53
|
+
actual_args = actual_args.inject({}) do |string_keys, (arg, value)|
|
54
|
+
string_keys[arg.to_s] = specific_to_java(value, @types[arg.to_s])
|
55
|
+
string_keys
|
56
|
+
end
|
57
|
+
args, values = split_hash(actual_args)
|
58
|
+
unused = validate_fields(args)
|
59
|
+
return self.eval(actual_args.reject{ |arg, value| unused.include?(arg) }) unless unused.empty?
|
60
|
+
evaluate(values)
|
61
|
+
end
|
62
|
+
|
63
|
+
# Evaluates this ExprStub with default values for each actual argument.
|
64
|
+
# Values may be overridden with the optional actual_args argument, which
|
65
|
+
# accepts a hash like ExprStub#eval. Throws an CascadingException upon
|
66
|
+
# failure.
|
67
|
+
def validate(actual_args = {})
|
68
|
+
self.eval(test_values.merge(actual_args))
|
69
|
+
end
|
70
|
+
|
71
|
+
def validate_scope(scope)
|
72
|
+
validate_fields(scope.values_fields.to_a)
|
73
|
+
end
|
74
|
+
|
75
|
+
# Throws an exception if any arguments required by this ExprStub are
|
76
|
+
# missing from fields. Returns those fields which are unused. Throws an
|
77
|
+
# ExprArgException upon failure.
|
78
|
+
def validate_fields(fields)
|
79
|
+
names = @types.keys.sort
|
80
|
+
missing = names - fields
|
81
|
+
raise ExprArgException.new("Expression '#{@expression}' is missing these fields: #{missing.inspect}\nRequires: #{names.inspect}, found: #{fields.inspect}") unless missing.empty?
|
82
|
+
fields - names
|
83
|
+
end
|
84
|
+
|
85
|
+
private
|
86
|
+
|
87
|
+
def split_hash(h)
|
88
|
+
keys, values = h.sort.inject([[], []]) do |(keys, values), (key, value)|
|
89
|
+
[keys << key, values << value]
|
90
|
+
end
|
91
|
+
[keys, values]
|
92
|
+
end
|
93
|
+
|
94
|
+
# Evaluate this ExprStub given an array of actual arguments. Throws an
|
95
|
+
# CascadingException upon failure. GOTCHA: requires values to be in order
|
96
|
+
# of lexicographically sorted formal arguments.
|
97
|
+
def evaluate(values)
|
98
|
+
begin
|
99
|
+
evaluator.evaluate(values.to_java)
|
100
|
+
rescue NativeException => ne
|
101
|
+
raise CascadingException.new(ne, "Exception encountered while evaluating '#{@expression}' with arguments: #{values.inspect}")
|
102
|
+
end
|
23
103
|
end
|
24
|
-
[keys, values]
|
25
|
-
end
|
26
104
|
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
105
|
+
# Building an evaluator ensures that the expression scans, parses, and
|
106
|
+
# compiles
|
107
|
+
def evaluator
|
108
|
+
begin
|
109
|
+
names, types = names_and_types
|
110
|
+
Java::OrgCodehausJanino::ExpressionEvaluator.new(@expression, java.lang.Comparable.java_class, names, types)
|
111
|
+
rescue NativeException => ne
|
112
|
+
raise CascadingException.new(ne, "Exception encountered while compiling '#{@expression}'")
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
# Extract Java names and types from @types hash
|
117
|
+
def names_and_types
|
118
|
+
names, types = split_hash(@types)
|
119
|
+
[names.to_java(java.lang.String), types.to_java(java.lang.Class)]
|
120
|
+
end
|
121
|
+
|
122
|
+
# Makes best effort to convert Ruby numbers into the Java numeric type
|
123
|
+
# exepcted by a Janino expression. However, if the conversion fails, it
|
124
|
+
# returns the original value so that the exception thrown will be from
|
125
|
+
# Janino, not this code.
|
126
|
+
def specific_to_java(value, type)
|
127
|
+
# GOTCHA: Java's Float and Long have constructors that take strings and
|
128
|
+
# parse them. If value is a string representation of a number, this code
|
129
|
+
# could coerce it to a number whereas invocation of the Janino expression
|
130
|
+
# would fail. We therefore punt if value is a String.
|
131
|
+
return value if value.kind_of?(::String)
|
132
|
+
if type == java.lang.Float.java_class
|
133
|
+
return value if value.kind_of?(::Integer)
|
134
|
+
java.lang.Float.new(value) rescue value
|
135
|
+
elsif type == java.lang.Long.java_class && JRUBY_VERSION <= '1.2.0'
|
136
|
+
return value if value.kind_of?(::Float)
|
137
|
+
java.lang.Long.new(value) rescue value
|
138
|
+
elsif type == java.lang.Integer.java_class && JRUBY_VERSION > '1.2.0'
|
139
|
+
return value if value.kind_of?(::Float)
|
140
|
+
java.lang.Integer.new(value) rescue value
|
141
|
+
else
|
142
|
+
value
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
@@defaults = {
|
147
|
+
java.lang.Integer.java_class => JRUBY_VERSION > '1.2.0' ? java.lang.Integer.new(0) : 0,
|
148
|
+
java.lang.Boolean.java_class => false,
|
149
|
+
java.lang.Double.java_class => 0.0,
|
150
|
+
java.lang.Float.java_class => java.lang.Float.new(0.0),
|
151
|
+
java.lang.Long.java_class => JRUBY_VERSION > '1.2.0' ? 0 : java.lang.Long.new(0),
|
152
|
+
java.lang.String.java_class => nil,
|
153
|
+
}
|
154
|
+
|
155
|
+
def test_values
|
156
|
+
@types.sort.inject({}) do |test_values, (name, type)|
|
157
|
+
test_values[name] = @@defaults[type]
|
158
|
+
test_values
|
159
|
+
end
|
160
|
+
end
|
32
161
|
end
|
162
|
+
|
163
|
+
class ExprArgException < StandardError; end
|
33
164
|
end
|
data/lib/cascading/flow.rb
CHANGED
@@ -8,10 +8,10 @@ module Cascading
|
|
8
8
|
class Flow < Cascading::Node
|
9
9
|
extend Registerable
|
10
10
|
|
11
|
-
attr_accessor :properties, :sources, :sinks, :outgoing_scopes, :listeners
|
11
|
+
attr_accessor :properties, :sources, :sinks, :incoming_scopes, :outgoing_scopes, :listeners
|
12
12
|
|
13
13
|
def initialize(name, parent)
|
14
|
-
@properties, @sources, @sinks, @outgoing_scopes, @listeners = {}, {}, {}, {}, []
|
14
|
+
@properties, @sources, @sinks, @incoming_scopes, @outgoing_scopes, @listeners = {}, {}, {}, {}, {}, []
|
15
15
|
super(name, parent)
|
16
16
|
self.class.add(name, self)
|
17
17
|
end
|
@@ -29,9 +29,9 @@ module Cascading
|
|
29
29
|
# reference a path.
|
30
30
|
def sink(*args)
|
31
31
|
if (args.size == 2)
|
32
|
-
|
32
|
+
sinks[args[0]] = args[1]
|
33
33
|
elsif (args.size == 1)
|
34
|
-
|
34
|
+
sinks[name] = args[0]
|
35
35
|
end
|
36
36
|
end
|
37
37
|
|
@@ -40,14 +40,24 @@ module Cascading
|
|
40
40
|
# reference a path.
|
41
41
|
def source(*args)
|
42
42
|
if (args.size == 2)
|
43
|
-
|
44
|
-
|
43
|
+
sources[args[0]] = args[1]
|
44
|
+
incoming_scopes[args[0]] = Scope.tap_scope(args[1], args[0])
|
45
|
+
outgoing_scopes[args[0]] = incoming_scopes[args[0]]
|
45
46
|
elsif (args.size == 1)
|
46
|
-
|
47
|
-
|
47
|
+
sources[name] = args[0]
|
48
|
+
incoming_scopes[name] = Scope.empty_scope(name)
|
49
|
+
outgoing_scopes[name] = incoming_scopes[name]
|
48
50
|
end
|
49
51
|
end
|
50
52
|
|
53
|
+
def describe(offset = '')
|
54
|
+
description = "#{offset}#{name}:flow\n"
|
55
|
+
description += "#{sources.keys.map{ |source| "#{offset} #{source}:source :: #{incoming_scopes[source].values_fields.to_a.inspect}" }.join("\n")}\n"
|
56
|
+
description += "#{child_names.map{ |child| children[child].describe("#{offset} ") }.join("\n")}\n"
|
57
|
+
description += "#{sinks.keys.map{ |sink| "#{offset} #{sink}:sink :: #{outgoing_scopes[sink].values_fields.to_a.inspect}" }.join("\n")}"
|
58
|
+
description
|
59
|
+
end
|
60
|
+
|
51
61
|
def scope(name = nil)
|
52
62
|
raise 'Must specify name if no children have been defined yet' unless name || last_child
|
53
63
|
name ||= last_child.name
|
data/lib/cascading.rb
CHANGED
data/samples/copy.rb
CHANGED
@@ -10,7 +10,8 @@ cascade 'copy' do
|
|
10
10
|
|
11
11
|
assembly 'input' do
|
12
12
|
rename 'line' => 'value'
|
13
|
-
|
13
|
+
# We override validate_with because we know line will never be null
|
14
|
+
reject 'value:string.indexOf("R") == -1', :validate_with => { :value => 'nothinghere' }
|
14
15
|
end
|
15
16
|
|
16
17
|
sink 'input', tap('output/copy', :sink_mode => :replace)
|
data/spec/cascading_spec.rb
CHANGED
@@ -1,6 +1,11 @@
|
|
1
1
|
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
2
2
|
|
3
|
-
|
3
|
+
context Cascading do
|
4
|
+
it 'should handle string and integer field names' do
|
5
|
+
f = fields(['a', 1, 'b', 2])
|
6
|
+
f.to_a.should == ['a', 1, 'b', 2]
|
7
|
+
end
|
8
|
+
|
4
9
|
it 'should dedup field names from multiple sources' do
|
5
10
|
left_names = ['a', 'b', 'c', 'd', 'e']
|
6
11
|
mid_names = ['a', 'f']
|