cascading.jruby 0.0.10 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE.txt +13 -160
- data/README.md +35 -0
- data/lib/cascading.rb +8 -41
- data/lib/cascading/aggregations.rb +216 -71
- data/lib/cascading/assembly.rb +409 -606
- data/lib/cascading/base.rb +22 -0
- data/lib/cascading/cascade.rb +55 -18
- data/lib/cascading/cascading.rb +137 -47
- data/lib/cascading/expr_stub.rb +31 -17
- data/lib/cascading/ext/array.rb +17 -0
- data/lib/cascading/filter_operations.rb +101 -0
- data/lib/cascading/flow.rb +87 -23
- data/lib/cascading/identity_operations.rb +82 -0
- data/lib/cascading/mode.rb +14 -10
- data/lib/cascading/operations.rb +109 -174
- data/lib/cascading/regex_operations.rb +133 -0
- data/lib/cascading/scope.rb +32 -9
- data/lib/cascading/sub_assembly.rb +8 -5
- data/lib/cascading/tap.rb +41 -17
- data/lib/cascading/text_operations.rb +67 -0
- data/test/mock_assemblies.rb +55 -0
- data/test/test_assembly.rb +23 -25
- data/test/test_local_execution.rb +7 -7
- data/test/test_operations.rb +0 -10
- metadata +76 -74
- data/History.txt +0 -58
data/lib/cascading/base.rb
CHANGED
@@ -1,7 +1,22 @@
|
|
1
1
|
module Cascading
|
2
|
+
# A Node is a Cascade, Flow, or Assembly, all of which are composite
|
3
|
+
# structures that describe the hierarchical structure of your job. A Cascade
|
4
|
+
# may contain many Flows and a Flow and Assembly may contain many Assemblies
|
5
|
+
# (branches in the case of the Assembly). Nodes are named, contain parent
|
6
|
+
# and child pointers, and keep track of their children both by name and by
|
7
|
+
# insertion order.
|
8
|
+
#
|
9
|
+
# Nodes must be uniquely named within the scope of their parent so that they
|
10
|
+
# unambiguously looked up for connecting pipes within a flow. However, we
|
11
|
+
# only ensure that children are uniquely named upon insertion; full
|
12
|
+
# uniqueness isn't required until Node#find_child is called (this allows for
|
13
|
+
# name reuse in a few limited circumstances that was important when migrating
|
14
|
+
# the Etsy workload to enforce these constraints).
|
2
15
|
class Node
|
3
16
|
attr_accessor :name, :parent, :children, :child_names, :last_child
|
4
17
|
|
18
|
+
# A Node requires a name and a parent when it is constructed. Children are
|
19
|
+
# added later with Node#add_child.
|
5
20
|
def initialize(name, parent)
|
6
21
|
@name = name
|
7
22
|
@parent = parent
|
@@ -23,10 +38,15 @@ module Cascading
|
|
23
38
|
node
|
24
39
|
end
|
25
40
|
|
41
|
+
# The qualified name of a node is formed from the name of all nodes in the
|
42
|
+
# path from the root to that node.
|
26
43
|
def qualified_name
|
27
44
|
parent ? "#{parent.qualified_name}.#{name}" : name
|
28
45
|
end
|
29
46
|
|
47
|
+
# Produces a textual description of this Node. This method is overridden
|
48
|
+
# by all classes inheriting Node, so it serves mainly as a template for
|
49
|
+
# describing a node with children.
|
30
50
|
def describe(offset = '')
|
31
51
|
"#{offset}#{name}:node\n#{child_names.map{ |child| children[child].describe("#{offset} ") }.join("\n")}"
|
32
52
|
end
|
@@ -44,6 +64,8 @@ module Cascading
|
|
44
64
|
all_children_with_name.first
|
45
65
|
end
|
46
66
|
|
67
|
+
# Returns the root Node, the topmost parent of the hierarchy (typically a
|
68
|
+
# Cascade or Flow).
|
47
69
|
def root
|
48
70
|
return self unless parent
|
49
71
|
parent.root
|
data/lib/cascading/cascade.rb
CHANGED
@@ -2,6 +2,13 @@ require 'cascading/base'
|
|
2
2
|
require 'yaml'
|
3
3
|
|
4
4
|
module Cascading
|
5
|
+
# A Cascade wraps a c.c.Cascade. A Cascade is composed of Flows, which are
|
6
|
+
# constructed using the Cascade#flow method within the block passed to the
|
7
|
+
# Cascading::cascade constructor. Many flows may be nested within a Cascade.
|
8
|
+
#
|
9
|
+
# Note that you are not required to use a Cascade to wrap your job. Instead,
|
10
|
+
# you could start with a top-level Flow, which you might prefer if you have
|
11
|
+
# no need of a c.c.Cascade's make-like semantics wrt sinks.
|
5
12
|
class Cascade < Cascading::Node
|
6
13
|
extend Registerable
|
7
14
|
|
@@ -10,46 +17,72 @@ module Cascading
|
|
10
17
|
# Do not use this constructor directly; instead, use Cascading::cascade to
|
11
18
|
# build cascades.
|
12
19
|
#
|
13
|
-
# Builds a
|
14
|
-
#
|
15
|
-
#
|
16
|
-
#
|
17
|
-
#
|
18
|
-
#
|
19
|
-
#
|
20
|
-
|
21
|
-
|
22
|
-
|
20
|
+
# Builds a Cascade given a name.
|
21
|
+
#
|
22
|
+
# The named options are:
|
23
|
+
# [properties] Properties hash which will be used as the default properties
|
24
|
+
# for all child flows. Properties must be a Ruby Hash with
|
25
|
+
# string keys and values and will be copied before being
|
26
|
+
# passed into each flow in the cascade. See Flow#initialize
|
27
|
+
# for details on how flows handle properties.
|
28
|
+
# [mode] Mode which will be used as the default mode for all child flows.
|
29
|
+
# See Mode.parse for details.
|
30
|
+
def initialize(name, options = {})
|
31
|
+
@properties = options[:properties] || {}
|
32
|
+
@mode = options[:mode]
|
23
33
|
super(name, nil) # A Cascade cannot have a parent
|
24
34
|
self.class.add(name, self)
|
25
35
|
end
|
26
36
|
|
27
|
-
# Builds a child
|
28
|
-
#
|
29
|
-
#
|
30
|
-
#
|
31
|
-
|
37
|
+
# Builds a child Flow in this Cascade given a name and block.
|
38
|
+
#
|
39
|
+
# The named options are:
|
40
|
+
# [properties] Properties hash which will override the default properties
|
41
|
+
# stored in this cascade.
|
42
|
+
# [mode] Mode which will override the default mode stored in this cascade.
|
43
|
+
#
|
44
|
+
# Example:
|
45
|
+
# cascade 'wordcount', :mode => :local do
|
46
|
+
# flow 'first_step' do
|
47
|
+
# ...
|
48
|
+
# end
|
49
|
+
#
|
50
|
+
# flow 'second_step' do
|
51
|
+
# ...
|
52
|
+
# end
|
53
|
+
# end
|
54
|
+
def flow(name, options = {}, &block)
|
32
55
|
raise "Could not build flow '#{name}'; block required" unless block_given?
|
33
56
|
|
34
|
-
|
35
|
-
|
57
|
+
options[:properties] ||= properties.dup
|
58
|
+
options[:mode] ||= mode
|
36
59
|
|
37
|
-
flow = Flow.new(name, self,
|
60
|
+
flow = Flow.new(name, self, options)
|
38
61
|
add_child(flow)
|
39
62
|
flow.instance_eval(&block)
|
40
63
|
flow
|
41
64
|
end
|
42
65
|
|
66
|
+
# Produces a textual description of this Cascade. The description details
|
67
|
+
# the structure of the Cascade, the sources and sinks of each Flow, and the
|
68
|
+
# input and output fields of each Assembly. The offset parameter allows
|
69
|
+
# for this describe to be nested within a calling context, which lets us
|
70
|
+
# indent the structural hierarchy of a job.
|
43
71
|
def describe(offset = '')
|
44
72
|
"#{offset}#{name}:cascade\n#{child_names.map{ |child| children[child].describe("#{offset} ") }.join("\n")}"
|
45
73
|
end
|
46
74
|
|
75
|
+
# Writes out the DOT file describing the structure of this Cascade.
|
76
|
+
#
|
77
|
+
# NOTE: will be at Job in later version and also present on Flow
|
47
78
|
def draw(dir)
|
48
79
|
@children.each do |name, flow|
|
49
80
|
flow.connect.writeDOT("#{dir}/#{name}.dot")
|
50
81
|
end
|
51
82
|
end
|
52
83
|
|
84
|
+
# Builds a map, keyed by flow name, of the sink metadata for each child
|
85
|
+
# flow. Currently, this contains only the field names of each sink.
|
53
86
|
def sink_metadata
|
54
87
|
@children.inject({}) do |sink_fields, (name, flow)|
|
55
88
|
sink_fields[name] = flow.sink_metadata
|
@@ -57,12 +90,16 @@ module Cascading
|
|
57
90
|
end
|
58
91
|
end
|
59
92
|
|
93
|
+
# Writes the mapping produced by Cascade#sink_metadata to a file at the
|
94
|
+
# given path in YAML.
|
60
95
|
def write_sink_metadata(file_name)
|
61
96
|
File.open(file_name, 'w') do |file|
|
62
97
|
YAML.dump(sink_metadata, file)
|
63
98
|
end
|
64
99
|
end
|
65
100
|
|
101
|
+
# Connects this Cascade, producing a c.c.Cascade, which is then completed,
|
102
|
+
# executing it. Child flows are connected, so no parameters are required.
|
66
103
|
def complete
|
67
104
|
begin
|
68
105
|
Java::CascadingCascade::CascadeConnector.new.connect(name, make_flows(@children)).complete
|
data/lib/cascading/cascading.rb
CHANGED
@@ -1,6 +1,33 @@
|
|
1
|
+
require 'cascading/cascade'
|
2
|
+
require 'cascading/flow'
|
1
3
|
require 'cascading/expr_stub'
|
2
4
|
|
5
|
+
# The Cascading module contains all of the cascading.jruby DSL. Inserting the
|
6
|
+
# following into your script:
|
7
|
+
# require 'rubygems'
|
8
|
+
# require 'cascading'
|
9
|
+
# includes this module at the top level, making all of its features available.
|
10
|
+
#
|
11
|
+
# To build a dataflow like the one in the README.md or
|
12
|
+
# {samples}[http://github.com/mrwalker/cascading.jruby/tree/master/samples],
|
13
|
+
# start by looking at Cascade or Flow. These are the
|
14
|
+
# highest level structures you'll use to put together your job.
|
15
|
+
#
|
16
|
+
# Within a flow, you'll connect sources to sinks by way of Assembly, which
|
17
|
+
# refers to "pipe assemblies" from Cascading. Within an Assembly, you'll use
|
18
|
+
# functions and filters (see Operations, IdentityOperations, RegexOperations,
|
19
|
+
# FilterOperations, and TextOperations) as well as Assembly#group_by,
|
20
|
+
# Assembly#union, and Assembly#join. You can provide those last pipes with a
|
21
|
+
# block that can select operations from Aggregations.
|
22
|
+
#
|
23
|
+
# Finally, you'll want to address the execution of your job, whether it be
|
24
|
+
# locally testing or running remotely on a Hadoop cluster. See the Mode class
|
25
|
+
# for the available modes, and parameterize your script such that it can operate
|
26
|
+
# in Cascading local mode locally and in Hadoop mode when run in a jar produced
|
27
|
+
# with {Jading}[http://github.com/mrwalker/jading].
|
3
28
|
module Cascading
|
29
|
+
# Mapping that defines a convenient syntax for specifying Java classes, used
|
30
|
+
# in Janino expressions and elsewhere.
|
4
31
|
JAVA_TYPE_MAP = {
|
5
32
|
:int => java.lang.Integer.java_class, :long => java.lang.Long.java_class,
|
6
33
|
:bool => java.lang.Boolean.java_class, :double => java.lang.Double.java_class,
|
@@ -24,44 +51,84 @@ module Cascading
|
|
24
51
|
# directly building their own cascades and flows so that jading can send them
|
25
52
|
# default properties.
|
26
53
|
|
27
|
-
# Builds a top-level
|
28
|
-
#
|
29
|
-
|
54
|
+
# Builds a top-level Cascade given a name and a block.
|
55
|
+
#
|
56
|
+
# The named options are:
|
57
|
+
# [properties] See Cascade#initialize
|
58
|
+
# [mode] See Cascade#initialize
|
59
|
+
#
|
60
|
+
# Example:
|
61
|
+
# cascade 'wordcount', :mode => :local do
|
62
|
+
# flow 'first_step' do
|
63
|
+
# ...
|
64
|
+
# end
|
65
|
+
#
|
66
|
+
# flow 'second_step' do
|
67
|
+
# ...
|
68
|
+
# end
|
69
|
+
# end
|
70
|
+
def cascade(name, options = {}, &block)
|
30
71
|
raise "Could not build cascade '#{name}'; block required" unless block_given?
|
31
|
-
raise 'Cascading::cascade does not accept the :properties param only the global $jobconf_properties' if
|
72
|
+
raise 'Cascading::cascade does not accept the :properties param only the global $jobconf_properties' if options[:properties]
|
32
73
|
|
33
|
-
|
74
|
+
options[:properties] = $jobconf_properties.dup if defined?($jobconf_properties) && $jobconf_properties
|
34
75
|
|
35
|
-
cascade = Cascade.new(name,
|
76
|
+
cascade = Cascade.new(name, options)
|
36
77
|
cascade.instance_eval(&block)
|
37
78
|
cascade
|
38
79
|
end
|
39
80
|
|
40
|
-
# Builds a top-level
|
41
|
-
# flows with no cascades.
|
42
|
-
#
|
43
|
-
|
81
|
+
# Builds a top-level Flow given a name and block for applications built of
|
82
|
+
# flows with no cascades.
|
83
|
+
#
|
84
|
+
# The named options are:
|
85
|
+
# [properties] See Flow#initialize
|
86
|
+
# [mode] See Flow#initialize
|
87
|
+
#
|
88
|
+
# Example:
|
89
|
+
# flow 'wordcount', :mode => :local do
|
90
|
+
# assembly 'first_step' do
|
91
|
+
# ...
|
92
|
+
# end
|
93
|
+
#
|
94
|
+
# assembly 'second_step' do
|
95
|
+
# ...
|
96
|
+
# end
|
97
|
+
# end
|
98
|
+
def flow(name, options = {}, &block)
|
44
99
|
raise "Could not build flow '#{name}'; block required" unless block_given?
|
45
|
-
raise 'Cascading::flow does not accept the :properties param only the global $jobconf_properties' if
|
100
|
+
raise 'Cascading::flow does not accept the :properties param only the global $jobconf_properties' if options[:properties]
|
46
101
|
|
47
|
-
|
102
|
+
options[:properties] = $jobconf_properties.dup if defined?($jobconf_properties) && $jobconf_properties
|
48
103
|
|
49
|
-
flow = Flow.new(name, nil,
|
104
|
+
flow = Flow.new(name, nil, options)
|
50
105
|
flow.instance_eval(&block)
|
51
106
|
flow
|
52
107
|
end
|
53
108
|
|
109
|
+
# Produces a textual description of all Cascades in the global registry. The
|
110
|
+
# description details the structure of the Cascades, the sources and sinks of
|
111
|
+
# each Flow, and the input and output fields of each Assembly.
|
112
|
+
#
|
113
|
+
# NOTE: will be moved to Job in later version
|
54
114
|
def describe
|
55
115
|
Cascade.all.map{ |cascade| cascade.describe }.join("\n")
|
56
116
|
end
|
57
117
|
alias desc describe
|
58
118
|
|
59
119
|
# See ExprStub.expr
|
60
|
-
def expr(expression,
|
61
|
-
ExprStub.expr(expression,
|
120
|
+
def expr(expression, options = {})
|
121
|
+
ExprStub.expr(expression, options)
|
62
122
|
end
|
63
123
|
|
64
|
-
#
|
124
|
+
# Utility method for creating Cascading c.t.Fields from a field name (string)
|
125
|
+
# or list of field names (array of strings). If the input fields is already a
|
126
|
+
# c.t.Fields or nil, it is passed through. This allows for flexible use of
|
127
|
+
# the method at multiple layers in the DSL.
|
128
|
+
#
|
129
|
+
# Example:
|
130
|
+
# cascading_fields = fields(['first', 'second', 'third'])
|
131
|
+
# # cascading_fields.to_a == ['first', 'second', 'third']
|
65
132
|
def fields(fields)
|
66
133
|
if fields.nil?
|
67
134
|
return nil
|
@@ -76,27 +143,45 @@ module Cascading
|
|
76
143
|
return Java::CascadingTuple::Fields.new([fields].flatten.map{ |f| f.kind_of?(Fixnum) ? java.lang.Integer.new(f) : f }.to_java(java.lang.Comparable))
|
77
144
|
end
|
78
145
|
|
146
|
+
# Convenience method wrapping c.t.Fields::ALL
|
79
147
|
def all_fields
|
80
148
|
Java::CascadingTuple::Fields::ALL
|
81
149
|
end
|
82
150
|
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
def difference_fields(*fields)
|
88
|
-
fields(fields[1..-1].inject(fields.first.to_a){ |acc, arr| acc - arr.to_a })
|
151
|
+
# Convenience method wrapping c.t.Fields::VALUES
|
152
|
+
def last_grouping_fields
|
153
|
+
Java::CascadingTuple::Fields::VALUES
|
89
154
|
end
|
90
155
|
|
91
|
-
|
92
|
-
|
156
|
+
# Computes fields formed by removing remove_fields from base_fields. Operates
|
157
|
+
# only on named fields, not positional fields.
|
158
|
+
#
|
159
|
+
# Example:
|
160
|
+
# base_fields = fields(['a', 'b', 'c'])
|
161
|
+
# remove_fields = fields(['b'])
|
162
|
+
# result_fields = difference_fields(base_fields, remove_fields)
|
163
|
+
# # results_fields.to_a == ['a', 'c']
|
164
|
+
def difference_fields(base_fields, remove_fields)
|
165
|
+
fields(base_fields.to_a - remove_fields.to_a)
|
93
166
|
end
|
94
167
|
|
168
|
+
# Combines fields deduplicating them with trailing underscores as necessary.
|
169
|
+
# This is used in joins to avoid requiring the caller to unique fields before
|
170
|
+
# they are joined.
|
95
171
|
def dedup_fields(*fields)
|
96
172
|
raise 'Can only be applied to declarators' unless fields.all?{ |f| f.is_declarator? }
|
97
173
|
fields(dedup_field_names(*fields.map{ |f| f.to_a }))
|
98
174
|
end
|
99
175
|
|
176
|
+
# Helper used by dedup_fields that operates on arrays of field names rather
|
177
|
+
# than fields objects.
|
178
|
+
#
|
179
|
+
# Example:
|
180
|
+
# left_names = ['a', 'b']
|
181
|
+
# mid_names = ['a', 'c']
|
182
|
+
# right_names = ['a', 'd']
|
183
|
+
# deduped_names = dedup_field_names(left_names, mid_names, right_names)
|
184
|
+
# # deduped_names == ['a', 'b', 'a_', 'c', 'a__', 'd']
|
100
185
|
def dedup_field_names(*names)
|
101
186
|
names.inject([]) do |acc, arr|
|
102
187
|
acc + arr.map{ |e| search_field_name(acc, e) }
|
@@ -106,30 +191,22 @@ module Cascading
|
|
106
191
|
def search_field_name(names, candidate)
|
107
192
|
names.include?(candidate) ? search_field_name(names, "#{candidate}_") : candidate
|
108
193
|
end
|
109
|
-
|
110
|
-
def last_grouping_fields
|
111
|
-
Java::CascadingTuple::Fields::VALUES
|
112
|
-
end
|
113
|
-
|
114
|
-
def results_fields
|
115
|
-
Java::CascadingTuple::Fields::RESULTS
|
116
|
-
end
|
194
|
+
private :search_field_name
|
117
195
|
|
118
196
|
# Creates a TextLine scheme (can be used in both Cascading local and hadoop
|
119
|
-
# modes). Positional args are used if
|
120
|
-
# provided.
|
197
|
+
# modes). Positional args are used if :source_fields is not provided.
|
121
198
|
#
|
122
199
|
# The named options are:
|
123
|
-
#
|
124
|
-
#
|
125
|
-
#
|
126
|
-
#
|
127
|
-
#
|
128
|
-
#
|
129
|
-
#
|
130
|
-
def text_line_scheme(*
|
131
|
-
options =
|
132
|
-
source_fields = fields(options[:source_fields] || (
|
200
|
+
# [source_fields] Fields to be read from a source with this scheme. Defaults
|
201
|
+
# to ['offset', 'line'].
|
202
|
+
# [sink_fields] Fields to be written to a sink with this scheme. Defaults to
|
203
|
+
# all_fields.
|
204
|
+
# [compression] A symbol, either :enable or :disable, that
|
205
|
+
# governs the TextLine scheme's compression. Defaults to the
|
206
|
+
# default TextLine compression (only applies to c.s.h.TextLine).
|
207
|
+
def text_line_scheme(*args_with_options)
|
208
|
+
options, source_fields = args_with_options.extract_options!, args_with_options
|
209
|
+
source_fields = fields(options[:source_fields] || (source_fields.empty? ? ['offset', 'line'] : source_fields))
|
133
210
|
sink_fields = fields(options[:sink_fields]) || all_fields
|
134
211
|
sink_compression = case options[:compression]
|
135
212
|
when :enable then Java::CascadingSchemeHadoop::TextLine::Compress::ENABLE
|
@@ -153,17 +230,30 @@ module Cascading
|
|
153
230
|
}
|
154
231
|
end
|
155
232
|
|
233
|
+
# Convenience access to MultiTap.multi_source_tap. This constructor is more
|
234
|
+
# "DSL-like" because it allows you to pass taps directly as actual args rather
|
235
|
+
# than in an array:
|
236
|
+
# multi_source_tap tap1, tap2, tap3, ..., tapn
|
237
|
+
#
|
238
|
+
# See MultiTap.multi_source_tap for more details.
|
156
239
|
def multi_source_tap(*taps)
|
157
240
|
MultiTap.multi_source_tap(taps)
|
158
241
|
end
|
159
242
|
|
243
|
+
# Convenience access to MultiTap.multi_sink_tap. This constructor is more
|
244
|
+
# "DSL-like" because it allows you to pass taps directly as actual args rather
|
245
|
+
# than in an array:
|
246
|
+
# multi_sink_tap tap1, tap2, tap3, ..., tapn
|
247
|
+
#
|
248
|
+
# See MultiTap.multi_sink_tap for more details.
|
160
249
|
def multi_sink_tap(*taps)
|
161
250
|
MultiTap.multi_sink_tap(taps)
|
162
251
|
end
|
163
252
|
|
164
|
-
#
|
165
|
-
|
166
|
-
|
253
|
+
# Convenience constructor for a Tap, that accepts the same options as that
|
254
|
+
# class' constructor. See Tap for more details.
|
255
|
+
def tap(path, options = {})
|
256
|
+
Tap.new(path, options)
|
167
257
|
end
|
168
258
|
|
169
259
|
# Constructs properties to be passed to Flow#complete or Cascade#complete
|
data/lib/cascading/expr_stub.rb
CHANGED
@@ -3,15 +3,15 @@ module Cascading
|
|
3
3
|
attr_accessor :expression, :types, :input_expression
|
4
4
|
|
5
5
|
# ExprStub requires a Janino expression decorated with field types. For
|
6
|
-
# example:
|
7
|
-
#
|
6
|
+
# example:
|
7
|
+
# expr('"Found: " + (x:int + y:int) + " " + z:string')
|
8
|
+
# Type names are defined in Cascading::JAVA_TYPE_MAP.
|
8
9
|
def initialize(expression)
|
9
10
|
@input_expression = expression
|
10
11
|
@expression = expression.dup
|
11
12
|
@types = {}
|
12
13
|
|
13
14
|
# Simple regexp based parser for types
|
14
|
-
|
15
15
|
JAVA_TYPE_MAP.each do |sym, klass|
|
16
16
|
@expression.gsub!(/[A-Za-z0-9_]+:#{sym.to_s}/) do |match|
|
17
17
|
name = match.split(/:/).first.gsub(/\s+/, "")
|
@@ -21,21 +21,38 @@ module Cascading
|
|
21
21
|
end
|
22
22
|
end
|
23
23
|
|
24
|
+
# Extract Java names and types from @types hash. Cascading constructors
|
25
|
+
# often require two separate Java Arrays in this fashion.
|
26
|
+
def names_and_types
|
27
|
+
names, types = split_hash(@types)
|
28
|
+
[names.to_java(java.lang.String), types.to_java(java.lang.Class)]
|
29
|
+
end
|
30
|
+
|
31
|
+
# Prints the original input expression.
|
24
32
|
def to_s
|
25
33
|
@input_expression
|
26
34
|
end
|
27
35
|
|
28
36
|
# Convenience constructor for an ExprStub that optionally performs
|
29
37
|
# validation. Takes a string to use as a Janino expression and an optional
|
30
|
-
#
|
31
|
-
#
|
32
|
-
#
|
33
|
-
#
|
34
|
-
|
35
|
-
|
38
|
+
# options hash.
|
39
|
+
#
|
40
|
+
# The named options are:
|
41
|
+
# [validate] A boolean indicating whether expression validation using
|
42
|
+
# default actual argument values should be performed. Defaults
|
43
|
+
# to true.
|
44
|
+
# [validate_with] A hash mapping field names (or symbols) to the value that
|
45
|
+
# should be used for validation. Strings default to nil,
|
46
|
+
# so if you have previously filtered nulls you might use a
|
47
|
+
# marker value like 'nulls_filtered'. Defaults to {}.
|
48
|
+
#
|
49
|
+
# Example:
|
50
|
+
# insert 'x_eq_y' => expr('x:string.equals(y:string)', :validate_with => { :x => 'nulls_filtered' })
|
51
|
+
def self.expr(expression, options = {})
|
52
|
+
options = { :validate => true, :validate_with => {} }.merge(options)
|
36
53
|
expr_stub = expression.kind_of?(ExprStub) ? expression : ExprStub.new(expression).compile
|
37
|
-
expr_stub.validate(
|
38
|
-
puts "Expression validation is disabled for '#{expression}'" unless
|
54
|
+
expr_stub.validate(options[:validate_with]) if options[:validate]
|
55
|
+
puts "Expression validation is disabled for '#{expression}'" unless options[:validate]
|
39
56
|
expr_stub
|
40
57
|
end
|
41
58
|
|
@@ -68,6 +85,9 @@ module Cascading
|
|
68
85
|
self.eval(test_values.merge(actual_args))
|
69
86
|
end
|
70
87
|
|
88
|
+
# Given a scope, validates that the fields required by this ExprStub are
|
89
|
+
# available in the values fields of the scope. Returns those values fields
|
90
|
+
# which are unused in the expression.
|
71
91
|
def validate_scope(scope)
|
72
92
|
validate_fields(scope.values_fields.to_a)
|
73
93
|
end
|
@@ -113,12 +133,6 @@ module Cascading
|
|
113
133
|
end
|
114
134
|
end
|
115
135
|
|
116
|
-
# Extract Java names and types from @types hash
|
117
|
-
def names_and_types
|
118
|
-
names, types = split_hash(@types)
|
119
|
-
[names.to_java(java.lang.String), types.to_java(java.lang.Class)]
|
120
|
-
end
|
121
|
-
|
122
136
|
# Makes best effort to convert Ruby numbers into the Java numeric type
|
123
137
|
# exepcted by a Janino expression. However, if the conversion fails, it
|
124
138
|
# returns the original value so that the exception thrown will be from
|