cascading.jruby 0.0.10 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE.txt +13 -160
- data/README.md +35 -0
- data/lib/cascading.rb +8 -41
- data/lib/cascading/aggregations.rb +216 -71
- data/lib/cascading/assembly.rb +409 -606
- data/lib/cascading/base.rb +22 -0
- data/lib/cascading/cascade.rb +55 -18
- data/lib/cascading/cascading.rb +137 -47
- data/lib/cascading/expr_stub.rb +31 -17
- data/lib/cascading/ext/array.rb +17 -0
- data/lib/cascading/filter_operations.rb +101 -0
- data/lib/cascading/flow.rb +87 -23
- data/lib/cascading/identity_operations.rb +82 -0
- data/lib/cascading/mode.rb +14 -10
- data/lib/cascading/operations.rb +109 -174
- data/lib/cascading/regex_operations.rb +133 -0
- data/lib/cascading/scope.rb +32 -9
- data/lib/cascading/sub_assembly.rb +8 -5
- data/lib/cascading/tap.rb +41 -17
- data/lib/cascading/text_operations.rb +67 -0
- data/test/mock_assemblies.rb +55 -0
- data/test/test_assembly.rb +23 -25
- data/test/test_local_execution.rb +7 -7
- data/test/test_operations.rb +0 -10
- metadata +76 -74
- data/History.txt +0 -58
data/lib/cascading/base.rb
CHANGED
@@ -1,7 +1,22 @@
|
|
1
1
|
module Cascading
|
2
|
+
# A Node is a Cascade, Flow, or Assembly, all of which are composite
|
3
|
+
# structures that describe the hierarchical structure of your job. A Cascade
|
4
|
+
# may contain many Flows and a Flow and Assembly may contain many Assemblies
|
5
|
+
# (branches in the case of the Assembly). Nodes are named, contain parent
|
6
|
+
# and child pointers, and keep track of their children both by name and by
|
7
|
+
# insertion order.
|
8
|
+
#
|
9
|
+
# Nodes must be uniquely named within the scope of their parent so that they
|
10
|
+
# unambiguously looked up for connecting pipes within a flow. However, we
|
11
|
+
# only ensure that children are uniquely named upon insertion; full
|
12
|
+
# uniqueness isn't required until Node#find_child is called (this allows for
|
13
|
+
# name reuse in a few limited circumstances that was important when migrating
|
14
|
+
# the Etsy workload to enforce these constraints).
|
2
15
|
class Node
|
3
16
|
attr_accessor :name, :parent, :children, :child_names, :last_child
|
4
17
|
|
18
|
+
# A Node requires a name and a parent when it is constructed. Children are
|
19
|
+
# added later with Node#add_child.
|
5
20
|
def initialize(name, parent)
|
6
21
|
@name = name
|
7
22
|
@parent = parent
|
@@ -23,10 +38,15 @@ module Cascading
|
|
23
38
|
node
|
24
39
|
end
|
25
40
|
|
41
|
+
# The qualified name of a node is formed from the name of all nodes in the
|
42
|
+
# path from the root to that node.
|
26
43
|
def qualified_name
|
27
44
|
parent ? "#{parent.qualified_name}.#{name}" : name
|
28
45
|
end
|
29
46
|
|
47
|
+
# Produces a textual description of this Node. This method is overridden
|
48
|
+
# by all classes inheriting Node, so it serves mainly as a template for
|
49
|
+
# describing a node with children.
|
30
50
|
def describe(offset = '')
|
31
51
|
"#{offset}#{name}:node\n#{child_names.map{ |child| children[child].describe("#{offset} ") }.join("\n")}"
|
32
52
|
end
|
@@ -44,6 +64,8 @@ module Cascading
|
|
44
64
|
all_children_with_name.first
|
45
65
|
end
|
46
66
|
|
67
|
+
# Returns the root Node, the topmost parent of the hierarchy (typically a
|
68
|
+
# Cascade or Flow).
|
47
69
|
def root
|
48
70
|
return self unless parent
|
49
71
|
parent.root
|
data/lib/cascading/cascade.rb
CHANGED
@@ -2,6 +2,13 @@ require 'cascading/base'
|
|
2
2
|
require 'yaml'
|
3
3
|
|
4
4
|
module Cascading
|
5
|
+
# A Cascade wraps a c.c.Cascade. A Cascade is composed of Flows, which are
|
6
|
+
# constructed using the Cascade#flow method within the block passed to the
|
7
|
+
# Cascading::cascade constructor. Many flows may be nested within a Cascade.
|
8
|
+
#
|
9
|
+
# Note that you are not required to use a Cascade to wrap your job. Instead,
|
10
|
+
# you could start with a top-level Flow, which you might prefer if you have
|
11
|
+
# no need of a c.c.Cascade's make-like semantics wrt sinks.
|
5
12
|
class Cascade < Cascading::Node
|
6
13
|
extend Registerable
|
7
14
|
|
@@ -10,46 +17,72 @@ module Cascading
|
|
10
17
|
# Do not use this constructor directly; instead, use Cascading::cascade to
|
11
18
|
# build cascades.
|
12
19
|
#
|
13
|
-
# Builds a
|
14
|
-
#
|
15
|
-
#
|
16
|
-
#
|
17
|
-
#
|
18
|
-
#
|
19
|
-
#
|
20
|
-
|
21
|
-
|
22
|
-
|
20
|
+
# Builds a Cascade given a name.
|
21
|
+
#
|
22
|
+
# The named options are:
|
23
|
+
# [properties] Properties hash which will be used as the default properties
|
24
|
+
# for all child flows. Properties must be a Ruby Hash with
|
25
|
+
# string keys and values and will be copied before being
|
26
|
+
# passed into each flow in the cascade. See Flow#initialize
|
27
|
+
# for details on how flows handle properties.
|
28
|
+
# [mode] Mode which will be used as the default mode for all child flows.
|
29
|
+
# See Mode.parse for details.
|
30
|
+
def initialize(name, options = {})
|
31
|
+
@properties = options[:properties] || {}
|
32
|
+
@mode = options[:mode]
|
23
33
|
super(name, nil) # A Cascade cannot have a parent
|
24
34
|
self.class.add(name, self)
|
25
35
|
end
|
26
36
|
|
27
|
-
# Builds a child
|
28
|
-
#
|
29
|
-
#
|
30
|
-
#
|
31
|
-
|
37
|
+
# Builds a child Flow in this Cascade given a name and block.
|
38
|
+
#
|
39
|
+
# The named options are:
|
40
|
+
# [properties] Properties hash which will override the default properties
|
41
|
+
# stored in this cascade.
|
42
|
+
# [mode] Mode which will override the default mode stored in this cascade.
|
43
|
+
#
|
44
|
+
# Example:
|
45
|
+
# cascade 'wordcount', :mode => :local do
|
46
|
+
# flow 'first_step' do
|
47
|
+
# ...
|
48
|
+
# end
|
49
|
+
#
|
50
|
+
# flow 'second_step' do
|
51
|
+
# ...
|
52
|
+
# end
|
53
|
+
# end
|
54
|
+
def flow(name, options = {}, &block)
|
32
55
|
raise "Could not build flow '#{name}'; block required" unless block_given?
|
33
56
|
|
34
|
-
|
35
|
-
|
57
|
+
options[:properties] ||= properties.dup
|
58
|
+
options[:mode] ||= mode
|
36
59
|
|
37
|
-
flow = Flow.new(name, self,
|
60
|
+
flow = Flow.new(name, self, options)
|
38
61
|
add_child(flow)
|
39
62
|
flow.instance_eval(&block)
|
40
63
|
flow
|
41
64
|
end
|
42
65
|
|
66
|
+
# Produces a textual description of this Cascade. The description details
|
67
|
+
# the structure of the Cascade, the sources and sinks of each Flow, and the
|
68
|
+
# input and output fields of each Assembly. The offset parameter allows
|
69
|
+
# for this describe to be nested within a calling context, which lets us
|
70
|
+
# indent the structural hierarchy of a job.
|
43
71
|
def describe(offset = '')
|
44
72
|
"#{offset}#{name}:cascade\n#{child_names.map{ |child| children[child].describe("#{offset} ") }.join("\n")}"
|
45
73
|
end
|
46
74
|
|
75
|
+
# Writes out the DOT file describing the structure of this Cascade.
|
76
|
+
#
|
77
|
+
# NOTE: will be at Job in later version and also present on Flow
|
47
78
|
def draw(dir)
|
48
79
|
@children.each do |name, flow|
|
49
80
|
flow.connect.writeDOT("#{dir}/#{name}.dot")
|
50
81
|
end
|
51
82
|
end
|
52
83
|
|
84
|
+
# Builds a map, keyed by flow name, of the sink metadata for each child
|
85
|
+
# flow. Currently, this contains only the field names of each sink.
|
53
86
|
def sink_metadata
|
54
87
|
@children.inject({}) do |sink_fields, (name, flow)|
|
55
88
|
sink_fields[name] = flow.sink_metadata
|
@@ -57,12 +90,16 @@ module Cascading
|
|
57
90
|
end
|
58
91
|
end
|
59
92
|
|
93
|
+
# Writes the mapping produced by Cascade#sink_metadata to a file at the
|
94
|
+
# given path in YAML.
|
60
95
|
def write_sink_metadata(file_name)
|
61
96
|
File.open(file_name, 'w') do |file|
|
62
97
|
YAML.dump(sink_metadata, file)
|
63
98
|
end
|
64
99
|
end
|
65
100
|
|
101
|
+
# Connects this Cascade, producing a c.c.Cascade, which is then completed,
|
102
|
+
# executing it. Child flows are connected, so no parameters are required.
|
66
103
|
def complete
|
67
104
|
begin
|
68
105
|
Java::CascadingCascade::CascadeConnector.new.connect(name, make_flows(@children)).complete
|
data/lib/cascading/cascading.rb
CHANGED
@@ -1,6 +1,33 @@
|
|
1
|
+
require 'cascading/cascade'
|
2
|
+
require 'cascading/flow'
|
1
3
|
require 'cascading/expr_stub'
|
2
4
|
|
5
|
+
# The Cascading module contains all of the cascading.jruby DSL. Inserting the
|
6
|
+
# following into your script:
|
7
|
+
# require 'rubygems'
|
8
|
+
# require 'cascading'
|
9
|
+
# includes this module at the top level, making all of its features available.
|
10
|
+
#
|
11
|
+
# To build a dataflow like the one in the README.md or
|
12
|
+
# {samples}[http://github.com/mrwalker/cascading.jruby/tree/master/samples],
|
13
|
+
# start by looking at Cascade or Flow. These are the
|
14
|
+
# highest level structures you'll use to put together your job.
|
15
|
+
#
|
16
|
+
# Within a flow, you'll connect sources to sinks by way of Assembly, which
|
17
|
+
# refers to "pipe assemblies" from Cascading. Within an Assembly, you'll use
|
18
|
+
# functions and filters (see Operations, IdentityOperations, RegexOperations,
|
19
|
+
# FilterOperations, and TextOperations) as well as Assembly#group_by,
|
20
|
+
# Assembly#union, and Assembly#join. You can provide those last pipes with a
|
21
|
+
# block that can select operations from Aggregations.
|
22
|
+
#
|
23
|
+
# Finally, you'll want to address the execution of your job, whether it be
|
24
|
+
# locally testing or running remotely on a Hadoop cluster. See the Mode class
|
25
|
+
# for the available modes, and parameterize your script such that it can operate
|
26
|
+
# in Cascading local mode locally and in Hadoop mode when run in a jar produced
|
27
|
+
# with {Jading}[http://github.com/mrwalker/jading].
|
3
28
|
module Cascading
|
29
|
+
# Mapping that defines a convenient syntax for specifying Java classes, used
|
30
|
+
# in Janino expressions and elsewhere.
|
4
31
|
JAVA_TYPE_MAP = {
|
5
32
|
:int => java.lang.Integer.java_class, :long => java.lang.Long.java_class,
|
6
33
|
:bool => java.lang.Boolean.java_class, :double => java.lang.Double.java_class,
|
@@ -24,44 +51,84 @@ module Cascading
|
|
24
51
|
# directly building their own cascades and flows so that jading can send them
|
25
52
|
# default properties.
|
26
53
|
|
27
|
-
# Builds a top-level
|
28
|
-
#
|
29
|
-
|
54
|
+
# Builds a top-level Cascade given a name and a block.
|
55
|
+
#
|
56
|
+
# The named options are:
|
57
|
+
# [properties] See Cascade#initialize
|
58
|
+
# [mode] See Cascade#initialize
|
59
|
+
#
|
60
|
+
# Example:
|
61
|
+
# cascade 'wordcount', :mode => :local do
|
62
|
+
# flow 'first_step' do
|
63
|
+
# ...
|
64
|
+
# end
|
65
|
+
#
|
66
|
+
# flow 'second_step' do
|
67
|
+
# ...
|
68
|
+
# end
|
69
|
+
# end
|
70
|
+
def cascade(name, options = {}, &block)
|
30
71
|
raise "Could not build cascade '#{name}'; block required" unless block_given?
|
31
|
-
raise 'Cascading::cascade does not accept the :properties param only the global $jobconf_properties' if
|
72
|
+
raise 'Cascading::cascade does not accept the :properties param only the global $jobconf_properties' if options[:properties]
|
32
73
|
|
33
|
-
|
74
|
+
options[:properties] = $jobconf_properties.dup if defined?($jobconf_properties) && $jobconf_properties
|
34
75
|
|
35
|
-
cascade = Cascade.new(name,
|
76
|
+
cascade = Cascade.new(name, options)
|
36
77
|
cascade.instance_eval(&block)
|
37
78
|
cascade
|
38
79
|
end
|
39
80
|
|
40
|
-
# Builds a top-level
|
41
|
-
# flows with no cascades.
|
42
|
-
#
|
43
|
-
|
81
|
+
# Builds a top-level Flow given a name and block for applications built of
|
82
|
+
# flows with no cascades.
|
83
|
+
#
|
84
|
+
# The named options are:
|
85
|
+
# [properties] See Flow#initialize
|
86
|
+
# [mode] See Flow#initialize
|
87
|
+
#
|
88
|
+
# Example:
|
89
|
+
# flow 'wordcount', :mode => :local do
|
90
|
+
# assembly 'first_step' do
|
91
|
+
# ...
|
92
|
+
# end
|
93
|
+
#
|
94
|
+
# assembly 'second_step' do
|
95
|
+
# ...
|
96
|
+
# end
|
97
|
+
# end
|
98
|
+
def flow(name, options = {}, &block)
|
44
99
|
raise "Could not build flow '#{name}'; block required" unless block_given?
|
45
|
-
raise 'Cascading::flow does not accept the :properties param only the global $jobconf_properties' if
|
100
|
+
raise 'Cascading::flow does not accept the :properties param only the global $jobconf_properties' if options[:properties]
|
46
101
|
|
47
|
-
|
102
|
+
options[:properties] = $jobconf_properties.dup if defined?($jobconf_properties) && $jobconf_properties
|
48
103
|
|
49
|
-
flow = Flow.new(name, nil,
|
104
|
+
flow = Flow.new(name, nil, options)
|
50
105
|
flow.instance_eval(&block)
|
51
106
|
flow
|
52
107
|
end
|
53
108
|
|
109
|
+
# Produces a textual description of all Cascades in the global registry. The
|
110
|
+
# description details the structure of the Cascades, the sources and sinks of
|
111
|
+
# each Flow, and the input and output fields of each Assembly.
|
112
|
+
#
|
113
|
+
# NOTE: will be moved to Job in later version
|
54
114
|
def describe
|
55
115
|
Cascade.all.map{ |cascade| cascade.describe }.join("\n")
|
56
116
|
end
|
57
117
|
alias desc describe
|
58
118
|
|
59
119
|
# See ExprStub.expr
|
60
|
-
def expr(expression,
|
61
|
-
ExprStub.expr(expression,
|
120
|
+
def expr(expression, options = {})
|
121
|
+
ExprStub.expr(expression, options)
|
62
122
|
end
|
63
123
|
|
64
|
-
#
|
124
|
+
# Utility method for creating Cascading c.t.Fields from a field name (string)
|
125
|
+
# or list of field names (array of strings). If the input fields is already a
|
126
|
+
# c.t.Fields or nil, it is passed through. This allows for flexible use of
|
127
|
+
# the method at multiple layers in the DSL.
|
128
|
+
#
|
129
|
+
# Example:
|
130
|
+
# cascading_fields = fields(['first', 'second', 'third'])
|
131
|
+
# # cascading_fields.to_a == ['first', 'second', 'third']
|
65
132
|
def fields(fields)
|
66
133
|
if fields.nil?
|
67
134
|
return nil
|
@@ -76,27 +143,45 @@ module Cascading
|
|
76
143
|
return Java::CascadingTuple::Fields.new([fields].flatten.map{ |f| f.kind_of?(Fixnum) ? java.lang.Integer.new(f) : f }.to_java(java.lang.Comparable))
|
77
144
|
end
|
78
145
|
|
146
|
+
# Convenience method wrapping c.t.Fields::ALL
|
79
147
|
def all_fields
|
80
148
|
Java::CascadingTuple::Fields::ALL
|
81
149
|
end
|
82
150
|
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
def difference_fields(*fields)
|
88
|
-
fields(fields[1..-1].inject(fields.first.to_a){ |acc, arr| acc - arr.to_a })
|
151
|
+
# Convenience method wrapping c.t.Fields::VALUES
|
152
|
+
def last_grouping_fields
|
153
|
+
Java::CascadingTuple::Fields::VALUES
|
89
154
|
end
|
90
155
|
|
91
|
-
|
92
|
-
|
156
|
+
# Computes fields formed by removing remove_fields from base_fields. Operates
|
157
|
+
# only on named fields, not positional fields.
|
158
|
+
#
|
159
|
+
# Example:
|
160
|
+
# base_fields = fields(['a', 'b', 'c'])
|
161
|
+
# remove_fields = fields(['b'])
|
162
|
+
# result_fields = difference_fields(base_fields, remove_fields)
|
163
|
+
# # results_fields.to_a == ['a', 'c']
|
164
|
+
def difference_fields(base_fields, remove_fields)
|
165
|
+
fields(base_fields.to_a - remove_fields.to_a)
|
93
166
|
end
|
94
167
|
|
168
|
+
# Combines fields deduplicating them with trailing underscores as necessary.
|
169
|
+
# This is used in joins to avoid requiring the caller to unique fields before
|
170
|
+
# they are joined.
|
95
171
|
def dedup_fields(*fields)
|
96
172
|
raise 'Can only be applied to declarators' unless fields.all?{ |f| f.is_declarator? }
|
97
173
|
fields(dedup_field_names(*fields.map{ |f| f.to_a }))
|
98
174
|
end
|
99
175
|
|
176
|
+
# Helper used by dedup_fields that operates on arrays of field names rather
|
177
|
+
# than fields objects.
|
178
|
+
#
|
179
|
+
# Example:
|
180
|
+
# left_names = ['a', 'b']
|
181
|
+
# mid_names = ['a', 'c']
|
182
|
+
# right_names = ['a', 'd']
|
183
|
+
# deduped_names = dedup_field_names(left_names, mid_names, right_names)
|
184
|
+
# # deduped_names == ['a', 'b', 'a_', 'c', 'a__', 'd']
|
100
185
|
def dedup_field_names(*names)
|
101
186
|
names.inject([]) do |acc, arr|
|
102
187
|
acc + arr.map{ |e| search_field_name(acc, e) }
|
@@ -106,30 +191,22 @@ module Cascading
|
|
106
191
|
def search_field_name(names, candidate)
|
107
192
|
names.include?(candidate) ? search_field_name(names, "#{candidate}_") : candidate
|
108
193
|
end
|
109
|
-
|
110
|
-
def last_grouping_fields
|
111
|
-
Java::CascadingTuple::Fields::VALUES
|
112
|
-
end
|
113
|
-
|
114
|
-
def results_fields
|
115
|
-
Java::CascadingTuple::Fields::RESULTS
|
116
|
-
end
|
194
|
+
private :search_field_name
|
117
195
|
|
118
196
|
# Creates a TextLine scheme (can be used in both Cascading local and hadoop
|
119
|
-
# modes). Positional args are used if
|
120
|
-
# provided.
|
197
|
+
# modes). Positional args are used if :source_fields is not provided.
|
121
198
|
#
|
122
199
|
# The named options are:
|
123
|
-
#
|
124
|
-
#
|
125
|
-
#
|
126
|
-
#
|
127
|
-
#
|
128
|
-
#
|
129
|
-
#
|
130
|
-
def text_line_scheme(*
|
131
|
-
options =
|
132
|
-
source_fields = fields(options[:source_fields] || (
|
200
|
+
# [source_fields] Fields to be read from a source with this scheme. Defaults
|
201
|
+
# to ['offset', 'line'].
|
202
|
+
# [sink_fields] Fields to be written to a sink with this scheme. Defaults to
|
203
|
+
# all_fields.
|
204
|
+
# [compression] A symbol, either :enable or :disable, that
|
205
|
+
# governs the TextLine scheme's compression. Defaults to the
|
206
|
+
# default TextLine compression (only applies to c.s.h.TextLine).
|
207
|
+
def text_line_scheme(*args_with_options)
|
208
|
+
options, source_fields = args_with_options.extract_options!, args_with_options
|
209
|
+
source_fields = fields(options[:source_fields] || (source_fields.empty? ? ['offset', 'line'] : source_fields))
|
133
210
|
sink_fields = fields(options[:sink_fields]) || all_fields
|
134
211
|
sink_compression = case options[:compression]
|
135
212
|
when :enable then Java::CascadingSchemeHadoop::TextLine::Compress::ENABLE
|
@@ -153,17 +230,30 @@ module Cascading
|
|
153
230
|
}
|
154
231
|
end
|
155
232
|
|
233
|
+
# Convenience access to MultiTap.multi_source_tap. This constructor is more
|
234
|
+
# "DSL-like" because it allows you to pass taps directly as actual args rather
|
235
|
+
# than in an array:
|
236
|
+
# multi_source_tap tap1, tap2, tap3, ..., tapn
|
237
|
+
#
|
238
|
+
# See MultiTap.multi_source_tap for more details.
|
156
239
|
def multi_source_tap(*taps)
|
157
240
|
MultiTap.multi_source_tap(taps)
|
158
241
|
end
|
159
242
|
|
243
|
+
# Convenience access to MultiTap.multi_sink_tap. This constructor is more
|
244
|
+
# "DSL-like" because it allows you to pass taps directly as actual args rather
|
245
|
+
# than in an array:
|
246
|
+
# multi_sink_tap tap1, tap2, tap3, ..., tapn
|
247
|
+
#
|
248
|
+
# See MultiTap.multi_sink_tap for more details.
|
160
249
|
def multi_sink_tap(*taps)
|
161
250
|
MultiTap.multi_sink_tap(taps)
|
162
251
|
end
|
163
252
|
|
164
|
-
#
|
165
|
-
|
166
|
-
|
253
|
+
# Convenience constructor for a Tap, that accepts the same options as that
|
254
|
+
# class' constructor. See Tap for more details.
|
255
|
+
def tap(path, options = {})
|
256
|
+
Tap.new(path, options)
|
167
257
|
end
|
168
258
|
|
169
259
|
# Constructs properties to be passed to Flow#complete or Cascade#complete
|
data/lib/cascading/expr_stub.rb
CHANGED
@@ -3,15 +3,15 @@ module Cascading
|
|
3
3
|
attr_accessor :expression, :types, :input_expression
|
4
4
|
|
5
5
|
# ExprStub requires a Janino expression decorated with field types. For
|
6
|
-
# example:
|
7
|
-
#
|
6
|
+
# example:
|
7
|
+
# expr('"Found: " + (x:int + y:int) + " " + z:string')
|
8
|
+
# Type names are defined in Cascading::JAVA_TYPE_MAP.
|
8
9
|
def initialize(expression)
|
9
10
|
@input_expression = expression
|
10
11
|
@expression = expression.dup
|
11
12
|
@types = {}
|
12
13
|
|
13
14
|
# Simple regexp based parser for types
|
14
|
-
|
15
15
|
JAVA_TYPE_MAP.each do |sym, klass|
|
16
16
|
@expression.gsub!(/[A-Za-z0-9_]+:#{sym.to_s}/) do |match|
|
17
17
|
name = match.split(/:/).first.gsub(/\s+/, "")
|
@@ -21,21 +21,38 @@ module Cascading
|
|
21
21
|
end
|
22
22
|
end
|
23
23
|
|
24
|
+
# Extract Java names and types from @types hash. Cascading constructors
|
25
|
+
# often require two separate Java Arrays in this fashion.
|
26
|
+
def names_and_types
|
27
|
+
names, types = split_hash(@types)
|
28
|
+
[names.to_java(java.lang.String), types.to_java(java.lang.Class)]
|
29
|
+
end
|
30
|
+
|
31
|
+
# Prints the original input expression.
|
24
32
|
def to_s
|
25
33
|
@input_expression
|
26
34
|
end
|
27
35
|
|
28
36
|
# Convenience constructor for an ExprStub that optionally performs
|
29
37
|
# validation. Takes a string to use as a Janino expression and an optional
|
30
|
-
#
|
31
|
-
#
|
32
|
-
#
|
33
|
-
#
|
34
|
-
|
35
|
-
|
38
|
+
# options hash.
|
39
|
+
#
|
40
|
+
# The named options are:
|
41
|
+
# [validate] A boolean indicating whether expression validation using
|
42
|
+
# default actual argument values should be performed. Defaults
|
43
|
+
# to true.
|
44
|
+
# [validate_with] A hash mapping field names (or symbols) to the value that
|
45
|
+
# should be used for validation. Strings default to nil,
|
46
|
+
# so if you have previously filtered nulls you might use a
|
47
|
+
# marker value like 'nulls_filtered'. Defaults to {}.
|
48
|
+
#
|
49
|
+
# Example:
|
50
|
+
# insert 'x_eq_y' => expr('x:string.equals(y:string)', :validate_with => { :x => 'nulls_filtered' })
|
51
|
+
def self.expr(expression, options = {})
|
52
|
+
options = { :validate => true, :validate_with => {} }.merge(options)
|
36
53
|
expr_stub = expression.kind_of?(ExprStub) ? expression : ExprStub.new(expression).compile
|
37
|
-
expr_stub.validate(
|
38
|
-
puts "Expression validation is disabled for '#{expression}'" unless
|
54
|
+
expr_stub.validate(options[:validate_with]) if options[:validate]
|
55
|
+
puts "Expression validation is disabled for '#{expression}'" unless options[:validate]
|
39
56
|
expr_stub
|
40
57
|
end
|
41
58
|
|
@@ -68,6 +85,9 @@ module Cascading
|
|
68
85
|
self.eval(test_values.merge(actual_args))
|
69
86
|
end
|
70
87
|
|
88
|
+
# Given a scope, validates that the fields required by this ExprStub are
|
89
|
+
# available in the values fields of the scope. Returns those values fields
|
90
|
+
# which are unused in the expression.
|
71
91
|
def validate_scope(scope)
|
72
92
|
validate_fields(scope.values_fields.to_a)
|
73
93
|
end
|
@@ -113,12 +133,6 @@ module Cascading
|
|
113
133
|
end
|
114
134
|
end
|
115
135
|
|
116
|
-
# Extract Java names and types from @types hash
|
117
|
-
def names_and_types
|
118
|
-
names, types = split_hash(@types)
|
119
|
-
[names.to_java(java.lang.String), types.to_java(java.lang.Class)]
|
120
|
-
end
|
121
|
-
|
122
136
|
# Makes best effort to convert Ruby numbers into the Java numeric type
|
123
137
|
# exepcted by a Janino expression. However, if the conversion fails, it
|
124
138
|
# returns the original value so that the exception thrown will be from
|