cascading.jruby 0.0.10 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE.txt +13 -160
- data/README.md +35 -0
- data/lib/cascading.rb +8 -41
- data/lib/cascading/aggregations.rb +216 -71
- data/lib/cascading/assembly.rb +409 -606
- data/lib/cascading/base.rb +22 -0
- data/lib/cascading/cascade.rb +55 -18
- data/lib/cascading/cascading.rb +137 -47
- data/lib/cascading/expr_stub.rb +31 -17
- data/lib/cascading/ext/array.rb +17 -0
- data/lib/cascading/filter_operations.rb +101 -0
- data/lib/cascading/flow.rb +87 -23
- data/lib/cascading/identity_operations.rb +82 -0
- data/lib/cascading/mode.rb +14 -10
- data/lib/cascading/operations.rb +109 -174
- data/lib/cascading/regex_operations.rb +133 -0
- data/lib/cascading/scope.rb +32 -9
- data/lib/cascading/sub_assembly.rb +8 -5
- data/lib/cascading/tap.rb +41 -17
- data/lib/cascading/text_operations.rb +67 -0
- data/test/mock_assemblies.rb +55 -0
- data/test/test_assembly.rb +23 -25
- data/test/test_local_execution.rb +7 -7
- data/test/test_operations.rb +0 -10
- metadata +76 -74
- data/History.txt +0 -58
data/lib/cascading/operations.rb
CHANGED
@@ -1,116 +1,118 @@
|
|
1
1
|
module Cascading
|
2
|
-
# The Cascading::Operations module is deprecated. The original idea from long
|
3
|
-
# ago is that it would be useful to mixin operator wrappers to places other
|
4
|
-
# than Cascading::Assembly, but this is not true. Instead, put Eaches in
|
5
|
-
# Cascading::Assembly, Everies in Cascading::Aggregations, and any more
|
6
|
-
# generally useful utility code directly in the Cascading module
|
7
|
-
# (cascading/cascading.rb).
|
8
|
-
#
|
9
|
-
# Further, the entire *args pattern should be deprecated as it leads to
|
10
|
-
# functions that can only be understood by reading their code. Instead,
|
11
|
-
# idiomatic Ruby (positional required params and a params hash for optional
|
12
|
-
# args) should be used. See Cascading::Assembly#set_value for an example.
|
13
2
|
module Operations
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
end
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
def regex_split_generator(*args)
|
63
|
-
options = args.extract_options!
|
64
|
-
|
65
|
-
fields = Cascading.fields(args)
|
66
|
-
pattern = options[:pattern].to_s
|
67
|
-
parameters = [fields, pattern].compact
|
68
|
-
Java::CascadingOperationRegex::RegexSplitGenerator.new(*parameters)
|
69
|
-
end
|
70
|
-
|
71
|
-
def regex_generator(*args)
|
72
|
-
options = args.extract_options!
|
73
|
-
|
74
|
-
fields = Cascading.fields(args)
|
75
|
-
pattern = options[:pattern].to_s
|
76
|
-
parameters = [fields, pattern].compact
|
77
|
-
Java::CascadingOperationRegex::RegexGenerator.new(*parameters)
|
78
|
-
end
|
79
|
-
|
80
|
-
def expression_function(*args)
|
81
|
-
options = args.extract_options!
|
82
|
-
|
83
|
-
fields = Cascading.fields(args)
|
84
|
-
expression = options[:expression].to_s
|
85
|
-
parameters = options[:parameters]
|
86
|
-
parameter_names = []
|
87
|
-
parameter_types = []
|
88
|
-
if parameters.is_a? ::Hash
|
89
|
-
parameters.each do |name, type|
|
90
|
-
parameter_names << name
|
91
|
-
parameter_types << type
|
3
|
+
# Debugs the current assembly at runtime, printing every tuple and fields
|
4
|
+
# every 10 tuples by default.
|
5
|
+
#
|
6
|
+
# The named options are:
|
7
|
+
# [prefix] String to prefix prints with.
|
8
|
+
# [print_fields] Boolean controlling field printing, defaults to false.
|
9
|
+
# [tuple_interval] Integer specifying interval between printed tuples
|
10
|
+
# [fields_interval] Integer specifying interval between printing fields
|
11
|
+
#
|
12
|
+
# Example:
|
13
|
+
# debug :prefix => 'DEBUG', :print_fields => true, :fields_interval => 1000
|
14
|
+
def debug(options = {})
|
15
|
+
input_fields = options[:input] || all_fields
|
16
|
+
prefix = options[:prefix]
|
17
|
+
print_fields = options[:print_fields]
|
18
|
+
|
19
|
+
debug = Java::CascadingOperation::Debug.new(*[prefix, print_fields].compact)
|
20
|
+
|
21
|
+
debug.print_tuple_every = options[:tuple_interval] || 1
|
22
|
+
debug.print_fields_every = options[:fields_interval] || 10
|
23
|
+
|
24
|
+
each(input_fields, :filter => debug)
|
25
|
+
end
|
26
|
+
|
27
|
+
# Inserts new fields into the current assembly. Values may be constants or
|
28
|
+
# expressions (see Cascading::expr). Fields will be inserted in
|
29
|
+
# lexicographic order (not necessarily the order provided).
|
30
|
+
#
|
31
|
+
# Example:
|
32
|
+
# insert 'field1' => 'constant_string', 'field2' => 0, 'field3' => expr('fieldA:long + fieldB:long')
|
33
|
+
def insert(insert_map)
|
34
|
+
insert_map.keys.sort.each do |field_name|
|
35
|
+
value = insert_map[field_name]
|
36
|
+
|
37
|
+
if value.kind_of?(ExprStub)
|
38
|
+
value.validate_scope(scope)
|
39
|
+
names, types = value.names_and_types
|
40
|
+
each(
|
41
|
+
all_fields,
|
42
|
+
:function => Java::CascadingOperationExpression::ExpressionFunction.new(fields(field_name), value.expression, names, types),
|
43
|
+
:output => all_fields
|
44
|
+
)
|
45
|
+
else # value is a constant
|
46
|
+
each(
|
47
|
+
all_fields,
|
48
|
+
:function => Java::CascadingOperation::Insert.new(fields(field_name), to_java_comparable_array([value])),
|
49
|
+
:output => all_fields
|
50
|
+
)
|
92
51
|
end
|
93
|
-
parameter_names = parameter_names.to_java(java.lang.String)
|
94
|
-
parameter_types = parameter_types.to_java(java.lang.Class)
|
95
|
-
|
96
|
-
arguments = [fields, expression, parameter_names, parameter_types].compact
|
97
|
-
elsif !parameters.nil?
|
98
|
-
arguments = [fields, expression, parameters.java_class].compact
|
99
|
-
else
|
100
|
-
arguments = [fields, expression, java.lang.String.java_class].compact
|
101
52
|
end
|
102
|
-
|
103
|
-
Java::CascadingOperationExpression::ExpressionFunction.new(*arguments)
|
104
53
|
end
|
105
54
|
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
55
|
+
# Ungroups, or unpivots, a tuple (see Cascading's {UnGroup}[http://docs.cascading.org/cascading/2.1/javadoc/cascading/operation/function/UnGroup.html]).
|
56
|
+
#
|
57
|
+
# You must provide exactly one of :value_selectors and :num_values.
|
58
|
+
#
|
59
|
+
# The named options are:
|
60
|
+
# [value_selectors] Array of field names to ungroup. Each field will be
|
61
|
+
# ungrouped into an output tuple along with the key fields
|
62
|
+
# in the order provided.
|
63
|
+
# [num_values] Integer specifying the number of fields to ungroup into each
|
64
|
+
# output tuple (excluding the key fields). All input fields
|
65
|
+
# will be ungrouped.
|
66
|
+
#
|
67
|
+
# Example:
|
68
|
+
# ungroup 'key', ['new_key', 'val], :value_selectors => ['val1', 'val2', 'val3'], :output => ['new_key', 'val']
|
69
|
+
def ungroup(key, into_fields, options = {})
|
70
|
+
input_fields = options[:input] || all_fields
|
71
|
+
output = options[:output] || all_fields
|
72
|
+
|
73
|
+
raise 'You must provide exactly one of :value_selectors or :num_values to ungroup' unless options.has_key?(:value_selectors) ^ options.has_key?(:num_values)
|
74
|
+
value_selectors = options[:value_selectors].map{ |vs| fields(vs) }.to_java(Java::CascadingTuple::Fields) if options.has_key?(:value_selectors)
|
75
|
+
num_values = options[:num_values] if options.has_key?(:num_values)
|
76
|
+
|
77
|
+
parameters = [fields(into_fields), fields(key), value_selectors, num_values].compact
|
78
|
+
each input_fields, :function => Java::CascadingOperationFunction::UnGroup.new(*parameters), :output => output
|
79
|
+
end
|
80
|
+
|
81
|
+
# Inserts one of two values into the dataflow based upon the result of the
|
82
|
+
# supplied filter on the input_fields. This is primarily useful for
|
83
|
+
# creating indicators from filters. keep_value specifies the Java value to
|
84
|
+
# produce when the filter would keep the given input and remove_value
|
85
|
+
# specifies the Java value to produce when the filter would remove the given
|
86
|
+
# input.
|
87
|
+
#
|
88
|
+
# Example:
|
89
|
+
# set_value 'field1', Java::CascadingOperationFilter::FilterNull.new, 1.to_java, 0.to_java, 'is_field1_null'
|
90
|
+
def set_value(input_fields, filter, keep_value, remove_value, into_field, options = {})
|
91
|
+
output = options[:output] || all_fields
|
92
|
+
each input_fields, :function => Java::CascadingOperationFunction::SetValue.new(fields(into_field), filter, keep_value, remove_value), :output => output
|
93
|
+
end
|
94
|
+
|
95
|
+
# Efficient way of inserting a null indicator for any field, even one that
|
96
|
+
# cannot be coerced to a string. This is accomplished using Cascading's
|
97
|
+
# FilterNull and SetValue operators rather than Janino. 1 is produced if
|
98
|
+
# the field is null and 0 otherwise.
|
99
|
+
#
|
100
|
+
# Example:
|
101
|
+
# null_indicator 'field1', 'is_field1_null'
|
102
|
+
def null_indicator(input_field, into_field, options = {})
|
103
|
+
set_value input_field, Java::CascadingOperationFilter::FilterNull.new, 1.to_java, 0.to_java, into_field, :output => options[:output]
|
104
|
+
end
|
105
|
+
|
106
|
+
# Given an input_field and a regex, returns an indicator that is 1 if the string
|
107
|
+
# contains at least 1 match and 0 otherwise.
|
108
|
+
#
|
109
|
+
# Example:
|
110
|
+
# regex_contains 'field1', /\w+\s+\w+/, 'does_field1_contain_pair'
|
111
|
+
def regex_contains(input_field, regex, into_field, options = {})
|
112
|
+
set_value input_field, Java::CascadingOperationRegex::RegexFilter.new(pattern.to_s), 1.to_java, 0.to_java, into_field, :output => options[:output]
|
113
|
+
end
|
114
|
+
|
115
|
+
private
|
114
116
|
|
115
117
|
def to_java_comparable_array(arr)
|
116
118
|
(arr.map do |v|
|
@@ -130,72 +132,5 @@ module Cascading
|
|
130
132
|
java.lang.String.new(v.to_s)
|
131
133
|
end
|
132
134
|
end
|
133
|
-
|
134
|
-
def expression_filter(*args)
|
135
|
-
options = args.extract_options!
|
136
|
-
expression = (args[0] || options[:expression]).to_s
|
137
|
-
parameters = options[:parameters]
|
138
|
-
parameter_names = []
|
139
|
-
parameter_types = []
|
140
|
-
if parameters.is_a? ::Hash
|
141
|
-
parameters.each do |name, type|
|
142
|
-
parameter_names << name
|
143
|
-
parameter_types << type
|
144
|
-
end
|
145
|
-
parameter_names = parameter_names.to_java(java.lang.String)
|
146
|
-
parameter_types = parameter_types.to_java(java.lang.Class)
|
147
|
-
|
148
|
-
arguments = [expression, parameter_names, parameter_types].compact
|
149
|
-
elsif !parameters.nil?
|
150
|
-
arguments = [expression, parameters.java_class].compact
|
151
|
-
else
|
152
|
-
arguments = [expression, java.lang.String.java_class].compact
|
153
|
-
end
|
154
|
-
|
155
|
-
Java::CascadingOperationExpression::ExpressionFilter.new(*arguments)
|
156
|
-
end
|
157
|
-
|
158
|
-
def date_parser(field, format)
|
159
|
-
fields = fields(field)
|
160
|
-
Java::CascadingOperationText::DateParser.new(fields, format)
|
161
|
-
end
|
162
|
-
|
163
|
-
def date_formatter(fields, format, timezone=nil)
|
164
|
-
fields = fields(fields)
|
165
|
-
timezone = Java::JavaUtil::TimeZone.get_time_zone(timezone) if timezone
|
166
|
-
arguments = [fields, format, timezone].compact
|
167
|
-
Java::CascadingOperationText::DateFormatter.new(*arguments)
|
168
|
-
end
|
169
|
-
|
170
|
-
def regex_filter(*args)
|
171
|
-
options = args.extract_options!
|
172
|
-
|
173
|
-
pattern = args[0]
|
174
|
-
remove_match = options[:remove_match]
|
175
|
-
match_each_element = options[:match_each_element]
|
176
|
-
parameters = [pattern.to_s, remove_match, match_each_element].compact
|
177
|
-
Java::CascadingOperationRegex::RegexFilter.new(*parameters)
|
178
|
-
end
|
179
|
-
|
180
|
-
def regex_replace(*args)
|
181
|
-
options = args.extract_options!
|
182
|
-
|
183
|
-
fields = fields(args[0])
|
184
|
-
pattern = args[1]
|
185
|
-
replacement = args[2]
|
186
|
-
replace_all = options[:replace_all]
|
187
|
-
|
188
|
-
parameters = [fields, pattern.to_s, replacement.to_s, replace_all].compact
|
189
|
-
Java::CascadingOperationRegex::RegexReplace.new(*parameters)
|
190
|
-
end
|
191
|
-
|
192
|
-
def field_joiner(*args)
|
193
|
-
options = args.extract_options!
|
194
|
-
delimiter = options[:delimiter] || ','
|
195
|
-
fields = fields(options[:into])
|
196
|
-
|
197
|
-
parameters = [fields, delimiter].compact
|
198
|
-
Java::CascadingOperationText::FieldJoiner.new(*parameters)
|
199
|
-
end
|
200
135
|
end
|
201
136
|
end
|
@@ -0,0 +1,133 @@
|
|
1
|
+
module Cascading
|
2
|
+
# Module of pipe assemblies that wrap operations defined in the Cascading
|
3
|
+
# cascading.operations.regex package. These are split out only to group
|
4
|
+
# similar functionality.
|
5
|
+
#
|
6
|
+
# All DSL regex pipes require an input_field, a regex, and either a single
|
7
|
+
# into_field or one or more into_fields. Requiring a single input field
|
8
|
+
# allows us to raise an exception early if the wrong input is specified and
|
9
|
+
# avoids the non-intuitive situation where the first of many fields is
|
10
|
+
# silently taken as in Cascading. Requiring a regex means you don't have to
|
11
|
+
# go looking for defaults in code. And into_field(s) means we can propagate
|
12
|
+
# field names through the dataflow.
|
13
|
+
#
|
14
|
+
# Mapping of DSL pipes into Cascading regex operations:
|
15
|
+
# parse:: {RegexParser}[http://docs.cascading.org/cascading/2.1/javadoc/cascading/operation/regex/RegexParser.html]
|
16
|
+
# split:: {RegexSplitter}[http://docs.cascading.org/cascading/2.1/javadoc/cascading/operation/regex/RegexSplitter.html]
|
17
|
+
# split\_rows:: {RegexSplitGenerator}[http://docs.cascading.org/cascading/2.1/javadoc/cascading/operation/regex/RegexSplitGenerator.html]
|
18
|
+
# match\_rows:: {RegexGenerator}[http://docs.cascading.org/cascading/2.1/javadoc/cascading/operation/regex/RegexGenerator.html]
|
19
|
+
# replace:: {RegexReplace}[http://docs.cascading.org/cascading/2.1/javadoc/cascading/operation/regex/RegexReplace.html]
|
20
|
+
module RegexOperations
|
21
|
+
# Parses the given input_field using the specified regular expression to
|
22
|
+
# produce one output per group in that expression.
|
23
|
+
#
|
24
|
+
# The named options are:
|
25
|
+
# [groups] Array of integers specifying which groups to capture if you want
|
26
|
+
# a subset of groups.
|
27
|
+
#
|
28
|
+
# Example:
|
29
|
+
# parse 'field1', /(\w+)\s+(\w+)/, ['out1', 'out2'], :groups => [1, 2]
|
30
|
+
def parse(input_field, regex, into_fields, options = {})
|
31
|
+
groups = options[:groups].to_java(:int) if options[:groups]
|
32
|
+
output = options[:output] || all_fields # Overrides Cascading default
|
33
|
+
|
34
|
+
input_field = fields(input_field)
|
35
|
+
raise "input_field must declare exactly one field, was '#{input_field}'" unless input_field.size == 1
|
36
|
+
|
37
|
+
parameters = [fields(into_fields), regex.to_s, groups].compact
|
38
|
+
each(
|
39
|
+
input_field,
|
40
|
+
:function => Java::CascadingOperationRegex::RegexParser.new(*parameters),
|
41
|
+
:output => output
|
42
|
+
)
|
43
|
+
end
|
44
|
+
alias regex_parser parse
|
45
|
+
|
46
|
+
# Splits the given input_field into multiple fields using the specified
|
47
|
+
# regular expression.
|
48
|
+
#
|
49
|
+
# Example:
|
50
|
+
# split 'line', /\s+/, ['out1', 'out2']
|
51
|
+
def split(input_field, regex, into_fields, options = {})
|
52
|
+
output = options[:output] || all_fields # Overrides Cascading default
|
53
|
+
|
54
|
+
input_field = fields(input_field)
|
55
|
+
raise "input_field must declare exactly one field, was '#{input_field}'" unless input_field.size == 1
|
56
|
+
|
57
|
+
each(
|
58
|
+
input_field,
|
59
|
+
:function => Java::CascadingOperationRegex::RegexSplitter.new(fields(into_fields), regex.to_s),
|
60
|
+
:output => output
|
61
|
+
)
|
62
|
+
end
|
63
|
+
alias regex_splitter split
|
64
|
+
|
65
|
+
# Splits the given input_field into new rows using the specified regular
|
66
|
+
# expression.
|
67
|
+
#
|
68
|
+
# Example:
|
69
|
+
# split_rows 'line', /\s+/, 'word'
|
70
|
+
def split_rows(input_field, regex, into_field, options = {})
|
71
|
+
output = options[:output] || all_fields # Overrides Cascading default
|
72
|
+
|
73
|
+
input_field = fields(input_field)
|
74
|
+
raise "input_field must declare exactly one field, was '#{input_field}'" unless input_field.size == 1
|
75
|
+
into_field = fields(into_field)
|
76
|
+
raise "into_field must declare exactly one field, was '#{into_field}'" unless into_field.size == 1
|
77
|
+
|
78
|
+
each(
|
79
|
+
input_field,
|
80
|
+
:function => Java::CascadingOperationRegex::RegexSplitGenerator.new(into_field, regex.to_s),
|
81
|
+
:output => output
|
82
|
+
)
|
83
|
+
end
|
84
|
+
alias regex_split_generator split_rows
|
85
|
+
|
86
|
+
# Emits a new row for each regex group matched in input_field using the
|
87
|
+
# specified regular expression.
|
88
|
+
#
|
89
|
+
# Example:
|
90
|
+
# match_rows 'line', /(\w+)\s+(\w+)/, 'word'
|
91
|
+
def match_rows(input_field, regex, into_field, options = {})
|
92
|
+
output = options[:output] || all_fields # Overrides Cascading default
|
93
|
+
|
94
|
+
input_field = fields(input_field)
|
95
|
+
raise "input_field must declare exactly one field, was '#{input_field}'" unless input_field.size == 1
|
96
|
+
into_field = fields(into_field)
|
97
|
+
raise "into_field must declare exactly one field, was '#{into_field}'" unless into_field.size == 1
|
98
|
+
|
99
|
+
each(
|
100
|
+
input_field,
|
101
|
+
:function => Java::CascadingOperationRegex::RegexGenerator.new(into_field, regex.to_s),
|
102
|
+
:output => output
|
103
|
+
)
|
104
|
+
end
|
105
|
+
alias regex_generator match_rows
|
106
|
+
|
107
|
+
# Performs a query/replace on the given input_field using the specified
|
108
|
+
# regular expression and replacement.
|
109
|
+
#
|
110
|
+
# The named options are:
|
111
|
+
# [replace_all] Boolean indicating if all matches should be replaced;
|
112
|
+
# defaults to true (the Cascading default).
|
113
|
+
#
|
114
|
+
# Example:
|
115
|
+
# replace 'line', /[.,]*\s+/, 'tab_separated_line', "\t"
|
116
|
+
def replace(input_field, regex, into_field, replacement, options = {})
|
117
|
+
output = options[:output] || all_fields # Overrides Cascading default
|
118
|
+
|
119
|
+
input_field = fields(input_field)
|
120
|
+
raise "input_field must declare exactly one field, was '#{input_field}'" unless input_field.size == 1
|
121
|
+
into_field = fields(into_field)
|
122
|
+
raise "into_field must declare exactly one field, was '#{into_field}'" unless into_field.size == 1
|
123
|
+
|
124
|
+
parameters = [into_field, regex.to_s, replacement.to_s, options[:replace_all]].compact
|
125
|
+
each(
|
126
|
+
input_field,
|
127
|
+
:function => Java::CascadingOperationRegex::RegexReplace.new(*parameters),
|
128
|
+
:output => output
|
129
|
+
)
|
130
|
+
end
|
131
|
+
alias regex_replace replace
|
132
|
+
end
|
133
|
+
end
|
data/lib/cascading/scope.rb
CHANGED
@@ -1,23 +1,35 @@
|
|
1
1
|
module Cascading
|
2
|
+
# Scope is a wrapper for a the private Cascading c.f.p.Scope object used to
|
3
|
+
# connect the dataflow graph by resolving fields. cascading.jruby wraps this
|
4
|
+
# facility so that it may be used to propagate field names at composition
|
5
|
+
# time (not Cascading plan time) in the same way they will later be
|
6
|
+
# propagated by the planner.
|
2
7
|
class Scope
|
3
8
|
attr_accessor :scope
|
4
9
|
|
10
|
+
# Construct a Scope given the Cascading c.f.p.Scope to wrap.
|
5
11
|
def initialize(scope)
|
6
12
|
@scope = scope
|
7
13
|
end
|
8
14
|
|
15
|
+
# Copy one Scope into another; relies upon the copy constructor of
|
16
|
+
# c.f.p.Scope.
|
9
17
|
def copy
|
10
18
|
Scope.new(Java::CascadingFlowPlanner::Scope.new(@scope))
|
11
19
|
end
|
12
20
|
|
21
|
+
# Build a c.f.p.Scope for a Flow, which is empty except for its name.
|
13
22
|
def self.flow_scope(name)
|
14
23
|
Java::CascadingFlowPlanner::Scope.new(name)
|
15
24
|
end
|
16
25
|
|
26
|
+
# Build an empty Scope, wrapping an empty c.f.p.Scope.
|
17
27
|
def self.empty_scope(name)
|
18
28
|
Scope.new(Java::CascadingFlowPlanner::Scope.new(name))
|
19
29
|
end
|
20
30
|
|
31
|
+
# Build a Scope for a single source Tap. The flow_scope is propagated
|
32
|
+
# through this call into a new Scope.
|
21
33
|
def self.source_scope(name, tap, flow_scope)
|
22
34
|
incoming_scopes = java.util.HashSet.new
|
23
35
|
incoming_scopes.add(flow_scope)
|
@@ -27,28 +39,30 @@ module Cascading
|
|
27
39
|
Scope.new(java_scope)
|
28
40
|
end
|
29
41
|
|
42
|
+
# Build a Scope for an arbitrary flow element. This is used to update the
|
43
|
+
# Scope at each stage in a pipe Assembly.
|
30
44
|
def self.outgoing_scope(flow_element, incoming_scopes)
|
31
45
|
java_scopes = incoming_scopes.compact.map{ |s| s.scope }
|
32
46
|
Scope.new(outgoing_scope_for(flow_element, java.util.HashSet.new(java_scopes)))
|
33
47
|
end
|
34
48
|
|
49
|
+
# The values fields of the Scope, which indicate the fields in the current
|
50
|
+
# dataflow tuple.
|
35
51
|
def values_fields
|
36
52
|
@scope.out_values_fields
|
37
53
|
end
|
38
54
|
|
55
|
+
# The grouping fields of the Scope, which indicate the keys of an
|
56
|
+
# group/cogroup.
|
39
57
|
def grouping_fields
|
40
58
|
@scope.out_grouping_fields
|
41
59
|
end
|
42
60
|
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
'ERROR'
|
49
|
-
end
|
50
|
-
end
|
51
|
-
|
61
|
+
# Prints a detailed description of this Scope, including its type and
|
62
|
+
# various selectors, fields, and key fields. Data is bubbled up directly
|
63
|
+
# from the Cascading c.f.p.Scope. This output can be useful for debugging
|
64
|
+
# the propagation of fields through your job (see Flow#debug_scope and
|
65
|
+
# Assembly#debug_scope, which both rely upon this method).
|
52
66
|
def to_s
|
53
67
|
kind = 'Unknown'
|
54
68
|
kind = 'Tap' if @scope.tap?
|
@@ -77,6 +91,15 @@ END
|
|
77
91
|
|
78
92
|
private
|
79
93
|
|
94
|
+
def scope_fields_to_s(accessor)
|
95
|
+
begin
|
96
|
+
fields = @scope.send(accessor)
|
97
|
+
fields.nil? ? 'null' : fields.to_s
|
98
|
+
rescue Exception => e
|
99
|
+
'ERROR'
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
80
103
|
def self.outgoing_scope_for(flow_element, incoming_scopes)
|
81
104
|
begin
|
82
105
|
flow_element.outgoing_scope_for(incoming_scopes)
|