cascading.jruby 0.0.10 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE.txt +13 -160
- data/README.md +35 -0
- data/lib/cascading.rb +8 -41
- data/lib/cascading/aggregations.rb +216 -71
- data/lib/cascading/assembly.rb +409 -606
- data/lib/cascading/base.rb +22 -0
- data/lib/cascading/cascade.rb +55 -18
- data/lib/cascading/cascading.rb +137 -47
- data/lib/cascading/expr_stub.rb +31 -17
- data/lib/cascading/ext/array.rb +17 -0
- data/lib/cascading/filter_operations.rb +101 -0
- data/lib/cascading/flow.rb +87 -23
- data/lib/cascading/identity_operations.rb +82 -0
- data/lib/cascading/mode.rb +14 -10
- data/lib/cascading/operations.rb +109 -174
- data/lib/cascading/regex_operations.rb +133 -0
- data/lib/cascading/scope.rb +32 -9
- data/lib/cascading/sub_assembly.rb +8 -5
- data/lib/cascading/tap.rb +41 -17
- data/lib/cascading/text_operations.rb +67 -0
- data/test/mock_assemblies.rb +55 -0
- data/test/test_assembly.rb +23 -25
- data/test/test_local_execution.rb +7 -7
- data/test/test_operations.rb +0 -10
- metadata +76 -74
- data/History.txt +0 -58
data/lib/cascading/operations.rb
CHANGED
@@ -1,116 +1,118 @@
|
|
1
1
|
module Cascading
|
2
|
-
# The Cascading::Operations module is deprecated. The original idea from long
|
3
|
-
# ago is that it would be useful to mixin operator wrappers to places other
|
4
|
-
# than Cascading::Assembly, but this is not true. Instead, put Eaches in
|
5
|
-
# Cascading::Assembly, Everies in Cascading::Aggregations, and any more
|
6
|
-
# generally useful utility code directly in the Cascading module
|
7
|
-
# (cascading/cascading.rb).
|
8
|
-
#
|
9
|
-
# Further, the entire *args pattern should be deprecated as it leads to
|
10
|
-
# functions that can only be understood by reading their code. Instead,
|
11
|
-
# idiomatic Ruby (positional required params and a params hash for optional
|
12
|
-
# args) should be used. See Cascading::Assembly#set_value for an example.
|
13
2
|
module Operations
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
end
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
def regex_split_generator(*args)
|
63
|
-
options = args.extract_options!
|
64
|
-
|
65
|
-
fields = Cascading.fields(args)
|
66
|
-
pattern = options[:pattern].to_s
|
67
|
-
parameters = [fields, pattern].compact
|
68
|
-
Java::CascadingOperationRegex::RegexSplitGenerator.new(*parameters)
|
69
|
-
end
|
70
|
-
|
71
|
-
def regex_generator(*args)
|
72
|
-
options = args.extract_options!
|
73
|
-
|
74
|
-
fields = Cascading.fields(args)
|
75
|
-
pattern = options[:pattern].to_s
|
76
|
-
parameters = [fields, pattern].compact
|
77
|
-
Java::CascadingOperationRegex::RegexGenerator.new(*parameters)
|
78
|
-
end
|
79
|
-
|
80
|
-
def expression_function(*args)
|
81
|
-
options = args.extract_options!
|
82
|
-
|
83
|
-
fields = Cascading.fields(args)
|
84
|
-
expression = options[:expression].to_s
|
85
|
-
parameters = options[:parameters]
|
86
|
-
parameter_names = []
|
87
|
-
parameter_types = []
|
88
|
-
if parameters.is_a? ::Hash
|
89
|
-
parameters.each do |name, type|
|
90
|
-
parameter_names << name
|
91
|
-
parameter_types << type
|
3
|
+
# Debugs the current assembly at runtime, printing every tuple and fields
|
4
|
+
# every 10 tuples by default.
|
5
|
+
#
|
6
|
+
# The named options are:
|
7
|
+
# [prefix] String to prefix prints with.
|
8
|
+
# [print_fields] Boolean controlling field printing, defaults to false.
|
9
|
+
# [tuple_interval] Integer specifying interval between printed tuples
|
10
|
+
# [fields_interval] Integer specifying interval between printing fields
|
11
|
+
#
|
12
|
+
# Example:
|
13
|
+
# debug :prefix => 'DEBUG', :print_fields => true, :fields_interval => 1000
|
14
|
+
def debug(options = {})
|
15
|
+
input_fields = options[:input] || all_fields
|
16
|
+
prefix = options[:prefix]
|
17
|
+
print_fields = options[:print_fields]
|
18
|
+
|
19
|
+
debug = Java::CascadingOperation::Debug.new(*[prefix, print_fields].compact)
|
20
|
+
|
21
|
+
debug.print_tuple_every = options[:tuple_interval] || 1
|
22
|
+
debug.print_fields_every = options[:fields_interval] || 10
|
23
|
+
|
24
|
+
each(input_fields, :filter => debug)
|
25
|
+
end
|
26
|
+
|
27
|
+
# Inserts new fields into the current assembly. Values may be constants or
|
28
|
+
# expressions (see Cascading::expr). Fields will be inserted in
|
29
|
+
# lexicographic order (not necessarily the order provided).
|
30
|
+
#
|
31
|
+
# Example:
|
32
|
+
# insert 'field1' => 'constant_string', 'field2' => 0, 'field3' => expr('fieldA:long + fieldB:long')
|
33
|
+
def insert(insert_map)
|
34
|
+
insert_map.keys.sort.each do |field_name|
|
35
|
+
value = insert_map[field_name]
|
36
|
+
|
37
|
+
if value.kind_of?(ExprStub)
|
38
|
+
value.validate_scope(scope)
|
39
|
+
names, types = value.names_and_types
|
40
|
+
each(
|
41
|
+
all_fields,
|
42
|
+
:function => Java::CascadingOperationExpression::ExpressionFunction.new(fields(field_name), value.expression, names, types),
|
43
|
+
:output => all_fields
|
44
|
+
)
|
45
|
+
else # value is a constant
|
46
|
+
each(
|
47
|
+
all_fields,
|
48
|
+
:function => Java::CascadingOperation::Insert.new(fields(field_name), to_java_comparable_array([value])),
|
49
|
+
:output => all_fields
|
50
|
+
)
|
92
51
|
end
|
93
|
-
parameter_names = parameter_names.to_java(java.lang.String)
|
94
|
-
parameter_types = parameter_types.to_java(java.lang.Class)
|
95
|
-
|
96
|
-
arguments = [fields, expression, parameter_names, parameter_types].compact
|
97
|
-
elsif !parameters.nil?
|
98
|
-
arguments = [fields, expression, parameters.java_class].compact
|
99
|
-
else
|
100
|
-
arguments = [fields, expression, java.lang.String.java_class].compact
|
101
52
|
end
|
102
|
-
|
103
|
-
Java::CascadingOperationExpression::ExpressionFunction.new(*arguments)
|
104
53
|
end
|
105
54
|
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
55
|
+
# Ungroups, or unpivots, a tuple (see Cascading's {UnGroup}[http://docs.cascading.org/cascading/2.1/javadoc/cascading/operation/function/UnGroup.html]).
|
56
|
+
#
|
57
|
+
# You must provide exactly one of :value_selectors and :num_values.
|
58
|
+
#
|
59
|
+
# The named options are:
|
60
|
+
# [value_selectors] Array of field names to ungroup. Each field will be
|
61
|
+
# ungrouped into an output tuple along with the key fields
|
62
|
+
# in the order provided.
|
63
|
+
# [num_values] Integer specifying the number of fields to ungroup into each
|
64
|
+
# output tuple (excluding the key fields). All input fields
|
65
|
+
# will be ungrouped.
|
66
|
+
#
|
67
|
+
# Example:
|
68
|
+
# ungroup 'key', ['new_key', 'val], :value_selectors => ['val1', 'val2', 'val3'], :output => ['new_key', 'val']
|
69
|
+
def ungroup(key, into_fields, options = {})
|
70
|
+
input_fields = options[:input] || all_fields
|
71
|
+
output = options[:output] || all_fields
|
72
|
+
|
73
|
+
raise 'You must provide exactly one of :value_selectors or :num_values to ungroup' unless options.has_key?(:value_selectors) ^ options.has_key?(:num_values)
|
74
|
+
value_selectors = options[:value_selectors].map{ |vs| fields(vs) }.to_java(Java::CascadingTuple::Fields) if options.has_key?(:value_selectors)
|
75
|
+
num_values = options[:num_values] if options.has_key?(:num_values)
|
76
|
+
|
77
|
+
parameters = [fields(into_fields), fields(key), value_selectors, num_values].compact
|
78
|
+
each input_fields, :function => Java::CascadingOperationFunction::UnGroup.new(*parameters), :output => output
|
79
|
+
end
|
80
|
+
|
81
|
+
# Inserts one of two values into the dataflow based upon the result of the
|
82
|
+
# supplied filter on the input_fields. This is primarily useful for
|
83
|
+
# creating indicators from filters. keep_value specifies the Java value to
|
84
|
+
# produce when the filter would keep the given input and remove_value
|
85
|
+
# specifies the Java value to produce when the filter would remove the given
|
86
|
+
# input.
|
87
|
+
#
|
88
|
+
# Example:
|
89
|
+
# set_value 'field1', Java::CascadingOperationFilter::FilterNull.new, 1.to_java, 0.to_java, 'is_field1_null'
|
90
|
+
def set_value(input_fields, filter, keep_value, remove_value, into_field, options = {})
|
91
|
+
output = options[:output] || all_fields
|
92
|
+
each input_fields, :function => Java::CascadingOperationFunction::SetValue.new(fields(into_field), filter, keep_value, remove_value), :output => output
|
93
|
+
end
|
94
|
+
|
95
|
+
# Efficient way of inserting a null indicator for any field, even one that
|
96
|
+
# cannot be coerced to a string. This is accomplished using Cascading's
|
97
|
+
# FilterNull and SetValue operators rather than Janino. 1 is produced if
|
98
|
+
# the field is null and 0 otherwise.
|
99
|
+
#
|
100
|
+
# Example:
|
101
|
+
# null_indicator 'field1', 'is_field1_null'
|
102
|
+
def null_indicator(input_field, into_field, options = {})
|
103
|
+
set_value input_field, Java::CascadingOperationFilter::FilterNull.new, 1.to_java, 0.to_java, into_field, :output => options[:output]
|
104
|
+
end
|
105
|
+
|
106
|
+
# Given an input_field and a regex, returns an indicator that is 1 if the string
|
107
|
+
# contains at least 1 match and 0 otherwise.
|
108
|
+
#
|
109
|
+
# Example:
|
110
|
+
# regex_contains 'field1', /\w+\s+\w+/, 'does_field1_contain_pair'
|
111
|
+
def regex_contains(input_field, regex, into_field, options = {})
|
112
|
+
set_value input_field, Java::CascadingOperationRegex::RegexFilter.new(pattern.to_s), 1.to_java, 0.to_java, into_field, :output => options[:output]
|
113
|
+
end
|
114
|
+
|
115
|
+
private
|
114
116
|
|
115
117
|
def to_java_comparable_array(arr)
|
116
118
|
(arr.map do |v|
|
@@ -130,72 +132,5 @@ module Cascading
|
|
130
132
|
java.lang.String.new(v.to_s)
|
131
133
|
end
|
132
134
|
end
|
133
|
-
|
134
|
-
def expression_filter(*args)
|
135
|
-
options = args.extract_options!
|
136
|
-
expression = (args[0] || options[:expression]).to_s
|
137
|
-
parameters = options[:parameters]
|
138
|
-
parameter_names = []
|
139
|
-
parameter_types = []
|
140
|
-
if parameters.is_a? ::Hash
|
141
|
-
parameters.each do |name, type|
|
142
|
-
parameter_names << name
|
143
|
-
parameter_types << type
|
144
|
-
end
|
145
|
-
parameter_names = parameter_names.to_java(java.lang.String)
|
146
|
-
parameter_types = parameter_types.to_java(java.lang.Class)
|
147
|
-
|
148
|
-
arguments = [expression, parameter_names, parameter_types].compact
|
149
|
-
elsif !parameters.nil?
|
150
|
-
arguments = [expression, parameters.java_class].compact
|
151
|
-
else
|
152
|
-
arguments = [expression, java.lang.String.java_class].compact
|
153
|
-
end
|
154
|
-
|
155
|
-
Java::CascadingOperationExpression::ExpressionFilter.new(*arguments)
|
156
|
-
end
|
157
|
-
|
158
|
-
def date_parser(field, format)
|
159
|
-
fields = fields(field)
|
160
|
-
Java::CascadingOperationText::DateParser.new(fields, format)
|
161
|
-
end
|
162
|
-
|
163
|
-
def date_formatter(fields, format, timezone=nil)
|
164
|
-
fields = fields(fields)
|
165
|
-
timezone = Java::JavaUtil::TimeZone.get_time_zone(timezone) if timezone
|
166
|
-
arguments = [fields, format, timezone].compact
|
167
|
-
Java::CascadingOperationText::DateFormatter.new(*arguments)
|
168
|
-
end
|
169
|
-
|
170
|
-
def regex_filter(*args)
|
171
|
-
options = args.extract_options!
|
172
|
-
|
173
|
-
pattern = args[0]
|
174
|
-
remove_match = options[:remove_match]
|
175
|
-
match_each_element = options[:match_each_element]
|
176
|
-
parameters = [pattern.to_s, remove_match, match_each_element].compact
|
177
|
-
Java::CascadingOperationRegex::RegexFilter.new(*parameters)
|
178
|
-
end
|
179
|
-
|
180
|
-
def regex_replace(*args)
|
181
|
-
options = args.extract_options!
|
182
|
-
|
183
|
-
fields = fields(args[0])
|
184
|
-
pattern = args[1]
|
185
|
-
replacement = args[2]
|
186
|
-
replace_all = options[:replace_all]
|
187
|
-
|
188
|
-
parameters = [fields, pattern.to_s, replacement.to_s, replace_all].compact
|
189
|
-
Java::CascadingOperationRegex::RegexReplace.new(*parameters)
|
190
|
-
end
|
191
|
-
|
192
|
-
def field_joiner(*args)
|
193
|
-
options = args.extract_options!
|
194
|
-
delimiter = options[:delimiter] || ','
|
195
|
-
fields = fields(options[:into])
|
196
|
-
|
197
|
-
parameters = [fields, delimiter].compact
|
198
|
-
Java::CascadingOperationText::FieldJoiner.new(*parameters)
|
199
|
-
end
|
200
135
|
end
|
201
136
|
end
|
@@ -0,0 +1,133 @@
|
|
1
|
+
module Cascading
|
2
|
+
# Module of pipe assemblies that wrap operations defined in the Cascading
|
3
|
+
# cascading.operations.regex package. These are split out only to group
|
4
|
+
# similar functionality.
|
5
|
+
#
|
6
|
+
# All DSL regex pipes require an input_field, a regex, and either a single
|
7
|
+
# into_field or one or more into_fields. Requiring a single input field
|
8
|
+
# allows us to raise an exception early if the wrong input is specified and
|
9
|
+
# avoids the non-intuitive situation where the first of many fields is
|
10
|
+
# silently taken as in Cascading. Requiring a regex means you don't have to
|
11
|
+
# go looking for defaults in code. And into_field(s) means we can propagate
|
12
|
+
# field names through the dataflow.
|
13
|
+
#
|
14
|
+
# Mapping of DSL pipes into Cascading regex operations:
|
15
|
+
# parse:: {RegexParser}[http://docs.cascading.org/cascading/2.1/javadoc/cascading/operation/regex/RegexParser.html]
|
16
|
+
# split:: {RegexSplitter}[http://docs.cascading.org/cascading/2.1/javadoc/cascading/operation/regex/RegexSplitter.html]
|
17
|
+
# split\_rows:: {RegexSplitGenerator}[http://docs.cascading.org/cascading/2.1/javadoc/cascading/operation/regex/RegexSplitGenerator.html]
|
18
|
+
# match\_rows:: {RegexGenerator}[http://docs.cascading.org/cascading/2.1/javadoc/cascading/operation/regex/RegexGenerator.html]
|
19
|
+
# replace:: {RegexReplace}[http://docs.cascading.org/cascading/2.1/javadoc/cascading/operation/regex/RegexReplace.html]
|
20
|
+
module RegexOperations
|
21
|
+
# Parses the given input_field using the specified regular expression to
|
22
|
+
# produce one output per group in that expression.
|
23
|
+
#
|
24
|
+
# The named options are:
|
25
|
+
# [groups] Array of integers specifying which groups to capture if you want
|
26
|
+
# a subset of groups.
|
27
|
+
#
|
28
|
+
# Example:
|
29
|
+
# parse 'field1', /(\w+)\s+(\w+)/, ['out1', 'out2'], :groups => [1, 2]
|
30
|
+
def parse(input_field, regex, into_fields, options = {})
|
31
|
+
groups = options[:groups].to_java(:int) if options[:groups]
|
32
|
+
output = options[:output] || all_fields # Overrides Cascading default
|
33
|
+
|
34
|
+
input_field = fields(input_field)
|
35
|
+
raise "input_field must declare exactly one field, was '#{input_field}'" unless input_field.size == 1
|
36
|
+
|
37
|
+
parameters = [fields(into_fields), regex.to_s, groups].compact
|
38
|
+
each(
|
39
|
+
input_field,
|
40
|
+
:function => Java::CascadingOperationRegex::RegexParser.new(*parameters),
|
41
|
+
:output => output
|
42
|
+
)
|
43
|
+
end
|
44
|
+
alias regex_parser parse
|
45
|
+
|
46
|
+
# Splits the given input_field into multiple fields using the specified
|
47
|
+
# regular expression.
|
48
|
+
#
|
49
|
+
# Example:
|
50
|
+
# split 'line', /\s+/, ['out1', 'out2']
|
51
|
+
def split(input_field, regex, into_fields, options = {})
|
52
|
+
output = options[:output] || all_fields # Overrides Cascading default
|
53
|
+
|
54
|
+
input_field = fields(input_field)
|
55
|
+
raise "input_field must declare exactly one field, was '#{input_field}'" unless input_field.size == 1
|
56
|
+
|
57
|
+
each(
|
58
|
+
input_field,
|
59
|
+
:function => Java::CascadingOperationRegex::RegexSplitter.new(fields(into_fields), regex.to_s),
|
60
|
+
:output => output
|
61
|
+
)
|
62
|
+
end
|
63
|
+
alias regex_splitter split
|
64
|
+
|
65
|
+
# Splits the given input_field into new rows using the specified regular
|
66
|
+
# expression.
|
67
|
+
#
|
68
|
+
# Example:
|
69
|
+
# split_rows 'line', /\s+/, 'word'
|
70
|
+
def split_rows(input_field, regex, into_field, options = {})
|
71
|
+
output = options[:output] || all_fields # Overrides Cascading default
|
72
|
+
|
73
|
+
input_field = fields(input_field)
|
74
|
+
raise "input_field must declare exactly one field, was '#{input_field}'" unless input_field.size == 1
|
75
|
+
into_field = fields(into_field)
|
76
|
+
raise "into_field must declare exactly one field, was '#{into_field}'" unless into_field.size == 1
|
77
|
+
|
78
|
+
each(
|
79
|
+
input_field,
|
80
|
+
:function => Java::CascadingOperationRegex::RegexSplitGenerator.new(into_field, regex.to_s),
|
81
|
+
:output => output
|
82
|
+
)
|
83
|
+
end
|
84
|
+
alias regex_split_generator split_rows
|
85
|
+
|
86
|
+
# Emits a new row for each regex group matched in input_field using the
|
87
|
+
# specified regular expression.
|
88
|
+
#
|
89
|
+
# Example:
|
90
|
+
# match_rows 'line', /(\w+)\s+(\w+)/, 'word'
|
91
|
+
def match_rows(input_field, regex, into_field, options = {})
|
92
|
+
output = options[:output] || all_fields # Overrides Cascading default
|
93
|
+
|
94
|
+
input_field = fields(input_field)
|
95
|
+
raise "input_field must declare exactly one field, was '#{input_field}'" unless input_field.size == 1
|
96
|
+
into_field = fields(into_field)
|
97
|
+
raise "into_field must declare exactly one field, was '#{into_field}'" unless into_field.size == 1
|
98
|
+
|
99
|
+
each(
|
100
|
+
input_field,
|
101
|
+
:function => Java::CascadingOperationRegex::RegexGenerator.new(into_field, regex.to_s),
|
102
|
+
:output => output
|
103
|
+
)
|
104
|
+
end
|
105
|
+
alias regex_generator match_rows
|
106
|
+
|
107
|
+
# Performs a query/replace on the given input_field using the specified
|
108
|
+
# regular expression and replacement.
|
109
|
+
#
|
110
|
+
# The named options are:
|
111
|
+
# [replace_all] Boolean indicating if all matches should be replaced;
|
112
|
+
# defaults to true (the Cascading default).
|
113
|
+
#
|
114
|
+
# Example:
|
115
|
+
# replace 'line', /[.,]*\s+/, 'tab_separated_line', "\t"
|
116
|
+
def replace(input_field, regex, into_field, replacement, options = {})
|
117
|
+
output = options[:output] || all_fields # Overrides Cascading default
|
118
|
+
|
119
|
+
input_field = fields(input_field)
|
120
|
+
raise "input_field must declare exactly one field, was '#{input_field}'" unless input_field.size == 1
|
121
|
+
into_field = fields(into_field)
|
122
|
+
raise "into_field must declare exactly one field, was '#{into_field}'" unless into_field.size == 1
|
123
|
+
|
124
|
+
parameters = [into_field, regex.to_s, replacement.to_s, options[:replace_all]].compact
|
125
|
+
each(
|
126
|
+
input_field,
|
127
|
+
:function => Java::CascadingOperationRegex::RegexReplace.new(*parameters),
|
128
|
+
:output => output
|
129
|
+
)
|
130
|
+
end
|
131
|
+
alias regex_replace replace
|
132
|
+
end
|
133
|
+
end
|
data/lib/cascading/scope.rb
CHANGED
@@ -1,23 +1,35 @@
|
|
1
1
|
module Cascading
|
2
|
+
# Scope is a wrapper for a the private Cascading c.f.p.Scope object used to
|
3
|
+
# connect the dataflow graph by resolving fields. cascading.jruby wraps this
|
4
|
+
# facility so that it may be used to propagate field names at composition
|
5
|
+
# time (not Cascading plan time) in the same way they will later be
|
6
|
+
# propagated by the planner.
|
2
7
|
class Scope
|
3
8
|
attr_accessor :scope
|
4
9
|
|
10
|
+
# Construct a Scope given the Cascading c.f.p.Scope to wrap.
|
5
11
|
def initialize(scope)
|
6
12
|
@scope = scope
|
7
13
|
end
|
8
14
|
|
15
|
+
# Copy one Scope into another; relies upon the copy constructor of
|
16
|
+
# c.f.p.Scope.
|
9
17
|
def copy
|
10
18
|
Scope.new(Java::CascadingFlowPlanner::Scope.new(@scope))
|
11
19
|
end
|
12
20
|
|
21
|
+
# Build a c.f.p.Scope for a Flow, which is empty except for its name.
|
13
22
|
def self.flow_scope(name)
|
14
23
|
Java::CascadingFlowPlanner::Scope.new(name)
|
15
24
|
end
|
16
25
|
|
26
|
+
# Build an empty Scope, wrapping an empty c.f.p.Scope.
|
17
27
|
def self.empty_scope(name)
|
18
28
|
Scope.new(Java::CascadingFlowPlanner::Scope.new(name))
|
19
29
|
end
|
20
30
|
|
31
|
+
# Build a Scope for a single source Tap. The flow_scope is propagated
|
32
|
+
# through this call into a new Scope.
|
21
33
|
def self.source_scope(name, tap, flow_scope)
|
22
34
|
incoming_scopes = java.util.HashSet.new
|
23
35
|
incoming_scopes.add(flow_scope)
|
@@ -27,28 +39,30 @@ module Cascading
|
|
27
39
|
Scope.new(java_scope)
|
28
40
|
end
|
29
41
|
|
42
|
+
# Build a Scope for an arbitrary flow element. This is used to update the
|
43
|
+
# Scope at each stage in a pipe Assembly.
|
30
44
|
def self.outgoing_scope(flow_element, incoming_scopes)
|
31
45
|
java_scopes = incoming_scopes.compact.map{ |s| s.scope }
|
32
46
|
Scope.new(outgoing_scope_for(flow_element, java.util.HashSet.new(java_scopes)))
|
33
47
|
end
|
34
48
|
|
49
|
+
# The values fields of the Scope, which indicate the fields in the current
|
50
|
+
# dataflow tuple.
|
35
51
|
def values_fields
|
36
52
|
@scope.out_values_fields
|
37
53
|
end
|
38
54
|
|
55
|
+
# The grouping fields of the Scope, which indicate the keys of an
|
56
|
+
# group/cogroup.
|
39
57
|
def grouping_fields
|
40
58
|
@scope.out_grouping_fields
|
41
59
|
end
|
42
60
|
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
'ERROR'
|
49
|
-
end
|
50
|
-
end
|
51
|
-
|
61
|
+
# Prints a detailed description of this Scope, including its type and
|
62
|
+
# various selectors, fields, and key fields. Data is bubbled up directly
|
63
|
+
# from the Cascading c.f.p.Scope. This output can be useful for debugging
|
64
|
+
# the propagation of fields through your job (see Flow#debug_scope and
|
65
|
+
# Assembly#debug_scope, which both rely upon this method).
|
52
66
|
def to_s
|
53
67
|
kind = 'Unknown'
|
54
68
|
kind = 'Tap' if @scope.tap?
|
@@ -77,6 +91,15 @@ END
|
|
77
91
|
|
78
92
|
private
|
79
93
|
|
94
|
+
def scope_fields_to_s(accessor)
|
95
|
+
begin
|
96
|
+
fields = @scope.send(accessor)
|
97
|
+
fields.nil? ? 'null' : fields.to_s
|
98
|
+
rescue Exception => e
|
99
|
+
'ERROR'
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
80
103
|
def self.outgoing_scope_for(flow_element, incoming_scopes)
|
81
104
|
begin
|
82
105
|
flow_element.outgoing_scope_for(incoming_scopes)
|