cascading.jruby 0.0.9 → 0.0.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +15 -0
- data/lib/cascading/assembly.rb +138 -17
- data/lib/cascading/base.rb +0 -4
- data/lib/cascading/cascade.rb +25 -16
- data/lib/cascading/cascading.rb +25 -5
- data/lib/cascading/ext/array.rb +1 -7
- data/lib/cascading/flow.rb +18 -19
- data/lib/cascading/mode.rb +5 -1
- data/lib/cascading/operations.rb +11 -4
- data/lib/cascading/tap.rb +4 -0
- data/lib/cascading.rb +1 -5
- data/test/test_assembly.rb +135 -29
- data/test/test_cascade.rb +80 -0
- data/test/test_flow.rb +20 -0
- data/test/test_operations.rb +3 -2
- metadata +6 -76
- data/.travis.yml +0 -6
- data/Gemfile +0 -6
- data/Gemfile.lock +0 -12
- data/HACKING.md +0 -23
- data/README.md +0 -9
- data/Rakefile +0 -46
- data/TODO +0 -13
- data/bin/make_job +0 -81
- data/ivy.xml +0 -25
- data/ivysettings.xml +0 -7
- data/samples/branch.rb +0 -30
- data/samples/copy.rb +0 -20
- data/samples/data/data2.txt +0 -88799
- data/samples/data/data_group_by.txt +0 -7
- data/samples/data/data_join1.txt +0 -3
- data/samples/data/data_join2.txt +0 -3
- data/samples/data/data_join3.txt +0 -3
- data/samples/data/genealogy/names/dist.all.last +0 -88799
- data/samples/data/gutenberg/the_outline_of_science_vol_1 +0 -12761
- data/samples/group_by.rb +0 -61
- data/samples/join.rb +0 -31
- data/samples/logwordcount.rb +0 -22
- data/samples/project.rb +0 -23
- data/samples/rename.rb +0 -20
- data/samples/scorenames.rb +0 -20
- data/samples/splitter.rb +0 -19
- data/samples/sub_assembly.rb +0 -30
- data/samples/union.rb +0 -36
- data/spec/cascading_spec.rb +0 -105
- data/spec/expr_spec.rb +0 -230
- data/spec/jruby_version_spec.rb +0 -72
- data/spec/resource/join_input.txt +0 -3
- data/spec/resource/test_input.txt +0 -4
- data/spec/scope_spec.rb +0 -149
- data/spec/spec.opts +0 -6
- data/spec/spec_helper.rb +0 -5
- data/spec/spec_util.rb +0 -92
- data/src/cascading/jruby/Main.java +0 -38
- data/src/cascading/jruby/runner.rb +0 -6
- data/tags +0 -342
- data/tasks/ann.rake +0 -80
- data/tasks/ant.rake +0 -23
- data/tasks/bones.rake +0 -20
- data/tasks/gem.rake +0 -206
- data/tasks/git.rake +0 -40
- data/tasks/notes.rake +0 -27
- data/tasks/post_load.rake +0 -34
- data/tasks/rdoc.rake +0 -50
- data/tasks/rubyforge.rake +0 -55
- data/tasks/samples.rake +0 -19
- data/tasks/setup.rb +0 -300
- data/tasks/spec.rake +0 -59
- data/tasks/svn.rake +0 -47
- data/tasks/test.rake +0 -42
- data/test/data/data1.txt +0 -14
- data/test/data/data2.txt +0 -14
- data/test/mock_assemblies.rb +0 -55
data/History.txt
CHANGED
@@ -1,3 +1,18 @@
|
|
1
|
+
0.0.10 - Introduce Jading, cleanup, and library standardization
|
2
|
+
|
3
|
+
This release removes the code associated with running jobs on a Hadoop cluster
|
4
|
+
as that functionality is now moved to Jading. This separates the DSL library (a
|
5
|
+
gem) from the repository required to build and package job jars (Jading).
|
6
|
+
Additionally, this release removes a lot of the auto-generated Rake garbage
|
7
|
+
inherited from the original version and greatly streamlines the packaged gem.
|
8
|
+
Finally, it starts the precedent for making the contract of operations idiomatic
|
9
|
+
Ruby, removing the unnecessary Operations module, and eventually eliminating
|
10
|
+
global cascade and flow registries. A non-backwards compatible change was made
|
11
|
+
to the contract of complete and draw which allowed the mechnism for propagating
|
12
|
+
properties through a job to be righted and made more Hadoop-idiomatic.
|
13
|
+
|
14
|
+
This release removes all the cruft associated with running jobs
|
15
|
+
|
1
16
|
0.0.9 - Cascading local mode and upgrade to Cascading 2.0.0
|
2
17
|
|
3
18
|
This release upgrades to Cascading 2.0.0 (final) and introduces Cascading local
|
data/lib/cascading/assembly.rb
CHANGED
@@ -1,8 +1,3 @@
|
|
1
|
-
# -*- coding: utf-8 -*-
|
2
|
-
# Copyright 2009, Grégoire Marabout. All Rights Reserved.
|
3
|
-
#
|
4
|
-
# This is free software. Please see the LICENSE and COPYING files for details.
|
5
|
-
|
6
1
|
require 'cascading/base'
|
7
2
|
require 'cascading/operations'
|
8
3
|
require 'cascading/aggregations'
|
@@ -110,9 +105,7 @@ module Cascading
|
|
110
105
|
"#{name} : head pipe : #{head_pipe} - tail pipe: #{tail_pipe}"
|
111
106
|
end
|
112
107
|
|
113
|
-
|
114
|
-
# and :on to specify the group_fields.
|
115
|
-
def join(*args, &block)
|
108
|
+
def prepare_join(*args, &block)
|
116
109
|
options = args.extract_options!
|
117
110
|
|
118
111
|
pipes, _ = populate_incoming_scopes(args)
|
@@ -140,6 +133,7 @@ module Cascading
|
|
140
133
|
incoming_fields = @incoming_scopes.map{ |s| s.values_fields }
|
141
134
|
declared_fields = fields(options[:declared_fields] || dedup_fields(*incoming_fields))
|
142
135
|
joiner = options[:joiner]
|
136
|
+
is_hash_join = options[:hash] || false
|
143
137
|
|
144
138
|
case joiner
|
145
139
|
when :inner, 'inner', nil
|
@@ -160,15 +154,48 @@ module Cascading
|
|
160
154
|
end
|
161
155
|
joiner = Java::CascadingPipeJoiner::MixedJoin.new(joiner.to_java(:boolean))
|
162
156
|
end
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
157
|
+
|
158
|
+
if is_hash_join
|
159
|
+
raise ArgumentError, "hash joins don't support aggregations" if block_given?
|
160
|
+
parameters = [
|
161
|
+
pipes.to_java(Java::CascadingPipe::Pipe),
|
162
|
+
group_fields,
|
163
|
+
declared_fields,
|
164
|
+
joiner
|
165
|
+
]
|
166
|
+
group_assembly = Java::CascadingPipe::HashJoin.new(*parameters)
|
167
|
+
else
|
168
|
+
result_group_fields = dedup_fields(*group_fields)
|
169
|
+
parameters = [
|
170
|
+
pipes.to_java(Java::CascadingPipe::Pipe),
|
171
|
+
group_fields,
|
172
|
+
declared_fields,
|
173
|
+
result_group_fields,
|
174
|
+
joiner
|
175
|
+
]
|
176
|
+
group_assembly = Java::CascadingPipe::CoGroup.new(*parameters)
|
177
|
+
end
|
178
|
+
apply_aggregations(group_assembly, @incoming_scopes, &block)
|
179
|
+
end
|
180
|
+
private :prepare_join
|
181
|
+
|
182
|
+
# Builds a HashJoin pipe. This should be used carefully, as the right side
|
183
|
+
# of the join is accumulated entirely in memory. Requires a list of assembly
|
184
|
+
# names to join and :on to specify the join_fields.
|
185
|
+
def hash_join(*args, &block)
|
186
|
+
options = args.extract_options!
|
187
|
+
options[:hash] = true
|
188
|
+
args << options
|
189
|
+
prepare_join(*args, &block)
|
190
|
+
end
|
191
|
+
|
192
|
+
# Builds a join (CoGroup) pipe. Requires a list of assembly names to join
|
193
|
+
# and :on to specify the group_fields.
|
194
|
+
def join(*args, &block)
|
195
|
+
options = args.extract_options!
|
196
|
+
options[:hash] = false
|
197
|
+
args << options
|
198
|
+
prepare_join(*args, &block)
|
172
199
|
end
|
173
200
|
alias co_group join
|
174
201
|
|
@@ -650,5 +677,99 @@ module Cascading
|
|
650
677
|
|
651
678
|
each args, :function => field_joiner(options), :output => output
|
652
679
|
end
|
680
|
+
|
681
|
+
# Ungroups, or unpivots, a tuple (see Cascading's UnGroup at http://docs.cascading.org/cascading/2.0/javadoc/cascading/operation/function/UnGroup.html).
|
682
|
+
#
|
683
|
+
# You must provide :key and you must provide only one of :value_selectors
|
684
|
+
# and :num_values.
|
685
|
+
#
|
686
|
+
# The named options are:
|
687
|
+
# * <tt>:key</tt> required array of field names to replicate on every
|
688
|
+
# output row in an ungrouped group.
|
689
|
+
# * <tt>:value_selectors</tt> an array of field names to ungroup. Each
|
690
|
+
# field will be ungrouped into an output tuple along with the key fields
|
691
|
+
# in the order provided.
|
692
|
+
# * <tt>:num_values</tt> an integer specifying the number of fields to
|
693
|
+
# ungroup into each output tuple (excluding the key fields). All input
|
694
|
+
# fields will be ungrouped.
|
695
|
+
# * <tt>:input</tt> an array of field names that specifies the fields to
|
696
|
+
# input to UnGroup. Defaults to all_fields.
|
697
|
+
# * <tt>:into</tt> an array of field names. Default set by UnGroup.
|
698
|
+
# * <tt>:output</tt> an array of field names that specifies the fields to
|
699
|
+
# produce as output of UnGroup. Defaults to all_fields.
|
700
|
+
def ungroup(*args)
|
701
|
+
options = args.extract_options!
|
702
|
+
input = options[:input] || all_fields
|
703
|
+
into = fields(options[:into])
|
704
|
+
output = options[:output] || all_fields
|
705
|
+
key = fields(options[:key])
|
706
|
+
|
707
|
+
raise 'You must provide exactly one of :value_selectors or :num_values to ungroup' unless options.has_key?(:value_selectors) ^ options.has_key?(:num_values)
|
708
|
+
value_selectors = options[:value_selectors].map{ |vs| fields(vs) }.to_java(Java::CascadingTuple::Fields) if options.has_key?(:value_selectors)
|
709
|
+
num_values = options[:num_values] if options.has_key?(:num_values)
|
710
|
+
|
711
|
+
parameters = [into, key, value_selectors, num_values].compact
|
712
|
+
each input, :function => Java::CascadingOperationFunction::UnGroup.new(*parameters), :output => output
|
713
|
+
end
|
714
|
+
|
715
|
+
# Inserts one of two values into the dataflow based upon the result of the
|
716
|
+
# supplied filter on the input fields. This is primarily useful for
|
717
|
+
# creating indicators from filters.
|
718
|
+
#
|
719
|
+
# Parameters:
|
720
|
+
# * <tt>input</tt> name of field to apply the filter.
|
721
|
+
# * <tt>filter</tt> Cascading Filter to apply.
|
722
|
+
# * <tt>keep_value</tt> Java value to produce when the filter would keep
|
723
|
+
# the given input.
|
724
|
+
# * <tt>remove_value</tt> Java value to produce when the filter would
|
725
|
+
# remove the given input.
|
726
|
+
#
|
727
|
+
# The named options are:
|
728
|
+
# * <tt>:into</tt> an output field name, defaulting to 'filter_value'.
|
729
|
+
# * <tt>:output</tt> an array of field names that specifies the fields to
|
730
|
+
# retain in the output tuple. Defaults to all_fields.
|
731
|
+
def set_value(input, filter, keep_value, remove_value, params = {})
|
732
|
+
into = fields(params[:into] || 'filter_value')
|
733
|
+
output = params[:output] || all_fields
|
734
|
+
each input, :function => Java::CascadingOperationFunction::SetValue.new(into, filter, keep_value, remove_value), :output => output
|
735
|
+
end
|
736
|
+
|
737
|
+
# Efficient way of inserting a null indicator for any field, even one that
|
738
|
+
# cannot be coerced to a string. This is accomplished using Cascading's
|
739
|
+
# FilterNull and SetValue operators rather than Janino. 1 is produced if
|
740
|
+
# the field is null and 0 otherwise.
|
741
|
+
#
|
742
|
+
# Parameters:
|
743
|
+
# * <tt>input</tt> name of field to check for null.
|
744
|
+
#
|
745
|
+
# The named options are:
|
746
|
+
# * <tt>:into</tt> an output field name, defaulting to 'is_null'.
|
747
|
+
# * <tt>:output</tt> an array of field names that specifies the fields to
|
748
|
+
# retain in the output tuple. Defaults to all_fields.
|
749
|
+
def null_indicator(input, params = {})
|
750
|
+
into = fields(params[:into] || 'is_null')
|
751
|
+
output = params[:output] || all_fields
|
752
|
+
set_value input, Java::CascadingOperationFilter::FilterNull.new, 1.to_java, 0.to_java, :into => into, :output => output
|
753
|
+
end
|
754
|
+
|
755
|
+
# Given a field and a regex, returns an indicator that is 1 if the string
|
756
|
+
# contains at least 1 match and 0 otherwise.
|
757
|
+
#
|
758
|
+
# Parameters:
|
759
|
+
# * <tt>input</tt> field name or names that specifies the fields over which
|
760
|
+
# to perform the match.
|
761
|
+
# * <tt>pattern</tt> regex to apply to the input.
|
762
|
+
#
|
763
|
+
# The named options are:
|
764
|
+
# * <tt>:into</tt> an output field name, defaulting to 'regex_contains'.
|
765
|
+
# * <tt>:output</tt> an array of field names that specifies the fields to
|
766
|
+
# retain in the output tuple. Defaults to all_fields.
|
767
|
+
def regex_contains(input, pattern, params = {})
|
768
|
+
input = fields(input)
|
769
|
+
pattern = pattern.to_s # Supports JRuby regexes
|
770
|
+
into = fields(params[:into] || 'regex_contains')
|
771
|
+
output = params[:output] || all_fields
|
772
|
+
set_value input, Java::CascadingOperationRegex::RegexFilter.new(pattern), 1.to_java, 0.to_java, :into => into, :output => output
|
773
|
+
end
|
653
774
|
end
|
654
775
|
end
|
data/lib/cascading/base.rb
CHANGED
data/lib/cascading/cascade.rb
CHANGED
@@ -1,7 +1,3 @@
|
|
1
|
-
# Copyright 2009, Grégoire Marabout. All Rights Reserved.
|
2
|
-
#
|
3
|
-
# This is free software. Please see the LICENSE and COPYING files for details.
|
4
|
-
|
5
1
|
require 'cascading/base'
|
6
2
|
require 'yaml'
|
7
3
|
|
@@ -9,22 +5,35 @@ module Cascading
|
|
9
5
|
class Cascade < Cascading::Node
|
10
6
|
extend Registerable
|
11
7
|
|
12
|
-
attr_reader :mode
|
8
|
+
attr_reader :properties, :mode
|
13
9
|
|
14
|
-
#
|
15
|
-
#
|
16
|
-
#
|
10
|
+
# Do not use this constructor directly; instead, use Cascading::cascade to
|
11
|
+
# build cascades.
|
12
|
+
#
|
13
|
+
# Builds a cascade given the specified name. Optionally accepts
|
14
|
+
# :properties which will be used as the default properties for all child
|
15
|
+
# flows. Properties must be a Ruby Hash with string keys and values and
|
16
|
+
# will be copied before being passed into each flow in the cascade. See
|
17
|
+
# Cascading::Flow#initialize for details on how flows handle properties.
|
18
|
+
# Optionally accepts a :mode which will be used as the default mode for all
|
19
|
+
# child flows. See Cascading::Mode.parse for details.
|
17
20
|
def initialize(name, params = {})
|
21
|
+
@properties = params[:properties] || {}
|
18
22
|
@mode = params[:mode]
|
19
23
|
super(name, nil) # A Cascade cannot have a parent
|
20
24
|
self.class.add(name, self)
|
21
25
|
end
|
22
26
|
|
23
|
-
# Builds a child flow given a name and block. Optionally accepts
|
24
|
-
# which will override the default
|
27
|
+
# Builds a child flow given a name and block. Optionally accepts
|
28
|
+
# :properties which will override the default properties stroed in this
|
29
|
+
# cascade. Optionally accepts a :mode, which will override the default
|
30
|
+
# mode stored in this cascade.
|
25
31
|
def flow(name, params = {}, &block)
|
26
32
|
raise "Could not build flow '#{name}'; block required" unless block_given?
|
33
|
+
|
34
|
+
params[:properties] ||= properties.dup
|
27
35
|
params[:mode] ||= mode
|
36
|
+
|
28
37
|
flow = Flow.new(name, self, params)
|
29
38
|
add_child(flow)
|
30
39
|
flow.instance_eval(&block)
|
@@ -35,9 +44,9 @@ module Cascading
|
|
35
44
|
"#{offset}#{name}:cascade\n#{child_names.map{ |child| children[child].describe("#{offset} ") }.join("\n")}"
|
36
45
|
end
|
37
46
|
|
38
|
-
def draw(dir
|
47
|
+
def draw(dir)
|
39
48
|
@children.each do |name, flow|
|
40
|
-
flow.connect
|
49
|
+
flow.connect.writeDOT("#{dir}/#{name}.dot")
|
41
50
|
end
|
42
51
|
end
|
43
52
|
|
@@ -54,9 +63,9 @@ module Cascading
|
|
54
63
|
end
|
55
64
|
end
|
56
65
|
|
57
|
-
def complete
|
66
|
+
def complete
|
58
67
|
begin
|
59
|
-
Java::CascadingCascade::CascadeConnector.new.connect(name, make_flows(@children
|
68
|
+
Java::CascadingCascade::CascadeConnector.new.connect(name, make_flows(@children)).complete
|
60
69
|
rescue NativeException => e
|
61
70
|
raise CascadingException.new(e, 'Error completing cascade')
|
62
71
|
end
|
@@ -64,9 +73,9 @@ module Cascading
|
|
64
73
|
|
65
74
|
private
|
66
75
|
|
67
|
-
def make_flows(flows
|
76
|
+
def make_flows(flows)
|
68
77
|
flow_instances = flows.map do |name, flow|
|
69
|
-
cascading_flow = flow.connect
|
78
|
+
cascading_flow = flow.connect
|
70
79
|
flow.listeners.each { |l| cascading_flow.addListener(l) }
|
71
80
|
cascading_flow
|
72
81
|
end
|
data/lib/cascading/cascading.rb
CHANGED
@@ -1,8 +1,3 @@
|
|
1
|
-
# -*- coding: utf-8 -*-
|
2
|
-
# Copyright 2009, Grégoire Marabout. All Rights Reserved.
|
3
|
-
#
|
4
|
-
# This is free software. Please see the LICENSE and COPYING files for details.
|
5
|
-
|
6
1
|
require 'cascading/expr_stub'
|
7
2
|
|
8
3
|
module Cascading
|
@@ -12,10 +7,31 @@ module Cascading
|
|
12
7
|
:float => java.lang.Float.java_class, :string => java.lang.String.java_class,
|
13
8
|
}
|
14
9
|
|
10
|
+
# FIXME: I consider $jobconf_properties to be a hack forced on us by the lack
|
11
|
+
# of properties handling in earlier versions of the gem. Fully removing the
|
12
|
+
# hack would look like introducing a Job abstraction which instantiates user
|
13
|
+
# code, and allowing jading's runner to pass properties into that. I've
|
14
|
+
# already taken the step to thread properties through cascades and flows
|
15
|
+
# rather than merge properties before connect, but we still require the
|
16
|
+
# global properties hack to integrate with external runner code (jading).
|
17
|
+
#
|
18
|
+
# Note that this would also mean we can get rid of the global "registries" of
|
19
|
+
# cascades and flows. I've already eliminated most uses of these registries,
|
20
|
+
# but they are still required for the runner to find user code required in a
|
21
|
+
# previous step. A Job abstraction would clean this up, as well.
|
22
|
+
#
|
23
|
+
# For now, it is important that people use these constructors rather than
|
24
|
+
# directly building their own cascades and flows so that jading can send them
|
25
|
+
# default properties.
|
26
|
+
|
15
27
|
# Builds a top-level cascade given a name and a block. Optionally accepts a
|
16
28
|
# :mode, as explained in Cascading::Cascade#initialize.
|
17
29
|
def cascade(name, params = {}, &block)
|
18
30
|
raise "Could not build cascade '#{name}'; block required" unless block_given?
|
31
|
+
raise 'Cascading::cascade does not accept the :properties param only the global $jobconf_properties' if params[:properties]
|
32
|
+
|
33
|
+
params[:properties] = $jobconf_properties.dup if $jobconf_properties
|
34
|
+
|
19
35
|
cascade = Cascade.new(name, params)
|
20
36
|
cascade.instance_eval(&block)
|
21
37
|
cascade
|
@@ -26,6 +42,10 @@ module Cascading
|
|
26
42
|
# Cascading::Flow#initialize.
|
27
43
|
def flow(name, params = {}, &block)
|
28
44
|
raise "Could not build flow '#{name}'; block required" unless block_given?
|
45
|
+
raise 'Cascading::flow does not accept the :properties param only the global $jobconf_properties' if params[:properties]
|
46
|
+
|
47
|
+
params[:properties] = $jobconf_properties.dup if $jobconf_properties
|
48
|
+
|
29
49
|
flow = Flow.new(name, nil, params)
|
30
50
|
flow.instance_eval(&block)
|
31
51
|
flow
|
data/lib/cascading/ext/array.rb
CHANGED
@@ -1,9 +1,3 @@
|
|
1
|
-
# ext.rb : some extensions to basic types
|
2
|
-
#
|
3
|
-
# Copyright 2009, Grégoire Marabout. All Rights Reserved.
|
4
|
-
#
|
5
|
-
# This is free software. Please see the LICENSE and COPYING files for details.
|
6
|
-
|
7
1
|
class Array
|
8
2
|
def extract_options!
|
9
3
|
last.is_a?(::Hash) ? pop : {}
|
@@ -12,4 +6,4 @@ class Array
|
|
12
6
|
def extract_options
|
13
7
|
last.is_a?(::Hash) ? last : {}
|
14
8
|
end
|
15
|
-
end
|
9
|
+
end
|
data/lib/cascading/flow.rb
CHANGED
@@ -1,22 +1,26 @@
|
|
1
|
-
# -*- coding: utf-8 -*-
|
2
|
-
# Copyright 2009, Grégoire Marabout. All Rights Reserved.
|
3
|
-
#
|
4
|
-
# This is free software. Please see the LICENSE and COPYING files for details.
|
5
|
-
|
6
1
|
require 'cascading/assembly'
|
7
2
|
|
8
3
|
module Cascading
|
9
4
|
class Flow < Cascading::Node
|
10
5
|
extend Registerable
|
11
6
|
|
12
|
-
attr_accessor :
|
13
|
-
attr_reader :mode
|
7
|
+
attr_accessor :sources, :sinks, :incoming_scopes, :outgoing_scopes, :listeners
|
8
|
+
attr_reader :properties, :mode
|
14
9
|
|
10
|
+
# Do not use this constructor directly. Instead, use Cascading::flow to
|
11
|
+
# build top-level flows and Cascade#flow to build flows within a Cascade.
|
12
|
+
#
|
15
13
|
# Builds a flow given a name and a parent node (a cascade or nil).
|
16
|
-
# Optionally accepts
|
17
|
-
# this flow.
|
14
|
+
# Optionally accepts :properties which allows external configuration of
|
15
|
+
# this flow. The flow will side-effect the properties during composition,
|
16
|
+
# then pass the modified properties along to the FlowConnector for
|
17
|
+
# execution. See Cascading::Cascade#initialize for details on how
|
18
|
+
# properties are propagated through cascades. Optionally accepts a :mode
|
19
|
+
# which will determine the execution mode of this flow. See
|
20
|
+
# Cascading::Mode.parse for details.
|
18
21
|
def initialize(name, parent, params = {})
|
19
|
-
@
|
22
|
+
@sources, @sinks, @incoming_scopes, @outgoing_scopes, @listeners = {}, {}, {}, {}, []
|
23
|
+
@properties = params[:properties] || {}
|
20
24
|
@mode = Mode.parse(params[:mode])
|
21
25
|
@flow_scope = Scope.flow_scope(name)
|
22
26
|
super(name, parent)
|
@@ -128,14 +132,9 @@ module Cascading
|
|
128
132
|
end
|
129
133
|
end
|
130
134
|
|
131
|
-
def connect
|
132
|
-
# This ensures we have a hash, and that it is a Ruby Hash (because we
|
133
|
-
# also accept java.util.HashMap), then merges it with Flow properties
|
134
|
-
properties ||= {}
|
135
|
-
properties = java.util.HashMap.new(@properties.merge(Hash[*properties.to_a.flatten]))
|
136
|
-
|
135
|
+
def connect
|
137
136
|
puts "Connecting flow '#{name}' with properties:"
|
138
|
-
properties.
|
137
|
+
properties.keys.sort.each do |key|
|
139
138
|
puts "#{key}=#{properties[key]}"
|
140
139
|
end
|
141
140
|
|
@@ -149,9 +148,9 @@ module Cascading
|
|
149
148
|
mode.connect_flow(properties, name, sources, sinks, pipes)
|
150
149
|
end
|
151
150
|
|
152
|
-
def complete
|
151
|
+
def complete
|
153
152
|
begin
|
154
|
-
flow = connect
|
153
|
+
flow = connect
|
155
154
|
@listeners.each { |l| flow.addListener(l) }
|
156
155
|
flow.complete
|
157
156
|
rescue NativeException => e
|
data/lib/cascading/mode.rb
CHANGED
@@ -41,7 +41,11 @@ module Cascading
|
|
41
41
|
update_local_mode(sources, sinks)
|
42
42
|
sources = select_taps(sources)
|
43
43
|
sinks = select_taps(sinks)
|
44
|
-
|
44
|
+
|
45
|
+
# Report execution mode to stdout before connecting
|
46
|
+
puts "Connecting flow '#{name}' in #{local ? 'Cascading local mode' : 'Hadoop mode'}"
|
47
|
+
|
48
|
+
flow_connector_class.new(java.util.HashMap.new(properties)).connect(name, sources, sinks, pipes)
|
45
49
|
end
|
46
50
|
|
47
51
|
private
|
data/lib/cascading/operations.rb
CHANGED
@@ -1,8 +1,15 @@
|
|
1
|
-
# Copyright 2009, Grégoire Marabout. All Rights Reserved.
|
2
|
-
#
|
3
|
-
# This is free software. Please see the LICENSE and COPYING files for details.
|
4
|
-
|
5
1
|
module Cascading
|
2
|
+
# The Cascading::Operations module is deprecated. The original idea from long
|
3
|
+
# ago is that it would be useful to mixin operator wrappers to places other
|
4
|
+
# than Cascading::Assembly, but this is not true. Instead, put Eaches in
|
5
|
+
# Cascading::Assembly, Everies in Cascading::Aggregations, and any more
|
6
|
+
# generally useful utility code directly in the Cascading module
|
7
|
+
# (cascading/cascading.rb).
|
8
|
+
#
|
9
|
+
# Further, the entire *args pattern should be deprecated as it leads to
|
10
|
+
# functions that can only be understood by reading their code. Instead,
|
11
|
+
# idiomatic Ruby (positional required params and a params hash for optional
|
12
|
+
# args) should be used. See Cascading::Assembly#set_value for an example.
|
6
13
|
module Operations
|
7
14
|
def identity
|
8
15
|
Java::CascadingOperation::Identity.new
|
data/lib/cascading/tap.rb
CHANGED
data/lib/cascading.rb
CHANGED
@@ -1,12 +1,8 @@
|
|
1
|
-
# Copyright 2009, Grégoire Marabout. All Rights Reserved.
|
2
|
-
#
|
3
|
-
# This is free software. Please see the LICENSE and COPYING files for details.
|
4
|
-
|
5
1
|
require 'java'
|
6
2
|
|
7
3
|
module Cascading
|
8
4
|
# :stopdoc:
|
9
|
-
VERSION = '0.0.
|
5
|
+
VERSION = '0.0.10'
|
10
6
|
LIBPATH = ::File.expand_path(::File.dirname(__FILE__)) + ::File::SEPARATOR
|
11
7
|
PATH = ::File.dirname(LIBPATH) + ::File::SEPARATOR
|
12
8
|
CASCADING_HOME = ENV['CASCADING_HOME']
|