cascading.jruby 0.0.9 → 0.0.10
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +15 -0
- data/lib/cascading/assembly.rb +138 -17
- data/lib/cascading/base.rb +0 -4
- data/lib/cascading/cascade.rb +25 -16
- data/lib/cascading/cascading.rb +25 -5
- data/lib/cascading/ext/array.rb +1 -7
- data/lib/cascading/flow.rb +18 -19
- data/lib/cascading/mode.rb +5 -1
- data/lib/cascading/operations.rb +11 -4
- data/lib/cascading/tap.rb +4 -0
- data/lib/cascading.rb +1 -5
- data/test/test_assembly.rb +135 -29
- data/test/test_cascade.rb +80 -0
- data/test/test_flow.rb +20 -0
- data/test/test_operations.rb +3 -2
- metadata +6 -76
- data/.travis.yml +0 -6
- data/Gemfile +0 -6
- data/Gemfile.lock +0 -12
- data/HACKING.md +0 -23
- data/README.md +0 -9
- data/Rakefile +0 -46
- data/TODO +0 -13
- data/bin/make_job +0 -81
- data/ivy.xml +0 -25
- data/ivysettings.xml +0 -7
- data/samples/branch.rb +0 -30
- data/samples/copy.rb +0 -20
- data/samples/data/data2.txt +0 -88799
- data/samples/data/data_group_by.txt +0 -7
- data/samples/data/data_join1.txt +0 -3
- data/samples/data/data_join2.txt +0 -3
- data/samples/data/data_join3.txt +0 -3
- data/samples/data/genealogy/names/dist.all.last +0 -88799
- data/samples/data/gutenberg/the_outline_of_science_vol_1 +0 -12761
- data/samples/group_by.rb +0 -61
- data/samples/join.rb +0 -31
- data/samples/logwordcount.rb +0 -22
- data/samples/project.rb +0 -23
- data/samples/rename.rb +0 -20
- data/samples/scorenames.rb +0 -20
- data/samples/splitter.rb +0 -19
- data/samples/sub_assembly.rb +0 -30
- data/samples/union.rb +0 -36
- data/spec/cascading_spec.rb +0 -105
- data/spec/expr_spec.rb +0 -230
- data/spec/jruby_version_spec.rb +0 -72
- data/spec/resource/join_input.txt +0 -3
- data/spec/resource/test_input.txt +0 -4
- data/spec/scope_spec.rb +0 -149
- data/spec/spec.opts +0 -6
- data/spec/spec_helper.rb +0 -5
- data/spec/spec_util.rb +0 -92
- data/src/cascading/jruby/Main.java +0 -38
- data/src/cascading/jruby/runner.rb +0 -6
- data/tags +0 -342
- data/tasks/ann.rake +0 -80
- data/tasks/ant.rake +0 -23
- data/tasks/bones.rake +0 -20
- data/tasks/gem.rake +0 -206
- data/tasks/git.rake +0 -40
- data/tasks/notes.rake +0 -27
- data/tasks/post_load.rake +0 -34
- data/tasks/rdoc.rake +0 -50
- data/tasks/rubyforge.rake +0 -55
- data/tasks/samples.rake +0 -19
- data/tasks/setup.rb +0 -300
- data/tasks/spec.rake +0 -59
- data/tasks/svn.rake +0 -47
- data/tasks/test.rake +0 -42
- data/test/data/data1.txt +0 -14
- data/test/data/data2.txt +0 -14
- data/test/mock_assemblies.rb +0 -55
data/History.txt
CHANGED
@@ -1,3 +1,18 @@
|
|
1
|
+
0.0.10 - Introduce Jading, cleanup, and library standardization
|
2
|
+
|
3
|
+
This release removes the code associated with running jobs on a Hadoop cluster
|
4
|
+
as that functionality is now moved to Jading. This separates the DSL library (a
|
5
|
+
gem) from the repository required to build and package job jars (Jading).
|
6
|
+
Additionally, this release removes a lot of the auto-generated Rake garbage
|
7
|
+
inherited from the original version and greatly streamlines the packaged gem.
|
8
|
+
Finally, it starts the precedent for making the contract of operations idiomatic
|
9
|
+
Ruby, removing the unnecessary Operations module, and eventually eliminating
|
10
|
+
global cascade and flow registries. A non-backwards compatible change was made
|
11
|
+
to the contract of complete and draw which allowed the mechnism for propagating
|
12
|
+
properties through a job to be righted and made more Hadoop-idiomatic.
|
13
|
+
|
14
|
+
This release removes all the cruft associated with running jobs
|
15
|
+
|
1
16
|
0.0.9 - Cascading local mode and upgrade to Cascading 2.0.0
|
2
17
|
|
3
18
|
This release upgrades to Cascading 2.0.0 (final) and introduces Cascading local
|
data/lib/cascading/assembly.rb
CHANGED
@@ -1,8 +1,3 @@
|
|
1
|
-
# -*- coding: utf-8 -*-
|
2
|
-
# Copyright 2009, Grégoire Marabout. All Rights Reserved.
|
3
|
-
#
|
4
|
-
# This is free software. Please see the LICENSE and COPYING files for details.
|
5
|
-
|
6
1
|
require 'cascading/base'
|
7
2
|
require 'cascading/operations'
|
8
3
|
require 'cascading/aggregations'
|
@@ -110,9 +105,7 @@ module Cascading
|
|
110
105
|
"#{name} : head pipe : #{head_pipe} - tail pipe: #{tail_pipe}"
|
111
106
|
end
|
112
107
|
|
113
|
-
|
114
|
-
# and :on to specify the group_fields.
|
115
|
-
def join(*args, &block)
|
108
|
+
def prepare_join(*args, &block)
|
116
109
|
options = args.extract_options!
|
117
110
|
|
118
111
|
pipes, _ = populate_incoming_scopes(args)
|
@@ -140,6 +133,7 @@ module Cascading
|
|
140
133
|
incoming_fields = @incoming_scopes.map{ |s| s.values_fields }
|
141
134
|
declared_fields = fields(options[:declared_fields] || dedup_fields(*incoming_fields))
|
142
135
|
joiner = options[:joiner]
|
136
|
+
is_hash_join = options[:hash] || false
|
143
137
|
|
144
138
|
case joiner
|
145
139
|
when :inner, 'inner', nil
|
@@ -160,15 +154,48 @@ module Cascading
|
|
160
154
|
end
|
161
155
|
joiner = Java::CascadingPipeJoiner::MixedJoin.new(joiner.to_java(:boolean))
|
162
156
|
end
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
157
|
+
|
158
|
+
if is_hash_join
|
159
|
+
raise ArgumentError, "hash joins don't support aggregations" if block_given?
|
160
|
+
parameters = [
|
161
|
+
pipes.to_java(Java::CascadingPipe::Pipe),
|
162
|
+
group_fields,
|
163
|
+
declared_fields,
|
164
|
+
joiner
|
165
|
+
]
|
166
|
+
group_assembly = Java::CascadingPipe::HashJoin.new(*parameters)
|
167
|
+
else
|
168
|
+
result_group_fields = dedup_fields(*group_fields)
|
169
|
+
parameters = [
|
170
|
+
pipes.to_java(Java::CascadingPipe::Pipe),
|
171
|
+
group_fields,
|
172
|
+
declared_fields,
|
173
|
+
result_group_fields,
|
174
|
+
joiner
|
175
|
+
]
|
176
|
+
group_assembly = Java::CascadingPipe::CoGroup.new(*parameters)
|
177
|
+
end
|
178
|
+
apply_aggregations(group_assembly, @incoming_scopes, &block)
|
179
|
+
end
|
180
|
+
private :prepare_join
|
181
|
+
|
182
|
+
# Builds a HashJoin pipe. This should be used carefully, as the right side
|
183
|
+
# of the join is accumulated entirely in memory. Requires a list of assembly
|
184
|
+
# names to join and :on to specify the join_fields.
|
185
|
+
def hash_join(*args, &block)
|
186
|
+
options = args.extract_options!
|
187
|
+
options[:hash] = true
|
188
|
+
args << options
|
189
|
+
prepare_join(*args, &block)
|
190
|
+
end
|
191
|
+
|
192
|
+
# Builds a join (CoGroup) pipe. Requires a list of assembly names to join
|
193
|
+
# and :on to specify the group_fields.
|
194
|
+
def join(*args, &block)
|
195
|
+
options = args.extract_options!
|
196
|
+
options[:hash] = false
|
197
|
+
args << options
|
198
|
+
prepare_join(*args, &block)
|
172
199
|
end
|
173
200
|
alias co_group join
|
174
201
|
|
@@ -650,5 +677,99 @@ module Cascading
|
|
650
677
|
|
651
678
|
each args, :function => field_joiner(options), :output => output
|
652
679
|
end
|
680
|
+
|
681
|
+
# Ungroups, or unpivots, a tuple (see Cascading's UnGroup at http://docs.cascading.org/cascading/2.0/javadoc/cascading/operation/function/UnGroup.html).
|
682
|
+
#
|
683
|
+
# You must provide :key and you must provide only one of :value_selectors
|
684
|
+
# and :num_values.
|
685
|
+
#
|
686
|
+
# The named options are:
|
687
|
+
# * <tt>:key</tt> required array of field names to replicate on every
|
688
|
+
# output row in an ungrouped group.
|
689
|
+
# * <tt>:value_selectors</tt> an array of field names to ungroup. Each
|
690
|
+
# field will be ungrouped into an output tuple along with the key fields
|
691
|
+
# in the order provided.
|
692
|
+
# * <tt>:num_values</tt> an integer specifying the number of fields to
|
693
|
+
# ungroup into each output tuple (excluding the key fields). All input
|
694
|
+
# fields will be ungrouped.
|
695
|
+
# * <tt>:input</tt> an array of field names that specifies the fields to
|
696
|
+
# input to UnGroup. Defaults to all_fields.
|
697
|
+
# * <tt>:into</tt> an array of field names. Default set by UnGroup.
|
698
|
+
# * <tt>:output</tt> an array of field names that specifies the fields to
|
699
|
+
# produce as output of UnGroup. Defaults to all_fields.
|
700
|
+
def ungroup(*args)
|
701
|
+
options = args.extract_options!
|
702
|
+
input = options[:input] || all_fields
|
703
|
+
into = fields(options[:into])
|
704
|
+
output = options[:output] || all_fields
|
705
|
+
key = fields(options[:key])
|
706
|
+
|
707
|
+
raise 'You must provide exactly one of :value_selectors or :num_values to ungroup' unless options.has_key?(:value_selectors) ^ options.has_key?(:num_values)
|
708
|
+
value_selectors = options[:value_selectors].map{ |vs| fields(vs) }.to_java(Java::CascadingTuple::Fields) if options.has_key?(:value_selectors)
|
709
|
+
num_values = options[:num_values] if options.has_key?(:num_values)
|
710
|
+
|
711
|
+
parameters = [into, key, value_selectors, num_values].compact
|
712
|
+
each input, :function => Java::CascadingOperationFunction::UnGroup.new(*parameters), :output => output
|
713
|
+
end
|
714
|
+
|
715
|
+
# Inserts one of two values into the dataflow based upon the result of the
|
716
|
+
# supplied filter on the input fields. This is primarily useful for
|
717
|
+
# creating indicators from filters.
|
718
|
+
#
|
719
|
+
# Parameters:
|
720
|
+
# * <tt>input</tt> name of field to apply the filter.
|
721
|
+
# * <tt>filter</tt> Cascading Filter to apply.
|
722
|
+
# * <tt>keep_value</tt> Java value to produce when the filter would keep
|
723
|
+
# the given input.
|
724
|
+
# * <tt>remove_value</tt> Java value to produce when the filter would
|
725
|
+
# remove the given input.
|
726
|
+
#
|
727
|
+
# The named options are:
|
728
|
+
# * <tt>:into</tt> an output field name, defaulting to 'filter_value'.
|
729
|
+
# * <tt>:output</tt> an array of field names that specifies the fields to
|
730
|
+
# retain in the output tuple. Defaults to all_fields.
|
731
|
+
def set_value(input, filter, keep_value, remove_value, params = {})
|
732
|
+
into = fields(params[:into] || 'filter_value')
|
733
|
+
output = params[:output] || all_fields
|
734
|
+
each input, :function => Java::CascadingOperationFunction::SetValue.new(into, filter, keep_value, remove_value), :output => output
|
735
|
+
end
|
736
|
+
|
737
|
+
# Efficient way of inserting a null indicator for any field, even one that
|
738
|
+
# cannot be coerced to a string. This is accomplished using Cascading's
|
739
|
+
# FilterNull and SetValue operators rather than Janino. 1 is produced if
|
740
|
+
# the field is null and 0 otherwise.
|
741
|
+
#
|
742
|
+
# Parameters:
|
743
|
+
# * <tt>input</tt> name of field to check for null.
|
744
|
+
#
|
745
|
+
# The named options are:
|
746
|
+
# * <tt>:into</tt> an output field name, defaulting to 'is_null'.
|
747
|
+
# * <tt>:output</tt> an array of field names that specifies the fields to
|
748
|
+
# retain in the output tuple. Defaults to all_fields.
|
749
|
+
def null_indicator(input, params = {})
|
750
|
+
into = fields(params[:into] || 'is_null')
|
751
|
+
output = params[:output] || all_fields
|
752
|
+
set_value input, Java::CascadingOperationFilter::FilterNull.new, 1.to_java, 0.to_java, :into => into, :output => output
|
753
|
+
end
|
754
|
+
|
755
|
+
# Given a field and a regex, returns an indicator that is 1 if the string
|
756
|
+
# contains at least 1 match and 0 otherwise.
|
757
|
+
#
|
758
|
+
# Parameters:
|
759
|
+
# * <tt>input</tt> field name or names that specifies the fields over which
|
760
|
+
# to perform the match.
|
761
|
+
# * <tt>pattern</tt> regex to apply to the input.
|
762
|
+
#
|
763
|
+
# The named options are:
|
764
|
+
# * <tt>:into</tt> an output field name, defaulting to 'regex_contains'.
|
765
|
+
# * <tt>:output</tt> an array of field names that specifies the fields to
|
766
|
+
# retain in the output tuple. Defaults to all_fields.
|
767
|
+
def regex_contains(input, pattern, params = {})
|
768
|
+
input = fields(input)
|
769
|
+
pattern = pattern.to_s # Supports JRuby regexes
|
770
|
+
into = fields(params[:into] || 'regex_contains')
|
771
|
+
output = params[:output] || all_fields
|
772
|
+
set_value input, Java::CascadingOperationRegex::RegexFilter.new(pattern), 1.to_java, 0.to_java, :into => into, :output => output
|
773
|
+
end
|
653
774
|
end
|
654
775
|
end
|
data/lib/cascading/base.rb
CHANGED
data/lib/cascading/cascade.rb
CHANGED
@@ -1,7 +1,3 @@
|
|
1
|
-
# Copyright 2009, Grégoire Marabout. All Rights Reserved.
|
2
|
-
#
|
3
|
-
# This is free software. Please see the LICENSE and COPYING files for details.
|
4
|
-
|
5
1
|
require 'cascading/base'
|
6
2
|
require 'yaml'
|
7
3
|
|
@@ -9,22 +5,35 @@ module Cascading
|
|
9
5
|
class Cascade < Cascading::Node
|
10
6
|
extend Registerable
|
11
7
|
|
12
|
-
attr_reader :mode
|
8
|
+
attr_reader :properties, :mode
|
13
9
|
|
14
|
-
#
|
15
|
-
#
|
16
|
-
#
|
10
|
+
# Do not use this constructor directly; instead, use Cascading::cascade to
|
11
|
+
# build cascades.
|
12
|
+
#
|
13
|
+
# Builds a cascade given the specified name. Optionally accepts
|
14
|
+
# :properties which will be used as the default properties for all child
|
15
|
+
# flows. Properties must be a Ruby Hash with string keys and values and
|
16
|
+
# will be copied before being passed into each flow in the cascade. See
|
17
|
+
# Cascading::Flow#initialize for details on how flows handle properties.
|
18
|
+
# Optionally accepts a :mode which will be used as the default mode for all
|
19
|
+
# child flows. See Cascading::Mode.parse for details.
|
17
20
|
def initialize(name, params = {})
|
21
|
+
@properties = params[:properties] || {}
|
18
22
|
@mode = params[:mode]
|
19
23
|
super(name, nil) # A Cascade cannot have a parent
|
20
24
|
self.class.add(name, self)
|
21
25
|
end
|
22
26
|
|
23
|
-
# Builds a child flow given a name and block. Optionally accepts
|
24
|
-
# which will override the default
|
27
|
+
# Builds a child flow given a name and block. Optionally accepts
|
28
|
+
# :properties which will override the default properties stroed in this
|
29
|
+
# cascade. Optionally accepts a :mode, which will override the default
|
30
|
+
# mode stored in this cascade.
|
25
31
|
def flow(name, params = {}, &block)
|
26
32
|
raise "Could not build flow '#{name}'; block required" unless block_given?
|
33
|
+
|
34
|
+
params[:properties] ||= properties.dup
|
27
35
|
params[:mode] ||= mode
|
36
|
+
|
28
37
|
flow = Flow.new(name, self, params)
|
29
38
|
add_child(flow)
|
30
39
|
flow.instance_eval(&block)
|
@@ -35,9 +44,9 @@ module Cascading
|
|
35
44
|
"#{offset}#{name}:cascade\n#{child_names.map{ |child| children[child].describe("#{offset} ") }.join("\n")}"
|
36
45
|
end
|
37
46
|
|
38
|
-
def draw(dir
|
47
|
+
def draw(dir)
|
39
48
|
@children.each do |name, flow|
|
40
|
-
flow.connect
|
49
|
+
flow.connect.writeDOT("#{dir}/#{name}.dot")
|
41
50
|
end
|
42
51
|
end
|
43
52
|
|
@@ -54,9 +63,9 @@ module Cascading
|
|
54
63
|
end
|
55
64
|
end
|
56
65
|
|
57
|
-
def complete
|
66
|
+
def complete
|
58
67
|
begin
|
59
|
-
Java::CascadingCascade::CascadeConnector.new.connect(name, make_flows(@children
|
68
|
+
Java::CascadingCascade::CascadeConnector.new.connect(name, make_flows(@children)).complete
|
60
69
|
rescue NativeException => e
|
61
70
|
raise CascadingException.new(e, 'Error completing cascade')
|
62
71
|
end
|
@@ -64,9 +73,9 @@ module Cascading
|
|
64
73
|
|
65
74
|
private
|
66
75
|
|
67
|
-
def make_flows(flows
|
76
|
+
def make_flows(flows)
|
68
77
|
flow_instances = flows.map do |name, flow|
|
69
|
-
cascading_flow = flow.connect
|
78
|
+
cascading_flow = flow.connect
|
70
79
|
flow.listeners.each { |l| cascading_flow.addListener(l) }
|
71
80
|
cascading_flow
|
72
81
|
end
|
data/lib/cascading/cascading.rb
CHANGED
@@ -1,8 +1,3 @@
|
|
1
|
-
# -*- coding: utf-8 -*-
|
2
|
-
# Copyright 2009, Grégoire Marabout. All Rights Reserved.
|
3
|
-
#
|
4
|
-
# This is free software. Please see the LICENSE and COPYING files for details.
|
5
|
-
|
6
1
|
require 'cascading/expr_stub'
|
7
2
|
|
8
3
|
module Cascading
|
@@ -12,10 +7,31 @@ module Cascading
|
|
12
7
|
:float => java.lang.Float.java_class, :string => java.lang.String.java_class,
|
13
8
|
}
|
14
9
|
|
10
|
+
# FIXME: I consider $jobconf_properties to be a hack forced on us by the lack
|
11
|
+
# of properties handling in earlier versions of the gem. Fully removing the
|
12
|
+
# hack would look like introducing a Job abstraction which instantiates user
|
13
|
+
# code, and allowing jading's runner to pass properties into that. I've
|
14
|
+
# already taken the step to thread properties through cascades and flows
|
15
|
+
# rather than merge properties before connect, but we still require the
|
16
|
+
# global properties hack to integrate with external runner code (jading).
|
17
|
+
#
|
18
|
+
# Note that this would also mean we can get rid of the global "registries" of
|
19
|
+
# cascades and flows. I've already eliminated most uses of these registries,
|
20
|
+
# but they are still required for the runner to find user code required in a
|
21
|
+
# previous step. A Job abstraction would clean this up, as well.
|
22
|
+
#
|
23
|
+
# For now, it is important that people use these constructors rather than
|
24
|
+
# directly building their own cascades and flows so that jading can send them
|
25
|
+
# default properties.
|
26
|
+
|
15
27
|
# Builds a top-level cascade given a name and a block. Optionally accepts a
|
16
28
|
# :mode, as explained in Cascading::Cascade#initialize.
|
17
29
|
def cascade(name, params = {}, &block)
|
18
30
|
raise "Could not build cascade '#{name}'; block required" unless block_given?
|
31
|
+
raise 'Cascading::cascade does not accept the :properties param only the global $jobconf_properties' if params[:properties]
|
32
|
+
|
33
|
+
params[:properties] = $jobconf_properties.dup if $jobconf_properties
|
34
|
+
|
19
35
|
cascade = Cascade.new(name, params)
|
20
36
|
cascade.instance_eval(&block)
|
21
37
|
cascade
|
@@ -26,6 +42,10 @@ module Cascading
|
|
26
42
|
# Cascading::Flow#initialize.
|
27
43
|
def flow(name, params = {}, &block)
|
28
44
|
raise "Could not build flow '#{name}'; block required" unless block_given?
|
45
|
+
raise 'Cascading::flow does not accept the :properties param only the global $jobconf_properties' if params[:properties]
|
46
|
+
|
47
|
+
params[:properties] = $jobconf_properties.dup if $jobconf_properties
|
48
|
+
|
29
49
|
flow = Flow.new(name, nil, params)
|
30
50
|
flow.instance_eval(&block)
|
31
51
|
flow
|
data/lib/cascading/ext/array.rb
CHANGED
@@ -1,9 +1,3 @@
|
|
1
|
-
# ext.rb : some extensions to basic types
|
2
|
-
#
|
3
|
-
# Copyright 2009, Grégoire Marabout. All Rights Reserved.
|
4
|
-
#
|
5
|
-
# This is free software. Please see the LICENSE and COPYING files for details.
|
6
|
-
|
7
1
|
class Array
|
8
2
|
def extract_options!
|
9
3
|
last.is_a?(::Hash) ? pop : {}
|
@@ -12,4 +6,4 @@ class Array
|
|
12
6
|
def extract_options
|
13
7
|
last.is_a?(::Hash) ? last : {}
|
14
8
|
end
|
15
|
-
end
|
9
|
+
end
|
data/lib/cascading/flow.rb
CHANGED
@@ -1,22 +1,26 @@
|
|
1
|
-
# -*- coding: utf-8 -*-
|
2
|
-
# Copyright 2009, Grégoire Marabout. All Rights Reserved.
|
3
|
-
#
|
4
|
-
# This is free software. Please see the LICENSE and COPYING files for details.
|
5
|
-
|
6
1
|
require 'cascading/assembly'
|
7
2
|
|
8
3
|
module Cascading
|
9
4
|
class Flow < Cascading::Node
|
10
5
|
extend Registerable
|
11
6
|
|
12
|
-
attr_accessor :
|
13
|
-
attr_reader :mode
|
7
|
+
attr_accessor :sources, :sinks, :incoming_scopes, :outgoing_scopes, :listeners
|
8
|
+
attr_reader :properties, :mode
|
14
9
|
|
10
|
+
# Do not use this constructor directly. Instead, use Cascading::flow to
|
11
|
+
# build top-level flows and Cascade#flow to build flows within a Cascade.
|
12
|
+
#
|
15
13
|
# Builds a flow given a name and a parent node (a cascade or nil).
|
16
|
-
# Optionally accepts
|
17
|
-
# this flow.
|
14
|
+
# Optionally accepts :properties which allows external configuration of
|
15
|
+
# this flow. The flow will side-effect the properties during composition,
|
16
|
+
# then pass the modified properties along to the FlowConnector for
|
17
|
+
# execution. See Cascading::Cascade#initialize for details on how
|
18
|
+
# properties are propagated through cascades. Optionally accepts a :mode
|
19
|
+
# which will determine the execution mode of this flow. See
|
20
|
+
# Cascading::Mode.parse for details.
|
18
21
|
def initialize(name, parent, params = {})
|
19
|
-
@
|
22
|
+
@sources, @sinks, @incoming_scopes, @outgoing_scopes, @listeners = {}, {}, {}, {}, []
|
23
|
+
@properties = params[:properties] || {}
|
20
24
|
@mode = Mode.parse(params[:mode])
|
21
25
|
@flow_scope = Scope.flow_scope(name)
|
22
26
|
super(name, parent)
|
@@ -128,14 +132,9 @@ module Cascading
|
|
128
132
|
end
|
129
133
|
end
|
130
134
|
|
131
|
-
def connect
|
132
|
-
# This ensures we have a hash, and that it is a Ruby Hash (because we
|
133
|
-
# also accept java.util.HashMap), then merges it with Flow properties
|
134
|
-
properties ||= {}
|
135
|
-
properties = java.util.HashMap.new(@properties.merge(Hash[*properties.to_a.flatten]))
|
136
|
-
|
135
|
+
def connect
|
137
136
|
puts "Connecting flow '#{name}' with properties:"
|
138
|
-
properties.
|
137
|
+
properties.keys.sort.each do |key|
|
139
138
|
puts "#{key}=#{properties[key]}"
|
140
139
|
end
|
141
140
|
|
@@ -149,9 +148,9 @@ module Cascading
|
|
149
148
|
mode.connect_flow(properties, name, sources, sinks, pipes)
|
150
149
|
end
|
151
150
|
|
152
|
-
def complete
|
151
|
+
def complete
|
153
152
|
begin
|
154
|
-
flow = connect
|
153
|
+
flow = connect
|
155
154
|
@listeners.each { |l| flow.addListener(l) }
|
156
155
|
flow.complete
|
157
156
|
rescue NativeException => e
|
data/lib/cascading/mode.rb
CHANGED
@@ -41,7 +41,11 @@ module Cascading
|
|
41
41
|
update_local_mode(sources, sinks)
|
42
42
|
sources = select_taps(sources)
|
43
43
|
sinks = select_taps(sinks)
|
44
|
-
|
44
|
+
|
45
|
+
# Report execution mode to stdout before connecting
|
46
|
+
puts "Connecting flow '#{name}' in #{local ? 'Cascading local mode' : 'Hadoop mode'}"
|
47
|
+
|
48
|
+
flow_connector_class.new(java.util.HashMap.new(properties)).connect(name, sources, sinks, pipes)
|
45
49
|
end
|
46
50
|
|
47
51
|
private
|
data/lib/cascading/operations.rb
CHANGED
@@ -1,8 +1,15 @@
|
|
1
|
-
# Copyright 2009, Grégoire Marabout. All Rights Reserved.
|
2
|
-
#
|
3
|
-
# This is free software. Please see the LICENSE and COPYING files for details.
|
4
|
-
|
5
1
|
module Cascading
|
2
|
+
# The Cascading::Operations module is deprecated. The original idea from long
|
3
|
+
# ago is that it would be useful to mixin operator wrappers to places other
|
4
|
+
# than Cascading::Assembly, but this is not true. Instead, put Eaches in
|
5
|
+
# Cascading::Assembly, Everies in Cascading::Aggregations, and any more
|
6
|
+
# generally useful utility code directly in the Cascading module
|
7
|
+
# (cascading/cascading.rb).
|
8
|
+
#
|
9
|
+
# Further, the entire *args pattern should be deprecated as it leads to
|
10
|
+
# functions that can only be understood by reading their code. Instead,
|
11
|
+
# idiomatic Ruby (positional required params and a params hash for optional
|
12
|
+
# args) should be used. See Cascading::Assembly#set_value for an example.
|
6
13
|
module Operations
|
7
14
|
def identity
|
8
15
|
Java::CascadingOperation::Identity.new
|
data/lib/cascading/tap.rb
CHANGED
data/lib/cascading.rb
CHANGED
@@ -1,12 +1,8 @@
|
|
1
|
-
# Copyright 2009, Grégoire Marabout. All Rights Reserved.
|
2
|
-
#
|
3
|
-
# This is free software. Please see the LICENSE and COPYING files for details.
|
4
|
-
|
5
1
|
require 'java'
|
6
2
|
|
7
3
|
module Cascading
|
8
4
|
# :stopdoc:
|
9
|
-
VERSION = '0.0.
|
5
|
+
VERSION = '0.0.10'
|
10
6
|
LIBPATH = ::File.expand_path(::File.dirname(__FILE__)) + ::File::SEPARATOR
|
11
7
|
PATH = ::File.dirname(LIBPATH) + ::File::SEPARATOR
|
12
8
|
CASCADING_HOME = ENV['CASCADING_HOME']
|