cascading.jruby 0.0.9 → 0.0.10
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +15 -0
- data/lib/cascading/assembly.rb +138 -17
- data/lib/cascading/base.rb +0 -4
- data/lib/cascading/cascade.rb +25 -16
- data/lib/cascading/cascading.rb +25 -5
- data/lib/cascading/ext/array.rb +1 -7
- data/lib/cascading/flow.rb +18 -19
- data/lib/cascading/mode.rb +5 -1
- data/lib/cascading/operations.rb +11 -4
- data/lib/cascading/tap.rb +4 -0
- data/lib/cascading.rb +1 -5
- data/test/test_assembly.rb +135 -29
- data/test/test_cascade.rb +80 -0
- data/test/test_flow.rb +20 -0
- data/test/test_operations.rb +3 -2
- metadata +6 -76
- data/.travis.yml +0 -6
- data/Gemfile +0 -6
- data/Gemfile.lock +0 -12
- data/HACKING.md +0 -23
- data/README.md +0 -9
- data/Rakefile +0 -46
- data/TODO +0 -13
- data/bin/make_job +0 -81
- data/ivy.xml +0 -25
- data/ivysettings.xml +0 -7
- data/samples/branch.rb +0 -30
- data/samples/copy.rb +0 -20
- data/samples/data/data2.txt +0 -88799
- data/samples/data/data_group_by.txt +0 -7
- data/samples/data/data_join1.txt +0 -3
- data/samples/data/data_join2.txt +0 -3
- data/samples/data/data_join3.txt +0 -3
- data/samples/data/genealogy/names/dist.all.last +0 -88799
- data/samples/data/gutenberg/the_outline_of_science_vol_1 +0 -12761
- data/samples/group_by.rb +0 -61
- data/samples/join.rb +0 -31
- data/samples/logwordcount.rb +0 -22
- data/samples/project.rb +0 -23
- data/samples/rename.rb +0 -20
- data/samples/scorenames.rb +0 -20
- data/samples/splitter.rb +0 -19
- data/samples/sub_assembly.rb +0 -30
- data/samples/union.rb +0 -36
- data/spec/cascading_spec.rb +0 -105
- data/spec/expr_spec.rb +0 -230
- data/spec/jruby_version_spec.rb +0 -72
- data/spec/resource/join_input.txt +0 -3
- data/spec/resource/test_input.txt +0 -4
- data/spec/scope_spec.rb +0 -149
- data/spec/spec.opts +0 -6
- data/spec/spec_helper.rb +0 -5
- data/spec/spec_util.rb +0 -92
- data/src/cascading/jruby/Main.java +0 -38
- data/src/cascading/jruby/runner.rb +0 -6
- data/tags +0 -342
- data/tasks/ann.rake +0 -80
- data/tasks/ant.rake +0 -23
- data/tasks/bones.rake +0 -20
- data/tasks/gem.rake +0 -206
- data/tasks/git.rake +0 -40
- data/tasks/notes.rake +0 -27
- data/tasks/post_load.rake +0 -34
- data/tasks/rdoc.rake +0 -50
- data/tasks/rubyforge.rake +0 -55
- data/tasks/samples.rake +0 -19
- data/tasks/setup.rb +0 -300
- data/tasks/spec.rake +0 -59
- data/tasks/svn.rake +0 -47
- data/tasks/test.rake +0 -42
- data/test/data/data1.txt +0 -14
- data/test/data/data2.txt +0 -14
- data/test/mock_assemblies.rb +0 -55
data/spec/scope_spec.rb
DELETED
@@ -1,149 +0,0 @@
|
|
1
|
-
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
2
|
-
|
3
|
-
context Cascading::Scope do
|
4
|
-
it 'should match Cascading fields names from source tap scheme' do
|
5
|
-
test_assembly do
|
6
|
-
# Pass that uses our scope instead of all_fields
|
7
|
-
each scope.values_fields, :function => Java::CascadingOperation::Identity.new
|
8
|
-
check_scope :values_fields => ['offset', 'line']
|
9
|
-
end
|
10
|
-
end
|
11
|
-
|
12
|
-
it 'should pick up names from source tap scheme' do
|
13
|
-
test_assembly do
|
14
|
-
pass
|
15
|
-
|
16
|
-
check_scope :values_fields => ['offset', 'line']
|
17
|
-
end
|
18
|
-
end
|
19
|
-
|
20
|
-
it 'should propagate names through Each' do
|
21
|
-
test_assembly do
|
22
|
-
check_scope :values_fields => ['offset', 'line']
|
23
|
-
assert_size_equals 2
|
24
|
-
|
25
|
-
split 'line', ['x', 'y'], :pattern => /,/
|
26
|
-
check_scope :values_fields => ['offset', 'line', 'x', 'y']
|
27
|
-
assert_size_equals 4
|
28
|
-
end
|
29
|
-
end
|
30
|
-
|
31
|
-
it 'should allow field filtration at Each' do
|
32
|
-
test_assembly do
|
33
|
-
check_scope :values_fields => ['offset', 'line']
|
34
|
-
assert_size_equals 2
|
35
|
-
|
36
|
-
split 'line', ['x', 'y'], :pattern => /,/, :output => ['x', 'y']
|
37
|
-
check_scope :values_fields => ['x', 'y']
|
38
|
-
assert_size_equals 2
|
39
|
-
end
|
40
|
-
end
|
41
|
-
|
42
|
-
it 'should propagate names through CoGroup' do
|
43
|
-
test_join_assembly do
|
44
|
-
check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_'],
|
45
|
-
:grouping_fields => ['x', 'x_']
|
46
|
-
end
|
47
|
-
end
|
48
|
-
|
49
|
-
it 'should propagate names through CoGroup with no Aggregations' do
|
50
|
-
post_join_block = lambda do
|
51
|
-
check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_'],
|
52
|
-
:grouping_fields => ['x', 'x_']
|
53
|
-
end
|
54
|
-
|
55
|
-
test_join_assembly(:post_join_block => post_join_block)
|
56
|
-
end
|
57
|
-
|
58
|
-
it 'should pass grouping fields to Every' do
|
59
|
-
test_join_assembly do
|
60
|
-
sum :mapping => {'x' => 'x_sum'}, :type => :int
|
61
|
-
check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_'],
|
62
|
-
:grouping_fields => ['x', 'x_', 'x_sum']
|
63
|
-
assert_group_size_equals 1
|
64
|
-
end
|
65
|
-
end
|
66
|
-
|
67
|
-
it 'should pass grouping fields through chained Every' do
|
68
|
-
test_join_assembly do
|
69
|
-
sum :mapping => {'x' => 'x_sum'}, :type => :int
|
70
|
-
check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_'],
|
71
|
-
:grouping_fields => ['x', 'x_', 'x_sum']
|
72
|
-
assert_group_size_equals 1
|
73
|
-
|
74
|
-
sum :mapping => {'y' => 'y_sum'}, :type => :int
|
75
|
-
check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_'],
|
76
|
-
:grouping_fields => ['x', 'x_', 'x_sum', 'y_sum']
|
77
|
-
assert_group_size_equals 1
|
78
|
-
end
|
79
|
-
end
|
80
|
-
|
81
|
-
it 'should propagate names through Every' do
|
82
|
-
post_join_block = lambda do
|
83
|
-
check_scope :values_fields => ['x', 'x_', 'x_sum', 'y_sum']
|
84
|
-
assert_size_equals 4
|
85
|
-
end
|
86
|
-
|
87
|
-
test_join_assembly :post_join_block => post_join_block do
|
88
|
-
sum :mapping => {'x' => 'x_sum'}, :type => :int
|
89
|
-
check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_'],
|
90
|
-
:grouping_fields => ['x', 'x_', 'x_sum']
|
91
|
-
assert_group_size_equals 1
|
92
|
-
|
93
|
-
sum :mapping => {'y' => 'y_sum'}, :type => :int
|
94
|
-
check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_'],
|
95
|
-
:grouping_fields => ['x', 'x_', 'x_sum', 'y_sum']
|
96
|
-
assert_group_size_equals 1
|
97
|
-
end
|
98
|
-
end
|
99
|
-
|
100
|
-
it 'should pass values fields to Each immediately following CoGroup and remove grouping fields' do
|
101
|
-
post_join_block = lambda do
|
102
|
-
assert_size_equals 10
|
103
|
-
check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_']
|
104
|
-
end
|
105
|
-
test_join_assembly(:post_join_block => post_join_block)
|
106
|
-
end
|
107
|
-
|
108
|
-
it 'should fail to pass grouping fields to Every immediately following Each' do
|
109
|
-
post_join_block = lambda do
|
110
|
-
pass
|
111
|
-
check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_']
|
112
|
-
sum :mapping => {'x' => 'x_sum'}, :type => :int
|
113
|
-
end
|
114
|
-
|
115
|
-
lambda do # Composition fails
|
116
|
-
test_join_assembly(:post_join_block => post_join_block)
|
117
|
-
# sum doesn't exist outside of Aggregations (where block of join is
|
118
|
-
# evaluated)
|
119
|
-
end.should raise_error NoMethodError
|
120
|
-
end
|
121
|
-
|
122
|
-
it 'should propagate values fields and field names into branch' do
|
123
|
-
post_join_block = lambda do
|
124
|
-
branch 'data_tuple' do
|
125
|
-
check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_'],
|
126
|
-
:grouping_fields => ['x', 'x_']
|
127
|
-
assert_size_equals 10
|
128
|
-
end
|
129
|
-
end
|
130
|
-
|
131
|
-
test_join_assembly(:branches => ['data_tuple'], :post_join_block => post_join_block)
|
132
|
-
end
|
133
|
-
|
134
|
-
it 'should propagate names through GroupBy' do
|
135
|
-
test_assembly do
|
136
|
-
group_by 'line' do
|
137
|
-
count
|
138
|
-
end
|
139
|
-
check_scope :values_fields => ['line', 'count']
|
140
|
-
end
|
141
|
-
end
|
142
|
-
|
143
|
-
it 'should propagate names through blockless GroupBy' do
|
144
|
-
test_assembly do
|
145
|
-
group_by 'line'
|
146
|
-
check_scope :values_fields => ['offset', 'line'], :grouping_fields => ['line']
|
147
|
-
end
|
148
|
-
end
|
149
|
-
end
|
data/spec/spec.opts
DELETED
data/spec/spec_helper.rb
DELETED
data/spec/spec_util.rb
DELETED
@@ -1,92 +0,0 @@
|
|
1
|
-
OUTPUT_DIR = 'output'
|
2
|
-
BUILD_DIR = 'build/spec'
|
3
|
-
|
4
|
-
module ScopeTests
|
5
|
-
def check_scope(params = {})
|
6
|
-
name_params = [params[:source]].compact
|
7
|
-
scope = scope(*name_params)
|
8
|
-
values_fields = params[:values_fields]
|
9
|
-
grouping_fields = params[:grouping_fields] || values_fields
|
10
|
-
|
11
|
-
debug = params[:debug]
|
12
|
-
debug_scope(*name_params) if debug
|
13
|
-
|
14
|
-
scope.values_fields.to_a.should == values_fields
|
15
|
-
scope.grouping_fields.to_a.should == grouping_fields
|
16
|
-
end
|
17
|
-
end
|
18
|
-
|
19
|
-
module Cascading
|
20
|
-
class Flow; include ScopeTests; end
|
21
|
-
class Assembly; include ScopeTests; end
|
22
|
-
class Aggregations; include ScopeTests; end
|
23
|
-
end
|
24
|
-
|
25
|
-
def test_flow(&block)
|
26
|
-
cascade = cascade 'test_app', :mode => :local do
|
27
|
-
flow 'test', &block
|
28
|
-
end
|
29
|
-
cascade.complete
|
30
|
-
end
|
31
|
-
|
32
|
-
def test_assembly(params = {}, &block)
|
33
|
-
branches = params[:branches] || []
|
34
|
-
|
35
|
-
test_flow do
|
36
|
-
source 'input', tap('spec/resource/test_input.txt', :scheme => text_line_scheme)
|
37
|
-
|
38
|
-
# Default Fields defined by TextLineScheme
|
39
|
-
check_scope :source => 'input', :values_fields => ['offset', 'line']
|
40
|
-
|
41
|
-
assembly 'input', &block
|
42
|
-
|
43
|
-
sink 'input', tap("#{OUTPUT_DIR}/out.txt", :sink_mode => :replace)
|
44
|
-
|
45
|
-
# Branches must be sunk so that they (and their assertions) will be run
|
46
|
-
branches.each do |branch|
|
47
|
-
sink branch, tap("#{OUTPUT_DIR}/#{branch}_out.txt", :sink_mode => :replace)
|
48
|
-
end
|
49
|
-
end
|
50
|
-
end
|
51
|
-
|
52
|
-
def test_join_assembly(params = {}, &block)
|
53
|
-
branches = params[:branches] || []
|
54
|
-
post_join_block = params[:post_join_block]
|
55
|
-
|
56
|
-
test_flow do
|
57
|
-
source 'left', tap('spec/resource/join_input.txt', :scheme => text_line_scheme)
|
58
|
-
source 'right', tap('spec/resource/join_input.txt', :scheme => text_line_scheme)
|
59
|
-
|
60
|
-
# Default Fields defined by TextLineScheme
|
61
|
-
check_scope :source => 'left', :values_fields => ['offset', 'line']
|
62
|
-
check_scope :source => 'right', :values_fields => ['offset', 'line']
|
63
|
-
|
64
|
-
assembly 'left' do
|
65
|
-
check_scope :values_fields => ['offset', 'line']
|
66
|
-
split 'line', ['x', 'y', 'z'], :pattern => /,/
|
67
|
-
check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z']
|
68
|
-
end
|
69
|
-
|
70
|
-
assembly 'right' do
|
71
|
-
check_scope :values_fields => ['offset', 'line']
|
72
|
-
split 'line', ['x', 'y', 'z'], :pattern => /,/
|
73
|
-
check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z']
|
74
|
-
end
|
75
|
-
|
76
|
-
assembly 'join' do
|
77
|
-
# Empty scope because there is no 'join' source or assembly
|
78
|
-
check_scope :values_fields => []
|
79
|
-
|
80
|
-
left_join 'left', 'right', :on => ['x'], &block
|
81
|
-
|
82
|
-
instance_eval &post_join_block if post_join_block
|
83
|
-
end
|
84
|
-
|
85
|
-
sink 'join', tap("#{OUTPUT_DIR}/join_out.txt", :sink_mode => :replace)
|
86
|
-
|
87
|
-
# Branches must be sunk so that they (and their assertions) will be run
|
88
|
-
branches.each do |branch|
|
89
|
-
sink branch, tap("#{OUTPUT_DIR}/#{branch}_out.txt", :sink_mode => :replace)
|
90
|
-
end
|
91
|
-
end
|
92
|
-
end
|
@@ -1,38 +0,0 @@
|
|
1
|
-
package cascading.jruby;
|
2
|
-
|
3
|
-
import org.jruby.Ruby;
|
4
|
-
import org.jruby.RubyInstanceConfig;
|
5
|
-
|
6
|
-
public class Main {
|
7
|
-
private final static String JRUBY_HOME = "/opt/jruby";
|
8
|
-
|
9
|
-
/**
|
10
|
-
* Starts a Hadoop job by reading the specified JRuby script.
|
11
|
-
*
|
12
|
-
* @param args
|
13
|
-
*/
|
14
|
-
public static void main(String[] args) {
|
15
|
-
String name = args[0]; // c.j script name
|
16
|
-
if (!name.startsWith("/"))
|
17
|
-
name = "/" + name;
|
18
|
-
|
19
|
-
// c.j script args
|
20
|
-
String[] newArgs = new String[args.length - 1];
|
21
|
-
System.arraycopy(args, 1, newArgs, 0, args.length - 1);
|
22
|
-
RubyInstanceConfig config = new RubyInstanceConfig();
|
23
|
-
config.setJRubyHome(JRUBY_HOME); // mwalker
|
24
|
-
config.processArguments(newArgs);
|
25
|
-
|
26
|
-
System.out.println("Arguments: ");
|
27
|
-
for (String arg : config.getArgv())
|
28
|
-
System.out.println(arg);
|
29
|
-
|
30
|
-
Ruby runtime = Ruby.newInstance(config);
|
31
|
-
|
32
|
-
System.out.println("Requiring '" + name + "'");
|
33
|
-
runtime.executeScript("require '" + name + "'", name);
|
34
|
-
|
35
|
-
System.out.println("Requiring 'cascading/jruby/runner'");
|
36
|
-
runtime.executeScript("require 'cascading/jruby/runner'", "runner"); // gfodor
|
37
|
-
}
|
38
|
-
}
|