cascading.jruby 0.0.9 → 0.0.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +15 -0
- data/lib/cascading/assembly.rb +138 -17
- data/lib/cascading/base.rb +0 -4
- data/lib/cascading/cascade.rb +25 -16
- data/lib/cascading/cascading.rb +25 -5
- data/lib/cascading/ext/array.rb +1 -7
- data/lib/cascading/flow.rb +18 -19
- data/lib/cascading/mode.rb +5 -1
- data/lib/cascading/operations.rb +11 -4
- data/lib/cascading/tap.rb +4 -0
- data/lib/cascading.rb +1 -5
- data/test/test_assembly.rb +135 -29
- data/test/test_cascade.rb +80 -0
- data/test/test_flow.rb +20 -0
- data/test/test_operations.rb +3 -2
- metadata +6 -76
- data/.travis.yml +0 -6
- data/Gemfile +0 -6
- data/Gemfile.lock +0 -12
- data/HACKING.md +0 -23
- data/README.md +0 -9
- data/Rakefile +0 -46
- data/TODO +0 -13
- data/bin/make_job +0 -81
- data/ivy.xml +0 -25
- data/ivysettings.xml +0 -7
- data/samples/branch.rb +0 -30
- data/samples/copy.rb +0 -20
- data/samples/data/data2.txt +0 -88799
- data/samples/data/data_group_by.txt +0 -7
- data/samples/data/data_join1.txt +0 -3
- data/samples/data/data_join2.txt +0 -3
- data/samples/data/data_join3.txt +0 -3
- data/samples/data/genealogy/names/dist.all.last +0 -88799
- data/samples/data/gutenberg/the_outline_of_science_vol_1 +0 -12761
- data/samples/group_by.rb +0 -61
- data/samples/join.rb +0 -31
- data/samples/logwordcount.rb +0 -22
- data/samples/project.rb +0 -23
- data/samples/rename.rb +0 -20
- data/samples/scorenames.rb +0 -20
- data/samples/splitter.rb +0 -19
- data/samples/sub_assembly.rb +0 -30
- data/samples/union.rb +0 -36
- data/spec/cascading_spec.rb +0 -105
- data/spec/expr_spec.rb +0 -230
- data/spec/jruby_version_spec.rb +0 -72
- data/spec/resource/join_input.txt +0 -3
- data/spec/resource/test_input.txt +0 -4
- data/spec/scope_spec.rb +0 -149
- data/spec/spec.opts +0 -6
- data/spec/spec_helper.rb +0 -5
- data/spec/spec_util.rb +0 -92
- data/src/cascading/jruby/Main.java +0 -38
- data/src/cascading/jruby/runner.rb +0 -6
- data/tags +0 -342
- data/tasks/ann.rake +0 -80
- data/tasks/ant.rake +0 -23
- data/tasks/bones.rake +0 -20
- data/tasks/gem.rake +0 -206
- data/tasks/git.rake +0 -40
- data/tasks/notes.rake +0 -27
- data/tasks/post_load.rake +0 -34
- data/tasks/rdoc.rake +0 -50
- data/tasks/rubyforge.rake +0 -55
- data/tasks/samples.rake +0 -19
- data/tasks/setup.rb +0 -300
- data/tasks/spec.rake +0 -59
- data/tasks/svn.rake +0 -47
- data/tasks/test.rake +0 -42
- data/test/data/data1.txt +0 -14
- data/test/data/data2.txt +0 -14
- data/test/mock_assemblies.rb +0 -55
data/spec/scope_spec.rb
DELETED
@@ -1,149 +0,0 @@
|
|
1
|
-
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
2
|
-
|
3
|
-
context Cascading::Scope do
|
4
|
-
it 'should match Cascading fields names from source tap scheme' do
|
5
|
-
test_assembly do
|
6
|
-
# Pass that uses our scope instead of all_fields
|
7
|
-
each scope.values_fields, :function => Java::CascadingOperation::Identity.new
|
8
|
-
check_scope :values_fields => ['offset', 'line']
|
9
|
-
end
|
10
|
-
end
|
11
|
-
|
12
|
-
it 'should pick up names from source tap scheme' do
|
13
|
-
test_assembly do
|
14
|
-
pass
|
15
|
-
|
16
|
-
check_scope :values_fields => ['offset', 'line']
|
17
|
-
end
|
18
|
-
end
|
19
|
-
|
20
|
-
it 'should propagate names through Each' do
|
21
|
-
test_assembly do
|
22
|
-
check_scope :values_fields => ['offset', 'line']
|
23
|
-
assert_size_equals 2
|
24
|
-
|
25
|
-
split 'line', ['x', 'y'], :pattern => /,/
|
26
|
-
check_scope :values_fields => ['offset', 'line', 'x', 'y']
|
27
|
-
assert_size_equals 4
|
28
|
-
end
|
29
|
-
end
|
30
|
-
|
31
|
-
it 'should allow field filtration at Each' do
|
32
|
-
test_assembly do
|
33
|
-
check_scope :values_fields => ['offset', 'line']
|
34
|
-
assert_size_equals 2
|
35
|
-
|
36
|
-
split 'line', ['x', 'y'], :pattern => /,/, :output => ['x', 'y']
|
37
|
-
check_scope :values_fields => ['x', 'y']
|
38
|
-
assert_size_equals 2
|
39
|
-
end
|
40
|
-
end
|
41
|
-
|
42
|
-
it 'should propagate names through CoGroup' do
|
43
|
-
test_join_assembly do
|
44
|
-
check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_'],
|
45
|
-
:grouping_fields => ['x', 'x_']
|
46
|
-
end
|
47
|
-
end
|
48
|
-
|
49
|
-
it 'should propagate names through CoGroup with no Aggregations' do
|
50
|
-
post_join_block = lambda do
|
51
|
-
check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_'],
|
52
|
-
:grouping_fields => ['x', 'x_']
|
53
|
-
end
|
54
|
-
|
55
|
-
test_join_assembly(:post_join_block => post_join_block)
|
56
|
-
end
|
57
|
-
|
58
|
-
it 'should pass grouping fields to Every' do
|
59
|
-
test_join_assembly do
|
60
|
-
sum :mapping => {'x' => 'x_sum'}, :type => :int
|
61
|
-
check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_'],
|
62
|
-
:grouping_fields => ['x', 'x_', 'x_sum']
|
63
|
-
assert_group_size_equals 1
|
64
|
-
end
|
65
|
-
end
|
66
|
-
|
67
|
-
it 'should pass grouping fields through chained Every' do
|
68
|
-
test_join_assembly do
|
69
|
-
sum :mapping => {'x' => 'x_sum'}, :type => :int
|
70
|
-
check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_'],
|
71
|
-
:grouping_fields => ['x', 'x_', 'x_sum']
|
72
|
-
assert_group_size_equals 1
|
73
|
-
|
74
|
-
sum :mapping => {'y' => 'y_sum'}, :type => :int
|
75
|
-
check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_'],
|
76
|
-
:grouping_fields => ['x', 'x_', 'x_sum', 'y_sum']
|
77
|
-
assert_group_size_equals 1
|
78
|
-
end
|
79
|
-
end
|
80
|
-
|
81
|
-
it 'should propagate names through Every' do
|
82
|
-
post_join_block = lambda do
|
83
|
-
check_scope :values_fields => ['x', 'x_', 'x_sum', 'y_sum']
|
84
|
-
assert_size_equals 4
|
85
|
-
end
|
86
|
-
|
87
|
-
test_join_assembly :post_join_block => post_join_block do
|
88
|
-
sum :mapping => {'x' => 'x_sum'}, :type => :int
|
89
|
-
check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_'],
|
90
|
-
:grouping_fields => ['x', 'x_', 'x_sum']
|
91
|
-
assert_group_size_equals 1
|
92
|
-
|
93
|
-
sum :mapping => {'y' => 'y_sum'}, :type => :int
|
94
|
-
check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_'],
|
95
|
-
:grouping_fields => ['x', 'x_', 'x_sum', 'y_sum']
|
96
|
-
assert_group_size_equals 1
|
97
|
-
end
|
98
|
-
end
|
99
|
-
|
100
|
-
it 'should pass values fields to Each immediately following CoGroup and remove grouping fields' do
|
101
|
-
post_join_block = lambda do
|
102
|
-
assert_size_equals 10
|
103
|
-
check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_']
|
104
|
-
end
|
105
|
-
test_join_assembly(:post_join_block => post_join_block)
|
106
|
-
end
|
107
|
-
|
108
|
-
it 'should fail to pass grouping fields to Every immediately following Each' do
|
109
|
-
post_join_block = lambda do
|
110
|
-
pass
|
111
|
-
check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_']
|
112
|
-
sum :mapping => {'x' => 'x_sum'}, :type => :int
|
113
|
-
end
|
114
|
-
|
115
|
-
lambda do # Composition fails
|
116
|
-
test_join_assembly(:post_join_block => post_join_block)
|
117
|
-
# sum doesn't exist outside of Aggregations (where block of join is
|
118
|
-
# evaluated)
|
119
|
-
end.should raise_error NoMethodError
|
120
|
-
end
|
121
|
-
|
122
|
-
it 'should propagate values fields and field names into branch' do
|
123
|
-
post_join_block = lambda do
|
124
|
-
branch 'data_tuple' do
|
125
|
-
check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_'],
|
126
|
-
:grouping_fields => ['x', 'x_']
|
127
|
-
assert_size_equals 10
|
128
|
-
end
|
129
|
-
end
|
130
|
-
|
131
|
-
test_join_assembly(:branches => ['data_tuple'], :post_join_block => post_join_block)
|
132
|
-
end
|
133
|
-
|
134
|
-
it 'should propagate names through GroupBy' do
|
135
|
-
test_assembly do
|
136
|
-
group_by 'line' do
|
137
|
-
count
|
138
|
-
end
|
139
|
-
check_scope :values_fields => ['line', 'count']
|
140
|
-
end
|
141
|
-
end
|
142
|
-
|
143
|
-
it 'should propagate names through blockless GroupBy' do
|
144
|
-
test_assembly do
|
145
|
-
group_by 'line'
|
146
|
-
check_scope :values_fields => ['offset', 'line'], :grouping_fields => ['line']
|
147
|
-
end
|
148
|
-
end
|
149
|
-
end
|
data/spec/spec.opts
DELETED
data/spec/spec_helper.rb
DELETED
data/spec/spec_util.rb
DELETED
@@ -1,92 +0,0 @@
|
|
1
|
-
OUTPUT_DIR = 'output'
|
2
|
-
BUILD_DIR = 'build/spec'
|
3
|
-
|
4
|
-
module ScopeTests
|
5
|
-
def check_scope(params = {})
|
6
|
-
name_params = [params[:source]].compact
|
7
|
-
scope = scope(*name_params)
|
8
|
-
values_fields = params[:values_fields]
|
9
|
-
grouping_fields = params[:grouping_fields] || values_fields
|
10
|
-
|
11
|
-
debug = params[:debug]
|
12
|
-
debug_scope(*name_params) if debug
|
13
|
-
|
14
|
-
scope.values_fields.to_a.should == values_fields
|
15
|
-
scope.grouping_fields.to_a.should == grouping_fields
|
16
|
-
end
|
17
|
-
end
|
18
|
-
|
19
|
-
module Cascading
|
20
|
-
class Flow; include ScopeTests; end
|
21
|
-
class Assembly; include ScopeTests; end
|
22
|
-
class Aggregations; include ScopeTests; end
|
23
|
-
end
|
24
|
-
|
25
|
-
def test_flow(&block)
|
26
|
-
cascade = cascade 'test_app', :mode => :local do
|
27
|
-
flow 'test', &block
|
28
|
-
end
|
29
|
-
cascade.complete
|
30
|
-
end
|
31
|
-
|
32
|
-
def test_assembly(params = {}, &block)
|
33
|
-
branches = params[:branches] || []
|
34
|
-
|
35
|
-
test_flow do
|
36
|
-
source 'input', tap('spec/resource/test_input.txt', :scheme => text_line_scheme)
|
37
|
-
|
38
|
-
# Default Fields defined by TextLineScheme
|
39
|
-
check_scope :source => 'input', :values_fields => ['offset', 'line']
|
40
|
-
|
41
|
-
assembly 'input', &block
|
42
|
-
|
43
|
-
sink 'input', tap("#{OUTPUT_DIR}/out.txt", :sink_mode => :replace)
|
44
|
-
|
45
|
-
# Branches must be sunk so that they (and their assertions) will be run
|
46
|
-
branches.each do |branch|
|
47
|
-
sink branch, tap("#{OUTPUT_DIR}/#{branch}_out.txt", :sink_mode => :replace)
|
48
|
-
end
|
49
|
-
end
|
50
|
-
end
|
51
|
-
|
52
|
-
def test_join_assembly(params = {}, &block)
|
53
|
-
branches = params[:branches] || []
|
54
|
-
post_join_block = params[:post_join_block]
|
55
|
-
|
56
|
-
test_flow do
|
57
|
-
source 'left', tap('spec/resource/join_input.txt', :scheme => text_line_scheme)
|
58
|
-
source 'right', tap('spec/resource/join_input.txt', :scheme => text_line_scheme)
|
59
|
-
|
60
|
-
# Default Fields defined by TextLineScheme
|
61
|
-
check_scope :source => 'left', :values_fields => ['offset', 'line']
|
62
|
-
check_scope :source => 'right', :values_fields => ['offset', 'line']
|
63
|
-
|
64
|
-
assembly 'left' do
|
65
|
-
check_scope :values_fields => ['offset', 'line']
|
66
|
-
split 'line', ['x', 'y', 'z'], :pattern => /,/
|
67
|
-
check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z']
|
68
|
-
end
|
69
|
-
|
70
|
-
assembly 'right' do
|
71
|
-
check_scope :values_fields => ['offset', 'line']
|
72
|
-
split 'line', ['x', 'y', 'z'], :pattern => /,/
|
73
|
-
check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z']
|
74
|
-
end
|
75
|
-
|
76
|
-
assembly 'join' do
|
77
|
-
# Empty scope because there is no 'join' source or assembly
|
78
|
-
check_scope :values_fields => []
|
79
|
-
|
80
|
-
left_join 'left', 'right', :on => ['x'], &block
|
81
|
-
|
82
|
-
instance_eval &post_join_block if post_join_block
|
83
|
-
end
|
84
|
-
|
85
|
-
sink 'join', tap("#{OUTPUT_DIR}/join_out.txt", :sink_mode => :replace)
|
86
|
-
|
87
|
-
# Branches must be sunk so that they (and their assertions) will be run
|
88
|
-
branches.each do |branch|
|
89
|
-
sink branch, tap("#{OUTPUT_DIR}/#{branch}_out.txt", :sink_mode => :replace)
|
90
|
-
end
|
91
|
-
end
|
92
|
-
end
|
@@ -1,38 +0,0 @@
|
|
1
|
-
package cascading.jruby;
|
2
|
-
|
3
|
-
import org.jruby.Ruby;
|
4
|
-
import org.jruby.RubyInstanceConfig;
|
5
|
-
|
6
|
-
public class Main {
|
7
|
-
private final static String JRUBY_HOME = "/opt/jruby";
|
8
|
-
|
9
|
-
/**
|
10
|
-
* Starts a Hadoop job by reading the specified JRuby script.
|
11
|
-
*
|
12
|
-
* @param args
|
13
|
-
*/
|
14
|
-
public static void main(String[] args) {
|
15
|
-
String name = args[0]; // c.j script name
|
16
|
-
if (!name.startsWith("/"))
|
17
|
-
name = "/" + name;
|
18
|
-
|
19
|
-
// c.j script args
|
20
|
-
String[] newArgs = new String[args.length - 1];
|
21
|
-
System.arraycopy(args, 1, newArgs, 0, args.length - 1);
|
22
|
-
RubyInstanceConfig config = new RubyInstanceConfig();
|
23
|
-
config.setJRubyHome(JRUBY_HOME); // mwalker
|
24
|
-
config.processArguments(newArgs);
|
25
|
-
|
26
|
-
System.out.println("Arguments: ");
|
27
|
-
for (String arg : config.getArgv())
|
28
|
-
System.out.println(arg);
|
29
|
-
|
30
|
-
Ruby runtime = Ruby.newInstance(config);
|
31
|
-
|
32
|
-
System.out.println("Requiring '" + name + "'");
|
33
|
-
runtime.executeScript("require '" + name + "'", name);
|
34
|
-
|
35
|
-
System.out.println("Requiring 'cascading/jruby/runner'");
|
36
|
-
runtime.executeScript("require 'cascading/jruby/runner'", "runner"); // gfodor
|
37
|
-
}
|
38
|
-
}
|