cascading.jruby 0.0.7 → 0.0.8
Sign up to get free protection for your applications and to get access to all the features.
- data/.travis.yml +6 -0
- data/Gemfile +6 -0
- data/Gemfile.lock +12 -0
- data/HACKING.md +12 -4
- data/History.txt +8 -0
- data/README.md +2 -2
- data/Rakefile +3 -2
- data/TODO +2 -2
- data/ivy.xml +25 -0
- data/ivysettings.xml +7 -0
- data/lib/cascading.rb +1 -1
- data/lib/cascading/aggregations.rb +190 -0
- data/lib/cascading/assembly.rb +138 -194
- data/lib/cascading/cascading.rb +8 -3
- data/lib/cascading/cascading_exception.rb +12 -10
- data/lib/cascading/flow.rb +3 -2
- data/lib/cascading/operations.rb +1 -23
- data/lib/cascading/scope.rb +27 -24
- data/lib/cascading/sub_assembly.rb +93 -0
- data/samples/copy.rb +3 -1
- data/samples/data/data_group_by.txt +7 -0
- data/samples/data/genealogy/names/dist.all.last +88799 -0
- data/samples/data/gutenberg/the_outline_of_science_vol_1 +12761 -0
- data/samples/group_by.rb +61 -0
- data/samples/logwordcount.rb +3 -1
- data/samples/scorenames.rb +2 -1
- data/samples/sub_assembly.rb +30 -0
- data/samples/union.rb +3 -1
- data/spec/scope_spec.rb +47 -66
- data/spec/spec_util.rb +4 -4
- data/tags +69 -44
- data/tasks/ant.rake +9 -5
- data/tasks/samples.rake +6 -0
- data/tasks/test.rake +1 -1
- data/test/mock_assemblies.rb +55 -0
- data/test/test_aggregations.rb +443 -0
- data/test/test_assembly.rb +437 -196
- data/test/test_exceptions.rb +3 -3
- data/test/test_local_execution.rb +168 -0
- data/test/test_operations.rb +0 -7
- metadata +23 -2
data/test/test_exceptions.rb
CHANGED
@@ -9,16 +9,16 @@ class TC_Exceptions < Test::Unit::TestCase
|
|
9
9
|
e = CascadingException.new(ne1, 'cascading.jruby wrapper exception')
|
10
10
|
|
11
11
|
assert_equal ne1, e.ne
|
12
|
-
|
12
|
+
assert_match /^cascading\.jruby wrapper exception/, e.message
|
13
|
+
assert_match /^Exception summary for: cascading\.jruby wrapper exception/, e.message
|
13
14
|
assert_equal 3, e.depth
|
14
15
|
|
15
|
-
|
16
16
|
assert_equal ne1, e.cause(1)
|
17
17
|
assert_equal 'Exception Cascading hands us', e.cause(1).message
|
18
18
|
|
19
19
|
assert_equal ne2, e.cause(2)
|
20
20
|
# Cascading inserts Operator#to_s, here
|
21
|
-
assert_match /Exception thrown by Cascading
|
21
|
+
assert_match /Exception thrown by Cascading$/, e.cause(2).message
|
22
22
|
|
23
23
|
assert_equal ne3, e.cause(3)
|
24
24
|
assert_equal 'Root cause', e.cause(3).message
|
@@ -0,0 +1,168 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'cascading'
|
3
|
+
|
4
|
+
class TC_LocalExecution < Test::Unit::TestCase
|
5
|
+
def test_smoke_test_multi_source_tap
|
6
|
+
cascade 'splitter' do
|
7
|
+
flow 'splitter' do
|
8
|
+
tap1 = tap 'test/data/data1.txt'
|
9
|
+
tap2 = tap 'test/data/data2.txt'
|
10
|
+
source 'data', multi_source_tap(tap1, tap2)
|
11
|
+
|
12
|
+
assembly 'data' do
|
13
|
+
pass
|
14
|
+
end
|
15
|
+
|
16
|
+
sink 'data', tap('output/test_smoke_test_multi_source_tap')
|
17
|
+
end
|
18
|
+
end.complete
|
19
|
+
end
|
20
|
+
|
21
|
+
def test_smoke_test_sequence_file_scheme
|
22
|
+
cascade 'smoke' do
|
23
|
+
flow 'smoke' do
|
24
|
+
source 'input', tap('test/data/data1.txt')
|
25
|
+
assembly 'input' do
|
26
|
+
pass
|
27
|
+
end
|
28
|
+
compress_output :default, :block
|
29
|
+
sink 'input', tap('output/test_smoke_test_sequence_file_scheme', :scheme => sequence_file_scheme)
|
30
|
+
end
|
31
|
+
end.complete
|
32
|
+
end
|
33
|
+
|
34
|
+
def test_splitter
|
35
|
+
flow = flow 'splitter' do
|
36
|
+
source 'copy', tap('test/data/data1.txt')
|
37
|
+
|
38
|
+
assembly 'copy' do
|
39
|
+
split 'line', :pattern => /[.,]*\s+/, :into=>['name', 'score1', 'score2', 'id'], :output => ['name', 'score1', 'score2', 'id']
|
40
|
+
assert_size_equals 4
|
41
|
+
assert_not_null
|
42
|
+
debug :print_fields => true
|
43
|
+
end
|
44
|
+
|
45
|
+
sink 'copy', tap('output/test_splitter', :sink_mode => :replace)
|
46
|
+
end.complete
|
47
|
+
end
|
48
|
+
|
49
|
+
def test_smoke_test_multi_source_tap
|
50
|
+
cascade 'multi_source_tap' do
|
51
|
+
flow 'multi_source_tap' do
|
52
|
+
tap1 = tap 'test/data/data1.txt'
|
53
|
+
tap2 = tap 'test/data/data2.txt'
|
54
|
+
source 'data', multi_source_tap(tap1, tap2)
|
55
|
+
|
56
|
+
assembly 'data' do
|
57
|
+
pass
|
58
|
+
end
|
59
|
+
|
60
|
+
sink 'data', tap('output/test_smoke_test_multi_source_tap')
|
61
|
+
end
|
62
|
+
end.complete
|
63
|
+
end
|
64
|
+
|
65
|
+
def test_join1
|
66
|
+
join_grouping_fields, join_values_fields = nil, nil
|
67
|
+
cascade 'splitter' do
|
68
|
+
flow 'splitter' do
|
69
|
+
source 'data1', tap('test/data/data1.txt')
|
70
|
+
source 'data2', tap('test/data/data2.txt')
|
71
|
+
|
72
|
+
assembly1 = assembly 'data1' do
|
73
|
+
split 'line', :pattern => /[.,]*\s+/, :into => ['name', 'score1', 'score2', 'id'], :output => ['name', 'score1', 'score2', 'id']
|
74
|
+
assert_size_equals 4
|
75
|
+
assert_not_null
|
76
|
+
debug :print_fields => true
|
77
|
+
end
|
78
|
+
|
79
|
+
assembly2 = assembly 'data2' do
|
80
|
+
split 'line', :pattern => /[.,]*\s+/, :into => ['name', 'id', 'town'], :output => ['name', 'id', 'town']
|
81
|
+
assert_size_equals 3
|
82
|
+
assert_not_null
|
83
|
+
debug :print_fields => true
|
84
|
+
end
|
85
|
+
|
86
|
+
assembly 'joined' do
|
87
|
+
join assembly1.name, assembly2.name, :on => ['name', 'id'], :declared_fields => ['name', 'score1', 'score2', 'id', 'name2', 'id2', 'town']
|
88
|
+
join_grouping_fields = scope.grouping_fields.to_a
|
89
|
+
join_values_fields = scope.values_fields.to_a
|
90
|
+
|
91
|
+
assert_size_equals 7
|
92
|
+
assert_not_null
|
93
|
+
end
|
94
|
+
|
95
|
+
sink 'joined', tap('output/test_join1', :sink_mode => :replace)
|
96
|
+
end
|
97
|
+
end.complete
|
98
|
+
assert_equal ['name', 'id', 'name_', 'id_'], join_grouping_fields
|
99
|
+
assert_equal ['name', 'score1', 'score2', 'id', 'name2', 'id2', 'town'], join_values_fields
|
100
|
+
end
|
101
|
+
|
102
|
+
def test_join2
|
103
|
+
join_grouping_fields, join_values_fields = nil, nil
|
104
|
+
flow = flow 'splitter' do
|
105
|
+
source 'data1', tap('test/data/data1.txt')
|
106
|
+
source 'data2', tap('test/data/data2.txt')
|
107
|
+
|
108
|
+
assembly 'data1' do
|
109
|
+
split 'line', :pattern => /[.,]*\s+/, :into => ['name', 'score1', 'score2', 'id'], :output => ['name', 'score1', 'score2', 'id']
|
110
|
+
debug :print_fields => true
|
111
|
+
end
|
112
|
+
|
113
|
+
assembly 'data2' do
|
114
|
+
split 'line', :pattern => /[.,]*\s+/, :into => ['name', 'code', 'town'], :output => ['name', 'code', 'town']
|
115
|
+
debug :print_fields => true
|
116
|
+
end
|
117
|
+
|
118
|
+
assembly 'joined' do
|
119
|
+
join :on => {'data1' => ['name', 'id'], 'data2' => ['name', 'code']}, :declared_fields => ['name', 'score1', 'score2', 'id', 'name2', 'code', 'town']
|
120
|
+
join_grouping_fields = scope.grouping_fields.to_a
|
121
|
+
join_values_fields = scope.values_fields.to_a
|
122
|
+
end
|
123
|
+
|
124
|
+
sink 'joined', tap('output/test_join2', :sink_mode => :replace)
|
125
|
+
end.complete
|
126
|
+
assert_equal ['name', 'id', 'name_', 'code'], join_grouping_fields
|
127
|
+
assert_equal ['name', 'score1', 'score2', 'id', 'name2', 'code', 'town'], join_values_fields
|
128
|
+
end
|
129
|
+
|
130
|
+
def test_union
|
131
|
+
union_grouping_fields, union_values_fields = nil, nil
|
132
|
+
cascade 'union' do
|
133
|
+
flow 'union' do
|
134
|
+
source 'data1', tap('test/data/data1.txt')
|
135
|
+
source 'data2', tap('test/data/data2.txt')
|
136
|
+
|
137
|
+
assembly 'data1' do
|
138
|
+
split 'line', :pattern => /[.,]*\s+/, :into => ['name', 'score1', 'score2', 'id'], :output => ['name', 'score1', 'score2', 'id']
|
139
|
+
assert_size_equals 4
|
140
|
+
assert_not_null
|
141
|
+
|
142
|
+
project 'name', 'id'
|
143
|
+
assert_size_equals 2
|
144
|
+
end
|
145
|
+
|
146
|
+
assembly 'data2' do
|
147
|
+
split 'line', :pattern => /[.,]*\s+/, :into => ['name', 'code', 'town'], :output => ['name', 'code', 'town']
|
148
|
+
assert_size_equals 3
|
149
|
+
assert_not_null
|
150
|
+
|
151
|
+
rename 'code' => 'id'
|
152
|
+
project 'name', 'id'
|
153
|
+
assert_size_equals 2
|
154
|
+
end
|
155
|
+
|
156
|
+
assembly 'union' do
|
157
|
+
union 'data1', 'data2'
|
158
|
+
union_grouping_fields = scope.grouping_fields.to_a
|
159
|
+
union_values_fields = scope.values_fields.to_a
|
160
|
+
end
|
161
|
+
|
162
|
+
sink 'union', tap('output/test_union', :sink_mode => :replace)
|
163
|
+
end
|
164
|
+
end.complete
|
165
|
+
assert_equal ['name'], union_grouping_fields
|
166
|
+
assert_equal ['name', 'id'], union_values_fields
|
167
|
+
end
|
168
|
+
end
|
data/test/test_operations.rb
CHANGED
@@ -13,11 +13,4 @@ class TC_Operations < Test::Unit::TestCase
|
|
13
13
|
first = first_function 'first_field', :ignore => [Java::CascadingTuple::Tuple.new(-1)].to_java(Java::CascadingTuple::Tuple)
|
14
14
|
assert_not_nil first
|
15
15
|
end
|
16
|
-
|
17
|
-
def test_aggregator_function_ignore_exception
|
18
|
-
assert_raise RuntimeError do
|
19
|
-
avg = average_function 'avg_field', :ignore => [nil].to_java(:string)
|
20
|
-
assert_not_nil avg
|
21
|
-
end
|
22
|
-
end
|
23
16
|
end
|
metadata
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
name: cascading.jruby
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease:
|
5
|
-
version: 0.0.
|
5
|
+
version: 0.0.8
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Matt Walker
|
@@ -11,7 +11,7 @@ autorequire:
|
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
13
|
|
14
|
-
date: 2012-
|
14
|
+
date: 2012-05-15 00:00:00 Z
|
15
15
|
dependencies: []
|
16
16
|
|
17
17
|
description: cascading.jruby is a small DSL above Cascading, written in JRuby
|
@@ -25,6 +25,7 @@ extra_rdoc_files:
|
|
25
25
|
- LICENSE.txt
|
26
26
|
- bin/make_job
|
27
27
|
- samples/data/data2.txt
|
28
|
+
- samples/data/data_group_by.txt
|
28
29
|
- samples/data/data_join1.txt
|
29
30
|
- samples/data/data_join2.txt
|
30
31
|
- samples/data/data_join3.txt
|
@@ -33,6 +34,9 @@ extra_rdoc_files:
|
|
33
34
|
- test/data/data1.txt
|
34
35
|
- test/data/data2.txt
|
35
36
|
files:
|
37
|
+
- .travis.yml
|
38
|
+
- Gemfile
|
39
|
+
- Gemfile.lock
|
36
40
|
- HACKING.md
|
37
41
|
- History.txt
|
38
42
|
- LICENSE.txt
|
@@ -40,7 +44,10 @@ files:
|
|
40
44
|
- Rakefile
|
41
45
|
- TODO
|
42
46
|
- bin/make_job
|
47
|
+
- ivy.xml
|
48
|
+
- ivysettings.xml
|
43
49
|
- lib/cascading.rb
|
50
|
+
- lib/cascading/aggregations.rb
|
44
51
|
- lib/cascading/assembly.rb
|
45
52
|
- lib/cascading/base.rb
|
46
53
|
- lib/cascading/cascade.rb
|
@@ -51,18 +58,24 @@ files:
|
|
51
58
|
- lib/cascading/flow.rb
|
52
59
|
- lib/cascading/operations.rb
|
53
60
|
- lib/cascading/scope.rb
|
61
|
+
- lib/cascading/sub_assembly.rb
|
54
62
|
- samples/branch.rb
|
55
63
|
- samples/copy.rb
|
56
64
|
- samples/data/data2.txt
|
65
|
+
- samples/data/data_group_by.txt
|
57
66
|
- samples/data/data_join1.txt
|
58
67
|
- samples/data/data_join2.txt
|
59
68
|
- samples/data/data_join3.txt
|
69
|
+
- samples/data/genealogy/names/dist.all.last
|
70
|
+
- samples/data/gutenberg/the_outline_of_science_vol_1
|
71
|
+
- samples/group_by.rb
|
60
72
|
- samples/join.rb
|
61
73
|
- samples/logwordcount.rb
|
62
74
|
- samples/project.rb
|
63
75
|
- samples/rename.rb
|
64
76
|
- samples/scorenames.rb
|
65
77
|
- samples/splitter.rb
|
78
|
+
- samples/sub_assembly.rb
|
66
79
|
- samples/union.rb
|
67
80
|
- spec/cascading_spec.rb
|
68
81
|
- spec/expr_spec.rb
|
@@ -92,11 +105,14 @@ files:
|
|
92
105
|
- tasks/test.rake
|
93
106
|
- test/data/data1.txt
|
94
107
|
- test/data/data2.txt
|
108
|
+
- test/mock_assemblies.rb
|
109
|
+
- test/test_aggregations.rb
|
95
110
|
- test/test_assembly.rb
|
96
111
|
- test/test_cascade.rb
|
97
112
|
- test/test_cascading.rb
|
98
113
|
- test/test_exceptions.rb
|
99
114
|
- test/test_flow.rb
|
115
|
+
- test/test_local_execution.rb
|
100
116
|
- test/test_operations.rb
|
101
117
|
homepage: http://github.com/etsy/cascading.jruby
|
102
118
|
licenses: []
|
@@ -112,6 +128,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
112
128
|
requirements:
|
113
129
|
- - ">="
|
114
130
|
- !ruby/object:Gem::Version
|
131
|
+
hash: 2
|
132
|
+
segments:
|
133
|
+
- 0
|
115
134
|
version: "0"
|
116
135
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
117
136
|
none: false
|
@@ -127,9 +146,11 @@ signing_key:
|
|
127
146
|
specification_version: 3
|
128
147
|
summary: A JRuby DSL for Cascading
|
129
148
|
test_files:
|
149
|
+
- test/test_aggregations.rb
|
130
150
|
- test/test_assembly.rb
|
131
151
|
- test/test_cascade.rb
|
132
152
|
- test/test_cascading.rb
|
133
153
|
- test/test_exceptions.rb
|
134
154
|
- test/test_flow.rb
|
155
|
+
- test/test_local_execution.rb
|
135
156
|
- test/test_operations.rb
|