cascading.jruby 0.0.7 → 0.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,16 +9,16 @@ class TC_Exceptions < Test::Unit::TestCase
9
9
  e = CascadingException.new(ne1, 'cascading.jruby wrapper exception')
10
10
 
11
11
  assert_equal ne1, e.ne
12
- assert_equal 'cascading.jruby wrapper exception', e.message
12
+ assert_match /^cascading\.jruby wrapper exception/, e.message
13
+ assert_match /^Exception summary for: cascading\.jruby wrapper exception/, e.message
13
14
  assert_equal 3, e.depth
14
15
 
15
-
16
16
  assert_equal ne1, e.cause(1)
17
17
  assert_equal 'Exception Cascading hands us', e.cause(1).message
18
18
 
19
19
  assert_equal ne2, e.cause(2)
20
20
  # Cascading inserts Operator#to_s, here
21
- assert_match /Exception thrown by Cascading/, e.cause(2).message
21
+ assert_match /Exception thrown by Cascading$/, e.cause(2).message
22
22
 
23
23
  assert_equal ne3, e.cause(3)
24
24
  assert_equal 'Root cause', e.cause(3).message
@@ -0,0 +1,168 @@
1
+ require 'test/unit'
2
+ require 'cascading'
3
+
4
+ class TC_LocalExecution < Test::Unit::TestCase
5
+ def test_smoke_test_multi_source_tap
6
+ cascade 'splitter' do
7
+ flow 'splitter' do
8
+ tap1 = tap 'test/data/data1.txt'
9
+ tap2 = tap 'test/data/data2.txt'
10
+ source 'data', multi_source_tap(tap1, tap2)
11
+
12
+ assembly 'data' do
13
+ pass
14
+ end
15
+
16
+ sink 'data', tap('output/test_smoke_test_multi_source_tap')
17
+ end
18
+ end.complete
19
+ end
20
+
21
+ def test_smoke_test_sequence_file_scheme
22
+ cascade 'smoke' do
23
+ flow 'smoke' do
24
+ source 'input', tap('test/data/data1.txt')
25
+ assembly 'input' do
26
+ pass
27
+ end
28
+ compress_output :default, :block
29
+ sink 'input', tap('output/test_smoke_test_sequence_file_scheme', :scheme => sequence_file_scheme)
30
+ end
31
+ end.complete
32
+ end
33
+
34
+ def test_splitter
35
+ flow = flow 'splitter' do
36
+ source 'copy', tap('test/data/data1.txt')
37
+
38
+ assembly 'copy' do
39
+ split 'line', :pattern => /[.,]*\s+/, :into=>['name', 'score1', 'score2', 'id'], :output => ['name', 'score1', 'score2', 'id']
40
+ assert_size_equals 4
41
+ assert_not_null
42
+ debug :print_fields => true
43
+ end
44
+
45
+ sink 'copy', tap('output/test_splitter', :sink_mode => :replace)
46
+ end.complete
47
+ end
48
+
49
+ def test_smoke_test_multi_source_tap
50
+ cascade 'multi_source_tap' do
51
+ flow 'multi_source_tap' do
52
+ tap1 = tap 'test/data/data1.txt'
53
+ tap2 = tap 'test/data/data2.txt'
54
+ source 'data', multi_source_tap(tap1, tap2)
55
+
56
+ assembly 'data' do
57
+ pass
58
+ end
59
+
60
+ sink 'data', tap('output/test_smoke_test_multi_source_tap')
61
+ end
62
+ end.complete
63
+ end
64
+
65
+ def test_join1
66
+ join_grouping_fields, join_values_fields = nil, nil
67
+ cascade 'splitter' do
68
+ flow 'splitter' do
69
+ source 'data1', tap('test/data/data1.txt')
70
+ source 'data2', tap('test/data/data2.txt')
71
+
72
+ assembly1 = assembly 'data1' do
73
+ split 'line', :pattern => /[.,]*\s+/, :into => ['name', 'score1', 'score2', 'id'], :output => ['name', 'score1', 'score2', 'id']
74
+ assert_size_equals 4
75
+ assert_not_null
76
+ debug :print_fields => true
77
+ end
78
+
79
+ assembly2 = assembly 'data2' do
80
+ split 'line', :pattern => /[.,]*\s+/, :into => ['name', 'id', 'town'], :output => ['name', 'id', 'town']
81
+ assert_size_equals 3
82
+ assert_not_null
83
+ debug :print_fields => true
84
+ end
85
+
86
+ assembly 'joined' do
87
+ join assembly1.name, assembly2.name, :on => ['name', 'id'], :declared_fields => ['name', 'score1', 'score2', 'id', 'name2', 'id2', 'town']
88
+ join_grouping_fields = scope.grouping_fields.to_a
89
+ join_values_fields = scope.values_fields.to_a
90
+
91
+ assert_size_equals 7
92
+ assert_not_null
93
+ end
94
+
95
+ sink 'joined', tap('output/test_join1', :sink_mode => :replace)
96
+ end
97
+ end.complete
98
+ assert_equal ['name', 'id', 'name_', 'id_'], join_grouping_fields
99
+ assert_equal ['name', 'score1', 'score2', 'id', 'name2', 'id2', 'town'], join_values_fields
100
+ end
101
+
102
+ def test_join2
103
+ join_grouping_fields, join_values_fields = nil, nil
104
+ flow = flow 'splitter' do
105
+ source 'data1', tap('test/data/data1.txt')
106
+ source 'data2', tap('test/data/data2.txt')
107
+
108
+ assembly 'data1' do
109
+ split 'line', :pattern => /[.,]*\s+/, :into => ['name', 'score1', 'score2', 'id'], :output => ['name', 'score1', 'score2', 'id']
110
+ debug :print_fields => true
111
+ end
112
+
113
+ assembly 'data2' do
114
+ split 'line', :pattern => /[.,]*\s+/, :into => ['name', 'code', 'town'], :output => ['name', 'code', 'town']
115
+ debug :print_fields => true
116
+ end
117
+
118
+ assembly 'joined' do
119
+ join :on => {'data1' => ['name', 'id'], 'data2' => ['name', 'code']}, :declared_fields => ['name', 'score1', 'score2', 'id', 'name2', 'code', 'town']
120
+ join_grouping_fields = scope.grouping_fields.to_a
121
+ join_values_fields = scope.values_fields.to_a
122
+ end
123
+
124
+ sink 'joined', tap('output/test_join2', :sink_mode => :replace)
125
+ end.complete
126
+ assert_equal ['name', 'id', 'name_', 'code'], join_grouping_fields
127
+ assert_equal ['name', 'score1', 'score2', 'id', 'name2', 'code', 'town'], join_values_fields
128
+ end
129
+
130
+ def test_union
131
+ union_grouping_fields, union_values_fields = nil, nil
132
+ cascade 'union' do
133
+ flow 'union' do
134
+ source 'data1', tap('test/data/data1.txt')
135
+ source 'data2', tap('test/data/data2.txt')
136
+
137
+ assembly 'data1' do
138
+ split 'line', :pattern => /[.,]*\s+/, :into => ['name', 'score1', 'score2', 'id'], :output => ['name', 'score1', 'score2', 'id']
139
+ assert_size_equals 4
140
+ assert_not_null
141
+
142
+ project 'name', 'id'
143
+ assert_size_equals 2
144
+ end
145
+
146
+ assembly 'data2' do
147
+ split 'line', :pattern => /[.,]*\s+/, :into => ['name', 'code', 'town'], :output => ['name', 'code', 'town']
148
+ assert_size_equals 3
149
+ assert_not_null
150
+
151
+ rename 'code' => 'id'
152
+ project 'name', 'id'
153
+ assert_size_equals 2
154
+ end
155
+
156
+ assembly 'union' do
157
+ union 'data1', 'data2'
158
+ union_grouping_fields = scope.grouping_fields.to_a
159
+ union_values_fields = scope.values_fields.to_a
160
+ end
161
+
162
+ sink 'union', tap('output/test_union', :sink_mode => :replace)
163
+ end
164
+ end.complete
165
+ assert_equal ['name'], union_grouping_fields
166
+ assert_equal ['name', 'id'], union_values_fields
167
+ end
168
+ end
@@ -13,11 +13,4 @@ class TC_Operations < Test::Unit::TestCase
13
13
  first = first_function 'first_field', :ignore => [Java::CascadingTuple::Tuple.new(-1)].to_java(Java::CascadingTuple::Tuple)
14
14
  assert_not_nil first
15
15
  end
16
-
17
- def test_aggregator_function_ignore_exception
18
- assert_raise RuntimeError do
19
- avg = average_function 'avg_field', :ignore => [nil].to_java(:string)
20
- assert_not_nil avg
21
- end
22
- end
23
16
  end
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: cascading.jruby
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 0.0.7
5
+ version: 0.0.8
6
6
  platform: ruby
7
7
  authors:
8
8
  - Matt Walker
@@ -11,7 +11,7 @@ autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
13
 
14
- date: 2012-04-02 00:00:00 Z
14
+ date: 2012-05-15 00:00:00 Z
15
15
  dependencies: []
16
16
 
17
17
  description: cascading.jruby is a small DSL above Cascading, written in JRuby
@@ -25,6 +25,7 @@ extra_rdoc_files:
25
25
  - LICENSE.txt
26
26
  - bin/make_job
27
27
  - samples/data/data2.txt
28
+ - samples/data/data_group_by.txt
28
29
  - samples/data/data_join1.txt
29
30
  - samples/data/data_join2.txt
30
31
  - samples/data/data_join3.txt
@@ -33,6 +34,9 @@ extra_rdoc_files:
33
34
  - test/data/data1.txt
34
35
  - test/data/data2.txt
35
36
  files:
37
+ - .travis.yml
38
+ - Gemfile
39
+ - Gemfile.lock
36
40
  - HACKING.md
37
41
  - History.txt
38
42
  - LICENSE.txt
@@ -40,7 +44,10 @@ files:
40
44
  - Rakefile
41
45
  - TODO
42
46
  - bin/make_job
47
+ - ivy.xml
48
+ - ivysettings.xml
43
49
  - lib/cascading.rb
50
+ - lib/cascading/aggregations.rb
44
51
  - lib/cascading/assembly.rb
45
52
  - lib/cascading/base.rb
46
53
  - lib/cascading/cascade.rb
@@ -51,18 +58,24 @@ files:
51
58
  - lib/cascading/flow.rb
52
59
  - lib/cascading/operations.rb
53
60
  - lib/cascading/scope.rb
61
+ - lib/cascading/sub_assembly.rb
54
62
  - samples/branch.rb
55
63
  - samples/copy.rb
56
64
  - samples/data/data2.txt
65
+ - samples/data/data_group_by.txt
57
66
  - samples/data/data_join1.txt
58
67
  - samples/data/data_join2.txt
59
68
  - samples/data/data_join3.txt
69
+ - samples/data/genealogy/names/dist.all.last
70
+ - samples/data/gutenberg/the_outline_of_science_vol_1
71
+ - samples/group_by.rb
60
72
  - samples/join.rb
61
73
  - samples/logwordcount.rb
62
74
  - samples/project.rb
63
75
  - samples/rename.rb
64
76
  - samples/scorenames.rb
65
77
  - samples/splitter.rb
78
+ - samples/sub_assembly.rb
66
79
  - samples/union.rb
67
80
  - spec/cascading_spec.rb
68
81
  - spec/expr_spec.rb
@@ -92,11 +105,14 @@ files:
92
105
  - tasks/test.rake
93
106
  - test/data/data1.txt
94
107
  - test/data/data2.txt
108
+ - test/mock_assemblies.rb
109
+ - test/test_aggregations.rb
95
110
  - test/test_assembly.rb
96
111
  - test/test_cascade.rb
97
112
  - test/test_cascading.rb
98
113
  - test/test_exceptions.rb
99
114
  - test/test_flow.rb
115
+ - test/test_local_execution.rb
100
116
  - test/test_operations.rb
101
117
  homepage: http://github.com/etsy/cascading.jruby
102
118
  licenses: []
@@ -112,6 +128,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
112
128
  requirements:
113
129
  - - ">="
114
130
  - !ruby/object:Gem::Version
131
+ hash: 2
132
+ segments:
133
+ - 0
115
134
  version: "0"
116
135
  required_rubygems_version: !ruby/object:Gem::Requirement
117
136
  none: false
@@ -127,9 +146,11 @@ signing_key:
127
146
  specification_version: 3
128
147
  summary: A JRuby DSL for Cascading
129
148
  test_files:
149
+ - test/test_aggregations.rb
130
150
  - test/test_assembly.rb
131
151
  - test/test_cascade.rb
132
152
  - test/test_cascading.rb
133
153
  - test/test_exceptions.rb
134
154
  - test/test_flow.rb
155
+ - test/test_local_execution.rb
135
156
  - test/test_operations.rb