cascading.jruby 0.0.7 → 0.0.8

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,41 +1,29 @@
1
1
  require 'test/unit'
2
2
  require 'cascading'
3
-
4
- def compare_with_references(test_name)
5
- result = compare_files("test/references/#{test_name}.txt", "output/#{test_name}/part-00000")
6
- assert_nil(result)
7
- end
8
-
9
- # Convenience for basic assembly tests; not valid for applications
10
- def assembly(name, &block)
11
- assembly = Assembly.new(name, nil)
12
- assembly.instance_eval(&block)
13
- assembly
14
- end
3
+ require 'test/mock_assemblies'
15
4
 
16
5
  class TC_Assembly < Test::Unit::TestCase
17
- def mock_assembly(&block)
18
- assembly = nil
19
- flow 'test' do
20
- source 'test', tap('test/data/data1.txt')
21
- assembly = assembly 'test', &block
22
- end
23
- assembly
24
- end
6
+ include MockAssemblies
25
7
 
26
8
  def test_create_assembly_simple
27
- assembly = assembly "assembly1" do
28
- # Empty assembly
9
+ assembly = nil
10
+ flow 'test_create_assembly_simple' do
11
+ assembly = assembly 'assembly1' do
12
+ # Empty assembly
13
+ end
29
14
  end
30
15
 
31
16
  assert_not_nil assembly
17
+ assert_equal assembly.name, 'assembly1'
18
+ assert_equal 0, assembly.children.size
19
+
32
20
  pipe = assembly.tail_pipe
33
- assert pipe.is_a? Java::CascadingPipe::Pipe
21
+ assert_equal Java::CascadingPipe::Pipe, pipe.class
34
22
  end
35
23
 
36
24
  def test_each_identity
37
25
  assembly = mock_assembly do
38
- each 'offset', :filter => identity
26
+ each 'offset', :function => identity
39
27
  end
40
28
 
41
29
  flow = assembly.parent
@@ -47,148 +35,424 @@ class TC_Assembly < Test::Unit::TestCase
47
35
  def test_create_each
48
36
  # You can apply an Each to 0 fields
49
37
  assembly = mock_assembly do
50
- each(:filter => identity)
38
+ each(:function => identity)
51
39
  end
52
- assert assembly.tail_pipe.is_a? Java::CascadingPipe::Each
40
+ assert_equal Java::CascadingPipe::Each, assembly.tail_pipe.class
53
41
 
54
42
  # In which case, it has empty argument and output selectors
55
43
  assert_equal 0, assembly.tail_pipe.argument_selector.size
56
44
  assert_equal 0, assembly.tail_pipe.output_selector.size
57
45
 
58
46
  assembly = mock_assembly do
59
- each('offset', :output => 'offset_copy',
60
- :filter => Java::CascadingOperation::Identity.new(fields('offset_copy')))
47
+ each 'offset', :output => 'offset_copy', :function => Java::CascadingOperation::Identity.new(fields('offset_copy'))
61
48
  end
62
49
  pipe = assembly.tail_pipe
63
50
 
64
- assert pipe.is_a? Java::CascadingPipe::Each
51
+ assert_equal Java::CascadingPipe::Each, pipe.class
65
52
 
66
- assert_equal 'offset', pipe.argument_selector.get(0)
67
- assert_equal 'offset_copy', pipe.output_selector.get(0)
53
+ assert_equal ['offset'], pipe.argument_selector.to_a
54
+ assert_equal ['offset_copy'], pipe.output_selector.to_a
68
55
  end
69
56
 
70
- # For now, replaced these tests with the trivial observation that you can't
71
- # follow a Tap with an Every. Eventually, should support testing within a
72
- # group_by block.
73
- def test_create_every
74
- assert_raise CascadingException do
57
+ def test_every_cannot_follow_tap
58
+ # Assembly#count is no longer defined; instead, it has moved to
59
+ # Aggregations#count
60
+ assert_raise NameError do
75
61
  assembly = mock_assembly do
76
- every(:aggregator => count_function)
62
+ count
77
63
  end
78
64
  pipe = assembly.tail_pipe
79
- assert pipe.is_a? Java::CascadingPipe::Every
65
+ assert Java::CascadingPipe::Every, pipe.class
80
66
  end
67
+ end
81
68
 
82
- assert_raise CascadingException do
69
+ def test_create_every
83
70
  assembly = mock_assembly do
84
- every(:aggregator => count_function("field1", "field2"))
71
+ group_by 'line' do
72
+ count_aggregator = Java::CascadingOperationAggregator::Count.new(fields('count'))
73
+ every 'line', :aggregator => count_aggregator, :output => 'count'
74
+ end
85
75
  end
86
- assert assembly.tail_pipe.is_a? Java::CascadingPipe::Every
87
- end
76
+ assert Java::CascadingPipe::Every, assembly.tail_pipe.class
77
+ assert_equal ['line'], assembly.tail_pipe.argument_selector.to_a
78
+ assert_equal ['count'], assembly.tail_pipe.output_selector.to_a
88
79
 
89
- assert_raise CascadingException do
90
80
  assembly = mock_assembly do
91
- every("Field1", :aggregator => count_function)
81
+ group_by 'line' do
82
+ count
83
+ end
92
84
  end
93
- assert assembly.tail_pipe.is_a? Java::CascadingPipe::Every
94
- assert_equal "Field1", assembly.tail_pipe.argument_selector.get(0)
95
- end
85
+ assert Java::CascadingPipe::Every, assembly.tail_pipe.class
96
86
 
97
- assert_raise CascadingException do
98
- assembly = mock_assembly do
99
- every('line', :aggregator => count_function, :output=>'line_count')
100
- end
101
- assert assembly.tail_pipe.is_a? Java::CascadingPipe::Every
102
- assert_equal 'line', assembly.tail_pipe.argument_selector.get(0)
103
- assert_equal 'line_count', assembly.tail_pipe.output_selector.get(0)
104
- end
87
+ # NOTE: this is not valid when we optimize using CountBy
88
+ #assert_equal last_grouping_fields, assembly.tail_pipe.argument_selector
89
+ assert_equal fields('count'), assembly.tail_pipe.argument_selector
90
+
91
+ assert_equal all_fields, assembly.tail_pipe.output_selector
105
92
  end
106
93
 
107
94
  def test_create_group_by
108
95
  assembly = mock_assembly do
109
- group_by('line')
96
+ group_by 'line'
110
97
  end
111
98
 
112
- assert assembly.tail_pipe.is_a? Java::CascadingPipe::GroupBy
99
+ assert_equal Java::CascadingPipe::GroupBy, assembly.tail_pipe.class
113
100
  grouping_fields = assembly.tail_pipe.key_selectors['test']
114
- assert_equal 'line', grouping_fields.get(0)
101
+ assert_equal ['line'], grouping_fields.to_a
102
+
103
+ assert_equal ['offset', 'line'], assembly.scope.values_fields.to_a
104
+ assert_equal ['line'], assembly.scope.grouping_fields.to_a
115
105
 
116
106
  assembly = mock_assembly do
117
- group_by('line')
107
+ group_by 'offset'
118
108
  end
119
109
 
120
- assert assembly.tail_pipe.is_a? Java::CascadingPipe::GroupBy
110
+ assert_equal Java::CascadingPipe::GroupBy, assembly.tail_pipe.class
121
111
  grouping_fields = assembly.tail_pipe.key_selectors['test']
122
- assert_equal 'line', grouping_fields.get(0)
112
+ assert_equal ['offset'], grouping_fields.to_a
113
+
114
+ assert_equal ['offset', 'line'], assembly.scope.values_fields.to_a
115
+ assert_equal ['offset'], assembly.scope.grouping_fields.to_a
123
116
  end
124
117
 
125
118
  def test_create_group_by_many_fields
126
119
  assembly = mock_assembly do
127
- group_by(['offset', 'line'])
120
+ group_by 'offset', 'line'
128
121
  end
129
122
 
130
- assert assembly.tail_pipe.is_a? Java::CascadingPipe::GroupBy
123
+ assert_equal Java::CascadingPipe::GroupBy, assembly.tail_pipe.class
131
124
  grouping_fields = assembly.tail_pipe.key_selectors['test']
132
- assert_equal 'offset', grouping_fields.get(0)
133
- assert_equal 'line', grouping_fields.get(1)
125
+ assert_equal ['offset', 'line'], grouping_fields.to_a
126
+
127
+ assert_equal ['offset', 'line'], assembly.scope.values_fields.to_a
128
+ assert_equal ['offset', 'line'], assembly.scope.grouping_fields.to_a
134
129
  end
135
130
 
136
131
  def test_create_group_by_with_sort
137
132
  assembly = mock_assembly do
138
- group_by('offset', 'line', :sort_by => ['line'])
133
+ group_by 'offset', 'line', :sort_by => 'line'
139
134
  end
140
135
 
141
- assert assembly.tail_pipe.is_a? Java::CascadingPipe::GroupBy
136
+ assert_equal Java::CascadingPipe::GroupBy, assembly.tail_pipe.class
142
137
  grouping_fields = assembly.tail_pipe.key_selectors['test']
143
138
  sorting_fields = assembly.tail_pipe.sorting_selectors['test']
144
139
 
145
- assert_equal 2, grouping_fields.size
146
- assert_equal 1, sorting_fields.size
140
+ assert assembly.tail_pipe.is_sorted
141
+ assert !assembly.tail_pipe.is_sort_reversed
147
142
 
148
- assert_equal 'offset', grouping_fields.get(0)
149
- assert_equal 'line', grouping_fields.get(1)
150
- assert assembly.tail_pipe.isSorted()
151
- assert !assembly.tail_pipe.isSortReversed()
152
- assert_equal 'line', sorting_fields.get(0)
143
+ assert_equal ['offset', 'line'], grouping_fields.to_a
144
+ assert_equal ['line'], sorting_fields.to_a
145
+
146
+ assert_equal ['offset', 'line'], assembly.scope.values_fields.to_a
147
+ assert_equal ['offset', 'line'], assembly.scope.grouping_fields.to_a
153
148
  end
154
149
 
155
150
  def test_create_group_by_with_sort_reverse
156
151
  assembly = mock_assembly do
157
- group_by('offset', 'line', :sort_by => ['line'], :reverse => true)
152
+ group_by 'offset', 'line', :sort_by => 'line', :reverse => true
158
153
  end
159
154
 
160
- assert assembly.tail_pipe.is_a? Java::CascadingPipe::GroupBy
155
+ assert_equal Java::CascadingPipe::GroupBy, assembly.tail_pipe.class
161
156
  grouping_fields = assembly.tail_pipe.key_selectors['test']
162
157
  sorting_fields = assembly.tail_pipe.sorting_selectors['test']
163
158
 
164
- assert_equal 2, grouping_fields.size
165
- assert_equal 1, sorting_fields.size
159
+ assert assembly.tail_pipe.is_sorted
160
+ assert assembly.tail_pipe.is_sort_reversed
161
+
162
+ assert_equal ['offset', 'line'], grouping_fields.to_a
163
+ assert_equal ['line'], sorting_fields.to_a
166
164
 
167
- assert_equal 'offset', grouping_fields.get(0)
168
- assert_equal 'line', grouping_fields.get(1)
169
- assert assembly.tail_pipe.isSorted()
170
- assert assembly.tail_pipe.isSortReversed()
171
- assert_equal 'line', sorting_fields.get(0)
165
+ assert_equal ['offset', 'line'], assembly.scope.values_fields.to_a
166
+ assert_equal ['offset', 'line'], assembly.scope.grouping_fields.to_a
172
167
  end
173
168
 
174
169
  def test_create_group_by_reverse
175
170
  assembly = mock_assembly do
176
- group_by('offset', 'line', :reverse => true)
171
+ group_by 'offset', 'line', :reverse => true
177
172
  end
178
173
 
179
- assert assembly.tail_pipe.is_a? Java::CascadingPipe::GroupBy
174
+ assert_equal Java::CascadingPipe::GroupBy, assembly.tail_pipe.class
180
175
  grouping_fields = assembly.tail_pipe.key_selectors['test']
181
176
  sorting_fields = assembly.tail_pipe.sorting_selectors['test']
182
177
 
183
- assert_equal 2, grouping_fields.size
184
- assert_equal 2, sorting_fields.size
178
+ assert !assembly.tail_pipe.is_sorted
179
+ assert assembly.tail_pipe.is_sort_reversed
180
+
181
+ assert_equal ['offset', 'line'], grouping_fields.to_a
182
+ assert_nil sorting_fields
183
+
184
+ assert_equal ['offset', 'line'], assembly.scope.values_fields.to_a
185
+ assert_equal ['offset', 'line'], assembly.scope.grouping_fields.to_a
186
+ end
187
+
188
+ def test_create_union
189
+ assembly = mock_branched_assembly do
190
+ union 'test1', 'test2', :on => 'line'
191
+ end
192
+
193
+ assert_equal Java::CascadingPipe::GroupBy, assembly.tail_pipe.class
194
+
195
+ left_grouping_fields = assembly.tail_pipe.key_selectors['test1']
196
+ assert_equal ['line'], left_grouping_fields.to_a
197
+
198
+ right_grouping_fields = assembly.tail_pipe.key_selectors['test2']
199
+ assert_equal ['line'], right_grouping_fields.to_a
200
+
201
+ assert_equal ['offset', 'line'], assembly.scope.values_fields.to_a
202
+ assert_equal ['line'], assembly.scope.grouping_fields.to_a
203
+
204
+ assembly = mock_branched_assembly do
205
+ union 'test1', 'test2', :on => 'offset'
206
+ end
207
+
208
+ assert_equal Java::CascadingPipe::GroupBy, assembly.tail_pipe.class
209
+ left_grouping_fields = assembly.tail_pipe.key_selectors['test1']
210
+ assert_equal ['offset'], left_grouping_fields.to_a
211
+ right_grouping_fields = assembly.tail_pipe.key_selectors['test2']
212
+ assert_equal ['offset'], right_grouping_fields.to_a
213
+
214
+ assert_equal ['offset', 'line'], assembly.scope.values_fields.to_a
215
+ assert_equal ['offset'], assembly.scope.grouping_fields.to_a
216
+
217
+ assembly = mock_branched_assembly do
218
+ union 'test1', 'test2'
219
+ end
220
+
221
+ assert_equal Java::CascadingPipe::GroupBy, assembly.tail_pipe.class
222
+ left_grouping_fields = assembly.tail_pipe.key_selectors['test1']
223
+ assert_equal ['offset'], left_grouping_fields.to_a
224
+ right_grouping_fields = assembly.tail_pipe.key_selectors['test2']
225
+ assert_equal ['offset'], right_grouping_fields.to_a
226
+
227
+ assert_equal ['offset', 'line'], assembly.scope.values_fields.to_a
228
+ assert_equal ['offset'], assembly.scope.grouping_fields.to_a
229
+ end
230
+
231
+ def test_create_union_many_fields
232
+ assembly = mock_branched_assembly do
233
+ union 'test1', 'test2', :on => ['offset', 'line']
234
+ end
235
+
236
+ assert_equal Java::CascadingPipe::GroupBy, assembly.tail_pipe.class
237
+
238
+ left_grouping_fields = assembly.tail_pipe.key_selectors['test1']
239
+ assert_equal ['offset', 'line'], left_grouping_fields.to_a
240
+
241
+ right_grouping_fields = assembly.tail_pipe.key_selectors['test2']
242
+ assert_equal ['offset', 'line'], right_grouping_fields.to_a
243
+
244
+ assert_equal ['offset', 'line'], assembly.scope.values_fields.to_a
245
+ assert_equal ['offset', 'line'], assembly.scope.grouping_fields.to_a
246
+ end
247
+
248
+ def test_create_union_with_sort
249
+ assembly = mock_branched_assembly do
250
+ union 'test1', 'test2', :on => ['offset', 'line'], :sort_by => 'line'
251
+ end
252
+
253
+ assert_equal Java::CascadingPipe::GroupBy, assembly.tail_pipe.class
254
+ left_grouping_fields = assembly.tail_pipe.key_selectors['test1']
255
+ right_grouping_fields = assembly.tail_pipe.key_selectors['test2']
256
+ left_sorting_fields = assembly.tail_pipe.sorting_selectors['test1']
257
+ right_sorting_fields = assembly.tail_pipe.sorting_selectors['test2']
258
+
259
+ assert assembly.tail_pipe.is_sorted
260
+ assert !assembly.tail_pipe.is_sort_reversed
261
+
262
+ assert_equal ['offset', 'line'], left_grouping_fields.to_a
263
+ assert_equal ['offset', 'line'], right_grouping_fields.to_a
264
+ assert_equal ['line'], left_sorting_fields.to_a
265
+ assert_equal ['line'], right_sorting_fields.to_a
266
+
267
+ assert_equal ['offset', 'line'], assembly.scope.values_fields.to_a
268
+ assert_equal ['offset', 'line'], assembly.scope.grouping_fields.to_a
269
+ end
270
+
271
+ def test_create_union_with_sort_reverse
272
+ assembly = mock_branched_assembly do
273
+ union 'test1', 'test2', :on => ['offset', 'line'], :sort_by => 'line', :reverse => true
274
+ end
275
+
276
+ assert_equal Java::CascadingPipe::GroupBy, assembly.tail_pipe.class
277
+ left_grouping_fields = assembly.tail_pipe.key_selectors['test1']
278
+ right_grouping_fields = assembly.tail_pipe.key_selectors['test2']
279
+ left_sorting_fields = assembly.tail_pipe.sorting_selectors['test1']
280
+ right_sorting_fields = assembly.tail_pipe.sorting_selectors['test2']
281
+
282
+ assert assembly.tail_pipe.is_sorted
283
+ assert assembly.tail_pipe.is_sort_reversed
284
+
285
+ assert_equal ['offset', 'line'], left_grouping_fields.to_a
286
+ assert_equal ['offset', 'line'], right_grouping_fields.to_a
287
+ assert_equal ['line'], left_sorting_fields.to_a
288
+ assert_equal ['line'], right_sorting_fields.to_a
289
+
290
+ assert_equal ['offset', 'line'], assembly.scope.values_fields.to_a
291
+ assert_equal ['offset', 'line'], assembly.scope.grouping_fields.to_a
292
+ end
293
+
294
+ def test_create_union_reverse
295
+ assembly = mock_branched_assembly do
296
+ union 'test1', 'test2', :on => ['offset', 'line'], :reverse => true
297
+ end
298
+
299
+ assert_equal Java::CascadingPipe::GroupBy, assembly.tail_pipe.class
300
+ left_grouping_fields = assembly.tail_pipe.key_selectors['test1']
301
+ right_grouping_fields = assembly.tail_pipe.key_selectors['test2']
302
+ left_sorting_fields = assembly.tail_pipe.sorting_selectors['test1']
303
+ right_sorting_fields = assembly.tail_pipe.sorting_selectors['test2']
304
+
305
+ assert assembly.tail_pipe.is_sorted # FIXME: Missing constructor in wip-255
306
+ assert assembly.tail_pipe.is_sort_reversed
307
+
308
+ assert_equal ['offset', 'line'], left_grouping_fields.to_a
309
+ assert_equal ['offset', 'line'], right_grouping_fields.to_a
310
+ assert_equal ['offset', 'line'], left_sorting_fields.to_a # FIXME: Missing constructor in wip-255
311
+ assert_equal ['offset', 'line'], right_sorting_fields.to_a # FIXME: Missing constructor in wip-255
312
+
313
+ assert_equal ['offset', 'line'], assembly.scope.values_fields.to_a
314
+ assert_equal ['offset', 'line'], assembly.scope.grouping_fields.to_a
315
+ end
316
+
317
+ def test_union_undefined_inputs
318
+ assert_raise RuntimeError, "Could not find assembly 'doesnotexist' in union" do
319
+ flow 'test_union_undefined_inputs' do
320
+ source 'data1', tap('test/data/data1.txt')
321
+
322
+ assembly 'data1' do
323
+ pass
324
+ end
325
+
326
+ assembly 'union' do
327
+ union 'doesnotexist', 'data1'
328
+ end
329
+
330
+ sink 'union', tap('output/test_union_undefined_inputs')
331
+ end
332
+ end
333
+ end
334
+
335
+ def test_create_join
336
+ assembly = mock_two_input_assembly do
337
+ join 'test1', 'test2', :on => 'name'
338
+ end
339
+
340
+ assert_equal Java::CascadingPipe::CoGroup, assembly.tail_pipe.class
341
+
342
+ left_grouping_fields = assembly.tail_pipe.key_selectors['test1']
343
+ assert_equal ['name'], left_grouping_fields.to_a
344
+
345
+ right_grouping_fields = assembly.tail_pipe.key_selectors['test2']
346
+ assert_equal ['name'], right_grouping_fields.to_a
347
+
348
+ assert_equal ['name', 'score1', 'score2', 'id', 'name_', 'id_', 'town'], assembly.scope.values_fields.to_a
349
+ assert_equal ['name', 'name_'], assembly.scope.grouping_fields.to_a
350
+
351
+ assembly = mock_two_input_assembly do
352
+ join 'test1', 'test2', :on => 'id'
353
+ end
354
+
355
+ assert_equal Java::CascadingPipe::CoGroup, assembly.tail_pipe.class
356
+ left_grouping_fields = assembly.tail_pipe.key_selectors['test1']
357
+ assert_equal ['id'], left_grouping_fields.to_a
358
+ right_grouping_fields = assembly.tail_pipe.key_selectors['test2']
359
+ assert_equal ['id'], right_grouping_fields.to_a
360
+
361
+ assert_equal ['name', 'score1', 'score2', 'id', 'name_', 'id_', 'town'], assembly.scope.values_fields.to_a
362
+ assert_equal ['id', 'id_'], assembly.scope.grouping_fields.to_a
363
+ end
364
+
365
+ def test_create_join_many_fields
366
+ assembly = mock_two_input_assembly do
367
+ join 'test1', 'test2', :on => ['name', 'id']
368
+ end
369
+
370
+ assert_equal Java::CascadingPipe::CoGroup, assembly.tail_pipe.class
371
+ left_grouping_fields = assembly.tail_pipe.key_selectors['test1']
372
+ assert_equal ['name', 'id'], left_grouping_fields.to_a
373
+ right_grouping_fields = assembly.tail_pipe.key_selectors['test2']
374
+ assert_equal ['name', 'id'], right_grouping_fields.to_a
185
375
 
186
- assert_equal 'offset', grouping_fields.get(0)
187
- assert_equal 'line', grouping_fields.get(1)
188
- assert assembly.tail_pipe.isSorted()
189
- assert assembly.tail_pipe.isSortReversed()
190
- assert_equal 'offset', sorting_fields.get(0)
191
- assert_equal 'line', sorting_fields.get(1)
376
+ assert_equal ['name', 'score1', 'score2', 'id', 'name_', 'id_', 'town'], assembly.scope.values_fields.to_a
377
+ assert_equal ['name', 'id', 'name_', 'id_'], assembly.scope.grouping_fields.to_a
378
+ end
379
+
380
+ def test_create_join_with_declared_fields
381
+ assembly = mock_two_input_assembly do
382
+ join 'test1', 'test2', :on => 'name', :declared_fields => ['a', 'b', 'c', 'd', 'e', 'f', 'g']
383
+ end
384
+
385
+ assert_equal Java::CascadingPipe::CoGroup, assembly.tail_pipe.class
386
+
387
+ left_grouping_fields = assembly.tail_pipe.key_selectors['test1']
388
+ assert_equal ['name'], left_grouping_fields.to_a
389
+
390
+ right_grouping_fields = assembly.tail_pipe.key_selectors['test2']
391
+ assert_equal ['name'], right_grouping_fields.to_a
392
+
393
+ assert_equal ['a', 'b', 'c', 'd', 'e', 'f', 'g'], assembly.scope.values_fields.to_a
394
+ assert_equal ['name', 'name_'], assembly.scope.grouping_fields.to_a
395
+ end
396
+
397
+ def test_join_with_block
398
+ assembly = mock_two_input_assembly do
399
+ join 'test1', 'test2', :on => 'name' do
400
+ count
401
+ end
402
+ end
403
+
404
+ assert_equal Java::CascadingPipe::Every, assembly.tail_pipe.class
405
+
406
+ assert_equal ['name', 'name_', 'count'], assembly.scope.values_fields.to_a
407
+ assert_equal ['name', 'name_', 'count'], assembly.scope.grouping_fields.to_a
408
+ end
409
+
410
+ def test_join_undefined_inputs
411
+ assert_raise RuntimeError, "Could not find assembly 'doesnotexist' in join" do
412
+ flow 'test_join_undefined_inputs' do
413
+ source 'data1', tap('test/data/data1.txt')
414
+
415
+ assembly 'data1' do
416
+ pass
417
+ end
418
+
419
+ assembly 'join' do
420
+ join 'doesnotexist', 'data1', :on => 'name'
421
+ end
422
+
423
+ sink 'join', tap('output/test_join_undefined_inputs')
424
+ end
425
+ end
426
+ end
427
+
428
+ def test_join_without_on
429
+ assert_raise RuntimeError, 'join requires :on parameter' do
430
+ mock_two_input_assembly do
431
+ join 'test1', 'test2'
432
+ end
433
+ end
434
+ end
435
+
436
+ def test_join_invalid_on
437
+ assert_raise RuntimeError, "Unsupported data type for :on in join: 'Fixnum'" do
438
+ mock_two_input_assembly do
439
+ join 'test1', 'test2', :on => 1
440
+ end
441
+ end
442
+ end
443
+
444
+ def test_join_empty_on
445
+ assert_raise RuntimeError, 'join requres non-empty :on parameter' do
446
+ mock_two_input_assembly do
447
+ join 'test1', 'test2', :on => []
448
+ end
449
+ end
450
+
451
+ assert_raise RuntimeError, 'join requres non-empty :on parameter' do
452
+ mock_two_input_assembly do
453
+ join 'test1', 'test2', :on => {}
454
+ end
455
+ end
192
456
  end
193
457
 
194
458
  def test_branch_unique
@@ -198,7 +462,6 @@ class TC_Assembly < Test::Unit::TestCase
198
462
  end
199
463
 
200
464
  assert_equal 1, assembly.children.size
201
-
202
465
  end
203
466
 
204
467
  def test_branch_empty
@@ -231,20 +494,74 @@ class TC_Assembly < Test::Unit::TestCase
231
494
  assert_equal 0, assembly.children['branch1'].children['branch2'].children.size
232
495
  end
233
496
 
234
- # Fixed this test, but it isn't even valid. You shouldn't be able to follow
235
- # an Each with an Every.
236
- def test_full_assembly
237
- assert_raise CascadingException do
238
- assembly = mock_assembly do
239
- each('offset', :output => 'offset_copy',
240
- :filter => Java::CascadingOperation::Identity.new(fields('offset_copy')))
241
- every(:aggregator => count_function)
242
- end
497
+ def test_sub_assembly
498
+ assembly = mock_assembly do
499
+ sub_assembly Java::CascadingPipeAssembly::Discard.new(tail_pipe, fields('offset'))
500
+ end
501
+ assert_equal ['line'], assembly.scope.values_fields.to_a
243
502
 
244
- pipe = assembly.tail_pipe
503
+ assembly = mock_assembly do
504
+ sub_assembly Java::CascadingPipeAssembly::Retain.new(tail_pipe, fields('offset'))
505
+ end
506
+ assert_equal ['offset'], assembly.scope.values_fields.to_a
245
507
 
246
- assert pipe.is_a? Java::CascadingPipe::Every
508
+ assembly = mock_assembly do
509
+ sub_assembly Java::CascadingPipeAssembly::Rename.new(tail_pipe, fields(['offset', 'line']), fields(['byte', 'line']))
247
510
  end
511
+ assert_equal ['byte', 'line'], assembly.scope.values_fields.to_a
512
+
513
+ assembly = mock_assembly do
514
+ sub_assembly Java::CascadingPipeAssembly::Unique.new(tail_pipe, fields('line'))
515
+ end
516
+ assert_equal ['offset', 'line'], assembly.scope.values_fields.to_a
517
+ assert_equal ['offset', 'line'], assembly.scope.grouping_fields.to_a
518
+ end
519
+
520
+ def test_count_by_sub_assembly
521
+ assembly = mock_branched_assembly do
522
+ pipes, _ = populate_incoming_scopes(['test1', 'test2'])
523
+
524
+ aggregate_by = Java::CascadingPipeAssembly::AggregateBy.new(
525
+ name,
526
+ pipes.to_java(Java::CascadingPipe::Pipe),
527
+ fields('line'),
528
+ [Java::CascadingPipeAssembly::CountBy.new(fields('count'))].to_java(Java::CascadingPipeAssembly::AggregateBy)
529
+ )
530
+
531
+ sub_assembly aggregate_by, pipes, @incoming_scopes
532
+ end
533
+ assert_equal ['line', 'count'], assembly.scope.values_fields.to_a
534
+ assert_equal ['line', 'count'], assembly.scope.grouping_fields.to_a
535
+ end
536
+
537
+ def test_average_by_sub_assembly
538
+ assembly = mock_assembly do
539
+ aggregate_by = Java::CascadingPipeAssembly::AggregateBy.new(
540
+ name,
541
+ [tail_pipe].to_java(Java::CascadingPipe::Pipe),
542
+ fields('line'),
543
+ [Java::CascadingPipeAssembly::AverageBy.new(fields('offset'), fields('average'))].to_java(Java::CascadingPipeAssembly::AggregateBy)
544
+ )
545
+
546
+ sub_assembly aggregate_by
547
+ end
548
+ assert_equal ['line', 'average'], assembly.scope.values_fields.to_a
549
+ assert_equal ['line', 'average'], assembly.scope.grouping_fields.to_a
550
+ end
551
+
552
+ def test_sum_by_sub_assembly
553
+ assembly = mock_assembly do
554
+ aggregate_by = Java::CascadingPipeAssembly::AggregateBy.new(
555
+ name,
556
+ [tail_pipe].to_java(Java::CascadingPipe::Pipe),
557
+ fields('line'),
558
+ [Java::CascadingPipeAssembly::SumBy.new(fields('offset'), fields('sum'), Java::double.java_class)].to_java(Java::CascadingPipeAssembly::AggregateBy)
559
+ )
560
+
561
+ sub_assembly aggregate_by
562
+ end
563
+ assert_equal ['line', 'sum'], assembly.scope.values_fields.to_a
564
+ assert_equal ['line', 'sum'], assembly.scope.grouping_fields.to_a
248
565
  end
249
566
 
250
567
  def test_empty_where
@@ -252,10 +569,10 @@ class TC_Assembly < Test::Unit::TestCase
252
569
  split 'line', ['name', 'score1', 'score2', 'id'], :pattern => /[.,]*\s+/, :output => ['name', 'score1', 'score2', 'id']
253
570
  where
254
571
  end
255
- assert assembly.tail_pipe.is_a? Java::CascadingPipe::Each
572
+ assert_equal Java::CascadingPipe::Each, assembly.tail_pipe.class
256
573
 
257
574
  # Empty where compiles away
258
- assert assembly.tail_pipe.operation.is_a? Java::CascadingOperationRegex::RegexSplitter
575
+ assert_equal Java::CascadingOperationRegex::RegexSplitter, assembly.tail_pipe.operation.class
259
576
  end
260
577
 
261
578
  def test_where
@@ -263,8 +580,8 @@ class TC_Assembly < Test::Unit::TestCase
263
580
  split 'line', ['name', 'score1', 'score2', 'id'], :pattern => /[.,]*\s+/, :output => ['name', 'score1', 'score2', 'id']
264
581
  where 'score1:double < score2:double'
265
582
  end
266
- assert assembly.tail_pipe.is_a? Java::CascadingPipe::Each
267
- assert assembly.tail_pipe.operation.is_a? Java::CascadingOperationExpression::ExpressionFilter
583
+ assert_equal Java::CascadingPipe::Each, assembly.tail_pipe.class
584
+ assert_equal Java::CascadingOperationExpression::ExpressionFilter, assembly.tail_pipe.operation.class
268
585
  end
269
586
 
270
587
  def test_where_with_expression
@@ -272,8 +589,8 @@ class TC_Assembly < Test::Unit::TestCase
272
589
  split 'line', ['name', 'score1', 'score2', 'id'], :pattern => /[.,]*\s+/, :output => ['name', 'score1', 'score2', 'id']
273
590
  where :expression => 'score1:double < score2:double'
274
591
  end
275
- assert assembly.tail_pipe.is_a? Java::CascadingPipe::Each
276
- assert assembly.tail_pipe.operation.is_a? Java::CascadingOperationExpression::ExpressionFilter
592
+ assert_equal Java::CascadingPipe::Each, assembly.tail_pipe.class
593
+ assert_equal Java::CascadingOperationExpression::ExpressionFilter, assembly.tail_pipe.operation.class
277
594
  end
278
595
 
279
596
  def test_where_with_import
@@ -282,8 +599,8 @@ class TC_Assembly < Test::Unit::TestCase
282
599
  names = ['SMITH', 'JONES', 'BROWN']
283
600
  where "import java.util.Arrays;\nArrays.asList(new String[] { \"#{names.join('", "')}\" }).contains(name:string)"
284
601
  end
285
- assert assembly.tail_pipe.is_a? Java::CascadingPipe::Each
286
- assert assembly.tail_pipe.operation.is_a? Java::CascadingOperationExpression::ExpressionFilter
602
+ assert_equal Java::CascadingPipe::Each, assembly.tail_pipe.class
603
+ assert_equal Java::CascadingOperationExpression::ExpressionFilter, assembly.tail_pipe.operation.class
287
604
  end
288
605
 
289
606
  def test_smoke_test_debug_scope
@@ -291,92 +608,16 @@ class TC_Assembly < Test::Unit::TestCase
291
608
  flow 'smoke' do
292
609
  source 'input', tap('test/data/data1.txt')
293
610
  assembly 'input' do
294
- pass
611
+ debug_scope
612
+ group_by 'line' do
613
+ count
614
+ sum 'offset', :type => :long
615
+ debug_scope
616
+ end
295
617
  debug_scope
296
618
  end
297
- sink 'input', tap('output/smoke_test_debug_scope')
619
+ sink 'input', tap('output/test_smoke_test_debug_scope')
298
620
  end
299
621
  end
300
622
  end
301
623
  end
302
-
303
- class TC_AssemblyScenarii < Test::Unit::TestCase
304
- def test_smoke_test_sequence_file_scheme
305
- cascade 'smoke' do
306
- flow 'smoke' do
307
- source 'input', tap('test/data/data1.txt')
308
- assembly 'input' do
309
- pass
310
- end
311
- compress_output :default, :block
312
- sink 'input', tap('output/smoke_test_sequence_file_scheme', :scheme => sequence_file_scheme)
313
- end
314
- end.complete
315
- end
316
-
317
- def test_splitter
318
- flow = flow "splitter" do
319
- source "copy", tap("test/data/data1.txt")
320
- sink "copy", tap('output/splitter', :sink_mode => :replace)
321
-
322
- assembly "copy" do
323
- split "line", :pattern => /[.,]*\s+/, :into=>["name", "score1", "score2", "id"], :output => ["name", "score1", "score2", "id"]
324
- assert_size_equals 4
325
- assert_not_null
326
- debug :print_fields => true
327
- end
328
- end.complete
329
- end
330
-
331
- def test_join1
332
- cascade 'splitter' do
333
- flow 'splitter' do
334
- source "data1", tap("test/data/data1.txt")
335
- source "data2", tap("test/data/data2.txt")
336
- sink "joined", tap('output/joined', :sink_mode => :replace)
337
-
338
- assembly1 = assembly "data1" do
339
- split "line", :pattern => /[.,]*\s+/, :into=>["name", "score1", "score2", "id"], :output => ["name", "score1", "score2", "id"]
340
- assert_size_equals 4
341
- assert_not_null
342
- debug :print_fields => true
343
- end
344
-
345
- assembly2 = assembly "data2" do
346
- split "line", :pattern => /[.,]*\s+/, :into=>["name", "id", "town"], :output => ["name", "id", "town"]
347
- assert_size_equals 3
348
- assert_not_null
349
- debug :print_fields => true
350
- end
351
-
352
- assembly "joined" do
353
- join assembly1.name, assembly2.name, :on => ["name", "id"], :declared_fields => ["name", "score1", "score2", "id", "name2", "id2", "town"]
354
- assert_size_equals 7
355
- assert_not_null
356
- end
357
- end
358
- end.complete
359
- end
360
-
361
- def test_join2
362
- flow = flow "splitter" do
363
- source "data1", tap("test/data/data1.txt")
364
- source "data2", tap("test/data/data2.txt")
365
- sink "joined", tap('output/joined', :sink_mode => :replace)
366
-
367
- assembly "data1" do
368
- split "line", :pattern => /[.,]*\s+/, :into=>["name", "score1", "score2", "id"], :output => ["name", "score1", "score2", "id"]
369
- debug :print_fields => true
370
- end
371
-
372
- assembly "data2" do
373
- split "line", :pattern => /[.,]*\s+/, :into=>["name", "code", "town"], :output => ["name", "code", "town"]
374
- debug :print_fields => true
375
- end
376
-
377
- assembly "joined" do
378
- join :on => {"data1"=>["name", "id"], "data2"=>["name", "code"]}, :declared_fields => ["name", "score1", "score2", "id", "name2", "code", "town"]
379
- end
380
- end.complete
381
- end
382
- end