cascading.jruby 0.0.7 → 0.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,41 +1,29 @@
1
1
  require 'test/unit'
2
2
  require 'cascading'
3
-
4
- def compare_with_references(test_name)
5
- result = compare_files("test/references/#{test_name}.txt", "output/#{test_name}/part-00000")
6
- assert_nil(result)
7
- end
8
-
9
- # Convenience for basic assembly tests; not valid for applications
10
- def assembly(name, &block)
11
- assembly = Assembly.new(name, nil)
12
- assembly.instance_eval(&block)
13
- assembly
14
- end
3
+ require 'test/mock_assemblies'
15
4
 
16
5
  class TC_Assembly < Test::Unit::TestCase
17
- def mock_assembly(&block)
18
- assembly = nil
19
- flow 'test' do
20
- source 'test', tap('test/data/data1.txt')
21
- assembly = assembly 'test', &block
22
- end
23
- assembly
24
- end
6
+ include MockAssemblies
25
7
 
26
8
  def test_create_assembly_simple
27
- assembly = assembly "assembly1" do
28
- # Empty assembly
9
+ assembly = nil
10
+ flow 'test_create_assembly_simple' do
11
+ assembly = assembly 'assembly1' do
12
+ # Empty assembly
13
+ end
29
14
  end
30
15
 
31
16
  assert_not_nil assembly
17
+ assert_equal assembly.name, 'assembly1'
18
+ assert_equal 0, assembly.children.size
19
+
32
20
  pipe = assembly.tail_pipe
33
- assert pipe.is_a? Java::CascadingPipe::Pipe
21
+ assert_equal Java::CascadingPipe::Pipe, pipe.class
34
22
  end
35
23
 
36
24
  def test_each_identity
37
25
  assembly = mock_assembly do
38
- each 'offset', :filter => identity
26
+ each 'offset', :function => identity
39
27
  end
40
28
 
41
29
  flow = assembly.parent
@@ -47,148 +35,424 @@ class TC_Assembly < Test::Unit::TestCase
47
35
  def test_create_each
48
36
  # You can apply an Each to 0 fields
49
37
  assembly = mock_assembly do
50
- each(:filter => identity)
38
+ each(:function => identity)
51
39
  end
52
- assert assembly.tail_pipe.is_a? Java::CascadingPipe::Each
40
+ assert_equal Java::CascadingPipe::Each, assembly.tail_pipe.class
53
41
 
54
42
  # In which case, it has empty argument and output selectors
55
43
  assert_equal 0, assembly.tail_pipe.argument_selector.size
56
44
  assert_equal 0, assembly.tail_pipe.output_selector.size
57
45
 
58
46
  assembly = mock_assembly do
59
- each('offset', :output => 'offset_copy',
60
- :filter => Java::CascadingOperation::Identity.new(fields('offset_copy')))
47
+ each 'offset', :output => 'offset_copy', :function => Java::CascadingOperation::Identity.new(fields('offset_copy'))
61
48
  end
62
49
  pipe = assembly.tail_pipe
63
50
 
64
- assert pipe.is_a? Java::CascadingPipe::Each
51
+ assert_equal Java::CascadingPipe::Each, pipe.class
65
52
 
66
- assert_equal 'offset', pipe.argument_selector.get(0)
67
- assert_equal 'offset_copy', pipe.output_selector.get(0)
53
+ assert_equal ['offset'], pipe.argument_selector.to_a
54
+ assert_equal ['offset_copy'], pipe.output_selector.to_a
68
55
  end
69
56
 
70
- # For now, replaced these tests with the trivial observation that you can't
71
- # follow a Tap with an Every. Eventually, should support testing within a
72
- # group_by block.
73
- def test_create_every
74
- assert_raise CascadingException do
57
+ def test_every_cannot_follow_tap
58
+ # Assembly#count is no longer defined; instead, it has moved to
59
+ # Aggregations#count
60
+ assert_raise NameError do
75
61
  assembly = mock_assembly do
76
- every(:aggregator => count_function)
62
+ count
77
63
  end
78
64
  pipe = assembly.tail_pipe
79
- assert pipe.is_a? Java::CascadingPipe::Every
65
+ assert Java::CascadingPipe::Every, pipe.class
80
66
  end
67
+ end
81
68
 
82
- assert_raise CascadingException do
69
+ def test_create_every
83
70
  assembly = mock_assembly do
84
- every(:aggregator => count_function("field1", "field2"))
71
+ group_by 'line' do
72
+ count_aggregator = Java::CascadingOperationAggregator::Count.new(fields('count'))
73
+ every 'line', :aggregator => count_aggregator, :output => 'count'
74
+ end
85
75
  end
86
- assert assembly.tail_pipe.is_a? Java::CascadingPipe::Every
87
- end
76
+ assert Java::CascadingPipe::Every, assembly.tail_pipe.class
77
+ assert_equal ['line'], assembly.tail_pipe.argument_selector.to_a
78
+ assert_equal ['count'], assembly.tail_pipe.output_selector.to_a
88
79
 
89
- assert_raise CascadingException do
90
80
  assembly = mock_assembly do
91
- every("Field1", :aggregator => count_function)
81
+ group_by 'line' do
82
+ count
83
+ end
92
84
  end
93
- assert assembly.tail_pipe.is_a? Java::CascadingPipe::Every
94
- assert_equal "Field1", assembly.tail_pipe.argument_selector.get(0)
95
- end
85
+ assert Java::CascadingPipe::Every, assembly.tail_pipe.class
96
86
 
97
- assert_raise CascadingException do
98
- assembly = mock_assembly do
99
- every('line', :aggregator => count_function, :output=>'line_count')
100
- end
101
- assert assembly.tail_pipe.is_a? Java::CascadingPipe::Every
102
- assert_equal 'line', assembly.tail_pipe.argument_selector.get(0)
103
- assert_equal 'line_count', assembly.tail_pipe.output_selector.get(0)
104
- end
87
+ # NOTE: this is not valid when we optimize using CountBy
88
+ #assert_equal last_grouping_fields, assembly.tail_pipe.argument_selector
89
+ assert_equal fields('count'), assembly.tail_pipe.argument_selector
90
+
91
+ assert_equal all_fields, assembly.tail_pipe.output_selector
105
92
  end
106
93
 
107
94
  def test_create_group_by
108
95
  assembly = mock_assembly do
109
- group_by('line')
96
+ group_by 'line'
110
97
  end
111
98
 
112
- assert assembly.tail_pipe.is_a? Java::CascadingPipe::GroupBy
99
+ assert_equal Java::CascadingPipe::GroupBy, assembly.tail_pipe.class
113
100
  grouping_fields = assembly.tail_pipe.key_selectors['test']
114
- assert_equal 'line', grouping_fields.get(0)
101
+ assert_equal ['line'], grouping_fields.to_a
102
+
103
+ assert_equal ['offset', 'line'], assembly.scope.values_fields.to_a
104
+ assert_equal ['line'], assembly.scope.grouping_fields.to_a
115
105
 
116
106
  assembly = mock_assembly do
117
- group_by('line')
107
+ group_by 'offset'
118
108
  end
119
109
 
120
- assert assembly.tail_pipe.is_a? Java::CascadingPipe::GroupBy
110
+ assert_equal Java::CascadingPipe::GroupBy, assembly.tail_pipe.class
121
111
  grouping_fields = assembly.tail_pipe.key_selectors['test']
122
- assert_equal 'line', grouping_fields.get(0)
112
+ assert_equal ['offset'], grouping_fields.to_a
113
+
114
+ assert_equal ['offset', 'line'], assembly.scope.values_fields.to_a
115
+ assert_equal ['offset'], assembly.scope.grouping_fields.to_a
123
116
  end
124
117
 
125
118
  def test_create_group_by_many_fields
126
119
  assembly = mock_assembly do
127
- group_by(['offset', 'line'])
120
+ group_by 'offset', 'line'
128
121
  end
129
122
 
130
- assert assembly.tail_pipe.is_a? Java::CascadingPipe::GroupBy
123
+ assert_equal Java::CascadingPipe::GroupBy, assembly.tail_pipe.class
131
124
  grouping_fields = assembly.tail_pipe.key_selectors['test']
132
- assert_equal 'offset', grouping_fields.get(0)
133
- assert_equal 'line', grouping_fields.get(1)
125
+ assert_equal ['offset', 'line'], grouping_fields.to_a
126
+
127
+ assert_equal ['offset', 'line'], assembly.scope.values_fields.to_a
128
+ assert_equal ['offset', 'line'], assembly.scope.grouping_fields.to_a
134
129
  end
135
130
 
136
131
  def test_create_group_by_with_sort
137
132
  assembly = mock_assembly do
138
- group_by('offset', 'line', :sort_by => ['line'])
133
+ group_by 'offset', 'line', :sort_by => 'line'
139
134
  end
140
135
 
141
- assert assembly.tail_pipe.is_a? Java::CascadingPipe::GroupBy
136
+ assert_equal Java::CascadingPipe::GroupBy, assembly.tail_pipe.class
142
137
  grouping_fields = assembly.tail_pipe.key_selectors['test']
143
138
  sorting_fields = assembly.tail_pipe.sorting_selectors['test']
144
139
 
145
- assert_equal 2, grouping_fields.size
146
- assert_equal 1, sorting_fields.size
140
+ assert assembly.tail_pipe.is_sorted
141
+ assert !assembly.tail_pipe.is_sort_reversed
147
142
 
148
- assert_equal 'offset', grouping_fields.get(0)
149
- assert_equal 'line', grouping_fields.get(1)
150
- assert assembly.tail_pipe.isSorted()
151
- assert !assembly.tail_pipe.isSortReversed()
152
- assert_equal 'line', sorting_fields.get(0)
143
+ assert_equal ['offset', 'line'], grouping_fields.to_a
144
+ assert_equal ['line'], sorting_fields.to_a
145
+
146
+ assert_equal ['offset', 'line'], assembly.scope.values_fields.to_a
147
+ assert_equal ['offset', 'line'], assembly.scope.grouping_fields.to_a
153
148
  end
154
149
 
155
150
  def test_create_group_by_with_sort_reverse
156
151
  assembly = mock_assembly do
157
- group_by('offset', 'line', :sort_by => ['line'], :reverse => true)
152
+ group_by 'offset', 'line', :sort_by => 'line', :reverse => true
158
153
  end
159
154
 
160
- assert assembly.tail_pipe.is_a? Java::CascadingPipe::GroupBy
155
+ assert_equal Java::CascadingPipe::GroupBy, assembly.tail_pipe.class
161
156
  grouping_fields = assembly.tail_pipe.key_selectors['test']
162
157
  sorting_fields = assembly.tail_pipe.sorting_selectors['test']
163
158
 
164
- assert_equal 2, grouping_fields.size
165
- assert_equal 1, sorting_fields.size
159
+ assert assembly.tail_pipe.is_sorted
160
+ assert assembly.tail_pipe.is_sort_reversed
161
+
162
+ assert_equal ['offset', 'line'], grouping_fields.to_a
163
+ assert_equal ['line'], sorting_fields.to_a
166
164
 
167
- assert_equal 'offset', grouping_fields.get(0)
168
- assert_equal 'line', grouping_fields.get(1)
169
- assert assembly.tail_pipe.isSorted()
170
- assert assembly.tail_pipe.isSortReversed()
171
- assert_equal 'line', sorting_fields.get(0)
165
+ assert_equal ['offset', 'line'], assembly.scope.values_fields.to_a
166
+ assert_equal ['offset', 'line'], assembly.scope.grouping_fields.to_a
172
167
  end
173
168
 
174
169
  def test_create_group_by_reverse
175
170
  assembly = mock_assembly do
176
- group_by('offset', 'line', :reverse => true)
171
+ group_by 'offset', 'line', :reverse => true
177
172
  end
178
173
 
179
- assert assembly.tail_pipe.is_a? Java::CascadingPipe::GroupBy
174
+ assert_equal Java::CascadingPipe::GroupBy, assembly.tail_pipe.class
180
175
  grouping_fields = assembly.tail_pipe.key_selectors['test']
181
176
  sorting_fields = assembly.tail_pipe.sorting_selectors['test']
182
177
 
183
- assert_equal 2, grouping_fields.size
184
- assert_equal 2, sorting_fields.size
178
+ assert !assembly.tail_pipe.is_sorted
179
+ assert assembly.tail_pipe.is_sort_reversed
180
+
181
+ assert_equal ['offset', 'line'], grouping_fields.to_a
182
+ assert_nil sorting_fields
183
+
184
+ assert_equal ['offset', 'line'], assembly.scope.values_fields.to_a
185
+ assert_equal ['offset', 'line'], assembly.scope.grouping_fields.to_a
186
+ end
187
+
188
+ def test_create_union
189
+ assembly = mock_branched_assembly do
190
+ union 'test1', 'test2', :on => 'line'
191
+ end
192
+
193
+ assert_equal Java::CascadingPipe::GroupBy, assembly.tail_pipe.class
194
+
195
+ left_grouping_fields = assembly.tail_pipe.key_selectors['test1']
196
+ assert_equal ['line'], left_grouping_fields.to_a
197
+
198
+ right_grouping_fields = assembly.tail_pipe.key_selectors['test2']
199
+ assert_equal ['line'], right_grouping_fields.to_a
200
+
201
+ assert_equal ['offset', 'line'], assembly.scope.values_fields.to_a
202
+ assert_equal ['line'], assembly.scope.grouping_fields.to_a
203
+
204
+ assembly = mock_branched_assembly do
205
+ union 'test1', 'test2', :on => 'offset'
206
+ end
207
+
208
+ assert_equal Java::CascadingPipe::GroupBy, assembly.tail_pipe.class
209
+ left_grouping_fields = assembly.tail_pipe.key_selectors['test1']
210
+ assert_equal ['offset'], left_grouping_fields.to_a
211
+ right_grouping_fields = assembly.tail_pipe.key_selectors['test2']
212
+ assert_equal ['offset'], right_grouping_fields.to_a
213
+
214
+ assert_equal ['offset', 'line'], assembly.scope.values_fields.to_a
215
+ assert_equal ['offset'], assembly.scope.grouping_fields.to_a
216
+
217
+ assembly = mock_branched_assembly do
218
+ union 'test1', 'test2'
219
+ end
220
+
221
+ assert_equal Java::CascadingPipe::GroupBy, assembly.tail_pipe.class
222
+ left_grouping_fields = assembly.tail_pipe.key_selectors['test1']
223
+ assert_equal ['offset'], left_grouping_fields.to_a
224
+ right_grouping_fields = assembly.tail_pipe.key_selectors['test2']
225
+ assert_equal ['offset'], right_grouping_fields.to_a
226
+
227
+ assert_equal ['offset', 'line'], assembly.scope.values_fields.to_a
228
+ assert_equal ['offset'], assembly.scope.grouping_fields.to_a
229
+ end
230
+
231
+ def test_create_union_many_fields
232
+ assembly = mock_branched_assembly do
233
+ union 'test1', 'test2', :on => ['offset', 'line']
234
+ end
235
+
236
+ assert_equal Java::CascadingPipe::GroupBy, assembly.tail_pipe.class
237
+
238
+ left_grouping_fields = assembly.tail_pipe.key_selectors['test1']
239
+ assert_equal ['offset', 'line'], left_grouping_fields.to_a
240
+
241
+ right_grouping_fields = assembly.tail_pipe.key_selectors['test2']
242
+ assert_equal ['offset', 'line'], right_grouping_fields.to_a
243
+
244
+ assert_equal ['offset', 'line'], assembly.scope.values_fields.to_a
245
+ assert_equal ['offset', 'line'], assembly.scope.grouping_fields.to_a
246
+ end
247
+
248
+ def test_create_union_with_sort
249
+ assembly = mock_branched_assembly do
250
+ union 'test1', 'test2', :on => ['offset', 'line'], :sort_by => 'line'
251
+ end
252
+
253
+ assert_equal Java::CascadingPipe::GroupBy, assembly.tail_pipe.class
254
+ left_grouping_fields = assembly.tail_pipe.key_selectors['test1']
255
+ right_grouping_fields = assembly.tail_pipe.key_selectors['test2']
256
+ left_sorting_fields = assembly.tail_pipe.sorting_selectors['test1']
257
+ right_sorting_fields = assembly.tail_pipe.sorting_selectors['test2']
258
+
259
+ assert assembly.tail_pipe.is_sorted
260
+ assert !assembly.tail_pipe.is_sort_reversed
261
+
262
+ assert_equal ['offset', 'line'], left_grouping_fields.to_a
263
+ assert_equal ['offset', 'line'], right_grouping_fields.to_a
264
+ assert_equal ['line'], left_sorting_fields.to_a
265
+ assert_equal ['line'], right_sorting_fields.to_a
266
+
267
+ assert_equal ['offset', 'line'], assembly.scope.values_fields.to_a
268
+ assert_equal ['offset', 'line'], assembly.scope.grouping_fields.to_a
269
+ end
270
+
271
+ def test_create_union_with_sort_reverse
272
+ assembly = mock_branched_assembly do
273
+ union 'test1', 'test2', :on => ['offset', 'line'], :sort_by => 'line', :reverse => true
274
+ end
275
+
276
+ assert_equal Java::CascadingPipe::GroupBy, assembly.tail_pipe.class
277
+ left_grouping_fields = assembly.tail_pipe.key_selectors['test1']
278
+ right_grouping_fields = assembly.tail_pipe.key_selectors['test2']
279
+ left_sorting_fields = assembly.tail_pipe.sorting_selectors['test1']
280
+ right_sorting_fields = assembly.tail_pipe.sorting_selectors['test2']
281
+
282
+ assert assembly.tail_pipe.is_sorted
283
+ assert assembly.tail_pipe.is_sort_reversed
284
+
285
+ assert_equal ['offset', 'line'], left_grouping_fields.to_a
286
+ assert_equal ['offset', 'line'], right_grouping_fields.to_a
287
+ assert_equal ['line'], left_sorting_fields.to_a
288
+ assert_equal ['line'], right_sorting_fields.to_a
289
+
290
+ assert_equal ['offset', 'line'], assembly.scope.values_fields.to_a
291
+ assert_equal ['offset', 'line'], assembly.scope.grouping_fields.to_a
292
+ end
293
+
294
+ def test_create_union_reverse
295
+ assembly = mock_branched_assembly do
296
+ union 'test1', 'test2', :on => ['offset', 'line'], :reverse => true
297
+ end
298
+
299
+ assert_equal Java::CascadingPipe::GroupBy, assembly.tail_pipe.class
300
+ left_grouping_fields = assembly.tail_pipe.key_selectors['test1']
301
+ right_grouping_fields = assembly.tail_pipe.key_selectors['test2']
302
+ left_sorting_fields = assembly.tail_pipe.sorting_selectors['test1']
303
+ right_sorting_fields = assembly.tail_pipe.sorting_selectors['test2']
304
+
305
+ assert assembly.tail_pipe.is_sorted # FIXME: Missing constructor in wip-255
306
+ assert assembly.tail_pipe.is_sort_reversed
307
+
308
+ assert_equal ['offset', 'line'], left_grouping_fields.to_a
309
+ assert_equal ['offset', 'line'], right_grouping_fields.to_a
310
+ assert_equal ['offset', 'line'], left_sorting_fields.to_a # FIXME: Missing constructor in wip-255
311
+ assert_equal ['offset', 'line'], right_sorting_fields.to_a # FIXME: Missing constructor in wip-255
312
+
313
+ assert_equal ['offset', 'line'], assembly.scope.values_fields.to_a
314
+ assert_equal ['offset', 'line'], assembly.scope.grouping_fields.to_a
315
+ end
316
+
317
+ def test_union_undefined_inputs
318
+ assert_raise RuntimeError, "Could not find assembly 'doesnotexist' in union" do
319
+ flow 'test_union_undefined_inputs' do
320
+ source 'data1', tap('test/data/data1.txt')
321
+
322
+ assembly 'data1' do
323
+ pass
324
+ end
325
+
326
+ assembly 'union' do
327
+ union 'doesnotexist', 'data1'
328
+ end
329
+
330
+ sink 'union', tap('output/test_union_undefined_inputs')
331
+ end
332
+ end
333
+ end
334
+
335
+ def test_create_join
336
+ assembly = mock_two_input_assembly do
337
+ join 'test1', 'test2', :on => 'name'
338
+ end
339
+
340
+ assert_equal Java::CascadingPipe::CoGroup, assembly.tail_pipe.class
341
+
342
+ left_grouping_fields = assembly.tail_pipe.key_selectors['test1']
343
+ assert_equal ['name'], left_grouping_fields.to_a
344
+
345
+ right_grouping_fields = assembly.tail_pipe.key_selectors['test2']
346
+ assert_equal ['name'], right_grouping_fields.to_a
347
+
348
+ assert_equal ['name', 'score1', 'score2', 'id', 'name_', 'id_', 'town'], assembly.scope.values_fields.to_a
349
+ assert_equal ['name', 'name_'], assembly.scope.grouping_fields.to_a
350
+
351
+ assembly = mock_two_input_assembly do
352
+ join 'test1', 'test2', :on => 'id'
353
+ end
354
+
355
+ assert_equal Java::CascadingPipe::CoGroup, assembly.tail_pipe.class
356
+ left_grouping_fields = assembly.tail_pipe.key_selectors['test1']
357
+ assert_equal ['id'], left_grouping_fields.to_a
358
+ right_grouping_fields = assembly.tail_pipe.key_selectors['test2']
359
+ assert_equal ['id'], right_grouping_fields.to_a
360
+
361
+ assert_equal ['name', 'score1', 'score2', 'id', 'name_', 'id_', 'town'], assembly.scope.values_fields.to_a
362
+ assert_equal ['id', 'id_'], assembly.scope.grouping_fields.to_a
363
+ end
364
+
365
+ def test_create_join_many_fields
366
+ assembly = mock_two_input_assembly do
367
+ join 'test1', 'test2', :on => ['name', 'id']
368
+ end
369
+
370
+ assert_equal Java::CascadingPipe::CoGroup, assembly.tail_pipe.class
371
+ left_grouping_fields = assembly.tail_pipe.key_selectors['test1']
372
+ assert_equal ['name', 'id'], left_grouping_fields.to_a
373
+ right_grouping_fields = assembly.tail_pipe.key_selectors['test2']
374
+ assert_equal ['name', 'id'], right_grouping_fields.to_a
185
375
 
186
- assert_equal 'offset', grouping_fields.get(0)
187
- assert_equal 'line', grouping_fields.get(1)
188
- assert assembly.tail_pipe.isSorted()
189
- assert assembly.tail_pipe.isSortReversed()
190
- assert_equal 'offset', sorting_fields.get(0)
191
- assert_equal 'line', sorting_fields.get(1)
376
+ assert_equal ['name', 'score1', 'score2', 'id', 'name_', 'id_', 'town'], assembly.scope.values_fields.to_a
377
+ assert_equal ['name', 'id', 'name_', 'id_'], assembly.scope.grouping_fields.to_a
378
+ end
379
+
380
+ def test_create_join_with_declared_fields
381
+ assembly = mock_two_input_assembly do
382
+ join 'test1', 'test2', :on => 'name', :declared_fields => ['a', 'b', 'c', 'd', 'e', 'f', 'g']
383
+ end
384
+
385
+ assert_equal Java::CascadingPipe::CoGroup, assembly.tail_pipe.class
386
+
387
+ left_grouping_fields = assembly.tail_pipe.key_selectors['test1']
388
+ assert_equal ['name'], left_grouping_fields.to_a
389
+
390
+ right_grouping_fields = assembly.tail_pipe.key_selectors['test2']
391
+ assert_equal ['name'], right_grouping_fields.to_a
392
+
393
+ assert_equal ['a', 'b', 'c', 'd', 'e', 'f', 'g'], assembly.scope.values_fields.to_a
394
+ assert_equal ['name', 'name_'], assembly.scope.grouping_fields.to_a
395
+ end
396
+
397
+ def test_join_with_block
398
+ assembly = mock_two_input_assembly do
399
+ join 'test1', 'test2', :on => 'name' do
400
+ count
401
+ end
402
+ end
403
+
404
+ assert_equal Java::CascadingPipe::Every, assembly.tail_pipe.class
405
+
406
+ assert_equal ['name', 'name_', 'count'], assembly.scope.values_fields.to_a
407
+ assert_equal ['name', 'name_', 'count'], assembly.scope.grouping_fields.to_a
408
+ end
409
+
410
+ def test_join_undefined_inputs
411
+ assert_raise RuntimeError, "Could not find assembly 'doesnotexist' in join" do
412
+ flow 'test_join_undefined_inputs' do
413
+ source 'data1', tap('test/data/data1.txt')
414
+
415
+ assembly 'data1' do
416
+ pass
417
+ end
418
+
419
+ assembly 'join' do
420
+ join 'doesnotexist', 'data1', :on => 'name'
421
+ end
422
+
423
+ sink 'join', tap('output/test_join_undefined_inputs')
424
+ end
425
+ end
426
+ end
427
+
428
+ def test_join_without_on
429
+ assert_raise RuntimeError, 'join requires :on parameter' do
430
+ mock_two_input_assembly do
431
+ join 'test1', 'test2'
432
+ end
433
+ end
434
+ end
435
+
436
+ def test_join_invalid_on
437
+ assert_raise RuntimeError, "Unsupported data type for :on in join: 'Fixnum'" do
438
+ mock_two_input_assembly do
439
+ join 'test1', 'test2', :on => 1
440
+ end
441
+ end
442
+ end
443
+
444
+ def test_join_empty_on
445
+ assert_raise RuntimeError, 'join requres non-empty :on parameter' do
446
+ mock_two_input_assembly do
447
+ join 'test1', 'test2', :on => []
448
+ end
449
+ end
450
+
451
+ assert_raise RuntimeError, 'join requres non-empty :on parameter' do
452
+ mock_two_input_assembly do
453
+ join 'test1', 'test2', :on => {}
454
+ end
455
+ end
192
456
  end
193
457
 
194
458
  def test_branch_unique
@@ -198,7 +462,6 @@ class TC_Assembly < Test::Unit::TestCase
198
462
  end
199
463
 
200
464
  assert_equal 1, assembly.children.size
201
-
202
465
  end
203
466
 
204
467
  def test_branch_empty
@@ -231,20 +494,74 @@ class TC_Assembly < Test::Unit::TestCase
231
494
  assert_equal 0, assembly.children['branch1'].children['branch2'].children.size
232
495
  end
233
496
 
234
- # Fixed this test, but it isn't even valid. You shouldn't be able to follow
235
- # an Each with an Every.
236
- def test_full_assembly
237
- assert_raise CascadingException do
238
- assembly = mock_assembly do
239
- each('offset', :output => 'offset_copy',
240
- :filter => Java::CascadingOperation::Identity.new(fields('offset_copy')))
241
- every(:aggregator => count_function)
242
- end
497
+ def test_sub_assembly
498
+ assembly = mock_assembly do
499
+ sub_assembly Java::CascadingPipeAssembly::Discard.new(tail_pipe, fields('offset'))
500
+ end
501
+ assert_equal ['line'], assembly.scope.values_fields.to_a
243
502
 
244
- pipe = assembly.tail_pipe
503
+ assembly = mock_assembly do
504
+ sub_assembly Java::CascadingPipeAssembly::Retain.new(tail_pipe, fields('offset'))
505
+ end
506
+ assert_equal ['offset'], assembly.scope.values_fields.to_a
245
507
 
246
- assert pipe.is_a? Java::CascadingPipe::Every
508
+ assembly = mock_assembly do
509
+ sub_assembly Java::CascadingPipeAssembly::Rename.new(tail_pipe, fields(['offset', 'line']), fields(['byte', 'line']))
247
510
  end
511
+ assert_equal ['byte', 'line'], assembly.scope.values_fields.to_a
512
+
513
+ assembly = mock_assembly do
514
+ sub_assembly Java::CascadingPipeAssembly::Unique.new(tail_pipe, fields('line'))
515
+ end
516
+ assert_equal ['offset', 'line'], assembly.scope.values_fields.to_a
517
+ assert_equal ['offset', 'line'], assembly.scope.grouping_fields.to_a
518
+ end
519
+
520
+ def test_count_by_sub_assembly
521
+ assembly = mock_branched_assembly do
522
+ pipes, _ = populate_incoming_scopes(['test1', 'test2'])
523
+
524
+ aggregate_by = Java::CascadingPipeAssembly::AggregateBy.new(
525
+ name,
526
+ pipes.to_java(Java::CascadingPipe::Pipe),
527
+ fields('line'),
528
+ [Java::CascadingPipeAssembly::CountBy.new(fields('count'))].to_java(Java::CascadingPipeAssembly::AggregateBy)
529
+ )
530
+
531
+ sub_assembly aggregate_by, pipes, @incoming_scopes
532
+ end
533
+ assert_equal ['line', 'count'], assembly.scope.values_fields.to_a
534
+ assert_equal ['line', 'count'], assembly.scope.grouping_fields.to_a
535
+ end
536
+
537
+ def test_average_by_sub_assembly
538
+ assembly = mock_assembly do
539
+ aggregate_by = Java::CascadingPipeAssembly::AggregateBy.new(
540
+ name,
541
+ [tail_pipe].to_java(Java::CascadingPipe::Pipe),
542
+ fields('line'),
543
+ [Java::CascadingPipeAssembly::AverageBy.new(fields('offset'), fields('average'))].to_java(Java::CascadingPipeAssembly::AggregateBy)
544
+ )
545
+
546
+ sub_assembly aggregate_by
547
+ end
548
+ assert_equal ['line', 'average'], assembly.scope.values_fields.to_a
549
+ assert_equal ['line', 'average'], assembly.scope.grouping_fields.to_a
550
+ end
551
+
552
+ def test_sum_by_sub_assembly
553
+ assembly = mock_assembly do
554
+ aggregate_by = Java::CascadingPipeAssembly::AggregateBy.new(
555
+ name,
556
+ [tail_pipe].to_java(Java::CascadingPipe::Pipe),
557
+ fields('line'),
558
+ [Java::CascadingPipeAssembly::SumBy.new(fields('offset'), fields('sum'), Java::double.java_class)].to_java(Java::CascadingPipeAssembly::AggregateBy)
559
+ )
560
+
561
+ sub_assembly aggregate_by
562
+ end
563
+ assert_equal ['line', 'sum'], assembly.scope.values_fields.to_a
564
+ assert_equal ['line', 'sum'], assembly.scope.grouping_fields.to_a
248
565
  end
249
566
 
250
567
  def test_empty_where
@@ -252,10 +569,10 @@ class TC_Assembly < Test::Unit::TestCase
252
569
  split 'line', ['name', 'score1', 'score2', 'id'], :pattern => /[.,]*\s+/, :output => ['name', 'score1', 'score2', 'id']
253
570
  where
254
571
  end
255
- assert assembly.tail_pipe.is_a? Java::CascadingPipe::Each
572
+ assert_equal Java::CascadingPipe::Each, assembly.tail_pipe.class
256
573
 
257
574
  # Empty where compiles away
258
- assert assembly.tail_pipe.operation.is_a? Java::CascadingOperationRegex::RegexSplitter
575
+ assert_equal Java::CascadingOperationRegex::RegexSplitter, assembly.tail_pipe.operation.class
259
576
  end
260
577
 
261
578
  def test_where
@@ -263,8 +580,8 @@ class TC_Assembly < Test::Unit::TestCase
263
580
  split 'line', ['name', 'score1', 'score2', 'id'], :pattern => /[.,]*\s+/, :output => ['name', 'score1', 'score2', 'id']
264
581
  where 'score1:double < score2:double'
265
582
  end
266
- assert assembly.tail_pipe.is_a? Java::CascadingPipe::Each
267
- assert assembly.tail_pipe.operation.is_a? Java::CascadingOperationExpression::ExpressionFilter
583
+ assert_equal Java::CascadingPipe::Each, assembly.tail_pipe.class
584
+ assert_equal Java::CascadingOperationExpression::ExpressionFilter, assembly.tail_pipe.operation.class
268
585
  end
269
586
 
270
587
  def test_where_with_expression
@@ -272,8 +589,8 @@ class TC_Assembly < Test::Unit::TestCase
272
589
  split 'line', ['name', 'score1', 'score2', 'id'], :pattern => /[.,]*\s+/, :output => ['name', 'score1', 'score2', 'id']
273
590
  where :expression => 'score1:double < score2:double'
274
591
  end
275
- assert assembly.tail_pipe.is_a? Java::CascadingPipe::Each
276
- assert assembly.tail_pipe.operation.is_a? Java::CascadingOperationExpression::ExpressionFilter
592
+ assert_equal Java::CascadingPipe::Each, assembly.tail_pipe.class
593
+ assert_equal Java::CascadingOperationExpression::ExpressionFilter, assembly.tail_pipe.operation.class
277
594
  end
278
595
 
279
596
  def test_where_with_import
@@ -282,8 +599,8 @@ class TC_Assembly < Test::Unit::TestCase
282
599
  names = ['SMITH', 'JONES', 'BROWN']
283
600
  where "import java.util.Arrays;\nArrays.asList(new String[] { \"#{names.join('", "')}\" }).contains(name:string)"
284
601
  end
285
- assert assembly.tail_pipe.is_a? Java::CascadingPipe::Each
286
- assert assembly.tail_pipe.operation.is_a? Java::CascadingOperationExpression::ExpressionFilter
602
+ assert_equal Java::CascadingPipe::Each, assembly.tail_pipe.class
603
+ assert_equal Java::CascadingOperationExpression::ExpressionFilter, assembly.tail_pipe.operation.class
287
604
  end
288
605
 
289
606
  def test_smoke_test_debug_scope
@@ -291,92 +608,16 @@ class TC_Assembly < Test::Unit::TestCase
291
608
  flow 'smoke' do
292
609
  source 'input', tap('test/data/data1.txt')
293
610
  assembly 'input' do
294
- pass
611
+ debug_scope
612
+ group_by 'line' do
613
+ count
614
+ sum 'offset', :type => :long
615
+ debug_scope
616
+ end
295
617
  debug_scope
296
618
  end
297
- sink 'input', tap('output/smoke_test_debug_scope')
619
+ sink 'input', tap('output/test_smoke_test_debug_scope')
298
620
  end
299
621
  end
300
622
  end
301
623
  end
302
-
303
- class TC_AssemblyScenarii < Test::Unit::TestCase
304
- def test_smoke_test_sequence_file_scheme
305
- cascade 'smoke' do
306
- flow 'smoke' do
307
- source 'input', tap('test/data/data1.txt')
308
- assembly 'input' do
309
- pass
310
- end
311
- compress_output :default, :block
312
- sink 'input', tap('output/smoke_test_sequence_file_scheme', :scheme => sequence_file_scheme)
313
- end
314
- end.complete
315
- end
316
-
317
- def test_splitter
318
- flow = flow "splitter" do
319
- source "copy", tap("test/data/data1.txt")
320
- sink "copy", tap('output/splitter', :sink_mode => :replace)
321
-
322
- assembly "copy" do
323
- split "line", :pattern => /[.,]*\s+/, :into=>["name", "score1", "score2", "id"], :output => ["name", "score1", "score2", "id"]
324
- assert_size_equals 4
325
- assert_not_null
326
- debug :print_fields => true
327
- end
328
- end.complete
329
- end
330
-
331
- def test_join1
332
- cascade 'splitter' do
333
- flow 'splitter' do
334
- source "data1", tap("test/data/data1.txt")
335
- source "data2", tap("test/data/data2.txt")
336
- sink "joined", tap('output/joined', :sink_mode => :replace)
337
-
338
- assembly1 = assembly "data1" do
339
- split "line", :pattern => /[.,]*\s+/, :into=>["name", "score1", "score2", "id"], :output => ["name", "score1", "score2", "id"]
340
- assert_size_equals 4
341
- assert_not_null
342
- debug :print_fields => true
343
- end
344
-
345
- assembly2 = assembly "data2" do
346
- split "line", :pattern => /[.,]*\s+/, :into=>["name", "id", "town"], :output => ["name", "id", "town"]
347
- assert_size_equals 3
348
- assert_not_null
349
- debug :print_fields => true
350
- end
351
-
352
- assembly "joined" do
353
- join assembly1.name, assembly2.name, :on => ["name", "id"], :declared_fields => ["name", "score1", "score2", "id", "name2", "id2", "town"]
354
- assert_size_equals 7
355
- assert_not_null
356
- end
357
- end
358
- end.complete
359
- end
360
-
361
- def test_join2
362
- flow = flow "splitter" do
363
- source "data1", tap("test/data/data1.txt")
364
- source "data2", tap("test/data/data2.txt")
365
- sink "joined", tap('output/joined', :sink_mode => :replace)
366
-
367
- assembly "data1" do
368
- split "line", :pattern => /[.,]*\s+/, :into=>["name", "score1", "score2", "id"], :output => ["name", "score1", "score2", "id"]
369
- debug :print_fields => true
370
- end
371
-
372
- assembly "data2" do
373
- split "line", :pattern => /[.,]*\s+/, :into=>["name", "code", "town"], :output => ["name", "code", "town"]
374
- debug :print_fields => true
375
- end
376
-
377
- assembly "joined" do
378
- join :on => {"data1"=>["name", "id"], "data2"=>["name", "code"]}, :declared_fields => ["name", "score1", "score2", "id", "name2", "code", "town"]
379
- end
380
- end.complete
381
- end
382
- end