cascading.jruby 0.0.9 → 0.0.10
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +15 -0
- data/lib/cascading/assembly.rb +138 -17
- data/lib/cascading/base.rb +0 -4
- data/lib/cascading/cascade.rb +25 -16
- data/lib/cascading/cascading.rb +25 -5
- data/lib/cascading/ext/array.rb +1 -7
- data/lib/cascading/flow.rb +18 -19
- data/lib/cascading/mode.rb +5 -1
- data/lib/cascading/operations.rb +11 -4
- data/lib/cascading/tap.rb +4 -0
- data/lib/cascading.rb +1 -5
- data/test/test_assembly.rb +135 -29
- data/test/test_cascade.rb +80 -0
- data/test/test_flow.rb +20 -0
- data/test/test_operations.rb +3 -2
- metadata +6 -76
- data/.travis.yml +0 -6
- data/Gemfile +0 -6
- data/Gemfile.lock +0 -12
- data/HACKING.md +0 -23
- data/README.md +0 -9
- data/Rakefile +0 -46
- data/TODO +0 -13
- data/bin/make_job +0 -81
- data/ivy.xml +0 -25
- data/ivysettings.xml +0 -7
- data/samples/branch.rb +0 -30
- data/samples/copy.rb +0 -20
- data/samples/data/data2.txt +0 -88799
- data/samples/data/data_group_by.txt +0 -7
- data/samples/data/data_join1.txt +0 -3
- data/samples/data/data_join2.txt +0 -3
- data/samples/data/data_join3.txt +0 -3
- data/samples/data/genealogy/names/dist.all.last +0 -88799
- data/samples/data/gutenberg/the_outline_of_science_vol_1 +0 -12761
- data/samples/group_by.rb +0 -61
- data/samples/join.rb +0 -31
- data/samples/logwordcount.rb +0 -22
- data/samples/project.rb +0 -23
- data/samples/rename.rb +0 -20
- data/samples/scorenames.rb +0 -20
- data/samples/splitter.rb +0 -19
- data/samples/sub_assembly.rb +0 -30
- data/samples/union.rb +0 -36
- data/spec/cascading_spec.rb +0 -105
- data/spec/expr_spec.rb +0 -230
- data/spec/jruby_version_spec.rb +0 -72
- data/spec/resource/join_input.txt +0 -3
- data/spec/resource/test_input.txt +0 -4
- data/spec/scope_spec.rb +0 -149
- data/spec/spec.opts +0 -6
- data/spec/spec_helper.rb +0 -5
- data/spec/spec_util.rb +0 -92
- data/src/cascading/jruby/Main.java +0 -38
- data/src/cascading/jruby/runner.rb +0 -6
- data/tags +0 -342
- data/tasks/ann.rake +0 -80
- data/tasks/ant.rake +0 -23
- data/tasks/bones.rake +0 -20
- data/tasks/gem.rake +0 -206
- data/tasks/git.rake +0 -40
- data/tasks/notes.rake +0 -27
- data/tasks/post_load.rake +0 -34
- data/tasks/rdoc.rake +0 -50
- data/tasks/rubyforge.rake +0 -55
- data/tasks/samples.rake +0 -19
- data/tasks/setup.rb +0 -300
- data/tasks/spec.rake +0 -59
- data/tasks/svn.rake +0 -47
- data/tasks/test.rake +0 -42
- data/test/data/data1.txt +0 -14
- data/test/data/data2.txt +0 -14
- data/test/mock_assemblies.rb +0 -55
data/test/test_assembly.rb
CHANGED
@@ -62,7 +62,7 @@ class TC_Assembly < Test::Unit::TestCase
|
|
62
62
|
count
|
63
63
|
end
|
64
64
|
pipe = assembly.tail_pipe
|
65
|
-
|
65
|
+
assert_equal Java::CascadingPipe::Every, pipe.class
|
66
66
|
end
|
67
67
|
assert_match /^undefined local variable or method `count' for #<Cascading::Assembly:.*>$/, ex.message
|
68
68
|
end
|
@@ -74,7 +74,7 @@ class TC_Assembly < Test::Unit::TestCase
|
|
74
74
|
every 'line', :aggregator => count_aggregator, :output => 'count'
|
75
75
|
end
|
76
76
|
end
|
77
|
-
|
77
|
+
assert_equal Java::CascadingPipe::Every, assembly.tail_pipe.class
|
78
78
|
assert_equal ['line'], assembly.tail_pipe.argument_selector.to_a
|
79
79
|
assert_equal ['count'], assembly.tail_pipe.output_selector.to_a
|
80
80
|
|
@@ -83,7 +83,7 @@ class TC_Assembly < Test::Unit::TestCase
|
|
83
83
|
count
|
84
84
|
end
|
85
85
|
end
|
86
|
-
|
86
|
+
assert_equal Java::CascadingPipe::Every, assembly.tail_pipe.class
|
87
87
|
|
88
88
|
# NOTE: this is not valid when we optimize using CountBy
|
89
89
|
#assert_equal last_grouping_fields, assembly.tail_pipe.argument_selector
|
@@ -370,8 +370,10 @@ class TC_Assembly < Test::Unit::TestCase
|
|
370
370
|
end
|
371
371
|
|
372
372
|
assert_equal Java::CascadingPipe::CoGroup, assembly.tail_pipe.class
|
373
|
+
|
373
374
|
left_grouping_fields = assembly.tail_pipe.key_selectors['test1']
|
374
375
|
assert_equal ['name', 'id'], left_grouping_fields.to_a
|
376
|
+
|
375
377
|
right_grouping_fields = assembly.tail_pipe.key_selectors['test2']
|
376
378
|
assert_equal ['name', 'id'], right_grouping_fields.to_a
|
377
379
|
|
@@ -410,56 +412,142 @@ class TC_Assembly < Test::Unit::TestCase
|
|
410
412
|
end
|
411
413
|
|
412
414
|
def test_join_undefined_inputs
|
413
|
-
|
414
|
-
|
415
|
-
|
415
|
+
[:join, :hash_join].each do |join|
|
416
|
+
ex = assert_raise RuntimeError do
|
417
|
+
flow 'test_join_undefined_inputs' do
|
418
|
+
source 'data1', tap('test/data/data1.txt')
|
416
419
|
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
+
assembly 'data1' do
|
421
|
+
pass
|
422
|
+
end
|
420
423
|
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
+
assembly 'join' do
|
425
|
+
send(join, 'doesnotexist', 'data1', :on => 'name')
|
426
|
+
end
|
424
427
|
|
425
|
-
|
428
|
+
sink 'join', tap('output/test_join_undefined_inputs')
|
429
|
+
end
|
426
430
|
end
|
431
|
+
assert_equal "Could not find assembly 'doesnotexist' from 'join'", ex.message
|
427
432
|
end
|
428
|
-
assert_equal "Could not find assembly 'doesnotexist' from 'join'", ex.message
|
429
433
|
end
|
430
434
|
|
431
435
|
def test_join_without_on
|
432
|
-
|
433
|
-
|
434
|
-
|
436
|
+
[:join, :hash_join].each do |join|
|
437
|
+
ex = assert_raise RuntimeError do
|
438
|
+
mock_two_input_assembly do
|
439
|
+
send(join, 'test1', 'test2')
|
440
|
+
end
|
435
441
|
end
|
442
|
+
assert_equal 'join requires :on parameter', ex.message
|
436
443
|
end
|
437
|
-
assert_equal 'join requires :on parameter', ex.message
|
438
444
|
end
|
439
445
|
|
440
446
|
def test_join_invalid_on
|
441
|
-
|
442
|
-
|
443
|
-
|
447
|
+
[:join, :hash_join].each do |join|
|
448
|
+
ex = assert_raise RuntimeError do
|
449
|
+
mock_two_input_assembly do
|
450
|
+
send(join, 'test1', 'test2', :on => 1)
|
451
|
+
end
|
444
452
|
end
|
453
|
+
assert_equal "Unsupported data type for :on in join: 'Fixnum'", ex.message
|
445
454
|
end
|
446
|
-
assert_equal "Unsupported data type for :on in join: 'Fixnum'", ex.message
|
447
455
|
end
|
448
456
|
|
449
457
|
def test_join_empty_on
|
450
|
-
|
451
|
-
|
452
|
-
|
458
|
+
[:join, :hash_join].each do |join|
|
459
|
+
ex = assert_raise RuntimeError do
|
460
|
+
mock_two_input_assembly do
|
461
|
+
send(join, 'test1', 'test2', :on => [])
|
462
|
+
end
|
463
|
+
end
|
464
|
+
assert_equal "join requires non-empty :on parameter", ex.message
|
465
|
+
|
466
|
+
ex = assert_raise RuntimeError do
|
467
|
+
mock_two_input_assembly do
|
468
|
+
send(join, 'test1', 'test2', :on => {})
|
469
|
+
end
|
453
470
|
end
|
471
|
+
assert_equal "join requires non-empty :on parameter", ex.message
|
454
472
|
end
|
455
|
-
|
473
|
+
end
|
456
474
|
|
457
|
-
|
475
|
+
def test_create_hash_join
|
476
|
+
assembly = mock_two_input_assembly do
|
477
|
+
hash_join 'test1', 'test2', :on => 'name'
|
478
|
+
end
|
479
|
+
|
480
|
+
assert_equal Java::CascadingPipe::HashJoin, assembly.tail_pipe.class
|
481
|
+
|
482
|
+
left_grouping_fields = assembly.tail_pipe.key_selectors['test1']
|
483
|
+
assert_equal ['name'], left_grouping_fields.to_a
|
484
|
+
|
485
|
+
right_grouping_fields = assembly.tail_pipe.key_selectors['test2']
|
486
|
+
assert_equal ['name'], right_grouping_fields.to_a
|
487
|
+
|
488
|
+
assert_equal ['name', 'score1', 'score2', 'id', 'name_', 'id_', 'town'], assembly.scope.values_fields.to_a
|
489
|
+
# NOTE: Why does HashJoin produce different grouping fields than CoGroup?
|
490
|
+
assert_equal ['name'], assembly.scope.grouping_fields.to_a
|
491
|
+
|
492
|
+
assembly = mock_two_input_assembly do
|
493
|
+
hash_join 'test1', 'test2', :on => 'id'
|
494
|
+
end
|
495
|
+
|
496
|
+
assert_equal Java::CascadingPipe::HashJoin, assembly.tail_pipe.class
|
497
|
+
left_grouping_fields = assembly.tail_pipe.key_selectors['test1']
|
498
|
+
assert_equal ['id'], left_grouping_fields.to_a
|
499
|
+
right_grouping_fields = assembly.tail_pipe.key_selectors['test2']
|
500
|
+
assert_equal ['id'], right_grouping_fields.to_a
|
501
|
+
assert_equal ['name', 'score1', 'score2', 'id', 'name_', 'id_', 'town'], assembly.scope.values_fields.to_a
|
502
|
+
# NOTE: Why does HashJoin produce different grouping fields than CoGroup?
|
503
|
+
assert_equal ['id'], assembly.scope.grouping_fields.to_a
|
504
|
+
end
|
505
|
+
|
506
|
+
def create_hash_join_many_fields
|
507
|
+
assembly = mock_two_input_assembly do
|
508
|
+
hash_join 'test1', 'test2', :on => ['name', 'id']
|
509
|
+
end
|
510
|
+
|
511
|
+
assert_equal Java::CascadingPipe::HashJoin, assembly.tail_pipe.class
|
512
|
+
|
513
|
+
left_grouping_fields = assembly.tail_pipe.key_selectors['test1']
|
514
|
+
assert_equal ['name', 'id'], left_grouping_fields.to_a
|
515
|
+
|
516
|
+
right_grouping_fields = assembly.tail_pipe.key_selectors['test2']
|
517
|
+
assert_equal ['name', 'id'], right_grouping_fields.to_a
|
518
|
+
|
519
|
+
assert_equal ['name', 'score1', 'score2', 'id', 'name_', 'id_', 'town'], assembly.scope.values_fields.to_a
|
520
|
+
# NOTE: Why does HashJoin produce different grouping fields than CoGroup?
|
521
|
+
assert_equal ['name', 'id'], assembly.scope.grouping_fields.to_a
|
522
|
+
end
|
523
|
+
|
524
|
+
def create_hash_join_with_declared_fields
|
525
|
+
assembly = mock_two_input_assembly do
|
526
|
+
hash_join 'test1', 'test2', :on => 'name', :declared_fields => ['a', 'b', 'c', 'd', 'e', 'f', 'g']
|
527
|
+
end
|
528
|
+
|
529
|
+
assert_equal Java::CascadingPipe::HashJoin, assembly.tail_pipe.class
|
530
|
+
|
531
|
+
left_grouping_fields = assembly.tail_pipe.key_selectors['test1']
|
532
|
+
assert_equal ['name'], left_grouping_fields.to_a
|
533
|
+
|
534
|
+
right_grouping_fields = assembly.tail_pipe.key_selectors['test2']
|
535
|
+
assert_equal ['name'], right_grouping_fields.to_a
|
536
|
+
|
537
|
+
assert_equal ['a', 'b', 'c', 'd', 'e', 'f', 'g'], assembly.scope.values_fields.to_a
|
538
|
+
# NOTE: Why does HashJoin produce different grouping fields than CoGroup?
|
539
|
+
assert_equal ['name'], assembly.scope.grouping_fields.to_a
|
540
|
+
end
|
541
|
+
|
542
|
+
def test_hash_join_with_block
|
543
|
+
ex = assert_raise ArgumentError do
|
458
544
|
mock_two_input_assembly do
|
459
|
-
|
545
|
+
hash_join 'test1', 'test2', :on => 'name' do
|
546
|
+
count
|
547
|
+
end
|
460
548
|
end
|
461
549
|
end
|
462
|
-
assert_equal "
|
550
|
+
assert_equal "hash joins don't support aggregations", ex.message
|
463
551
|
end
|
464
552
|
|
465
553
|
def test_branch_unique
|
@@ -610,6 +698,24 @@ class TC_Assembly < Test::Unit::TestCase
|
|
610
698
|
assert_equal Java::CascadingOperationExpression::ExpressionFilter, assembly.tail_pipe.operation.class
|
611
699
|
end
|
612
700
|
|
701
|
+
def test_smoke_test_describe
|
702
|
+
cascade 'smoke' do
|
703
|
+
flow 'smoke' do
|
704
|
+
source 'input', tap('test/data/data1.txt')
|
705
|
+
assembly 'input' do
|
706
|
+
puts "Describe at assembly start: '#{describe}'"
|
707
|
+
group_by 'line' do
|
708
|
+
count
|
709
|
+
sum 'offset', :type => :long
|
710
|
+
puts "Describe at group_by end (falls out to top-level Cascading::describe): '#{describe}'"
|
711
|
+
end
|
712
|
+
puts "Describe at assembly end: '#{describe}'"
|
713
|
+
end
|
714
|
+
sink 'input', tap('output/test_smoke_test_debug_scope')
|
715
|
+
end
|
716
|
+
end
|
717
|
+
end
|
718
|
+
|
613
719
|
def test_smoke_test_debug_scope
|
614
720
|
cascade 'smoke' do
|
615
721
|
flow 'smoke' do
|
data/test/test_cascade.rb
CHANGED
@@ -45,6 +45,67 @@ class TC_Cascade < Test::Unit::TestCase
|
|
45
45
|
assert_equal 'cascade.flow2.assembly4', cascade.last_child.last_child.qualified_name
|
46
46
|
end
|
47
47
|
|
48
|
+
def test_default_properties
|
49
|
+
f1, f2 = nil, nil
|
50
|
+
cascade = cascade 'cascade' do
|
51
|
+
f1 = flow 'flow1' do
|
52
|
+
end
|
53
|
+
|
54
|
+
f2 = flow 'flow2' do
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
assert_equal({}, cascade.properties)
|
59
|
+
assert_equal({}, f1.properties)
|
60
|
+
assert_equal({}, f2.properties)
|
61
|
+
end
|
62
|
+
|
63
|
+
def test_set_properties
|
64
|
+
# Simulate the global properties set by jading
|
65
|
+
$jobconf_properties = {
|
66
|
+
'external_no_overwrite' => 'external_no_overwrite',
|
67
|
+
'external_overwrite' => 'external_overwrite',
|
68
|
+
}
|
69
|
+
|
70
|
+
f1, f2 = nil, nil
|
71
|
+
cascade = cascade 'cascade' do
|
72
|
+
properties['external_overwrite'] = 'overwritten'
|
73
|
+
properties['internal_no_overwrite'] = 'internal_no_overwrite'
|
74
|
+
properties['internal_overwrite'] = 'internal_overwrite'
|
75
|
+
|
76
|
+
f1 = flow 'flow1' do
|
77
|
+
properties['external_overwrite'] = 'overwritten_flow1'
|
78
|
+
properties['internal_overwrite'] = 'overwritten_flow1'
|
79
|
+
end
|
80
|
+
|
81
|
+
f2 = flow 'flow2' do
|
82
|
+
properties['external_overwrite'] = 'overwritten_flow2'
|
83
|
+
properties['internal_overwrite'] = 'overwritten_flow2'
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
assert_equal({
|
88
|
+
'external_no_overwrite' => 'external_no_overwrite',
|
89
|
+
'external_overwrite' => 'overwritten',
|
90
|
+
'internal_no_overwrite' => 'internal_no_overwrite',
|
91
|
+
'internal_overwrite' => 'internal_overwrite',
|
92
|
+
}, cascade.properties)
|
93
|
+
|
94
|
+
assert_equal({
|
95
|
+
'external_no_overwrite' => 'external_no_overwrite',
|
96
|
+
'external_overwrite' => 'overwritten_flow1',
|
97
|
+
'internal_no_overwrite' => 'internal_no_overwrite',
|
98
|
+
'internal_overwrite' => 'overwritten_flow1',
|
99
|
+
}, f1.properties)
|
100
|
+
|
101
|
+
assert_equal({
|
102
|
+
'external_no_overwrite' => 'external_no_overwrite',
|
103
|
+
'external_overwrite' => 'overwritten_flow2',
|
104
|
+
'internal_no_overwrite' => 'internal_no_overwrite',
|
105
|
+
'internal_overwrite' => 'overwritten_flow2',
|
106
|
+
}, f2.properties)
|
107
|
+
end
|
108
|
+
|
48
109
|
def test_ambiguous_flow_names
|
49
110
|
ex = assert_raise AmbiguousNodeNameException do
|
50
111
|
f1, a1, a2, f2, a3, a4 = [nil] * 6
|
@@ -99,4 +160,23 @@ class TC_Cascade < Test::Unit::TestCase
|
|
99
160
|
# NOTE: Looking up flows this way is not a very common practice, so this is
|
100
161
|
# unlikely to cause issues
|
101
162
|
end
|
163
|
+
|
164
|
+
def test_smoke_test_describe
|
165
|
+
cascade 'smoke' do
|
166
|
+
puts "Describe at cascade start: '#{describe}'"
|
167
|
+
|
168
|
+
flow 'smoke' do
|
169
|
+
source 'input', tap('test/data/data1.txt')
|
170
|
+
assembly 'input' do
|
171
|
+
group_by 'line' do
|
172
|
+
count
|
173
|
+
sum 'offset', :type => :long
|
174
|
+
end
|
175
|
+
end
|
176
|
+
sink 'input', tap('output/test_smoke_test_debug_scope')
|
177
|
+
end
|
178
|
+
|
179
|
+
puts "Describe at cascade end: '#{describe}'"
|
180
|
+
end
|
181
|
+
end
|
102
182
|
end
|
data/test/test_flow.rb
CHANGED
@@ -113,4 +113,24 @@ class TC_Flow < Test::Unit::TestCase
|
|
113
113
|
end
|
114
114
|
assert_equal "Ambiguous lookup of child by name 'b'; found 'flow.b', 'flow.a.b'", ex.message
|
115
115
|
end
|
116
|
+
|
117
|
+
def test_smoke_test_describe
|
118
|
+
cascade 'smoke' do
|
119
|
+
flow 'smoke' do
|
120
|
+
puts "Describe at flow start: '#{describe}'"
|
121
|
+
source 'input', tap('test/data/data1.txt')
|
122
|
+
|
123
|
+
puts "Describe before first assembly: '#{describe}'"
|
124
|
+
assembly 'input' do
|
125
|
+
group_by 'line' do
|
126
|
+
count
|
127
|
+
sum 'offset', :type => :long
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
sink 'input', tap('output/test_smoke_test_debug_scope')
|
132
|
+
puts "Describe at flow end: '#{describe}'"
|
133
|
+
end
|
134
|
+
end
|
135
|
+
end
|
116
136
|
end
|
data/test/test_operations.rb
CHANGED
@@ -62,11 +62,12 @@ class TC_Operations < Test::Unit::TestCase
|
|
62
62
|
end
|
63
63
|
|
64
64
|
def test_coerce_to_java_other
|
65
|
-
|
65
|
+
orig_val = [1,2,3]
|
66
|
+
result = coerce_to_java(orig_val)
|
66
67
|
|
67
68
|
assert_equal Java::JavaLang::String, result.class
|
68
69
|
assert_equal ''.to_java(java.lang.String).java_class, result.java_class
|
69
|
-
assert_equal
|
70
|
+
assert_equal orig_val.to_s.to_java(java.lang.String), result
|
70
71
|
end
|
71
72
|
|
72
73
|
def test_to_java_comparable_array
|
metadata
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
name: cascading.jruby
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease:
|
5
|
-
version: 0.0.
|
5
|
+
version: 0.0.10
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Matt Walker
|
@@ -11,41 +11,19 @@ autorequire:
|
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
13
|
|
14
|
-
date: 2012-08-
|
14
|
+
date: 2012-08-09 00:00:00 Z
|
15
15
|
dependencies: []
|
16
16
|
|
17
17
|
description: cascading.jruby is a small DSL above Cascading, written in JRuby
|
18
18
|
email: mwalker@etsy.com
|
19
|
-
executables:
|
20
|
-
|
19
|
+
executables: []
|
20
|
+
|
21
21
|
extensions: []
|
22
22
|
|
23
23
|
extra_rdoc_files:
|
24
24
|
- History.txt
|
25
25
|
- LICENSE.txt
|
26
|
-
- bin/make_job
|
27
|
-
- samples/data/data2.txt
|
28
|
-
- samples/data/data_group_by.txt
|
29
|
-
- samples/data/data_join1.txt
|
30
|
-
- samples/data/data_join2.txt
|
31
|
-
- samples/data/data_join3.txt
|
32
|
-
- spec/resource/join_input.txt
|
33
|
-
- spec/resource/test_input.txt
|
34
|
-
- test/data/data1.txt
|
35
|
-
- test/data/data2.txt
|
36
26
|
files:
|
37
|
-
- .travis.yml
|
38
|
-
- Gemfile
|
39
|
-
- Gemfile.lock
|
40
|
-
- HACKING.md
|
41
|
-
- History.txt
|
42
|
-
- LICENSE.txt
|
43
|
-
- README.md
|
44
|
-
- Rakefile
|
45
|
-
- TODO
|
46
|
-
- bin/make_job
|
47
|
-
- ivy.xml
|
48
|
-
- ivysettings.xml
|
49
27
|
- lib/cascading.rb
|
50
28
|
- lib/cascading/aggregations.rb
|
51
29
|
- lib/cascading/assembly.rb
|
@@ -61,53 +39,8 @@ files:
|
|
61
39
|
- lib/cascading/scope.rb
|
62
40
|
- lib/cascading/sub_assembly.rb
|
63
41
|
- lib/cascading/tap.rb
|
64
|
-
-
|
65
|
-
-
|
66
|
-
- samples/data/data2.txt
|
67
|
-
- samples/data/data_group_by.txt
|
68
|
-
- samples/data/data_join1.txt
|
69
|
-
- samples/data/data_join2.txt
|
70
|
-
- samples/data/data_join3.txt
|
71
|
-
- samples/data/genealogy/names/dist.all.last
|
72
|
-
- samples/data/gutenberg/the_outline_of_science_vol_1
|
73
|
-
- samples/group_by.rb
|
74
|
-
- samples/join.rb
|
75
|
-
- samples/logwordcount.rb
|
76
|
-
- samples/project.rb
|
77
|
-
- samples/rename.rb
|
78
|
-
- samples/scorenames.rb
|
79
|
-
- samples/splitter.rb
|
80
|
-
- samples/sub_assembly.rb
|
81
|
-
- samples/union.rb
|
82
|
-
- spec/cascading_spec.rb
|
83
|
-
- spec/expr_spec.rb
|
84
|
-
- spec/jruby_version_spec.rb
|
85
|
-
- spec/resource/join_input.txt
|
86
|
-
- spec/resource/test_input.txt
|
87
|
-
- spec/scope_spec.rb
|
88
|
-
- spec/spec.opts
|
89
|
-
- spec/spec_helper.rb
|
90
|
-
- spec/spec_util.rb
|
91
|
-
- src/cascading/jruby/Main.java
|
92
|
-
- src/cascading/jruby/runner.rb
|
93
|
-
- tags
|
94
|
-
- tasks/ann.rake
|
95
|
-
- tasks/ant.rake
|
96
|
-
- tasks/bones.rake
|
97
|
-
- tasks/gem.rake
|
98
|
-
- tasks/git.rake
|
99
|
-
- tasks/notes.rake
|
100
|
-
- tasks/post_load.rake
|
101
|
-
- tasks/rdoc.rake
|
102
|
-
- tasks/rubyforge.rake
|
103
|
-
- tasks/samples.rake
|
104
|
-
- tasks/setup.rb
|
105
|
-
- tasks/spec.rake
|
106
|
-
- tasks/svn.rake
|
107
|
-
- tasks/test.rake
|
108
|
-
- test/data/data1.txt
|
109
|
-
- test/data/data2.txt
|
110
|
-
- test/mock_assemblies.rb
|
42
|
+
- History.txt
|
43
|
+
- LICENSE.txt
|
111
44
|
- test/test_aggregations.rb
|
112
45
|
- test/test_assembly.rb
|
113
46
|
- test/test_cascade.rb
|
@@ -130,9 +63,6 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
130
63
|
requirements:
|
131
64
|
- - ">="
|
132
65
|
- !ruby/object:Gem::Version
|
133
|
-
hash: 2
|
134
|
-
segments:
|
135
|
-
- 0
|
136
66
|
version: "0"
|
137
67
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
138
68
|
none: false
|
data/.travis.yml
DELETED
data/Gemfile
DELETED
data/Gemfile.lock
DELETED
data/HACKING.md
DELETED
@@ -1,23 +0,0 @@
|
|
1
|
-
# Hacking
|
2
|
-
|
3
|
-
Some hacking info on `cascading.jruby`:
|
4
|
-
|
5
|
-
For local development, install with (requires [bundler](http://gembundler.com/)):
|
6
|
-
|
7
|
-
jruby -S bundle install
|
8
|
-
|
9
|
-
To run the tests (will download Cascading and Hadoop jars):
|
10
|
-
|
11
|
-
jruby -S bundle exec rake
|
12
|
-
|
13
|
-
To create the gem:
|
14
|
-
|
15
|
-
jruby -S bundle exec rake gem
|
16
|
-
|
17
|
-
To install it locally:
|
18
|
-
|
19
|
-
jruby -S gem install pkg/cascading.jruby-xxx.gem
|
20
|
-
|
21
|
-
The `Cascading::Operations` module is mixed-in the `Cascading::Assembly` class to provide some shortcuts for common operations.
|
22
|
-
|
23
|
-
The file cascading/cascading.rb defines global helper methods for cascading like tap creation, fields creation, etc.
|
data/README.md
DELETED
@@ -1,9 +0,0 @@
|
|
1
|
-
# Cascading.JRuby [![Build Status](https://secure.travis-ci.org/mrwalker/cascading.jruby.png)](http://travis-ci.org/mrwalker/cascading.jruby)
|
2
|
-
|
3
|
-
`cascading.jruby` is a small DSL above [Cascading](http://www.cascading.org/).
|
4
|
-
|
5
|
-
It requires Hadoop (>= 0.20.2) and [Cascading 2.0.0](http://files.cascading.org/cascading/2.0/cascading-2.0.0.tgz) to be set via the environment variables: `HADOOP_HOME` and `CASCADING_HOME`
|
6
|
-
|
7
|
-
It has been tested on JRuby versions 1.2.0, 1.4.0, 1.5.3, and 1.6.5.
|
8
|
-
|
9
|
-
Copyright 2009, Grégoire Marabout.
|
data/Rakefile
DELETED
@@ -1,46 +0,0 @@
|
|
1
|
-
#! /usr/bin/env jruby
|
2
|
-
|
3
|
-
# Look in the tasks/setup.rb file for the various options that can be
|
4
|
-
# configured in this Rakefile. The .rake files in the tasks directory
|
5
|
-
# are where the options are used.
|
6
|
-
|
7
|
-
begin
|
8
|
-
require 'bones'
|
9
|
-
Bones.setup
|
10
|
-
rescue LoadError
|
11
|
-
begin
|
12
|
-
load 'tasks/setup.rb'
|
13
|
-
rescue LoadError
|
14
|
-
raise RuntimeError, '### please install the "bones" gem ###'
|
15
|
-
end
|
16
|
-
end
|
17
|
-
|
18
|
-
ensure_in_path 'lib'
|
19
|
-
|
20
|
-
require 'cascading'
|
21
|
-
require 'bundler/setup'
|
22
|
-
|
23
|
-
task :default => 'test'
|
24
|
-
|
25
|
-
task :run do
|
26
|
-
# ensure_in_path "samples"
|
27
|
-
puts "Running #{ARGS[0]}"
|
28
|
-
require "samples/#{ARGS[0]}"
|
29
|
-
end
|
30
|
-
|
31
|
-
desc 'Remove gem and Java build files'
|
32
|
-
task :clean => ['ant:clean', 'gem:clean', 'samples:clean'] do
|
33
|
-
puts 'Build files removed'
|
34
|
-
end
|
35
|
-
|
36
|
-
PROJ.name = 'cascading.jruby'
|
37
|
-
PROJ.authors = ['Matt Walker', 'Grégoire Marabout']
|
38
|
-
PROJ.email = 'mwalker@etsy.com'
|
39
|
-
PROJ.url = 'http://github.com/etsy/cascading.jruby'
|
40
|
-
PROJ.version = Cascading::VERSION
|
41
|
-
PROJ.summary = 'A JRuby DSL for Cascading'
|
42
|
-
PROJ.description = 'cascading.jruby is a small DSL above Cascading, written in JRuby'
|
43
|
-
PROJ.rubyforge.name = 'cascading.jruby'
|
44
|
-
PROJ.spec.opts << '--color'
|
45
|
-
|
46
|
-
# EOF
|
data/TODO
DELETED
@@ -1,13 +0,0 @@
|
|
1
|
-
Documentation
|
2
|
-
|
3
|
-
Import local_job/Job from Etsy, which can eliminate registries
|
4
|
-
|
5
|
-
Bug fixes on github
|
6
|
-
Enforce more runtime rules at composition time
|
7
|
-
Standardize helper contracts
|
8
|
-
Possibly combine unit tests...into unit tests because RSpec sucks and swallows stack traces
|
9
|
-
|
10
|
-
(Jading)
|
11
|
-
Split out runner
|
12
|
-
Make runner implement Tool
|
13
|
-
Create build tool for job jar
|