cascading.jruby 0.0.9 → 0.0.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +15 -0
- data/lib/cascading/assembly.rb +138 -17
- data/lib/cascading/base.rb +0 -4
- data/lib/cascading/cascade.rb +25 -16
- data/lib/cascading/cascading.rb +25 -5
- data/lib/cascading/ext/array.rb +1 -7
- data/lib/cascading/flow.rb +18 -19
- data/lib/cascading/mode.rb +5 -1
- data/lib/cascading/operations.rb +11 -4
- data/lib/cascading/tap.rb +4 -0
- data/lib/cascading.rb +1 -5
- data/test/test_assembly.rb +135 -29
- data/test/test_cascade.rb +80 -0
- data/test/test_flow.rb +20 -0
- data/test/test_operations.rb +3 -2
- metadata +6 -76
- data/.travis.yml +0 -6
- data/Gemfile +0 -6
- data/Gemfile.lock +0 -12
- data/HACKING.md +0 -23
- data/README.md +0 -9
- data/Rakefile +0 -46
- data/TODO +0 -13
- data/bin/make_job +0 -81
- data/ivy.xml +0 -25
- data/ivysettings.xml +0 -7
- data/samples/branch.rb +0 -30
- data/samples/copy.rb +0 -20
- data/samples/data/data2.txt +0 -88799
- data/samples/data/data_group_by.txt +0 -7
- data/samples/data/data_join1.txt +0 -3
- data/samples/data/data_join2.txt +0 -3
- data/samples/data/data_join3.txt +0 -3
- data/samples/data/genealogy/names/dist.all.last +0 -88799
- data/samples/data/gutenberg/the_outline_of_science_vol_1 +0 -12761
- data/samples/group_by.rb +0 -61
- data/samples/join.rb +0 -31
- data/samples/logwordcount.rb +0 -22
- data/samples/project.rb +0 -23
- data/samples/rename.rb +0 -20
- data/samples/scorenames.rb +0 -20
- data/samples/splitter.rb +0 -19
- data/samples/sub_assembly.rb +0 -30
- data/samples/union.rb +0 -36
- data/spec/cascading_spec.rb +0 -105
- data/spec/expr_spec.rb +0 -230
- data/spec/jruby_version_spec.rb +0 -72
- data/spec/resource/join_input.txt +0 -3
- data/spec/resource/test_input.txt +0 -4
- data/spec/scope_spec.rb +0 -149
- data/spec/spec.opts +0 -6
- data/spec/spec_helper.rb +0 -5
- data/spec/spec_util.rb +0 -92
- data/src/cascading/jruby/Main.java +0 -38
- data/src/cascading/jruby/runner.rb +0 -6
- data/tags +0 -342
- data/tasks/ann.rake +0 -80
- data/tasks/ant.rake +0 -23
- data/tasks/bones.rake +0 -20
- data/tasks/gem.rake +0 -206
- data/tasks/git.rake +0 -40
- data/tasks/notes.rake +0 -27
- data/tasks/post_load.rake +0 -34
- data/tasks/rdoc.rake +0 -50
- data/tasks/rubyforge.rake +0 -55
- data/tasks/samples.rake +0 -19
- data/tasks/setup.rb +0 -300
- data/tasks/spec.rake +0 -59
- data/tasks/svn.rake +0 -47
- data/tasks/test.rake +0 -42
- data/test/data/data1.txt +0 -14
- data/test/data/data2.txt +0 -14
- data/test/mock_assemblies.rb +0 -55
data/test/test_assembly.rb
CHANGED
@@ -62,7 +62,7 @@ class TC_Assembly < Test::Unit::TestCase
|
|
62
62
|
count
|
63
63
|
end
|
64
64
|
pipe = assembly.tail_pipe
|
65
|
-
|
65
|
+
assert_equal Java::CascadingPipe::Every, pipe.class
|
66
66
|
end
|
67
67
|
assert_match /^undefined local variable or method `count' for #<Cascading::Assembly:.*>$/, ex.message
|
68
68
|
end
|
@@ -74,7 +74,7 @@ class TC_Assembly < Test::Unit::TestCase
|
|
74
74
|
every 'line', :aggregator => count_aggregator, :output => 'count'
|
75
75
|
end
|
76
76
|
end
|
77
|
-
|
77
|
+
assert_equal Java::CascadingPipe::Every, assembly.tail_pipe.class
|
78
78
|
assert_equal ['line'], assembly.tail_pipe.argument_selector.to_a
|
79
79
|
assert_equal ['count'], assembly.tail_pipe.output_selector.to_a
|
80
80
|
|
@@ -83,7 +83,7 @@ class TC_Assembly < Test::Unit::TestCase
|
|
83
83
|
count
|
84
84
|
end
|
85
85
|
end
|
86
|
-
|
86
|
+
assert_equal Java::CascadingPipe::Every, assembly.tail_pipe.class
|
87
87
|
|
88
88
|
# NOTE: this is not valid when we optimize using CountBy
|
89
89
|
#assert_equal last_grouping_fields, assembly.tail_pipe.argument_selector
|
@@ -370,8 +370,10 @@ class TC_Assembly < Test::Unit::TestCase
|
|
370
370
|
end
|
371
371
|
|
372
372
|
assert_equal Java::CascadingPipe::CoGroup, assembly.tail_pipe.class
|
373
|
+
|
373
374
|
left_grouping_fields = assembly.tail_pipe.key_selectors['test1']
|
374
375
|
assert_equal ['name', 'id'], left_grouping_fields.to_a
|
376
|
+
|
375
377
|
right_grouping_fields = assembly.tail_pipe.key_selectors['test2']
|
376
378
|
assert_equal ['name', 'id'], right_grouping_fields.to_a
|
377
379
|
|
@@ -410,56 +412,142 @@ class TC_Assembly < Test::Unit::TestCase
|
|
410
412
|
end
|
411
413
|
|
412
414
|
def test_join_undefined_inputs
|
413
|
-
|
414
|
-
|
415
|
-
|
415
|
+
[:join, :hash_join].each do |join|
|
416
|
+
ex = assert_raise RuntimeError do
|
417
|
+
flow 'test_join_undefined_inputs' do
|
418
|
+
source 'data1', tap('test/data/data1.txt')
|
416
419
|
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
+
assembly 'data1' do
|
421
|
+
pass
|
422
|
+
end
|
420
423
|
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
+
assembly 'join' do
|
425
|
+
send(join, 'doesnotexist', 'data1', :on => 'name')
|
426
|
+
end
|
424
427
|
|
425
|
-
|
428
|
+
sink 'join', tap('output/test_join_undefined_inputs')
|
429
|
+
end
|
426
430
|
end
|
431
|
+
assert_equal "Could not find assembly 'doesnotexist' from 'join'", ex.message
|
427
432
|
end
|
428
|
-
assert_equal "Could not find assembly 'doesnotexist' from 'join'", ex.message
|
429
433
|
end
|
430
434
|
|
431
435
|
def test_join_without_on
|
432
|
-
|
433
|
-
|
434
|
-
|
436
|
+
[:join, :hash_join].each do |join|
|
437
|
+
ex = assert_raise RuntimeError do
|
438
|
+
mock_two_input_assembly do
|
439
|
+
send(join, 'test1', 'test2')
|
440
|
+
end
|
435
441
|
end
|
442
|
+
assert_equal 'join requires :on parameter', ex.message
|
436
443
|
end
|
437
|
-
assert_equal 'join requires :on parameter', ex.message
|
438
444
|
end
|
439
445
|
|
440
446
|
def test_join_invalid_on
|
441
|
-
|
442
|
-
|
443
|
-
|
447
|
+
[:join, :hash_join].each do |join|
|
448
|
+
ex = assert_raise RuntimeError do
|
449
|
+
mock_two_input_assembly do
|
450
|
+
send(join, 'test1', 'test2', :on => 1)
|
451
|
+
end
|
444
452
|
end
|
453
|
+
assert_equal "Unsupported data type for :on in join: 'Fixnum'", ex.message
|
445
454
|
end
|
446
|
-
assert_equal "Unsupported data type for :on in join: 'Fixnum'", ex.message
|
447
455
|
end
|
448
456
|
|
449
457
|
def test_join_empty_on
|
450
|
-
|
451
|
-
|
452
|
-
|
458
|
+
[:join, :hash_join].each do |join|
|
459
|
+
ex = assert_raise RuntimeError do
|
460
|
+
mock_two_input_assembly do
|
461
|
+
send(join, 'test1', 'test2', :on => [])
|
462
|
+
end
|
463
|
+
end
|
464
|
+
assert_equal "join requires non-empty :on parameter", ex.message
|
465
|
+
|
466
|
+
ex = assert_raise RuntimeError do
|
467
|
+
mock_two_input_assembly do
|
468
|
+
send(join, 'test1', 'test2', :on => {})
|
469
|
+
end
|
453
470
|
end
|
471
|
+
assert_equal "join requires non-empty :on parameter", ex.message
|
454
472
|
end
|
455
|
-
|
473
|
+
end
|
456
474
|
|
457
|
-
|
475
|
+
def test_create_hash_join
|
476
|
+
assembly = mock_two_input_assembly do
|
477
|
+
hash_join 'test1', 'test2', :on => 'name'
|
478
|
+
end
|
479
|
+
|
480
|
+
assert_equal Java::CascadingPipe::HashJoin, assembly.tail_pipe.class
|
481
|
+
|
482
|
+
left_grouping_fields = assembly.tail_pipe.key_selectors['test1']
|
483
|
+
assert_equal ['name'], left_grouping_fields.to_a
|
484
|
+
|
485
|
+
right_grouping_fields = assembly.tail_pipe.key_selectors['test2']
|
486
|
+
assert_equal ['name'], right_grouping_fields.to_a
|
487
|
+
|
488
|
+
assert_equal ['name', 'score1', 'score2', 'id', 'name_', 'id_', 'town'], assembly.scope.values_fields.to_a
|
489
|
+
# NOTE: Why does HashJoin produce different grouping fields than CoGroup?
|
490
|
+
assert_equal ['name'], assembly.scope.grouping_fields.to_a
|
491
|
+
|
492
|
+
assembly = mock_two_input_assembly do
|
493
|
+
hash_join 'test1', 'test2', :on => 'id'
|
494
|
+
end
|
495
|
+
|
496
|
+
assert_equal Java::CascadingPipe::HashJoin, assembly.tail_pipe.class
|
497
|
+
left_grouping_fields = assembly.tail_pipe.key_selectors['test1']
|
498
|
+
assert_equal ['id'], left_grouping_fields.to_a
|
499
|
+
right_grouping_fields = assembly.tail_pipe.key_selectors['test2']
|
500
|
+
assert_equal ['id'], right_grouping_fields.to_a
|
501
|
+
assert_equal ['name', 'score1', 'score2', 'id', 'name_', 'id_', 'town'], assembly.scope.values_fields.to_a
|
502
|
+
# NOTE: Why does HashJoin produce different grouping fields than CoGroup?
|
503
|
+
assert_equal ['id'], assembly.scope.grouping_fields.to_a
|
504
|
+
end
|
505
|
+
|
506
|
+
def create_hash_join_many_fields
|
507
|
+
assembly = mock_two_input_assembly do
|
508
|
+
hash_join 'test1', 'test2', :on => ['name', 'id']
|
509
|
+
end
|
510
|
+
|
511
|
+
assert_equal Java::CascadingPipe::HashJoin, assembly.tail_pipe.class
|
512
|
+
|
513
|
+
left_grouping_fields = assembly.tail_pipe.key_selectors['test1']
|
514
|
+
assert_equal ['name', 'id'], left_grouping_fields.to_a
|
515
|
+
|
516
|
+
right_grouping_fields = assembly.tail_pipe.key_selectors['test2']
|
517
|
+
assert_equal ['name', 'id'], right_grouping_fields.to_a
|
518
|
+
|
519
|
+
assert_equal ['name', 'score1', 'score2', 'id', 'name_', 'id_', 'town'], assembly.scope.values_fields.to_a
|
520
|
+
# NOTE: Why does HashJoin produce different grouping fields than CoGroup?
|
521
|
+
assert_equal ['name', 'id'], assembly.scope.grouping_fields.to_a
|
522
|
+
end
|
523
|
+
|
524
|
+
def create_hash_join_with_declared_fields
|
525
|
+
assembly = mock_two_input_assembly do
|
526
|
+
hash_join 'test1', 'test2', :on => 'name', :declared_fields => ['a', 'b', 'c', 'd', 'e', 'f', 'g']
|
527
|
+
end
|
528
|
+
|
529
|
+
assert_equal Java::CascadingPipe::HashJoin, assembly.tail_pipe.class
|
530
|
+
|
531
|
+
left_grouping_fields = assembly.tail_pipe.key_selectors['test1']
|
532
|
+
assert_equal ['name'], left_grouping_fields.to_a
|
533
|
+
|
534
|
+
right_grouping_fields = assembly.tail_pipe.key_selectors['test2']
|
535
|
+
assert_equal ['name'], right_grouping_fields.to_a
|
536
|
+
|
537
|
+
assert_equal ['a', 'b', 'c', 'd', 'e', 'f', 'g'], assembly.scope.values_fields.to_a
|
538
|
+
# NOTE: Why does HashJoin produce different grouping fields than CoGroup?
|
539
|
+
assert_equal ['name'], assembly.scope.grouping_fields.to_a
|
540
|
+
end
|
541
|
+
|
542
|
+
def test_hash_join_with_block
|
543
|
+
ex = assert_raise ArgumentError do
|
458
544
|
mock_two_input_assembly do
|
459
|
-
|
545
|
+
hash_join 'test1', 'test2', :on => 'name' do
|
546
|
+
count
|
547
|
+
end
|
460
548
|
end
|
461
549
|
end
|
462
|
-
assert_equal "
|
550
|
+
assert_equal "hash joins don't support aggregations", ex.message
|
463
551
|
end
|
464
552
|
|
465
553
|
def test_branch_unique
|
@@ -610,6 +698,24 @@ class TC_Assembly < Test::Unit::TestCase
|
|
610
698
|
assert_equal Java::CascadingOperationExpression::ExpressionFilter, assembly.tail_pipe.operation.class
|
611
699
|
end
|
612
700
|
|
701
|
+
def test_smoke_test_describe
|
702
|
+
cascade 'smoke' do
|
703
|
+
flow 'smoke' do
|
704
|
+
source 'input', tap('test/data/data1.txt')
|
705
|
+
assembly 'input' do
|
706
|
+
puts "Describe at assembly start: '#{describe}'"
|
707
|
+
group_by 'line' do
|
708
|
+
count
|
709
|
+
sum 'offset', :type => :long
|
710
|
+
puts "Describe at group_by end (falls out to top-level Cascading::describe): '#{describe}'"
|
711
|
+
end
|
712
|
+
puts "Describe at assembly end: '#{describe}'"
|
713
|
+
end
|
714
|
+
sink 'input', tap('output/test_smoke_test_debug_scope')
|
715
|
+
end
|
716
|
+
end
|
717
|
+
end
|
718
|
+
|
613
719
|
def test_smoke_test_debug_scope
|
614
720
|
cascade 'smoke' do
|
615
721
|
flow 'smoke' do
|
data/test/test_cascade.rb
CHANGED
@@ -45,6 +45,67 @@ class TC_Cascade < Test::Unit::TestCase
|
|
45
45
|
assert_equal 'cascade.flow2.assembly4', cascade.last_child.last_child.qualified_name
|
46
46
|
end
|
47
47
|
|
48
|
+
def test_default_properties
|
49
|
+
f1, f2 = nil, nil
|
50
|
+
cascade = cascade 'cascade' do
|
51
|
+
f1 = flow 'flow1' do
|
52
|
+
end
|
53
|
+
|
54
|
+
f2 = flow 'flow2' do
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
assert_equal({}, cascade.properties)
|
59
|
+
assert_equal({}, f1.properties)
|
60
|
+
assert_equal({}, f2.properties)
|
61
|
+
end
|
62
|
+
|
63
|
+
def test_set_properties
|
64
|
+
# Simulate the global properties set by jading
|
65
|
+
$jobconf_properties = {
|
66
|
+
'external_no_overwrite' => 'external_no_overwrite',
|
67
|
+
'external_overwrite' => 'external_overwrite',
|
68
|
+
}
|
69
|
+
|
70
|
+
f1, f2 = nil, nil
|
71
|
+
cascade = cascade 'cascade' do
|
72
|
+
properties['external_overwrite'] = 'overwritten'
|
73
|
+
properties['internal_no_overwrite'] = 'internal_no_overwrite'
|
74
|
+
properties['internal_overwrite'] = 'internal_overwrite'
|
75
|
+
|
76
|
+
f1 = flow 'flow1' do
|
77
|
+
properties['external_overwrite'] = 'overwritten_flow1'
|
78
|
+
properties['internal_overwrite'] = 'overwritten_flow1'
|
79
|
+
end
|
80
|
+
|
81
|
+
f2 = flow 'flow2' do
|
82
|
+
properties['external_overwrite'] = 'overwritten_flow2'
|
83
|
+
properties['internal_overwrite'] = 'overwritten_flow2'
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
assert_equal({
|
88
|
+
'external_no_overwrite' => 'external_no_overwrite',
|
89
|
+
'external_overwrite' => 'overwritten',
|
90
|
+
'internal_no_overwrite' => 'internal_no_overwrite',
|
91
|
+
'internal_overwrite' => 'internal_overwrite',
|
92
|
+
}, cascade.properties)
|
93
|
+
|
94
|
+
assert_equal({
|
95
|
+
'external_no_overwrite' => 'external_no_overwrite',
|
96
|
+
'external_overwrite' => 'overwritten_flow1',
|
97
|
+
'internal_no_overwrite' => 'internal_no_overwrite',
|
98
|
+
'internal_overwrite' => 'overwritten_flow1',
|
99
|
+
}, f1.properties)
|
100
|
+
|
101
|
+
assert_equal({
|
102
|
+
'external_no_overwrite' => 'external_no_overwrite',
|
103
|
+
'external_overwrite' => 'overwritten_flow2',
|
104
|
+
'internal_no_overwrite' => 'internal_no_overwrite',
|
105
|
+
'internal_overwrite' => 'overwritten_flow2',
|
106
|
+
}, f2.properties)
|
107
|
+
end
|
108
|
+
|
48
109
|
def test_ambiguous_flow_names
|
49
110
|
ex = assert_raise AmbiguousNodeNameException do
|
50
111
|
f1, a1, a2, f2, a3, a4 = [nil] * 6
|
@@ -99,4 +160,23 @@ class TC_Cascade < Test::Unit::TestCase
|
|
99
160
|
# NOTE: Looking up flows this way is not a very common practice, so this is
|
100
161
|
# unlikely to cause issues
|
101
162
|
end
|
163
|
+
|
164
|
+
def test_smoke_test_describe
|
165
|
+
cascade 'smoke' do
|
166
|
+
puts "Describe at cascade start: '#{describe}'"
|
167
|
+
|
168
|
+
flow 'smoke' do
|
169
|
+
source 'input', tap('test/data/data1.txt')
|
170
|
+
assembly 'input' do
|
171
|
+
group_by 'line' do
|
172
|
+
count
|
173
|
+
sum 'offset', :type => :long
|
174
|
+
end
|
175
|
+
end
|
176
|
+
sink 'input', tap('output/test_smoke_test_debug_scope')
|
177
|
+
end
|
178
|
+
|
179
|
+
puts "Describe at cascade end: '#{describe}'"
|
180
|
+
end
|
181
|
+
end
|
102
182
|
end
|
data/test/test_flow.rb
CHANGED
@@ -113,4 +113,24 @@ class TC_Flow < Test::Unit::TestCase
|
|
113
113
|
end
|
114
114
|
assert_equal "Ambiguous lookup of child by name 'b'; found 'flow.b', 'flow.a.b'", ex.message
|
115
115
|
end
|
116
|
+
|
117
|
+
def test_smoke_test_describe
|
118
|
+
cascade 'smoke' do
|
119
|
+
flow 'smoke' do
|
120
|
+
puts "Describe at flow start: '#{describe}'"
|
121
|
+
source 'input', tap('test/data/data1.txt')
|
122
|
+
|
123
|
+
puts "Describe before first assembly: '#{describe}'"
|
124
|
+
assembly 'input' do
|
125
|
+
group_by 'line' do
|
126
|
+
count
|
127
|
+
sum 'offset', :type => :long
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
sink 'input', tap('output/test_smoke_test_debug_scope')
|
132
|
+
puts "Describe at flow end: '#{describe}'"
|
133
|
+
end
|
134
|
+
end
|
135
|
+
end
|
116
136
|
end
|
data/test/test_operations.rb
CHANGED
@@ -62,11 +62,12 @@ class TC_Operations < Test::Unit::TestCase
|
|
62
62
|
end
|
63
63
|
|
64
64
|
def test_coerce_to_java_other
|
65
|
-
|
65
|
+
orig_val = [1,2,3]
|
66
|
+
result = coerce_to_java(orig_val)
|
66
67
|
|
67
68
|
assert_equal Java::JavaLang::String, result.class
|
68
69
|
assert_equal ''.to_java(java.lang.String).java_class, result.java_class
|
69
|
-
assert_equal
|
70
|
+
assert_equal orig_val.to_s.to_java(java.lang.String), result
|
70
71
|
end
|
71
72
|
|
72
73
|
def test_to_java_comparable_array
|
metadata
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
name: cascading.jruby
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease:
|
5
|
-
version: 0.0.
|
5
|
+
version: 0.0.10
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Matt Walker
|
@@ -11,41 +11,19 @@ autorequire:
|
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
13
|
|
14
|
-
date: 2012-08-
|
14
|
+
date: 2012-08-09 00:00:00 Z
|
15
15
|
dependencies: []
|
16
16
|
|
17
17
|
description: cascading.jruby is a small DSL above Cascading, written in JRuby
|
18
18
|
email: mwalker@etsy.com
|
19
|
-
executables:
|
20
|
-
|
19
|
+
executables: []
|
20
|
+
|
21
21
|
extensions: []
|
22
22
|
|
23
23
|
extra_rdoc_files:
|
24
24
|
- History.txt
|
25
25
|
- LICENSE.txt
|
26
|
-
- bin/make_job
|
27
|
-
- samples/data/data2.txt
|
28
|
-
- samples/data/data_group_by.txt
|
29
|
-
- samples/data/data_join1.txt
|
30
|
-
- samples/data/data_join2.txt
|
31
|
-
- samples/data/data_join3.txt
|
32
|
-
- spec/resource/join_input.txt
|
33
|
-
- spec/resource/test_input.txt
|
34
|
-
- test/data/data1.txt
|
35
|
-
- test/data/data2.txt
|
36
26
|
files:
|
37
|
-
- .travis.yml
|
38
|
-
- Gemfile
|
39
|
-
- Gemfile.lock
|
40
|
-
- HACKING.md
|
41
|
-
- History.txt
|
42
|
-
- LICENSE.txt
|
43
|
-
- README.md
|
44
|
-
- Rakefile
|
45
|
-
- TODO
|
46
|
-
- bin/make_job
|
47
|
-
- ivy.xml
|
48
|
-
- ivysettings.xml
|
49
27
|
- lib/cascading.rb
|
50
28
|
- lib/cascading/aggregations.rb
|
51
29
|
- lib/cascading/assembly.rb
|
@@ -61,53 +39,8 @@ files:
|
|
61
39
|
- lib/cascading/scope.rb
|
62
40
|
- lib/cascading/sub_assembly.rb
|
63
41
|
- lib/cascading/tap.rb
|
64
|
-
-
|
65
|
-
-
|
66
|
-
- samples/data/data2.txt
|
67
|
-
- samples/data/data_group_by.txt
|
68
|
-
- samples/data/data_join1.txt
|
69
|
-
- samples/data/data_join2.txt
|
70
|
-
- samples/data/data_join3.txt
|
71
|
-
- samples/data/genealogy/names/dist.all.last
|
72
|
-
- samples/data/gutenberg/the_outline_of_science_vol_1
|
73
|
-
- samples/group_by.rb
|
74
|
-
- samples/join.rb
|
75
|
-
- samples/logwordcount.rb
|
76
|
-
- samples/project.rb
|
77
|
-
- samples/rename.rb
|
78
|
-
- samples/scorenames.rb
|
79
|
-
- samples/splitter.rb
|
80
|
-
- samples/sub_assembly.rb
|
81
|
-
- samples/union.rb
|
82
|
-
- spec/cascading_spec.rb
|
83
|
-
- spec/expr_spec.rb
|
84
|
-
- spec/jruby_version_spec.rb
|
85
|
-
- spec/resource/join_input.txt
|
86
|
-
- spec/resource/test_input.txt
|
87
|
-
- spec/scope_spec.rb
|
88
|
-
- spec/spec.opts
|
89
|
-
- spec/spec_helper.rb
|
90
|
-
- spec/spec_util.rb
|
91
|
-
- src/cascading/jruby/Main.java
|
92
|
-
- src/cascading/jruby/runner.rb
|
93
|
-
- tags
|
94
|
-
- tasks/ann.rake
|
95
|
-
- tasks/ant.rake
|
96
|
-
- tasks/bones.rake
|
97
|
-
- tasks/gem.rake
|
98
|
-
- tasks/git.rake
|
99
|
-
- tasks/notes.rake
|
100
|
-
- tasks/post_load.rake
|
101
|
-
- tasks/rdoc.rake
|
102
|
-
- tasks/rubyforge.rake
|
103
|
-
- tasks/samples.rake
|
104
|
-
- tasks/setup.rb
|
105
|
-
- tasks/spec.rake
|
106
|
-
- tasks/svn.rake
|
107
|
-
- tasks/test.rake
|
108
|
-
- test/data/data1.txt
|
109
|
-
- test/data/data2.txt
|
110
|
-
- test/mock_assemblies.rb
|
42
|
+
- History.txt
|
43
|
+
- LICENSE.txt
|
111
44
|
- test/test_aggregations.rb
|
112
45
|
- test/test_assembly.rb
|
113
46
|
- test/test_cascade.rb
|
@@ -130,9 +63,6 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
130
63
|
requirements:
|
131
64
|
- - ">="
|
132
65
|
- !ruby/object:Gem::Version
|
133
|
-
hash: 2
|
134
|
-
segments:
|
135
|
-
- 0
|
136
66
|
version: "0"
|
137
67
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
138
68
|
none: false
|
data/.travis.yml
DELETED
data/Gemfile
DELETED
data/Gemfile.lock
DELETED
data/HACKING.md
DELETED
@@ -1,23 +0,0 @@
|
|
1
|
-
# Hacking
|
2
|
-
|
3
|
-
Some hacking info on `cascading.jruby`:
|
4
|
-
|
5
|
-
For local development, install with (requires [bundler](http://gembundler.com/)):
|
6
|
-
|
7
|
-
jruby -S bundle install
|
8
|
-
|
9
|
-
To run the tests (will download Cascading and Hadoop jars):
|
10
|
-
|
11
|
-
jruby -S bundle exec rake
|
12
|
-
|
13
|
-
To create the gem:
|
14
|
-
|
15
|
-
jruby -S bundle exec rake gem
|
16
|
-
|
17
|
-
To install it locally:
|
18
|
-
|
19
|
-
jruby -S gem install pkg/cascading.jruby-xxx.gem
|
20
|
-
|
21
|
-
The `Cascading::Operations` module is mixed-in the `Cascading::Assembly` class to provide some shortcuts for common operations.
|
22
|
-
|
23
|
-
The file cascading/cascading.rb defines global helper methods for cascading like tap creation, fields creation, etc.
|
data/README.md
DELETED
@@ -1,9 +0,0 @@
|
|
1
|
-
# Cascading.JRuby [](http://travis-ci.org/mrwalker/cascading.jruby)
|
2
|
-
|
3
|
-
`cascading.jruby` is a small DSL above [Cascading](http://www.cascading.org/).
|
4
|
-
|
5
|
-
It requires Hadoop (>= 0.20.2) and [Cascading 2.0.0](http://files.cascading.org/cascading/2.0/cascading-2.0.0.tgz) to be set via the environment variables: `HADOOP_HOME` and `CASCADING_HOME`
|
6
|
-
|
7
|
-
It has been tested on JRuby versions 1.2.0, 1.4.0, 1.5.3, and 1.6.5.
|
8
|
-
|
9
|
-
Copyright 2009, Grégoire Marabout.
|
data/Rakefile
DELETED
@@ -1,46 +0,0 @@
|
|
1
|
-
#! /usr/bin/env jruby
|
2
|
-
|
3
|
-
# Look in the tasks/setup.rb file for the various options that can be
|
4
|
-
# configured in this Rakefile. The .rake files in the tasks directory
|
5
|
-
# are where the options are used.
|
6
|
-
|
7
|
-
begin
|
8
|
-
require 'bones'
|
9
|
-
Bones.setup
|
10
|
-
rescue LoadError
|
11
|
-
begin
|
12
|
-
load 'tasks/setup.rb'
|
13
|
-
rescue LoadError
|
14
|
-
raise RuntimeError, '### please install the "bones" gem ###'
|
15
|
-
end
|
16
|
-
end
|
17
|
-
|
18
|
-
ensure_in_path 'lib'
|
19
|
-
|
20
|
-
require 'cascading'
|
21
|
-
require 'bundler/setup'
|
22
|
-
|
23
|
-
task :default => 'test'
|
24
|
-
|
25
|
-
task :run do
|
26
|
-
# ensure_in_path "samples"
|
27
|
-
puts "Running #{ARGS[0]}"
|
28
|
-
require "samples/#{ARGS[0]}"
|
29
|
-
end
|
30
|
-
|
31
|
-
desc 'Remove gem and Java build files'
|
32
|
-
task :clean => ['ant:clean', 'gem:clean', 'samples:clean'] do
|
33
|
-
puts 'Build files removed'
|
34
|
-
end
|
35
|
-
|
36
|
-
PROJ.name = 'cascading.jruby'
|
37
|
-
PROJ.authors = ['Matt Walker', 'Grégoire Marabout']
|
38
|
-
PROJ.email = 'mwalker@etsy.com'
|
39
|
-
PROJ.url = 'http://github.com/etsy/cascading.jruby'
|
40
|
-
PROJ.version = Cascading::VERSION
|
41
|
-
PROJ.summary = 'A JRuby DSL for Cascading'
|
42
|
-
PROJ.description = 'cascading.jruby is a small DSL above Cascading, written in JRuby'
|
43
|
-
PROJ.rubyforge.name = 'cascading.jruby'
|
44
|
-
PROJ.spec.opts << '--color'
|
45
|
-
|
46
|
-
# EOF
|
data/TODO
DELETED
@@ -1,13 +0,0 @@
|
|
1
|
-
Documentation
|
2
|
-
|
3
|
-
Import local_job/Job from Etsy, which can eliminate registries
|
4
|
-
|
5
|
-
Bug fixes on github
|
6
|
-
Enforce more runtime rules at composition time
|
7
|
-
Standardize helper contracts
|
8
|
-
Possibly combine unit tests...into unit tests because RSpec sucks and swallows stack traces
|
9
|
-
|
10
|
-
(Jading)
|
11
|
-
Split out runner
|
12
|
-
Make runner implement Tool
|
13
|
-
Create build tool for job jar
|