cascading.jruby 0.0.5 → 0.0.6
Sign up to get free protection for your applications and to get access to all the features.
- data/HACKING.md +1 -1
- data/History.txt +6 -0
- data/README.md +1 -1
- data/TODO +14 -0
- data/lib/cascading/assembly.rb +6 -37
- data/lib/cascading/base.rb +14 -1
- data/lib/cascading/cascading.rb +1 -0
- data/lib/cascading/flow.rb +10 -2
- data/lib/cascading/operations.rb +8 -2
- data/lib/cascading/scope.rb +4 -88
- data/lib/cascading.rb +1 -1
- data/spec/jruby_version_spec.rb +1 -1
- data/spec/spec_util.rb +0 -61
- data/tags +250 -0
- data/tasks/ant.rake +5 -2
- data/test/test_assembly.rb +42 -7
- data/test/test_cascade.rb +47 -0
- data/test/test_flow.rb +90 -6
- data/test/test_operations.rb +23 -0
- metadata +123 -117
- data/spec/primary_key_spec.rb +0 -119
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'cascading'
|
3
|
+
|
4
|
+
class TC_Operations < Test::Unit::TestCase
|
5
|
+
include Operations
|
6
|
+
|
7
|
+
def test_aggregator_function_ignore_values
|
8
|
+
min = min_function 'min_field', :ignore => [nil].to_java(:string)
|
9
|
+
assert_not_nil min
|
10
|
+
end
|
11
|
+
|
12
|
+
def test_aggregator_function_ignore_tuples
|
13
|
+
first = first_function 'first_field', :ignore => [Java::CascadingTuple::Tuple.new(-1)].to_java(Java::CascadingTuple::Tuple)
|
14
|
+
assert_not_nil first
|
15
|
+
end
|
16
|
+
|
17
|
+
def test_aggregator_function_ignore_exception
|
18
|
+
assert_raise RuntimeError do
|
19
|
+
avg = average_function 'avg_field', :ignore => [nil].to_java(:string)
|
20
|
+
assert_not_nil avg
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
metadata
CHANGED
@@ -1,128 +1,134 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
2
|
+
name: cascading.jruby
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease:
|
5
|
+
version: 0.0.6
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Matt Walker
|
9
|
+
- "Gr\xC3\xA9goire Marabout"
|
10
|
+
autorequire:
|
11
|
+
bindir: bin
|
9
12
|
cert_chain: []
|
10
13
|
|
11
|
-
|
12
|
-
|
14
|
+
date: 2012-03-26 00:00:00 Z
|
15
|
+
dependencies: []
|
16
|
+
|
17
|
+
description: cascading.jruby is a small DSL above Cascading, written in JRuby
|
18
|
+
email: mwalker@etsy.com
|
19
|
+
executables:
|
20
|
+
- make_job
|
21
|
+
extensions: []
|
22
|
+
|
13
23
|
extra_rdoc_files:
|
14
|
-
- History.txt
|
15
|
-
- LICENSE.txt
|
16
|
-
- bin/make_job
|
17
|
-
- samples/data/data2.txt
|
18
|
-
- samples/data/data_join1.txt
|
19
|
-
- samples/data/data_join2.txt
|
20
|
-
- samples/data/data_join3.txt
|
21
|
-
- spec/resource/join_input.txt
|
22
|
-
- spec/resource/test_input.txt
|
23
|
-
- test/data/data1.txt
|
24
|
-
- test/data/data2.txt
|
24
|
+
- History.txt
|
25
|
+
- LICENSE.txt
|
26
|
+
- bin/make_job
|
27
|
+
- samples/data/data2.txt
|
28
|
+
- samples/data/data_join1.txt
|
29
|
+
- samples/data/data_join2.txt
|
30
|
+
- samples/data/data_join3.txt
|
31
|
+
- spec/resource/join_input.txt
|
32
|
+
- spec/resource/test_input.txt
|
33
|
+
- test/data/data1.txt
|
34
|
+
- test/data/data2.txt
|
35
|
+
files:
|
36
|
+
- HACKING.md
|
37
|
+
- History.txt
|
38
|
+
- LICENSE.txt
|
39
|
+
- README.md
|
40
|
+
- Rakefile
|
41
|
+
- TODO
|
42
|
+
- bin/make_job
|
43
|
+
- lib/cascading.rb
|
44
|
+
- lib/cascading/assembly.rb
|
45
|
+
- lib/cascading/base.rb
|
46
|
+
- lib/cascading/cascade.rb
|
47
|
+
- lib/cascading/cascading.rb
|
48
|
+
- lib/cascading/cascading_exception.rb
|
49
|
+
- lib/cascading/expr_stub.rb
|
50
|
+
- lib/cascading/ext/array.rb
|
51
|
+
- lib/cascading/flow.rb
|
52
|
+
- lib/cascading/operations.rb
|
53
|
+
- lib/cascading/scope.rb
|
54
|
+
- samples/branch.rb
|
55
|
+
- samples/cascading.rb
|
56
|
+
- samples/copy.rb
|
57
|
+
- samples/data/data2.txt
|
58
|
+
- samples/data/data_join1.txt
|
59
|
+
- samples/data/data_join2.txt
|
60
|
+
- samples/data/data_join3.txt
|
61
|
+
- samples/join.rb
|
62
|
+
- samples/logwordcount.rb
|
63
|
+
- samples/project.rb
|
64
|
+
- samples/rename.rb
|
65
|
+
- samples/scorenames.rb
|
66
|
+
- samples/splitter.rb
|
67
|
+
- samples/union.rb
|
68
|
+
- spec/cascading_spec.rb
|
69
|
+
- spec/expr_spec.rb
|
70
|
+
- spec/jruby_version_spec.rb
|
71
|
+
- spec/resource/join_input.txt
|
72
|
+
- spec/resource/test_input.txt
|
73
|
+
- spec/scope_spec.rb
|
74
|
+
- spec/spec.opts
|
75
|
+
- spec/spec_helper.rb
|
76
|
+
- spec/spec_util.rb
|
77
|
+
- src/cascading/jruby/Main.java
|
78
|
+
- src/cascading/jruby/runner.rb
|
79
|
+
- tags
|
80
|
+
- tasks/ann.rake
|
81
|
+
- tasks/ant.rake
|
82
|
+
- tasks/bones.rake
|
83
|
+
- tasks/gem.rake
|
84
|
+
- tasks/git.rake
|
85
|
+
- tasks/notes.rake
|
86
|
+
- tasks/post_load.rake
|
87
|
+
- tasks/rdoc.rake
|
88
|
+
- tasks/rubyforge.rake
|
89
|
+
- tasks/samples.rake
|
90
|
+
- tasks/setup.rb
|
91
|
+
- tasks/spec.rake
|
92
|
+
- tasks/svn.rake
|
93
|
+
- tasks/test.rake
|
94
|
+
- test/data/data1.txt
|
95
|
+
- test/data/data2.txt
|
96
|
+
- test/test_assembly.rb
|
97
|
+
- test/test_cascade.rb
|
98
|
+
- test/test_cascading.rb
|
99
|
+
- test/test_flow.rb
|
100
|
+
- test/test_operations.rb
|
25
101
|
homepage: http://github.com/etsy/cascading.jruby
|
26
|
-
|
27
|
-
|
102
|
+
licenses: []
|
103
|
+
|
104
|
+
post_install_message:
|
28
105
|
rdoc_options:
|
29
|
-
- --main
|
30
|
-
- README.md
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
- HACKING.md
|
40
|
-
- History.txt
|
41
|
-
- LICENSE.txt
|
42
|
-
- README.md
|
43
|
-
- Rakefile
|
44
|
-
- bin/make_job
|
45
|
-
- lib/cascading.rb
|
46
|
-
- lib/cascading/assembly.rb
|
47
|
-
- lib/cascading/base.rb
|
48
|
-
- lib/cascading/cascade.rb
|
49
|
-
- lib/cascading/cascading.rb
|
50
|
-
- lib/cascading/cascading_exception.rb
|
51
|
-
- lib/cascading/expr_stub.rb
|
52
|
-
- lib/cascading/ext/array.rb
|
53
|
-
- lib/cascading/flow.rb
|
54
|
-
- lib/cascading/operations.rb
|
55
|
-
- lib/cascading/scope.rb
|
56
|
-
- samples/branch.rb
|
57
|
-
- samples/cascading.rb
|
58
|
-
- samples/copy.rb
|
59
|
-
- samples/data/data2.txt
|
60
|
-
- samples/data/data_join1.txt
|
61
|
-
- samples/data/data_join2.txt
|
62
|
-
- samples/data/data_join3.txt
|
63
|
-
- samples/join.rb
|
64
|
-
- samples/logwordcount.rb
|
65
|
-
- samples/project.rb
|
66
|
-
- samples/rename.rb
|
67
|
-
- samples/scorenames.rb
|
68
|
-
- samples/splitter.rb
|
69
|
-
- samples/union.rb
|
70
|
-
- spec/cascading_spec.rb
|
71
|
-
- spec/expr_spec.rb
|
72
|
-
- spec/jruby_version_spec.rb
|
73
|
-
- spec/primary_key_spec.rb
|
74
|
-
- spec/resource/join_input.txt
|
75
|
-
- spec/resource/test_input.txt
|
76
|
-
- spec/scope_spec.rb
|
77
|
-
- spec/spec.opts
|
78
|
-
- spec/spec_helper.rb
|
79
|
-
- spec/spec_util.rb
|
80
|
-
- src/cascading/jruby/Main.java
|
81
|
-
- src/cascading/jruby/runner.rb
|
82
|
-
- tasks/ann.rake
|
83
|
-
- tasks/ant.rake
|
84
|
-
- tasks/bones.rake
|
85
|
-
- tasks/gem.rake
|
86
|
-
- tasks/git.rake
|
87
|
-
- tasks/notes.rake
|
88
|
-
- tasks/post_load.rake
|
89
|
-
- tasks/rdoc.rake
|
90
|
-
- tasks/rubyforge.rake
|
91
|
-
- tasks/samples.rake
|
92
|
-
- tasks/setup.rb
|
93
|
-
- tasks/spec.rake
|
94
|
-
- tasks/svn.rake
|
95
|
-
- tasks/test.rake
|
96
|
-
- test/data/data1.txt
|
97
|
-
- test/data/data2.txt
|
98
|
-
- test/test_assembly.rb
|
99
|
-
- test/test_cascading.rb
|
100
|
-
- test/test_flow.rb
|
106
|
+
- --main
|
107
|
+
- README.md
|
108
|
+
require_paths:
|
109
|
+
- lib
|
110
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
111
|
+
none: false
|
112
|
+
requirements:
|
113
|
+
- - ">="
|
114
|
+
- !ruby/object:Gem::Version
|
115
|
+
version: "0"
|
101
116
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
117
|
+
none: false
|
102
118
|
requirements:
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
version:
|
107
|
-
extensions: []
|
108
|
-
|
109
|
-
rubygems_version: 1.3.1
|
119
|
+
- - ">="
|
120
|
+
- !ruby/object:Gem::Version
|
121
|
+
version: "0"
|
110
122
|
requirements: []
|
111
123
|
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
124
|
+
rubyforge_project: cascading.jruby
|
125
|
+
rubygems_version: 1.8.9
|
126
|
+
signing_key:
|
127
|
+
specification_version: 3
|
128
|
+
summary: A JRuby DSL for Cascading
|
117
129
|
test_files:
|
118
|
-
- test/test_assembly.rb
|
119
|
-
- test/
|
120
|
-
- test/
|
121
|
-
|
122
|
-
|
123
|
-
require_paths:
|
124
|
-
- lib
|
125
|
-
dependencies: []
|
126
|
-
|
127
|
-
bindir: bin
|
128
|
-
has_rdoc: true
|
130
|
+
- test/test_assembly.rb
|
131
|
+
- test/test_cascade.rb
|
132
|
+
- test/test_cascading.rb
|
133
|
+
- test/test_flow.rb
|
134
|
+
- test/test_operations.rb
|
data/spec/primary_key_spec.rb
DELETED
@@ -1,119 +0,0 @@
|
|
1
|
-
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
2
|
-
|
3
|
-
context Cascading::Scope do
|
4
|
-
it 'should allow override of primary key' do
|
5
|
-
test_assembly do
|
6
|
-
split 'line', ['x', 'y'], :pattern => /,/
|
7
|
-
check_scope :values_fields => ['offset', 'line', 'x', 'y'],
|
8
|
-
:primary_key_fields => ['offset']
|
9
|
-
end
|
10
|
-
end
|
11
|
-
|
12
|
-
it 'should pass primary key through Each' do
|
13
|
-
test_assembly do
|
14
|
-
split 'line', ['x', 'y'], :pattern => /,/
|
15
|
-
check_scope :values_fields => ['offset', 'line', 'x', 'y'],
|
16
|
-
:primary_key_fields => ['offset']
|
17
|
-
pass
|
18
|
-
check_scope :values_fields => ['offset', 'line', 'x', 'y'],
|
19
|
-
:primary_key_fields => ['offset']
|
20
|
-
end
|
21
|
-
end
|
22
|
-
|
23
|
-
it 'should support renaming primary keys' do
|
24
|
-
test_assembly do
|
25
|
-
split 'line', ['x', 'y'], :pattern => /,/
|
26
|
-
check_scope :values_fields => ['offset', 'line', 'x', 'y'],
|
27
|
-
:primary_key_fields => ['offset']
|
28
|
-
rename 'offset' => 'primary_key', 'line' => 'data'
|
29
|
-
check_scope :values_fields => ['primary_key', 'data', 'x', 'y'],
|
30
|
-
:primary_key_fields => ['primary_key']
|
31
|
-
end
|
32
|
-
end
|
33
|
-
|
34
|
-
it 'should clear primary keys when a subset of their fields are discarded' do
|
35
|
-
test_assembly do
|
36
|
-
primary 'offset', 'line' # Make primary keys interesting
|
37
|
-
split 'line', ['x', 'y'], :pattern => /,/
|
38
|
-
check_scope :values_fields => ['offset', 'line', 'x', 'y'],
|
39
|
-
:primary_key_fields => ['offset', 'line']
|
40
|
-
project 'line', 'x', 'y'
|
41
|
-
check_scope :values_fields => ['line', 'x', 'y'],
|
42
|
-
:primary_key_fields => nil
|
43
|
-
end
|
44
|
-
end
|
45
|
-
|
46
|
-
it 'should pass primary key through branch' do
|
47
|
-
test_assembly do
|
48
|
-
split 'line', ['x', 'y'], :pattern => /,/
|
49
|
-
check_scope :values_fields => ['offset', 'line', 'x', 'y'],
|
50
|
-
:primary_key_fields => ['offset']
|
51
|
-
|
52
|
-
branch 'check_keys' do
|
53
|
-
check_scope :values_fields => ['offset', 'line', 'x', 'y'],
|
54
|
-
:primary_key_fields => ['offset']
|
55
|
-
pass
|
56
|
-
check_scope :values_fields => ['offset', 'line', 'x', 'y'],
|
57
|
-
:primary_key_fields => ['offset']
|
58
|
-
end
|
59
|
-
|
60
|
-
check_scope :values_fields => ['offset', 'line', 'x', 'y'],
|
61
|
-
:primary_key_fields => ['offset']
|
62
|
-
pass
|
63
|
-
check_scope :values_fields => ['offset', 'line', 'x', 'y'],
|
64
|
-
:primary_key_fields => ['offset']
|
65
|
-
end
|
66
|
-
end
|
67
|
-
|
68
|
-
it 'should pass primary key through GroupBy followed by Each' do
|
69
|
-
test_assembly do
|
70
|
-
group_by 'offset'
|
71
|
-
check_scope :values_fields => ['offset', 'line'],
|
72
|
-
:grouping_fields => ['offset'],
|
73
|
-
:primary_key_fields => ['offset']
|
74
|
-
end
|
75
|
-
end
|
76
|
-
|
77
|
-
it 'should pass primary key through GroupBy followed by Every' do
|
78
|
-
test_assembly do
|
79
|
-
split 'line', ['x', 'y'], :pattern => /,/
|
80
|
-
group_by 'offset', 'x' do
|
81
|
-
check_scope :values_fields => ['offset', 'line', 'x', 'y'],
|
82
|
-
:grouping_fields => ['offset', 'x'],
|
83
|
-
:primary_key_fields => ['offset'],
|
84
|
-
:grouping_primary_key_fields => ['offset', 'x']
|
85
|
-
count 'line'
|
86
|
-
check_scope :values_fields => ['offset', 'line', 'x', 'y'],
|
87
|
-
:grouping_fields => ['offset', 'x', 'line'],
|
88
|
-
:primary_key_fields => ['offset'],
|
89
|
-
:grouping_primary_key_fields => ['offset', 'x']
|
90
|
-
count 'y'
|
91
|
-
check_scope :values_fields => ['offset', 'line', 'x', 'y'],
|
92
|
-
:grouping_fields => ['offset', 'x', 'line', 'y'],
|
93
|
-
:primary_key_fields => ['offset', 'x'], # FIXME: why has the pk changed?
|
94
|
-
:grouping_primary_key_fields => ['offset', 'x']
|
95
|
-
end
|
96
|
-
check_scope :values_fields => ['offset', 'x', 'line', 'y'],
|
97
|
-
:primary_key_fields => ['offset', 'x']
|
98
|
-
end
|
99
|
-
end
|
100
|
-
|
101
|
-
it 'should not clear primary key when grouping on other fields' do
|
102
|
-
test_assembly do
|
103
|
-
group_by 'line'
|
104
|
-
check_scope :values_fields => ['offset', 'line'],
|
105
|
-
:grouping_fields => ['line'],
|
106
|
-
:primary_key_fields => ['offset'],
|
107
|
-
:grouping_primary_key_fields => ['line']
|
108
|
-
end
|
109
|
-
end
|
110
|
-
|
111
|
-
it 'should pass primary key through CoGroup' do
|
112
|
-
test_join_assembly do
|
113
|
-
check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_'],
|
114
|
-
:grouping_fields => ['x'],
|
115
|
-
:primary_key_fields => ['offset'],
|
116
|
-
:grouping_primary_key_fields => ['x']
|
117
|
-
end
|
118
|
-
end
|
119
|
-
end
|