cascading.jruby 0.0.5 → 0.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/HACKING.md +1 -1
- data/History.txt +6 -0
- data/README.md +1 -1
- data/TODO +14 -0
- data/lib/cascading/assembly.rb +6 -37
- data/lib/cascading/base.rb +14 -1
- data/lib/cascading/cascading.rb +1 -0
- data/lib/cascading/flow.rb +10 -2
- data/lib/cascading/operations.rb +8 -2
- data/lib/cascading/scope.rb +4 -88
- data/lib/cascading.rb +1 -1
- data/spec/jruby_version_spec.rb +1 -1
- data/spec/spec_util.rb +0 -61
- data/tags +250 -0
- data/tasks/ant.rake +5 -2
- data/test/test_assembly.rb +42 -7
- data/test/test_cascade.rb +47 -0
- data/test/test_flow.rb +90 -6
- data/test/test_operations.rb +23 -0
- metadata +123 -117
- data/spec/primary_key_spec.rb +0 -119
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'cascading'
|
3
|
+
|
4
|
+
class TC_Operations < Test::Unit::TestCase
|
5
|
+
include Operations
|
6
|
+
|
7
|
+
def test_aggregator_function_ignore_values
|
8
|
+
min = min_function 'min_field', :ignore => [nil].to_java(:string)
|
9
|
+
assert_not_nil min
|
10
|
+
end
|
11
|
+
|
12
|
+
def test_aggregator_function_ignore_tuples
|
13
|
+
first = first_function 'first_field', :ignore => [Java::CascadingTuple::Tuple.new(-1)].to_java(Java::CascadingTuple::Tuple)
|
14
|
+
assert_not_nil first
|
15
|
+
end
|
16
|
+
|
17
|
+
def test_aggregator_function_ignore_exception
|
18
|
+
assert_raise RuntimeError do
|
19
|
+
avg = average_function 'avg_field', :ignore => [nil].to_java(:string)
|
20
|
+
assert_not_nil avg
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
metadata
CHANGED
@@ -1,128 +1,134 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
2
|
+
name: cascading.jruby
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease:
|
5
|
+
version: 0.0.6
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Matt Walker
|
9
|
+
- "Gr\xC3\xA9goire Marabout"
|
10
|
+
autorequire:
|
11
|
+
bindir: bin
|
9
12
|
cert_chain: []
|
10
13
|
|
11
|
-
|
12
|
-
|
14
|
+
date: 2012-03-26 00:00:00 Z
|
15
|
+
dependencies: []
|
16
|
+
|
17
|
+
description: cascading.jruby is a small DSL above Cascading, written in JRuby
|
18
|
+
email: mwalker@etsy.com
|
19
|
+
executables:
|
20
|
+
- make_job
|
21
|
+
extensions: []
|
22
|
+
|
13
23
|
extra_rdoc_files:
|
14
|
-
- History.txt
|
15
|
-
- LICENSE.txt
|
16
|
-
- bin/make_job
|
17
|
-
- samples/data/data2.txt
|
18
|
-
- samples/data/data_join1.txt
|
19
|
-
- samples/data/data_join2.txt
|
20
|
-
- samples/data/data_join3.txt
|
21
|
-
- spec/resource/join_input.txt
|
22
|
-
- spec/resource/test_input.txt
|
23
|
-
- test/data/data1.txt
|
24
|
-
- test/data/data2.txt
|
24
|
+
- History.txt
|
25
|
+
- LICENSE.txt
|
26
|
+
- bin/make_job
|
27
|
+
- samples/data/data2.txt
|
28
|
+
- samples/data/data_join1.txt
|
29
|
+
- samples/data/data_join2.txt
|
30
|
+
- samples/data/data_join3.txt
|
31
|
+
- spec/resource/join_input.txt
|
32
|
+
- spec/resource/test_input.txt
|
33
|
+
- test/data/data1.txt
|
34
|
+
- test/data/data2.txt
|
35
|
+
files:
|
36
|
+
- HACKING.md
|
37
|
+
- History.txt
|
38
|
+
- LICENSE.txt
|
39
|
+
- README.md
|
40
|
+
- Rakefile
|
41
|
+
- TODO
|
42
|
+
- bin/make_job
|
43
|
+
- lib/cascading.rb
|
44
|
+
- lib/cascading/assembly.rb
|
45
|
+
- lib/cascading/base.rb
|
46
|
+
- lib/cascading/cascade.rb
|
47
|
+
- lib/cascading/cascading.rb
|
48
|
+
- lib/cascading/cascading_exception.rb
|
49
|
+
- lib/cascading/expr_stub.rb
|
50
|
+
- lib/cascading/ext/array.rb
|
51
|
+
- lib/cascading/flow.rb
|
52
|
+
- lib/cascading/operations.rb
|
53
|
+
- lib/cascading/scope.rb
|
54
|
+
- samples/branch.rb
|
55
|
+
- samples/cascading.rb
|
56
|
+
- samples/copy.rb
|
57
|
+
- samples/data/data2.txt
|
58
|
+
- samples/data/data_join1.txt
|
59
|
+
- samples/data/data_join2.txt
|
60
|
+
- samples/data/data_join3.txt
|
61
|
+
- samples/join.rb
|
62
|
+
- samples/logwordcount.rb
|
63
|
+
- samples/project.rb
|
64
|
+
- samples/rename.rb
|
65
|
+
- samples/scorenames.rb
|
66
|
+
- samples/splitter.rb
|
67
|
+
- samples/union.rb
|
68
|
+
- spec/cascading_spec.rb
|
69
|
+
- spec/expr_spec.rb
|
70
|
+
- spec/jruby_version_spec.rb
|
71
|
+
- spec/resource/join_input.txt
|
72
|
+
- spec/resource/test_input.txt
|
73
|
+
- spec/scope_spec.rb
|
74
|
+
- spec/spec.opts
|
75
|
+
- spec/spec_helper.rb
|
76
|
+
- spec/spec_util.rb
|
77
|
+
- src/cascading/jruby/Main.java
|
78
|
+
- src/cascading/jruby/runner.rb
|
79
|
+
- tags
|
80
|
+
- tasks/ann.rake
|
81
|
+
- tasks/ant.rake
|
82
|
+
- tasks/bones.rake
|
83
|
+
- tasks/gem.rake
|
84
|
+
- tasks/git.rake
|
85
|
+
- tasks/notes.rake
|
86
|
+
- tasks/post_load.rake
|
87
|
+
- tasks/rdoc.rake
|
88
|
+
- tasks/rubyforge.rake
|
89
|
+
- tasks/samples.rake
|
90
|
+
- tasks/setup.rb
|
91
|
+
- tasks/spec.rake
|
92
|
+
- tasks/svn.rake
|
93
|
+
- tasks/test.rake
|
94
|
+
- test/data/data1.txt
|
95
|
+
- test/data/data2.txt
|
96
|
+
- test/test_assembly.rb
|
97
|
+
- test/test_cascade.rb
|
98
|
+
- test/test_cascading.rb
|
99
|
+
- test/test_flow.rb
|
100
|
+
- test/test_operations.rb
|
25
101
|
homepage: http://github.com/etsy/cascading.jruby
|
26
|
-
|
27
|
-
|
102
|
+
licenses: []
|
103
|
+
|
104
|
+
post_install_message:
|
28
105
|
rdoc_options:
|
29
|
-
- --main
|
30
|
-
- README.md
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
- HACKING.md
|
40
|
-
- History.txt
|
41
|
-
- LICENSE.txt
|
42
|
-
- README.md
|
43
|
-
- Rakefile
|
44
|
-
- bin/make_job
|
45
|
-
- lib/cascading.rb
|
46
|
-
- lib/cascading/assembly.rb
|
47
|
-
- lib/cascading/base.rb
|
48
|
-
- lib/cascading/cascade.rb
|
49
|
-
- lib/cascading/cascading.rb
|
50
|
-
- lib/cascading/cascading_exception.rb
|
51
|
-
- lib/cascading/expr_stub.rb
|
52
|
-
- lib/cascading/ext/array.rb
|
53
|
-
- lib/cascading/flow.rb
|
54
|
-
- lib/cascading/operations.rb
|
55
|
-
- lib/cascading/scope.rb
|
56
|
-
- samples/branch.rb
|
57
|
-
- samples/cascading.rb
|
58
|
-
- samples/copy.rb
|
59
|
-
- samples/data/data2.txt
|
60
|
-
- samples/data/data_join1.txt
|
61
|
-
- samples/data/data_join2.txt
|
62
|
-
- samples/data/data_join3.txt
|
63
|
-
- samples/join.rb
|
64
|
-
- samples/logwordcount.rb
|
65
|
-
- samples/project.rb
|
66
|
-
- samples/rename.rb
|
67
|
-
- samples/scorenames.rb
|
68
|
-
- samples/splitter.rb
|
69
|
-
- samples/union.rb
|
70
|
-
- spec/cascading_spec.rb
|
71
|
-
- spec/expr_spec.rb
|
72
|
-
- spec/jruby_version_spec.rb
|
73
|
-
- spec/primary_key_spec.rb
|
74
|
-
- spec/resource/join_input.txt
|
75
|
-
- spec/resource/test_input.txt
|
76
|
-
- spec/scope_spec.rb
|
77
|
-
- spec/spec.opts
|
78
|
-
- spec/spec_helper.rb
|
79
|
-
- spec/spec_util.rb
|
80
|
-
- src/cascading/jruby/Main.java
|
81
|
-
- src/cascading/jruby/runner.rb
|
82
|
-
- tasks/ann.rake
|
83
|
-
- tasks/ant.rake
|
84
|
-
- tasks/bones.rake
|
85
|
-
- tasks/gem.rake
|
86
|
-
- tasks/git.rake
|
87
|
-
- tasks/notes.rake
|
88
|
-
- tasks/post_load.rake
|
89
|
-
- tasks/rdoc.rake
|
90
|
-
- tasks/rubyforge.rake
|
91
|
-
- tasks/samples.rake
|
92
|
-
- tasks/setup.rb
|
93
|
-
- tasks/spec.rake
|
94
|
-
- tasks/svn.rake
|
95
|
-
- tasks/test.rake
|
96
|
-
- test/data/data1.txt
|
97
|
-
- test/data/data2.txt
|
98
|
-
- test/test_assembly.rb
|
99
|
-
- test/test_cascading.rb
|
100
|
-
- test/test_flow.rb
|
106
|
+
- --main
|
107
|
+
- README.md
|
108
|
+
require_paths:
|
109
|
+
- lib
|
110
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
111
|
+
none: false
|
112
|
+
requirements:
|
113
|
+
- - ">="
|
114
|
+
- !ruby/object:Gem::Version
|
115
|
+
version: "0"
|
101
116
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
117
|
+
none: false
|
102
118
|
requirements:
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
version:
|
107
|
-
extensions: []
|
108
|
-
|
109
|
-
rubygems_version: 1.3.1
|
119
|
+
- - ">="
|
120
|
+
- !ruby/object:Gem::Version
|
121
|
+
version: "0"
|
110
122
|
requirements: []
|
111
123
|
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
124
|
+
rubyforge_project: cascading.jruby
|
125
|
+
rubygems_version: 1.8.9
|
126
|
+
signing_key:
|
127
|
+
specification_version: 3
|
128
|
+
summary: A JRuby DSL for Cascading
|
117
129
|
test_files:
|
118
|
-
- test/test_assembly.rb
|
119
|
-
- test/
|
120
|
-
- test/
|
121
|
-
|
122
|
-
|
123
|
-
require_paths:
|
124
|
-
- lib
|
125
|
-
dependencies: []
|
126
|
-
|
127
|
-
bindir: bin
|
128
|
-
has_rdoc: true
|
130
|
+
- test/test_assembly.rb
|
131
|
+
- test/test_cascade.rb
|
132
|
+
- test/test_cascading.rb
|
133
|
+
- test/test_flow.rb
|
134
|
+
- test/test_operations.rb
|
data/spec/primary_key_spec.rb
DELETED
@@ -1,119 +0,0 @@
|
|
1
|
-
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
2
|
-
|
3
|
-
context Cascading::Scope do
|
4
|
-
it 'should allow override of primary key' do
|
5
|
-
test_assembly do
|
6
|
-
split 'line', ['x', 'y'], :pattern => /,/
|
7
|
-
check_scope :values_fields => ['offset', 'line', 'x', 'y'],
|
8
|
-
:primary_key_fields => ['offset']
|
9
|
-
end
|
10
|
-
end
|
11
|
-
|
12
|
-
it 'should pass primary key through Each' do
|
13
|
-
test_assembly do
|
14
|
-
split 'line', ['x', 'y'], :pattern => /,/
|
15
|
-
check_scope :values_fields => ['offset', 'line', 'x', 'y'],
|
16
|
-
:primary_key_fields => ['offset']
|
17
|
-
pass
|
18
|
-
check_scope :values_fields => ['offset', 'line', 'x', 'y'],
|
19
|
-
:primary_key_fields => ['offset']
|
20
|
-
end
|
21
|
-
end
|
22
|
-
|
23
|
-
it 'should support renaming primary keys' do
|
24
|
-
test_assembly do
|
25
|
-
split 'line', ['x', 'y'], :pattern => /,/
|
26
|
-
check_scope :values_fields => ['offset', 'line', 'x', 'y'],
|
27
|
-
:primary_key_fields => ['offset']
|
28
|
-
rename 'offset' => 'primary_key', 'line' => 'data'
|
29
|
-
check_scope :values_fields => ['primary_key', 'data', 'x', 'y'],
|
30
|
-
:primary_key_fields => ['primary_key']
|
31
|
-
end
|
32
|
-
end
|
33
|
-
|
34
|
-
it 'should clear primary keys when a subset of their fields are discarded' do
|
35
|
-
test_assembly do
|
36
|
-
primary 'offset', 'line' # Make primary keys interesting
|
37
|
-
split 'line', ['x', 'y'], :pattern => /,/
|
38
|
-
check_scope :values_fields => ['offset', 'line', 'x', 'y'],
|
39
|
-
:primary_key_fields => ['offset', 'line']
|
40
|
-
project 'line', 'x', 'y'
|
41
|
-
check_scope :values_fields => ['line', 'x', 'y'],
|
42
|
-
:primary_key_fields => nil
|
43
|
-
end
|
44
|
-
end
|
45
|
-
|
46
|
-
it 'should pass primary key through branch' do
|
47
|
-
test_assembly do
|
48
|
-
split 'line', ['x', 'y'], :pattern => /,/
|
49
|
-
check_scope :values_fields => ['offset', 'line', 'x', 'y'],
|
50
|
-
:primary_key_fields => ['offset']
|
51
|
-
|
52
|
-
branch 'check_keys' do
|
53
|
-
check_scope :values_fields => ['offset', 'line', 'x', 'y'],
|
54
|
-
:primary_key_fields => ['offset']
|
55
|
-
pass
|
56
|
-
check_scope :values_fields => ['offset', 'line', 'x', 'y'],
|
57
|
-
:primary_key_fields => ['offset']
|
58
|
-
end
|
59
|
-
|
60
|
-
check_scope :values_fields => ['offset', 'line', 'x', 'y'],
|
61
|
-
:primary_key_fields => ['offset']
|
62
|
-
pass
|
63
|
-
check_scope :values_fields => ['offset', 'line', 'x', 'y'],
|
64
|
-
:primary_key_fields => ['offset']
|
65
|
-
end
|
66
|
-
end
|
67
|
-
|
68
|
-
it 'should pass primary key through GroupBy followed by Each' do
|
69
|
-
test_assembly do
|
70
|
-
group_by 'offset'
|
71
|
-
check_scope :values_fields => ['offset', 'line'],
|
72
|
-
:grouping_fields => ['offset'],
|
73
|
-
:primary_key_fields => ['offset']
|
74
|
-
end
|
75
|
-
end
|
76
|
-
|
77
|
-
it 'should pass primary key through GroupBy followed by Every' do
|
78
|
-
test_assembly do
|
79
|
-
split 'line', ['x', 'y'], :pattern => /,/
|
80
|
-
group_by 'offset', 'x' do
|
81
|
-
check_scope :values_fields => ['offset', 'line', 'x', 'y'],
|
82
|
-
:grouping_fields => ['offset', 'x'],
|
83
|
-
:primary_key_fields => ['offset'],
|
84
|
-
:grouping_primary_key_fields => ['offset', 'x']
|
85
|
-
count 'line'
|
86
|
-
check_scope :values_fields => ['offset', 'line', 'x', 'y'],
|
87
|
-
:grouping_fields => ['offset', 'x', 'line'],
|
88
|
-
:primary_key_fields => ['offset'],
|
89
|
-
:grouping_primary_key_fields => ['offset', 'x']
|
90
|
-
count 'y'
|
91
|
-
check_scope :values_fields => ['offset', 'line', 'x', 'y'],
|
92
|
-
:grouping_fields => ['offset', 'x', 'line', 'y'],
|
93
|
-
:primary_key_fields => ['offset', 'x'], # FIXME: why has the pk changed?
|
94
|
-
:grouping_primary_key_fields => ['offset', 'x']
|
95
|
-
end
|
96
|
-
check_scope :values_fields => ['offset', 'x', 'line', 'y'],
|
97
|
-
:primary_key_fields => ['offset', 'x']
|
98
|
-
end
|
99
|
-
end
|
100
|
-
|
101
|
-
it 'should not clear primary key when grouping on other fields' do
|
102
|
-
test_assembly do
|
103
|
-
group_by 'line'
|
104
|
-
check_scope :values_fields => ['offset', 'line'],
|
105
|
-
:grouping_fields => ['line'],
|
106
|
-
:primary_key_fields => ['offset'],
|
107
|
-
:grouping_primary_key_fields => ['line']
|
108
|
-
end
|
109
|
-
end
|
110
|
-
|
111
|
-
it 'should pass primary key through CoGroup' do
|
112
|
-
test_join_assembly do
|
113
|
-
check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_'],
|
114
|
-
:grouping_fields => ['x'],
|
115
|
-
:primary_key_fields => ['offset'],
|
116
|
-
:grouping_primary_key_fields => ['x']
|
117
|
-
end
|
118
|
-
end
|
119
|
-
end
|