cascading.jruby 0.0.6 → 0.0.7
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +6 -0
- data/README.md +1 -1
- data/TODO +0 -1
- data/lib/cascading.rb +1 -2
- data/lib/cascading/assembly.rb +5 -5
- data/lib/cascading/cascading.rb +50 -13
- data/lib/cascading/cascading_exception.rb +21 -5
- data/lib/cascading/flow.rb +5 -1
- data/lib/cascading/scope.rb +10 -7
- data/samples/branch.rb +1 -2
- data/samples/copy.rb +1 -2
- data/samples/join.rb +1 -2
- data/samples/logwordcount.rb +1 -2
- data/samples/project.rb +1 -2
- data/samples/rename.rb +1 -2
- data/samples/scorenames.rb +1 -2
- data/samples/splitter.rb +1 -2
- data/samples/union.rb +1 -2
- data/spec/scope_spec.rb +5 -11
- data/spec/spec_util.rb +1 -36
- data/tags +27 -11
- data/test/test_assembly.rb +47 -21
- data/test/test_cascading.rb +4 -4
- data/test/test_exceptions.rb +41 -0
- metadata +5 -4
- data/samples/cascading.rb +0 -41
data/History.txt
CHANGED
@@ -1,3 +1,9 @@
|
|
1
|
+
0.0.7 - Upgrade to Cascading 2.0.0 wip-255
|
2
|
+
|
3
|
+
This release upgrades to Cascading 2.0.0 wip-255, but implements neither local
|
4
|
+
mode nor aggregations through AggregateBy. Local jobs continue to run in Hadoop
|
5
|
+
mode, but we intend to release local mode support soon.
|
6
|
+
|
1
7
|
0.0.6 - Removing primary key
|
2
8
|
|
3
9
|
The primary key feature was a source of great confusion at Etsy, so it's been
|
data/README.md
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
`cascading.jruby` is a small DSL above [Cascading](http://www.cascading.org/).
|
4
4
|
|
5
|
-
It requires Hadoop (>= 0.
|
5
|
+
It requires Hadoop (>= 0.20.2) and Cascading (>= 2.0.0) to be set via the environment variables: `HADOOP_HOME` and `CASCADING_HOME`
|
6
6
|
|
7
7
|
It has been tested on JRuby versions 1.2.0, 1.4.0, 1.5.3, and 1.6.5.
|
8
8
|
|
data/TODO
CHANGED
@@ -6,7 +6,6 @@ Look into totally eliminating registries
|
|
6
6
|
Bug fixes on github
|
7
7
|
Enforce more runtime rules at composition time
|
8
8
|
Standardize helper contracts
|
9
|
-
Upgrade Cascading (already upgraded JRuby)
|
10
9
|
Possibly combine unit tests...into unit tests because RSpec sucks and swallows stack traces
|
11
10
|
|
12
11
|
Split out runner
|
data/lib/cascading.rb
CHANGED
@@ -6,7 +6,7 @@ require 'java'
|
|
6
6
|
|
7
7
|
module Cascading
|
8
8
|
# :stopdoc:
|
9
|
-
VERSION = '0.0.
|
9
|
+
VERSION = '0.0.7'
|
10
10
|
LIBPATH = ::File.expand_path(::File.dirname(__FILE__)) + ::File::SEPARATOR
|
11
11
|
PATH = ::File.dirname(LIBPATH) + ::File::SEPARATOR
|
12
12
|
CASCADING_HOME = ENV['CASCADING_HOME']
|
@@ -40,7 +40,6 @@ module Cascading
|
|
40
40
|
search_me = ::File.expand_path(
|
41
41
|
::File.join(from, '**', '*.jar'))
|
42
42
|
Dir.glob(search_me).sort.each do |jar|
|
43
|
-
#puts "required: #{jar}"
|
44
43
|
require jar
|
45
44
|
end
|
46
45
|
end
|
data/lib/cascading/assembly.rb
CHANGED
@@ -130,13 +130,13 @@ module Cascading
|
|
130
130
|
if declared_fields
|
131
131
|
case joiner
|
132
132
|
when :inner, "inner", nil
|
133
|
-
joiner = Java::
|
133
|
+
joiner = Java::CascadingPipeJoiner::InnerJoin.new
|
134
134
|
when :left, "left"
|
135
|
-
joiner = Java::
|
135
|
+
joiner = Java::CascadingPipeJoiner::LeftJoin.new
|
136
136
|
when :right, "right"
|
137
|
-
joiner = Java::
|
137
|
+
joiner = Java::CascadingPipeJoiner::RightJoin.new
|
138
138
|
when :outer, "outer"
|
139
|
-
joiner = Java::
|
139
|
+
joiner = Java::CascadingPipeJoiner::OuterJoin.new
|
140
140
|
when Array
|
141
141
|
joiner = joiner.map do |t|
|
142
142
|
case t
|
@@ -145,7 +145,7 @@ module Cascading
|
|
145
145
|
else fail "invalid mixed joiner entry: #{t}"
|
146
146
|
end
|
147
147
|
end
|
148
|
-
joiner = Java::
|
148
|
+
joiner = Java::CascadingPipeJoiner::MixedJoin.new(joiner.to_java(:boolean))
|
149
149
|
end
|
150
150
|
end
|
151
151
|
|
data/lib/cascading/cascading.rb
CHANGED
@@ -90,23 +90,36 @@ module Cascading
|
|
90
90
|
Java::CascadingTuple::Fields::RESULTS
|
91
91
|
end
|
92
92
|
|
93
|
-
# Creates a c.s.TextLine scheme
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
93
|
+
# Creates a c.s.h.TextLine scheme. Positional args are used if <tt>:source_fields</tt> is not provided.
|
94
|
+
#
|
95
|
+
# The named options are:
|
96
|
+
# * <tt>:source_fields</tt> a string or array of strings. Specifies the
|
97
|
+
# fields to be read from a source with this scheme. Defaults to ['offset', 'line'].
|
98
|
+
# * <tt>:sink_fields</tt> a string or array of strings. Specifies the fields
|
99
|
+
# to be written to a sink with this scheme. Defaults to all_fields.
|
100
|
+
# * <tt>:compression</tt> a symbol, either <tt>:enable</tt> or
|
101
|
+
# <tt>:disable</tt>, that governs the TextLine scheme's compression. Defaults
|
102
|
+
# to the default TextLine compression.
|
103
|
+
def text_line_scheme(*args)
|
104
|
+
options = args.extract_options!
|
105
|
+
source_fields = fields(options[:source_fields] || (args.empty? ? ['offset', 'line'] : args))
|
106
|
+
sink_fields = fields(options[:sink_fields]) || all_fields
|
107
|
+
sink_compression = case options[:compression]
|
108
|
+
when :enable then Java::CascadingSchemeHadoop::TextLine::Compress::ENABLE
|
109
|
+
when :disable then Java::CascadingSchemeHadoop::TextLine::Compress::DISABLE
|
110
|
+
else Java::CascadingSchemeHadoop::TextLine::Compress::DEFAULT
|
100
111
|
end
|
112
|
+
|
113
|
+
Java::CascadingSchemeHadoop::TextLine.new(source_fields, sink_fields, sink_compression)
|
101
114
|
end
|
102
115
|
|
103
|
-
# Creates a c.s.SequenceFile scheme instance from the specified fields.
|
116
|
+
# Creates a c.s.h.SequenceFile scheme instance from the specified fields.
|
104
117
|
def sequence_file_scheme(*fields)
|
105
118
|
unless fields.empty?
|
106
119
|
fields = fields(fields)
|
107
|
-
return Java::
|
120
|
+
return Java::CascadingSchemeHadoop::SequenceFile.new(fields)
|
108
121
|
else
|
109
|
-
return Java::
|
122
|
+
return Java::CascadingSchemeHadoop::SequenceFile.new(all_fields)
|
110
123
|
end
|
111
124
|
end
|
112
125
|
|
@@ -129,12 +142,36 @@ module Cascading
|
|
129
142
|
end
|
130
143
|
fs = opts[:kind] || :hfs
|
131
144
|
klass = case fs
|
132
|
-
when :hfs, 'hfs' then Java::
|
133
|
-
when :dfs, 'dfs' then Java::
|
134
|
-
when :lfs, 'lfs' then Java::
|
145
|
+
when :hfs, 'hfs' then Java::CascadingTapHadoop::Hfs
|
146
|
+
when :dfs, 'dfs' then Java::CascadingTapHadoop::Dfs
|
147
|
+
when :lfs, 'lfs' then Java::CascadingTapHadoop::Lfs
|
135
148
|
else raise "Unrecognized kind of tap '#{fs}'"
|
136
149
|
end
|
137
150
|
parameters = [scheme, path, sink_mode]
|
138
151
|
klass.new(*parameters)
|
139
152
|
end
|
153
|
+
|
154
|
+
# Constructs properties to be passed to Flow#complete or Cascade#complete
|
155
|
+
# which will locate temporary Hadoop files in base_dir. It is necessary
|
156
|
+
# to pass these properties only when executing local scripts via JRuby's main
|
157
|
+
# method, which confuses Cascading's attempt to find the containing jar.
|
158
|
+
def local_properties(base_dir)
|
159
|
+
dirs = {
|
160
|
+
'test.build.data' => "#{base_dir}/build",
|
161
|
+
'hadoop.tmp.dir' => "#{base_dir}/tmp",
|
162
|
+
'hadoop.log.dir' => "#{base_dir}/log",
|
163
|
+
}
|
164
|
+
dirs.each{ |key, dir| `mkdir -p #{dir}` }
|
165
|
+
|
166
|
+
job_conf = Java::OrgApacheHadoopMapred::JobConf.new
|
167
|
+
job_conf.jar = dirs['test.build.data']
|
168
|
+
dirs.each{ |key, dir| job_conf.set(key, dir) }
|
169
|
+
|
170
|
+
job_conf.num_map_tasks = 1
|
171
|
+
job_conf.num_reduce_tasks = 1
|
172
|
+
|
173
|
+
properties = java.util.HashMap.new
|
174
|
+
Java::CascadingFlowHadoop::HadoopPlanner.copy_job_conf(properties, job_conf)
|
175
|
+
properties
|
176
|
+
end
|
140
177
|
end
|
@@ -5,13 +5,23 @@
|
|
5
5
|
# underlying Janino expression problems.
|
6
6
|
module Cascading
|
7
7
|
class CascadingException < StandardError
|
8
|
+
attr_accessor :ne, :message, :depth
|
9
|
+
|
8
10
|
def initialize(native_exception, message)
|
9
11
|
@ne = native_exception
|
10
|
-
|
12
|
+
@message = message
|
13
|
+
trace, @depth = trace_causes(@ne, 1)
|
14
|
+
super("#{message}\n#{trace}")
|
11
15
|
end
|
12
16
|
|
13
|
-
|
14
|
-
|
17
|
+
# Fetch cause at depth. If depth is not provided, root cause is returned.
|
18
|
+
def cause(depth = @depth)
|
19
|
+
if depth > @depth
|
20
|
+
warn "WARNING: Depth (#{depth}) greater than depth of cause stack (#{@depth}) requested"
|
21
|
+
nil
|
22
|
+
else
|
23
|
+
fetch_cause(@ne, depth)
|
24
|
+
end
|
15
25
|
end
|
16
26
|
|
17
27
|
private
|
@@ -22,14 +32,20 @@ module Cascading
|
|
22
32
|
end
|
23
33
|
|
24
34
|
def trace_causes(ne, depth)
|
25
|
-
return unless ne
|
35
|
+
return ['', depth - 1] unless ne
|
36
|
+
|
37
|
+
warn "ERROR: Exception does not respond to cause: #{ne}" unless ne.respond_to?(:cause)
|
38
|
+
cause_trace, cause_depth = trace_causes(ne.respond_to?(:cause) ? ne.cause : nil, depth + 1)
|
39
|
+
|
26
40
|
trace = "Cause #{depth}: #{ne.respond_to?(:java_class) ? ne.java_class : ne.class}: #{ne}\n"
|
27
41
|
if ne.respond_to?(:stack_trace)
|
28
42
|
trace += "#{ne.stack_trace.map{ |e| " at #{e.class_name}.#{e.method_name}(#{e.file_name}:#{e.line_number})" }.join("\n")}\n"
|
29
43
|
elsif ne.respond_to?(:backtrace)
|
30
44
|
trace += " #{ne.backtrace.join("\n ")}\n"
|
31
45
|
end
|
32
|
-
trace +=
|
46
|
+
trace += cause_trace
|
47
|
+
|
48
|
+
[trace, cause_depth]
|
33
49
|
end
|
34
50
|
end
|
35
51
|
end
|
data/lib/cascading/flow.rb
CHANGED
@@ -144,7 +144,11 @@ module Cascading
|
|
144
144
|
puts "#{key}=#{properties[key]}"
|
145
145
|
end
|
146
146
|
|
147
|
-
|
147
|
+
# FIXME: why do I have to do this in 2.0 wip-255?
|
148
|
+
Java::CascadingFlow::FlowConnector.setApplicationName(properties, name)
|
149
|
+
Java::CascadingFlow::FlowConnector.setApplicationVersion(properties, '0.0.0')
|
150
|
+
|
151
|
+
Java::CascadingFlowHadoop::HadoopFlowConnector.new(properties).connect(
|
148
152
|
name,
|
149
153
|
make_tap_parameter(@sources),
|
150
154
|
make_tap_parameter(@sinks),
|
data/lib/cascading/scope.rb
CHANGED
@@ -49,17 +49,20 @@ module Cascading
|
|
49
49
|
<<-END
|
50
50
|
Scope name: #{@scope.name}
|
51
51
|
Kind: #{kind}
|
52
|
-
|
53
|
-
Declared fields: #{@scope.declared_fields}
|
54
|
-
Grouping selectors: #{@scope.grouping_selectors}
|
52
|
+
Key selectors: #{@scope.key_selectors}
|
55
53
|
Sorting selectors: #{@scope.sorting_selectors}
|
54
|
+
Remainder fields: #{@scope.remainder_fields}
|
55
|
+
Declared fields: #{@scope.declared_fields}
|
56
|
+
Arguments
|
57
|
+
selector: #{@scope.arguments_selector}
|
58
|
+
declarator: #{@scope.arguments_declarator}
|
56
59
|
Out grouping
|
57
|
-
selector:
|
58
|
-
fields:
|
59
|
-
key fields: #{@grouping_key_fields}
|
60
|
+
selector: #{@scope.out_grouping_selector}
|
61
|
+
fields: #{grouping_fields} (#{@scope.out_grouping_fields})
|
62
|
+
key fields: #{@grouping_key_fields} (#{@scope.key_selectors})
|
60
63
|
Out values
|
61
64
|
selector: #{@scope.out_values_selector}
|
62
|
-
fields:
|
65
|
+
fields: #{values_fields} (#{@scope.out_values_fields})
|
63
66
|
END
|
64
67
|
end
|
65
68
|
|
data/samples/branch.rb
CHANGED
@@ -3,7 +3,6 @@
|
|
3
3
|
$: << File.join(File.dirname(__FILE__), '..', 'lib')
|
4
4
|
|
5
5
|
require 'cascading'
|
6
|
-
require 'samples/cascading'
|
7
6
|
|
8
7
|
cascade 'branch' do
|
9
8
|
flow 'branch' do
|
@@ -28,4 +27,4 @@ cascade 'branch' do
|
|
28
27
|
sink 'branch1', tap('output/branch1', :sink_mode => :replace)
|
29
28
|
sink 'branch2', tap('output/branch2', :sink_mode => :replace)
|
30
29
|
end
|
31
|
-
end.complete(
|
30
|
+
end.complete(local_properties('build/sample'))
|
data/samples/copy.rb
CHANGED
@@ -2,7 +2,6 @@
|
|
2
2
|
$: << File.join(File.dirname(__FILE__), '..', 'lib')
|
3
3
|
|
4
4
|
require 'cascading'
|
5
|
-
require 'samples/cascading'
|
6
5
|
|
7
6
|
cascade 'copy' do
|
8
7
|
flow 'copy' do
|
@@ -16,4 +15,4 @@ cascade 'copy' do
|
|
16
15
|
|
17
16
|
sink 'input', tap('output/copy', :sink_mode => :replace)
|
18
17
|
end
|
19
|
-
end.complete(
|
18
|
+
end.complete(local_properties('build/sample'))
|
data/samples/join.rb
CHANGED
@@ -2,7 +2,6 @@
|
|
2
2
|
$: << File.join(File.dirname(__FILE__), '..', 'lib')
|
3
3
|
|
4
4
|
require 'cascading'
|
5
|
-
require 'samples/cascading'
|
6
5
|
|
7
6
|
cascade 'join' do
|
8
7
|
flow 'join' do
|
@@ -29,4 +28,4 @@ cascade 'join' do
|
|
29
28
|
|
30
29
|
sink 'join', tap('output/join', :sink_mode => :replace)
|
31
30
|
end
|
32
|
-
end.complete(
|
31
|
+
end.complete(local_properties('build/sample'))
|
data/samples/logwordcount.rb
CHANGED
@@ -2,7 +2,6 @@
|
|
2
2
|
$: << File.join(File.dirname(__FILE__), '..', 'lib')
|
3
3
|
|
4
4
|
require 'cascading'
|
5
|
-
require 'samples/cascading'
|
6
5
|
|
7
6
|
cascade 'logwordcount' do
|
8
7
|
flow 'logwordcount' do
|
@@ -19,4 +18,4 @@ cascade 'logwordcount' do
|
|
19
18
|
|
20
19
|
sink 'input', tap('output/logwordcount', :sink_mode => :replace)
|
21
20
|
end
|
22
|
-
end.complete(
|
21
|
+
end.complete(local_properties('build/sample'))
|
data/samples/project.rb
CHANGED
@@ -4,7 +4,6 @@ $: << File.join(File.dirname(__FILE__), '..', 'lib')
|
|
4
4
|
# History: "project" (verb) used to be known as "restrict"
|
5
5
|
|
6
6
|
require 'cascading'
|
7
|
-
require 'samples/cascading'
|
8
7
|
|
9
8
|
cascade 'project' do
|
10
9
|
flow 'project' do
|
@@ -21,4 +20,4 @@ cascade 'project' do
|
|
21
20
|
|
22
21
|
sink 'input', tap('output/project', :sink_mode => :replace)
|
23
22
|
end
|
24
|
-
end.complete(
|
23
|
+
end.complete(local_properties('build/sample'))
|
data/samples/rename.rb
CHANGED
@@ -2,7 +2,6 @@
|
|
2
2
|
$: << File.join(File.dirname(__FILE__), '..', 'lib')
|
3
3
|
|
4
4
|
require 'cascading'
|
5
|
-
require 'samples/cascading'
|
6
5
|
|
7
6
|
cascade 'rename' do
|
8
7
|
flow 'rename' do
|
@@ -18,4 +17,4 @@ cascade 'rename' do
|
|
18
17
|
|
19
18
|
sink 'input', tap('output/rename', :sink_mode => :replace)
|
20
19
|
end
|
21
|
-
end.complete(
|
20
|
+
end.complete(local_properties('build/sample'))
|
data/samples/scorenames.rb
CHANGED
@@ -2,7 +2,6 @@
|
|
2
2
|
$: << File.join(File.dirname(__FILE__), '..', 'lib')
|
3
3
|
|
4
4
|
require 'cascading'
|
5
|
-
require 'samples/cascading'
|
6
5
|
|
7
6
|
cascade 'scorenames' do
|
8
7
|
flow 'scorenames' do
|
@@ -17,4 +16,4 @@ cascade 'scorenames' do
|
|
17
16
|
|
18
17
|
sink 'input', tap('output/scorenames', :sink_mode => :replace)
|
19
18
|
end
|
20
|
-
end.complete(
|
19
|
+
end.complete(local_properties('build/sample'))
|
data/samples/splitter.rb
CHANGED
@@ -2,7 +2,6 @@
|
|
2
2
|
$: << File.join(File.dirname(__FILE__), '..', 'lib')
|
3
3
|
|
4
4
|
require 'cascading'
|
5
|
-
require 'samples/cascading'
|
6
5
|
|
7
6
|
cascade 'splitter' do
|
8
7
|
flow 'splitter' do
|
@@ -17,4 +16,4 @@ cascade 'splitter' do
|
|
17
16
|
|
18
17
|
sink 'input', tap('output/splitter', :sink_mode => :replace)
|
19
18
|
end
|
20
|
-
end.complete(
|
19
|
+
end.complete(local_properties('build/sample'))
|
data/samples/union.rb
CHANGED
@@ -2,7 +2,6 @@
|
|
2
2
|
$: << File.join(File.dirname(__FILE__), '..', 'lib')
|
3
3
|
|
4
4
|
require 'cascading'
|
5
|
-
require 'samples/cascading'
|
6
5
|
|
7
6
|
cascade 'union' do
|
8
7
|
flow 'union' do
|
@@ -32,4 +31,4 @@ cascade 'union' do
|
|
32
31
|
|
33
32
|
sink 'union', tap('output/union', :sink_mode => :replace)
|
34
33
|
end
|
35
|
-
end.complete(
|
34
|
+
end.complete(local_properties('build/sample'))
|
data/spec/scope_spec.rb
CHANGED
@@ -27,10 +27,9 @@ context Cascading::Scope do
|
|
27
27
|
|
28
28
|
# Pass that uses our grouping fields instead of all_fields
|
29
29
|
operation = Java::CascadingOperation::Identity.new
|
30
|
-
make_each(
|
31
|
-
Java::CascadingPipe::Each, tail_pipe, fields([0, 'x_sum']), operation)
|
30
|
+
make_each(Java::CascadingPipe::Each, tail_pipe, fields(['x', 'x_sum']), operation)
|
32
31
|
|
33
|
-
check_scope :values_fields => [
|
32
|
+
check_scope :values_fields => ['x', 'x_sum']
|
34
33
|
end
|
35
34
|
end
|
36
35
|
|
@@ -108,11 +107,6 @@ context Cascading::Scope do
|
|
108
107
|
:grouping_fields => ['x', 'x_sum', 'y_sum']
|
109
108
|
assert_size_equals 3
|
110
109
|
|
111
|
-
# No rename service provided unless you use the block form of join!
|
112
|
-
check_scope :values_fields => [0, 'x_sum', 'y_sum']
|
113
|
-
|
114
|
-
# Mimic rename service
|
115
|
-
bind_names ['x', 'x_sum', 'y_sum']
|
116
110
|
check_scope :values_fields => ['x', 'x_sum', 'y_sum']
|
117
111
|
end
|
118
112
|
end
|
@@ -132,7 +126,7 @@ context Cascading::Scope do
|
|
132
126
|
begin
|
133
127
|
sum :mapping => {'x' => 'x_sum'}, :type => :int
|
134
128
|
rescue CascadingException => e
|
135
|
-
raise e.cause
|
129
|
+
raise e.cause
|
136
130
|
end
|
137
131
|
end
|
138
132
|
end.should raise_error java.lang.IllegalStateException, 'Every cannot follow a Tap or an Each'
|
@@ -159,9 +153,9 @@ context Cascading::Scope do
|
|
159
153
|
end
|
160
154
|
end
|
161
155
|
rescue CascadingException => e
|
162
|
-
raise e.cause
|
156
|
+
raise e.cause
|
163
157
|
end
|
164
|
-
end.should raise_error
|
158
|
+
end.should raise_error Java::CascadingFlowPlanner::PlannerException, "[attempt_group][sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)] Every instances may not split after a GroupBy or CoGroup pipe, found: Every(attempt_group)[Sum[decl:'x_sum'][args:1]] after: CoGroup(left*right)[by:left:[{1}:'x']right:[{1}:'x']]"
|
165
159
|
end
|
166
160
|
|
167
161
|
it 'should propagate names through GroupBy' do
|
data/spec/spec_util.rb
CHANGED
@@ -25,7 +25,7 @@ def test_flow(&block)
|
|
25
25
|
cascade = cascade 'test_app' do
|
26
26
|
flow 'test', &block
|
27
27
|
end
|
28
|
-
cascade.complete(
|
28
|
+
cascade.complete(local_properties(BUILD_DIR))
|
29
29
|
end
|
30
30
|
|
31
31
|
def test_assembly(params = {}, &block)
|
@@ -90,38 +90,3 @@ def test_join_assembly(params = {}, &block)
|
|
90
90
|
end
|
91
91
|
end
|
92
92
|
end
|
93
|
-
|
94
|
-
def cascading_properties
|
95
|
-
build_dir = "#{BUILD_DIR}/build"
|
96
|
-
`mkdir -p #{build_dir}`
|
97
|
-
tmp_dir = "#{BUILD_DIR}/tmp"
|
98
|
-
`mkdir -p #{tmp_dir}`
|
99
|
-
log_dir = "#{BUILD_DIR}/log"
|
100
|
-
`mkdir -p #{log_dir}`
|
101
|
-
|
102
|
-
# Local cluster settings
|
103
|
-
#java.lang.System.set_property("test.build.data", build_dir)
|
104
|
-
#java.lang.System.set_property("hadoop.tmp.dir", tmp_dir)
|
105
|
-
#java.lang.System.set_property("hadoop.log.dir", log_dir)
|
106
|
-
#conf = Java::OrgApacheHadoopConf::Configuration.new
|
107
|
-
#dfs = Java::OrgApacheHadoopDfs::MiniDFSCluster.new(conf, 4, true, nil);
|
108
|
-
#file_sys = dfs.file_system
|
109
|
-
#mr = Java::OrgApacheHadoopMapred::MiniMRCluster.new(4, file_sys.uri.to_string, 1)
|
110
|
-
#job_conf = mr.create_job_conf
|
111
|
-
#job_conf.set("mapred.child.java.opts", "-Xmx512m")
|
112
|
-
#job_conf.set("mapred.map.tasks.speculative.execution", "false")
|
113
|
-
#job_conf.set("mapred.reduce.tasks.speculative.execution", "false")
|
114
|
-
|
115
|
-
job_conf = Java::OrgApacheHadoopMapred::JobConf.new
|
116
|
-
job_conf.jar = build_dir
|
117
|
-
job_conf.set("test.build.data", build_dir)
|
118
|
-
job_conf.set("hadoop.tmp.dir", tmp_dir)
|
119
|
-
job_conf.set("hadoop.log.dir", log_dir)
|
120
|
-
|
121
|
-
job_conf.num_map_tasks = 4
|
122
|
-
job_conf.num_reduce_tasks = 1
|
123
|
-
|
124
|
-
properties = java.util.HashMap.new({})
|
125
|
-
Java::CascadingFlow::MultiMapReducePlanner.set_job_conf(properties, job_conf)
|
126
|
-
properties
|
127
|
-
end
|
data/tags
CHANGED
@@ -36,8 +36,11 @@ Scope lib/cascading/scope.rb /^ class Scope$/;" c class:Cascading
|
|
36
36
|
ScopeTests spec/spec_util.rb /^module ScopeTests$/;" m
|
37
37
|
TC_Assembly test/test_assembly.rb /^class TC_Assembly < Test::Unit::TestCase$/;" c
|
38
38
|
TC_AssemblyScenarii test/test_assembly.rb /^class TC_AssemblyScenarii < Test::Unit::TestCase$/;" c
|
39
|
+
TC_Cascade test/test_cascade.rb /^class TC_Cascade < Test::Unit::TestCase$/;" c
|
39
40
|
TC_Cascading test/test_cascading.rb /^class TC_Cascading < Test::Unit::TestCase$/;" c
|
41
|
+
TC_Exceptions test/test_exceptions.rb /^class TC_Exceptions < Test::Unit::TestCase$/;" c
|
40
42
|
TC_Flow test/test_flow.rb /^class TC_Flow < Test::Unit::TestCase$/;" c
|
43
|
+
TC_Operations test/test_operations.rb /^class TC_Operations < Test::Unit::TestCase$/;" c
|
41
44
|
add lib/cascading/base.rb /^ def add(name, instance)$/;" f class:Cascading.Registerable
|
42
45
|
add_archive_to_distributed_cache lib/cascading/flow.rb /^ def add_archive_to_distributed_cache(file)$/;" f
|
43
46
|
add_child lib/cascading/base.rb /^ def add_child(node)$/;" f class:Cascading.Node
|
@@ -60,9 +63,8 @@ bind_names lib/cascading/assembly.rb /^ def bind_names(*new_names)$/;" f clas
|
|
60
63
|
branch lib/cascading/assembly.rb /^ def branch(name, &block)$/;" f class:Cascading.Assembly
|
61
64
|
cascade lib/cascading/cascading.rb /^ def cascade(name, &block)$/;" f class:Cascading
|
62
65
|
cascading.jruby src/cascading/jruby/Main.java /^package cascading.jruby;$/;" p
|
63
|
-
cascading_properties spec/spec_util.rb /^def cascading_properties$/;" f
|
64
66
|
cast lib/cascading/assembly.rb /^ def cast(type_map)$/;" f class:Cascading.Assembly
|
65
|
-
cause lib/cascading/cascading_exception.rb /^ def cause(depth)$/;" f class:Cascading.CascadingException
|
67
|
+
cause lib/cascading/cascading_exception.rb /^ def cause(depth = @depth)$/;" f class:Cascading.CascadingException
|
66
68
|
check_scope spec/spec_util.rb /^ def check_scope(params = {})$/;" f class:ScopeTests
|
67
69
|
compare_with_references test/test_assembly.rb /^def compare_with_references(test_name)$/;" f
|
68
70
|
compile lib/cascading/expr_stub.rb /^ def compile$/;" f class:Cascading
|
@@ -89,7 +91,6 @@ describe lib/cascading/base.rb /^ def describe(offset = '')$/;" f class:Casca
|
|
89
91
|
describe lib/cascading/cascade.rb /^ def describe(offset = '')$/;" f class:Cascading.Cascade
|
90
92
|
describe lib/cascading/cascading.rb /^ def describe$/;" f class:Cascading
|
91
93
|
describe lib/cascading/flow.rb /^ def describe(offset = '')$/;" f class:Cascading.Flow
|
92
|
-
describe_job spec/spec_util.rb /^def describe_job(job_file, &block)$/;" f
|
93
94
|
difference_fields lib/cascading/cascading.rb /^ def difference_fields(*fields)$/;" f class:Cascading
|
94
95
|
discard lib/cascading/assembly.rb /^ def discard(*args)$/;" f class:Cascading.Assembly
|
95
96
|
distinct lib/cascading/assembly.rb /^ def distinct(*args)$/;" f class:Cascading.Assembly
|
@@ -145,6 +146,7 @@ last_function lib/cascading/operations.rb /^ def last_function(*args)$/;" f c
|
|
145
146
|
last_grouping_fields lib/cascading/cascading.rb /^ def last_grouping_fields$/;" f class:Cascading
|
146
147
|
left_join lib/cascading/assembly.rb /^ def left_join(*args, &block)$/;" f class:Cascading.Assembly
|
147
148
|
libpath lib/cascading.rb /^ def self.libpath( *args )$/;" F class:Cascading
|
149
|
+
local_properties lib/cascading/cascading.rb /^ def local_properties(base_dir)$/;" f
|
148
150
|
main src/cascading/jruby/Main.java /^ public static void main(String[] args) {$/;" m class:Main
|
149
151
|
make_each lib/cascading/assembly.rb /^ def make_each(type, *parameters)$/;" f class:Cascading.Assembly
|
150
152
|
make_every lib/cascading/assembly.rb /^ def make_every(type, *parameters)$/;" f class:Cascading.Assembly
|
@@ -159,7 +161,7 @@ max_function lib/cascading/operations.rb /^ def max_function(*args)$/;" f cla
|
|
159
161
|
min lib/cascading/assembly.rb /^ def min(*args); composite_aggregator(args, :min_function); end$/;" f class:Cascading.Assembly
|
160
162
|
min_function lib/cascading/operations.rb /^ def min_function(*args)$/;" f class:Cascading.Operations
|
161
163
|
mock_assembly test/test_assembly.rb /^ def mock_assembly(&block)$/;" f class:TC_Assembly
|
162
|
-
multi_tap lib/cascading/cascading.rb /^ def multi_tap(*taps)$/;" f
|
164
|
+
multi_tap lib/cascading/cascading.rb /^ def multi_tap(*taps)$/;" f
|
163
165
|
names_and_types lib/cascading/expr_stub.rb /^ def names_and_types$/;" f class:Cascading
|
164
166
|
outer_join lib/cascading/assembly.rb /^ def outer_join(*args, &block)$/;" f class:Cascading.Assembly
|
165
167
|
outgoing_scope lib/cascading/scope.rb /^ def self.outgoing_scope(flow_element, incoming_scopes, grouping_key_fields, every_applied)$/;" F class:Cascading.Scope
|
@@ -171,6 +173,7 @@ parse_date lib/cascading/assembly.rb /^ def parse_date(*args)$/;" f class:Cas
|
|
171
173
|
pass lib/cascading/assembly.rb /^ def pass(*args)$/;" f class:Cascading.Assembly
|
172
174
|
path lib/cascading.rb /^ def self.path( *args )$/;" F class:Cascading
|
173
175
|
project lib/cascading/assembly.rb /^ def project(*args)$/;" f class:Cascading.Assembly
|
176
|
+
qualified_name lib/cascading/base.rb /^ def qualified_name$/;" f class:Cascading.Node
|
174
177
|
quiet tasks/setup.rb /^def quiet( &block )$/;" f
|
175
178
|
regex_filter lib/cascading/operations.rb /^ def regex_filter(*args)$/;" f class:Cascading.Operations.to_java_comparable_array
|
176
179
|
regex_generator lib/cascading/operations.rb /^ def regex_generator(*args)$/;" f class:Cascading.Operations
|
@@ -187,11 +190,12 @@ require_all_jars lib/cascading.rb /^ def self.require_all_jars(from = ::File.jo
|
|
187
190
|
reset lib/cascading/base.rb /^ def reset$/;" f class:Cascading.Registerable
|
188
191
|
results_fields lib/cascading/cascading.rb /^ def results_fields$/;" f class:Cascading
|
189
192
|
right_join lib/cascading/assembly.rb /^ def right_join(*args, &block)$/;" f class:Cascading.Assembly
|
193
|
+
root lib/cascading/base.rb /^ def root$/;" f class:Cascading.Node
|
190
194
|
sample_properties samples/cascading.rb /^ def sample_properties$/;" f class:Cascading
|
191
195
|
scope lib/cascading/assembly.rb /^ def scope$/;" f class:Cascading.Assembly
|
192
196
|
scope lib/cascading/flow.rb /^ def scope(name = nil)$/;" f class:Cascading.Flow
|
193
197
|
search_field_name lib/cascading/cascading.rb /^ def search_field_name(names, candidate)$/;" f class:Cascading
|
194
|
-
sequence_file_scheme lib/cascading/cascading.rb /^ def sequence_file_scheme(*fields)$/;" f
|
198
|
+
sequence_file_scheme lib/cascading/cascading.rb /^ def sequence_file_scheme(*fields)$/;" f
|
195
199
|
set_spill_threshold lib/cascading/flow.rb /^ def set_spill_threshold(threshold)$/;" f
|
196
200
|
sink lib/cascading/flow.rb /^ def sink(*args)$/;" f class:Cascading.Flow
|
197
201
|
sink_metadata lib/cascading/cascade.rb /^ def sink_metadata$/;" f class:Cascading.Cascade
|
@@ -203,13 +207,19 @@ split_hash lib/cascading/expr_stub.rb /^ def split_hash(h)$/;" f class:Cascad
|
|
203
207
|
split_rows lib/cascading/assembly.rb /^ def split_rows(*args)$/;" f class:Cascading.Assembly
|
204
208
|
sum lib/cascading/assembly.rb /^ def sum(*args)$/;" f class:Cascading.Assembly
|
205
209
|
sum_function lib/cascading/operations.rb /^ def sum_function(*args)$/;" f class:Cascading.Operations
|
206
|
-
tap lib/cascading/cascading.rb /^ def tap(*args)$/;" f
|
210
|
+
tap lib/cascading/cascading.rb /^ def tap(*args)$/;" f
|
207
211
|
tap_scope lib/cascading/scope.rb /^ def self.tap_scope(tap, name)$/;" F class:Cascading.Scope
|
212
|
+
test_aggregator_function_ignore_exception test/test_operations.rb /^ def test_aggregator_function_ignore_exception$/;" f class:TC_Operations
|
213
|
+
test_aggregator_function_ignore_tuples test/test_operations.rb /^ def test_aggregator_function_ignore_tuples$/;" f class:TC_Operations
|
214
|
+
test_aggregator_function_ignore_values test/test_operations.rb /^ def test_aggregator_function_ignore_values$/;" f class:TC_Operations
|
215
|
+
test_ambiguous_assembly_names test/test_flow.rb /^ def test_ambiguous_assembly_names$/;" f class:TC_Flow
|
216
|
+
test_ambiguous_branch_names test/test_flow.rb /^ def test_ambiguous_branch_names$/;" f class:TC_Flow
|
208
217
|
test_assembly spec/spec_util.rb /^def test_assembly(params = {}, &block)$/;" f
|
209
|
-
test_assembly test/test_flow.rb /^ def test_assembly$/;" f class:TC_Flow
|
210
218
|
test_branch_empty test/test_assembly.rb /^ def test_branch_empty$/;" f
|
211
219
|
test_branch_single test/test_assembly.rb /^ def test_branch_single$/;" f
|
212
220
|
test_branch_unique test/test_assembly.rb /^ def test_branch_unique$/;" f
|
221
|
+
test_cascade test/test_cascade.rb /^ def test_cascade$/;" f class:TC_Cascade
|
222
|
+
test_cascading_exception test/test_exceptions.rb /^ def test_cascading_exception$/;" f class:TC_Exceptions
|
213
223
|
test_create_assembly_simple test/test_assembly.rb /^ def test_create_assembly_simple$/;" f class:TC_Assembly
|
214
224
|
test_create_each test/test_assembly.rb /^ def test_create_each$/;" f
|
215
225
|
test_create_every test/test_assembly.rb /^ def test_create_every$/;" f
|
@@ -219,18 +229,25 @@ test_create_group_by_reverse test/test_assembly.rb /^ def test_create_group_by_
|
|
219
229
|
test_create_group_by_with_sort test/test_assembly.rb /^ def test_create_group_by_with_sort$/;" f
|
220
230
|
test_create_group_by_with_sort_reverse test/test_assembly.rb /^ def test_create_group_by_with_sort_reverse$/;" f
|
221
231
|
test_each_identity test/test_assembly.rb /^ def test_each_identity$/;" f
|
232
|
+
test_empty_where test/test_assembly.rb /^ def test_empty_where$/;" f
|
222
233
|
test_fields_field test/test_cascading.rb /^ def test_fields_field$/;" f class:TC_Cascading
|
223
234
|
test_fields_multiple test/test_cascading.rb /^ def test_fields_multiple$/;" f class:TC_Cascading
|
224
235
|
test_fields_single test/test_cascading.rb /^ def test_fields_single$/;" f class:TC_Cascading
|
225
236
|
test_flow spec/spec_util.rb /^def test_flow(&block)$/;" f
|
237
|
+
test_flow test/test_flow.rb /^ def test_flow$/;" f class:TC_Flow
|
226
238
|
test_full_assembly test/test_assembly.rb /^ def test_full_assembly$/;" f
|
227
|
-
test_join1 test/test_assembly.rb /^ def test_join1$/;" f
|
239
|
+
test_join1 test/test_assembly.rb /^ def test_join1$/;" f class:TC_AssemblyScenarii
|
228
240
|
test_join2 test/test_assembly.rb /^ def test_join2$/;" f
|
229
241
|
test_join_assembly spec/spec_util.rb /^def test_join_assembly(params = {}, &block)$/;" f
|
230
|
-
|
242
|
+
test_smoke_test_debug_scope test/test_assembly.rb /^ def test_smoke_test_debug_scope$/;" f
|
243
|
+
test_smoke_test_sequence_file_scheme test/test_assembly.rb /^ def test_smoke_test_sequence_file_scheme$/;" f class:TC_AssemblyScenarii
|
244
|
+
test_splitter test/test_assembly.rb /^ def test_splitter$/;" f class:TC_AssemblyScenarii.test_smoke_test_sequence_file_scheme
|
231
245
|
test_tap test/test_cascading.rb /^ def test_tap$/;" f class:TC_Cascading
|
232
246
|
test_values lib/cascading/expr_stub.rb /^ def test_values$/;" f class:Cascading
|
233
|
-
|
247
|
+
test_where test/test_assembly.rb /^ def test_where$/;" f
|
248
|
+
test_where_with_expression test/test_assembly.rb /^ def test_where_with_expression$/;" f
|
249
|
+
test_where_with_import test/test_assembly.rb /^ def test_where_with_import$/;" f
|
250
|
+
text_line_scheme lib/cascading/cascading.rb /^ def text_line_scheme(*args)$/;" f class:Cascading
|
234
251
|
to_java_comparable_array lib/cascading/operations.rb /^ def to_java_comparable_array(arr)$/;" f class:Cascading.Operations
|
235
252
|
to_s lib/cascading/assembly.rb /^ def to_s$/;" f class:Cascading.Assembly
|
236
253
|
to_s lib/cascading/expr_stub.rb /^ def to_s$/;" f class:Cascading
|
@@ -244,7 +261,6 @@ validate lib/cascading/expr_stub.rb /^ def validate(actual_args = {})$/;" f c
|
|
244
261
|
validate_fields lib/cascading/expr_stub.rb /^ def validate_fields(fields)$/;" f class:Cascading
|
245
262
|
validate_scope lib/cascading/expr_stub.rb /^ def validate_scope(scope)$/;" f class:Cascading
|
246
263
|
values_fields lib/cascading/scope.rb /^ def values_fields$/;" f class:Cascading.Scope
|
247
|
-
verify_assembly_output spec/spec_util.rb /^def verify_assembly_output(assembly_name, params, &block)$/;" f
|
248
264
|
version lib/cascading.rb /^ def self.version$/;" F class:Cascading
|
249
265
|
where lib/cascading/assembly.rb /^ def where(*args)$/;" f class:Cascading.Assembly
|
250
266
|
write_sink_metadata lib/cascading/cascade.rb /^ def write_sink_metadata(file_name)$/;" f class:Cascading.Cascade
|
data/test/test_assembly.rb
CHANGED
@@ -45,13 +45,15 @@ class TC_Assembly < Test::Unit::TestCase
|
|
45
45
|
end
|
46
46
|
|
47
47
|
def test_create_each
|
48
|
-
# You can
|
49
|
-
|
50
|
-
|
51
|
-
each(:filter => identity)
|
52
|
-
end
|
53
|
-
assert assembly.tail_pipe.is_a? Java::CascadingPipe::Each
|
48
|
+
# You can apply an Each to 0 fields
|
49
|
+
assembly = mock_assembly do
|
50
|
+
each(:filter => identity)
|
54
51
|
end
|
52
|
+
assert assembly.tail_pipe.is_a? Java::CascadingPipe::Each
|
53
|
+
|
54
|
+
# In which case, it has empty argument and output selectors
|
55
|
+
assert_equal 0, assembly.tail_pipe.argument_selector.size
|
56
|
+
assert_equal 0, assembly.tail_pipe.output_selector.size
|
55
57
|
|
56
58
|
assembly = mock_assembly do
|
57
59
|
each('offset', :output => 'offset_copy',
|
@@ -61,8 +63,8 @@ class TC_Assembly < Test::Unit::TestCase
|
|
61
63
|
|
62
64
|
assert pipe.is_a? Java::CascadingPipe::Each
|
63
65
|
|
64
|
-
assert_equal 'offset', pipe.
|
65
|
-
assert_equal 'offset_copy', pipe.
|
66
|
+
assert_equal 'offset', pipe.argument_selector.get(0)
|
67
|
+
assert_equal 'offset_copy', pipe.output_selector.get(0)
|
66
68
|
end
|
67
69
|
|
68
70
|
# For now, replaced these tests with the trivial observation that you can't
|
@@ -89,7 +91,7 @@ class TC_Assembly < Test::Unit::TestCase
|
|
89
91
|
every("Field1", :aggregator => count_function)
|
90
92
|
end
|
91
93
|
assert assembly.tail_pipe.is_a? Java::CascadingPipe::Every
|
92
|
-
assert_equal "Field1", assembly.tail_pipe.
|
94
|
+
assert_equal "Field1", assembly.tail_pipe.argument_selector.get(0)
|
93
95
|
end
|
94
96
|
|
95
97
|
assert_raise CascadingException do
|
@@ -97,8 +99,8 @@ class TC_Assembly < Test::Unit::TestCase
|
|
97
99
|
every('line', :aggregator => count_function, :output=>'line_count')
|
98
100
|
end
|
99
101
|
assert assembly.tail_pipe.is_a? Java::CascadingPipe::Every
|
100
|
-
assert_equal 'line', assembly.tail_pipe.
|
101
|
-
assert_equal 'line_count', assembly.tail_pipe.
|
102
|
+
assert_equal 'line', assembly.tail_pipe.argument_selector.get(0)
|
103
|
+
assert_equal 'line_count', assembly.tail_pipe.output_selector.get(0)
|
102
104
|
end
|
103
105
|
end
|
104
106
|
|
@@ -108,7 +110,7 @@ class TC_Assembly < Test::Unit::TestCase
|
|
108
110
|
end
|
109
111
|
|
110
112
|
assert assembly.tail_pipe.is_a? Java::CascadingPipe::GroupBy
|
111
|
-
grouping_fields = assembly.tail_pipe.
|
113
|
+
grouping_fields = assembly.tail_pipe.key_selectors['test']
|
112
114
|
assert_equal 'line', grouping_fields.get(0)
|
113
115
|
|
114
116
|
assembly = mock_assembly do
|
@@ -116,7 +118,7 @@ class TC_Assembly < Test::Unit::TestCase
|
|
116
118
|
end
|
117
119
|
|
118
120
|
assert assembly.tail_pipe.is_a? Java::CascadingPipe::GroupBy
|
119
|
-
grouping_fields = assembly.tail_pipe.
|
121
|
+
grouping_fields = assembly.tail_pipe.key_selectors['test']
|
120
122
|
assert_equal 'line', grouping_fields.get(0)
|
121
123
|
end
|
122
124
|
|
@@ -126,7 +128,7 @@ class TC_Assembly < Test::Unit::TestCase
|
|
126
128
|
end
|
127
129
|
|
128
130
|
assert assembly.tail_pipe.is_a? Java::CascadingPipe::GroupBy
|
129
|
-
grouping_fields = assembly.tail_pipe.
|
131
|
+
grouping_fields = assembly.tail_pipe.key_selectors['test']
|
130
132
|
assert_equal 'offset', grouping_fields.get(0)
|
131
133
|
assert_equal 'line', grouping_fields.get(1)
|
132
134
|
end
|
@@ -137,8 +139,8 @@ class TC_Assembly < Test::Unit::TestCase
|
|
137
139
|
end
|
138
140
|
|
139
141
|
assert assembly.tail_pipe.is_a? Java::CascadingPipe::GroupBy
|
140
|
-
grouping_fields = assembly.tail_pipe.
|
141
|
-
sorting_fields = assembly.tail_pipe.
|
142
|
+
grouping_fields = assembly.tail_pipe.key_selectors['test']
|
143
|
+
sorting_fields = assembly.tail_pipe.sorting_selectors['test']
|
142
144
|
|
143
145
|
assert_equal 2, grouping_fields.size
|
144
146
|
assert_equal 1, sorting_fields.size
|
@@ -156,8 +158,8 @@ class TC_Assembly < Test::Unit::TestCase
|
|
156
158
|
end
|
157
159
|
|
158
160
|
assert assembly.tail_pipe.is_a? Java::CascadingPipe::GroupBy
|
159
|
-
grouping_fields = assembly.tail_pipe.
|
160
|
-
sorting_fields = assembly.tail_pipe.
|
161
|
+
grouping_fields = assembly.tail_pipe.key_selectors['test']
|
162
|
+
sorting_fields = assembly.tail_pipe.sorting_selectors['test']
|
161
163
|
|
162
164
|
assert_equal 2, grouping_fields.size
|
163
165
|
assert_equal 1, sorting_fields.size
|
@@ -175,8 +177,8 @@ class TC_Assembly < Test::Unit::TestCase
|
|
175
177
|
end
|
176
178
|
|
177
179
|
assert assembly.tail_pipe.is_a? Java::CascadingPipe::GroupBy
|
178
|
-
grouping_fields = assembly.tail_pipe.
|
179
|
-
sorting_fields = assembly.tail_pipe.
|
180
|
+
grouping_fields = assembly.tail_pipe.key_selectors['test']
|
181
|
+
sorting_fields = assembly.tail_pipe.sorting_selectors['test']
|
180
182
|
|
181
183
|
assert_equal 2, grouping_fields.size
|
182
184
|
assert_equal 2, sorting_fields.size
|
@@ -283,10 +285,34 @@ class TC_Assembly < Test::Unit::TestCase
|
|
283
285
|
assert assembly.tail_pipe.is_a? Java::CascadingPipe::Each
|
284
286
|
assert assembly.tail_pipe.operation.is_a? Java::CascadingOperationExpression::ExpressionFilter
|
285
287
|
end
|
286
|
-
end
|
287
288
|
|
289
|
+
def test_smoke_test_debug_scope
|
290
|
+
cascade 'smoke' do
|
291
|
+
flow 'smoke' do
|
292
|
+
source 'input', tap('test/data/data1.txt')
|
293
|
+
assembly 'input' do
|
294
|
+
pass
|
295
|
+
debug_scope
|
296
|
+
end
|
297
|
+
sink 'input', tap('output/smoke_test_debug_scope')
|
298
|
+
end
|
299
|
+
end
|
300
|
+
end
|
301
|
+
end
|
288
302
|
|
289
303
|
class TC_AssemblyScenarii < Test::Unit::TestCase
|
304
|
+
def test_smoke_test_sequence_file_scheme
|
305
|
+
cascade 'smoke' do
|
306
|
+
flow 'smoke' do
|
307
|
+
source 'input', tap('test/data/data1.txt')
|
308
|
+
assembly 'input' do
|
309
|
+
pass
|
310
|
+
end
|
311
|
+
compress_output :default, :block
|
312
|
+
sink 'input', tap('output/smoke_test_sequence_file_scheme', :scheme => sequence_file_scheme)
|
313
|
+
end
|
314
|
+
end.complete
|
315
|
+
end
|
290
316
|
|
291
317
|
def test_splitter
|
292
318
|
flow = flow "splitter" do
|
data/test/test_cascading.rb
CHANGED
@@ -32,18 +32,18 @@ class TC_Cascading < Test::Unit::TestCase
|
|
32
32
|
def test_tap
|
33
33
|
tap = tap('/temp')
|
34
34
|
assert_equal '/temp', tap.getPath().toString()
|
35
|
-
assert tap.is_a? Java::
|
35
|
+
assert tap.is_a? Java::CascadingTapHadoop::Hfs
|
36
36
|
|
37
37
|
tap = tap('/temp', :kind => :dfs)
|
38
38
|
assert_equal '/temp', tap.getPath().toString()
|
39
|
-
assert tap.is_a? Java::
|
39
|
+
assert tap.is_a? Java::CascadingTapHadoop::Dfs
|
40
40
|
|
41
41
|
tap = tap('/temp', :kind => :lfs)
|
42
42
|
assert_equal '/temp', tap.getPath().toString()
|
43
|
-
assert tap.is_a? Java::
|
43
|
+
assert tap.is_a? Java::CascadingTapHadoop::Lfs
|
44
44
|
|
45
45
|
tap = tap('/temp', :kind => :hfs)
|
46
46
|
assert_equal '/temp', tap.getPath().toString()
|
47
|
-
assert tap.is_a? Java::
|
47
|
+
assert tap.is_a? Java::CascadingTapHadoop::Hfs
|
48
48
|
end
|
49
49
|
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'cascading'
|
3
|
+
|
4
|
+
class TC_Exceptions < Test::Unit::TestCase
|
5
|
+
def test_cascading_exception
|
6
|
+
ne3 = java.lang.IllegalArgumentException.new('Root cause')
|
7
|
+
ne2 = Java::CascadingPipe::OperatorException.new(Java::CascadingPipe::Pipe.new('dummy'), 'Exception thrown by Cascading', ne3)
|
8
|
+
ne1 = Java::Cascading::CascadingException.new('Exception Cascading hands us', ne2)
|
9
|
+
e = CascadingException.new(ne1, 'cascading.jruby wrapper exception')
|
10
|
+
|
11
|
+
assert_equal ne1, e.ne
|
12
|
+
assert_equal 'cascading.jruby wrapper exception', e.message
|
13
|
+
assert_equal 3, e.depth
|
14
|
+
|
15
|
+
|
16
|
+
assert_equal ne1, e.cause(1)
|
17
|
+
assert_equal 'Exception Cascading hands us', e.cause(1).message
|
18
|
+
|
19
|
+
assert_equal ne2, e.cause(2)
|
20
|
+
# Cascading inserts Operator#to_s, here
|
21
|
+
assert_match /Exception thrown by Cascading/, e.cause(2).message
|
22
|
+
|
23
|
+
assert_equal ne3, e.cause(3)
|
24
|
+
assert_equal 'Root cause', e.cause(3).message
|
25
|
+
|
26
|
+
# Shallower than depth 1 is the first cause
|
27
|
+
(-5..0).each do |i|
|
28
|
+
assert_equal ne1, e.cause(i)
|
29
|
+
assert_equal 'Exception Cascading hands us', e.cause(i).message
|
30
|
+
end
|
31
|
+
|
32
|
+
# Deeper than the root cause is nil
|
33
|
+
(4..10).each do |i|
|
34
|
+
assert_nil e.cause(i)
|
35
|
+
end
|
36
|
+
|
37
|
+
# cause without depth returns root cause
|
38
|
+
assert_equal ne3, e.cause
|
39
|
+
assert_equal 'Root cause', e.cause.message
|
40
|
+
end
|
41
|
+
end
|
metadata
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
name: cascading.jruby
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease:
|
5
|
-
version: 0.0.
|
5
|
+
version: 0.0.7
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Matt Walker
|
@@ -11,7 +11,7 @@ autorequire:
|
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
13
|
|
14
|
-
date: 2012-
|
14
|
+
date: 2012-04-02 00:00:00 Z
|
15
15
|
dependencies: []
|
16
16
|
|
17
17
|
description: cascading.jruby is a small DSL above Cascading, written in JRuby
|
@@ -52,7 +52,6 @@ files:
|
|
52
52
|
- lib/cascading/operations.rb
|
53
53
|
- lib/cascading/scope.rb
|
54
54
|
- samples/branch.rb
|
55
|
-
- samples/cascading.rb
|
56
55
|
- samples/copy.rb
|
57
56
|
- samples/data/data2.txt
|
58
57
|
- samples/data/data_join1.txt
|
@@ -96,6 +95,7 @@ files:
|
|
96
95
|
- test/test_assembly.rb
|
97
96
|
- test/test_cascade.rb
|
98
97
|
- test/test_cascading.rb
|
98
|
+
- test/test_exceptions.rb
|
99
99
|
- test/test_flow.rb
|
100
100
|
- test/test_operations.rb
|
101
101
|
homepage: http://github.com/etsy/cascading.jruby
|
@@ -122,7 +122,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
122
122
|
requirements: []
|
123
123
|
|
124
124
|
rubyforge_project: cascading.jruby
|
125
|
-
rubygems_version: 1.8.
|
125
|
+
rubygems_version: 1.8.21
|
126
126
|
signing_key:
|
127
127
|
specification_version: 3
|
128
128
|
summary: A JRuby DSL for Cascading
|
@@ -130,5 +130,6 @@ test_files:
|
|
130
130
|
- test/test_assembly.rb
|
131
131
|
- test/test_cascade.rb
|
132
132
|
- test/test_cascading.rb
|
133
|
+
- test/test_exceptions.rb
|
133
134
|
- test/test_flow.rb
|
134
135
|
- test/test_operations.rb
|
data/samples/cascading.rb
DELETED
@@ -1,41 +0,0 @@
|
|
1
|
-
module Cascading
|
2
|
-
# Constructs properties to be passed to Flow#complete or Cascade#complete
|
3
|
-
# which will locate temporary Hadoop files in build/sample. It is necessary
|
4
|
-
# to pass these properties only because the sample apps are invoked using
|
5
|
-
# JRuby's main method, which confuses the JobConf's attempt to find the
|
6
|
-
# containing jar.
|
7
|
-
def sample_properties
|
8
|
-
build_dir = 'build/sample/build'
|
9
|
-
`mkdir -p #{build_dir}`
|
10
|
-
tmp_dir = "build/sample/tmp"
|
11
|
-
`mkdir -p #{tmp_dir}`
|
12
|
-
log_dir = "build/sample/log"
|
13
|
-
`mkdir -p #{log_dir}`
|
14
|
-
|
15
|
-
# Local cluster settings
|
16
|
-
#java.lang.System.set_property("test.build.data", build_dir)
|
17
|
-
#java.lang.System.set_property("hadoop.tmp.dir", tmp_dir)
|
18
|
-
#java.lang.System.set_property("hadoop.log.dir", log_dir)
|
19
|
-
#conf = Java::OrgApacheHadoopConf::Configuration.new
|
20
|
-
#dfs = Java::OrgApacheHadoopDfs::MiniDFSCluster.new(conf, 4, true, nil);
|
21
|
-
#file_sys = dfs.file_system
|
22
|
-
#mr = Java::OrgApacheHadoopMapred::MiniMRCluster.new(4, file_sys.uri.to_string, 1)
|
23
|
-
#job_conf = mr.create_job_conf
|
24
|
-
#job_conf.set("mapred.child.java.opts", "-Xmx512m")
|
25
|
-
#job_conf.set("mapred.map.tasks.speculative.execution", "false")
|
26
|
-
#job_conf.set("mapred.reduce.tasks.speculative.execution", "false")
|
27
|
-
|
28
|
-
job_conf = Java::OrgApacheHadoopMapred::JobConf.new
|
29
|
-
job_conf.jar = build_dir
|
30
|
-
job_conf.set("test.build.data", build_dir)
|
31
|
-
job_conf.set("hadoop.tmp.dir", tmp_dir)
|
32
|
-
job_conf.set("hadoop.log.dir", log_dir)
|
33
|
-
|
34
|
-
job_conf.num_map_tasks = 4
|
35
|
-
job_conf.num_reduce_tasks = 1
|
36
|
-
|
37
|
-
properties = java.util.HashMap.new({})
|
38
|
-
Java::CascadingFlow::MultiMapReducePlanner.set_job_conf(properties, job_conf)
|
39
|
-
properties
|
40
|
-
end
|
41
|
-
end
|