cascading.jruby 0.0.6 → 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +6 -0
- data/README.md +1 -1
- data/TODO +0 -1
- data/lib/cascading.rb +1 -2
- data/lib/cascading/assembly.rb +5 -5
- data/lib/cascading/cascading.rb +50 -13
- data/lib/cascading/cascading_exception.rb +21 -5
- data/lib/cascading/flow.rb +5 -1
- data/lib/cascading/scope.rb +10 -7
- data/samples/branch.rb +1 -2
- data/samples/copy.rb +1 -2
- data/samples/join.rb +1 -2
- data/samples/logwordcount.rb +1 -2
- data/samples/project.rb +1 -2
- data/samples/rename.rb +1 -2
- data/samples/scorenames.rb +1 -2
- data/samples/splitter.rb +1 -2
- data/samples/union.rb +1 -2
- data/spec/scope_spec.rb +5 -11
- data/spec/spec_util.rb +1 -36
- data/tags +27 -11
- data/test/test_assembly.rb +47 -21
- data/test/test_cascading.rb +4 -4
- data/test/test_exceptions.rb +41 -0
- metadata +5 -4
- data/samples/cascading.rb +0 -41
data/History.txt
CHANGED
@@ -1,3 +1,9 @@
|
|
1
|
+
0.0.7 - Upgrade to Cascading 2.0.0 wip-255
|
2
|
+
|
3
|
+
This release upgrades to Cascading 2.0.0 wip-255, but implements neither local
|
4
|
+
mode nor aggregations through AggregateBy. Local jobs continue to run in Hadoop
|
5
|
+
mode, but we intend to release local mode support soon.
|
6
|
+
|
1
7
|
0.0.6 - Removing primary key
|
2
8
|
|
3
9
|
The primary key feature was a source of great confusion at Etsy, so it's been
|
data/README.md
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
`cascading.jruby` is a small DSL above [Cascading](http://www.cascading.org/).
|
4
4
|
|
5
|
-
It requires Hadoop (>= 0.
|
5
|
+
It requires Hadoop (>= 0.20.2) and Cascading (>= 2.0.0) to be set via the environment variables: `HADOOP_HOME` and `CASCADING_HOME`
|
6
6
|
|
7
7
|
It has been tested on JRuby versions 1.2.0, 1.4.0, 1.5.3, and 1.6.5.
|
8
8
|
|
data/TODO
CHANGED
@@ -6,7 +6,6 @@ Look into totally eliminating registries
|
|
6
6
|
Bug fixes on github
|
7
7
|
Enforce more runtime rules at composition time
|
8
8
|
Standardize helper contracts
|
9
|
-
Upgrade Cascading (already upgraded JRuby)
|
10
9
|
Possibly combine unit tests...into unit tests because RSpec sucks and swallows stack traces
|
11
10
|
|
12
11
|
Split out runner
|
data/lib/cascading.rb
CHANGED
@@ -6,7 +6,7 @@ require 'java'
|
|
6
6
|
|
7
7
|
module Cascading
|
8
8
|
# :stopdoc:
|
9
|
-
VERSION = '0.0.
|
9
|
+
VERSION = '0.0.7'
|
10
10
|
LIBPATH = ::File.expand_path(::File.dirname(__FILE__)) + ::File::SEPARATOR
|
11
11
|
PATH = ::File.dirname(LIBPATH) + ::File::SEPARATOR
|
12
12
|
CASCADING_HOME = ENV['CASCADING_HOME']
|
@@ -40,7 +40,6 @@ module Cascading
|
|
40
40
|
search_me = ::File.expand_path(
|
41
41
|
::File.join(from, '**', '*.jar'))
|
42
42
|
Dir.glob(search_me).sort.each do |jar|
|
43
|
-
#puts "required: #{jar}"
|
44
43
|
require jar
|
45
44
|
end
|
46
45
|
end
|
data/lib/cascading/assembly.rb
CHANGED
@@ -130,13 +130,13 @@ module Cascading
|
|
130
130
|
if declared_fields
|
131
131
|
case joiner
|
132
132
|
when :inner, "inner", nil
|
133
|
-
joiner = Java::
|
133
|
+
joiner = Java::CascadingPipeJoiner::InnerJoin.new
|
134
134
|
when :left, "left"
|
135
|
-
joiner = Java::
|
135
|
+
joiner = Java::CascadingPipeJoiner::LeftJoin.new
|
136
136
|
when :right, "right"
|
137
|
-
joiner = Java::
|
137
|
+
joiner = Java::CascadingPipeJoiner::RightJoin.new
|
138
138
|
when :outer, "outer"
|
139
|
-
joiner = Java::
|
139
|
+
joiner = Java::CascadingPipeJoiner::OuterJoin.new
|
140
140
|
when Array
|
141
141
|
joiner = joiner.map do |t|
|
142
142
|
case t
|
@@ -145,7 +145,7 @@ module Cascading
|
|
145
145
|
else fail "invalid mixed joiner entry: #{t}"
|
146
146
|
end
|
147
147
|
end
|
148
|
-
joiner = Java::
|
148
|
+
joiner = Java::CascadingPipeJoiner::MixedJoin.new(joiner.to_java(:boolean))
|
149
149
|
end
|
150
150
|
end
|
151
151
|
|
data/lib/cascading/cascading.rb
CHANGED
@@ -90,23 +90,36 @@ module Cascading
|
|
90
90
|
Java::CascadingTuple::Fields::RESULTS
|
91
91
|
end
|
92
92
|
|
93
|
-
# Creates a c.s.TextLine scheme
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
93
|
+
# Creates a c.s.h.TextLine scheme. Positional args are used if <tt>:source_fields</tt> is not provided.
|
94
|
+
#
|
95
|
+
# The named options are:
|
96
|
+
# * <tt>:source_fields</tt> a string or array of strings. Specifies the
|
97
|
+
# fields to be read from a source with this scheme. Defaults to ['offset', 'line'].
|
98
|
+
# * <tt>:sink_fields</tt> a string or array of strings. Specifies the fields
|
99
|
+
# to be written to a sink with this scheme. Defaults to all_fields.
|
100
|
+
# * <tt>:compression</tt> a symbol, either <tt>:enable</tt> or
|
101
|
+
# <tt>:disable</tt>, that governs the TextLine scheme's compression. Defaults
|
102
|
+
# to the default TextLine compression.
|
103
|
+
def text_line_scheme(*args)
|
104
|
+
options = args.extract_options!
|
105
|
+
source_fields = fields(options[:source_fields] || (args.empty? ? ['offset', 'line'] : args))
|
106
|
+
sink_fields = fields(options[:sink_fields]) || all_fields
|
107
|
+
sink_compression = case options[:compression]
|
108
|
+
when :enable then Java::CascadingSchemeHadoop::TextLine::Compress::ENABLE
|
109
|
+
when :disable then Java::CascadingSchemeHadoop::TextLine::Compress::DISABLE
|
110
|
+
else Java::CascadingSchemeHadoop::TextLine::Compress::DEFAULT
|
100
111
|
end
|
112
|
+
|
113
|
+
Java::CascadingSchemeHadoop::TextLine.new(source_fields, sink_fields, sink_compression)
|
101
114
|
end
|
102
115
|
|
103
|
-
# Creates a c.s.SequenceFile scheme instance from the specified fields.
|
116
|
+
# Creates a c.s.h.SequenceFile scheme instance from the specified fields.
|
104
117
|
def sequence_file_scheme(*fields)
|
105
118
|
unless fields.empty?
|
106
119
|
fields = fields(fields)
|
107
|
-
return Java::
|
120
|
+
return Java::CascadingSchemeHadoop::SequenceFile.new(fields)
|
108
121
|
else
|
109
|
-
return Java::
|
122
|
+
return Java::CascadingSchemeHadoop::SequenceFile.new(all_fields)
|
110
123
|
end
|
111
124
|
end
|
112
125
|
|
@@ -129,12 +142,36 @@ module Cascading
|
|
129
142
|
end
|
130
143
|
fs = opts[:kind] || :hfs
|
131
144
|
klass = case fs
|
132
|
-
when :hfs, 'hfs' then Java::
|
133
|
-
when :dfs, 'dfs' then Java::
|
134
|
-
when :lfs, 'lfs' then Java::
|
145
|
+
when :hfs, 'hfs' then Java::CascadingTapHadoop::Hfs
|
146
|
+
when :dfs, 'dfs' then Java::CascadingTapHadoop::Dfs
|
147
|
+
when :lfs, 'lfs' then Java::CascadingTapHadoop::Lfs
|
135
148
|
else raise "Unrecognized kind of tap '#{fs}'"
|
136
149
|
end
|
137
150
|
parameters = [scheme, path, sink_mode]
|
138
151
|
klass.new(*parameters)
|
139
152
|
end
|
153
|
+
|
154
|
+
# Constructs properties to be passed to Flow#complete or Cascade#complete
|
155
|
+
# which will locate temporary Hadoop files in base_dir. It is necessary
|
156
|
+
# to pass these properties only when executing local scripts via JRuby's main
|
157
|
+
# method, which confuses Cascading's attempt to find the containing jar.
|
158
|
+
def local_properties(base_dir)
|
159
|
+
dirs = {
|
160
|
+
'test.build.data' => "#{base_dir}/build",
|
161
|
+
'hadoop.tmp.dir' => "#{base_dir}/tmp",
|
162
|
+
'hadoop.log.dir' => "#{base_dir}/log",
|
163
|
+
}
|
164
|
+
dirs.each{ |key, dir| `mkdir -p #{dir}` }
|
165
|
+
|
166
|
+
job_conf = Java::OrgApacheHadoopMapred::JobConf.new
|
167
|
+
job_conf.jar = dirs['test.build.data']
|
168
|
+
dirs.each{ |key, dir| job_conf.set(key, dir) }
|
169
|
+
|
170
|
+
job_conf.num_map_tasks = 1
|
171
|
+
job_conf.num_reduce_tasks = 1
|
172
|
+
|
173
|
+
properties = java.util.HashMap.new
|
174
|
+
Java::CascadingFlowHadoop::HadoopPlanner.copy_job_conf(properties, job_conf)
|
175
|
+
properties
|
176
|
+
end
|
140
177
|
end
|
@@ -5,13 +5,23 @@
|
|
5
5
|
# underlying Janino expression problems.
|
6
6
|
module Cascading
|
7
7
|
class CascadingException < StandardError
|
8
|
+
attr_accessor :ne, :message, :depth
|
9
|
+
|
8
10
|
def initialize(native_exception, message)
|
9
11
|
@ne = native_exception
|
10
|
-
|
12
|
+
@message = message
|
13
|
+
trace, @depth = trace_causes(@ne, 1)
|
14
|
+
super("#{message}\n#{trace}")
|
11
15
|
end
|
12
16
|
|
13
|
-
|
14
|
-
|
17
|
+
# Fetch cause at depth. If depth is not provided, root cause is returned.
|
18
|
+
def cause(depth = @depth)
|
19
|
+
if depth > @depth
|
20
|
+
warn "WARNING: Depth (#{depth}) greater than depth of cause stack (#{@depth}) requested"
|
21
|
+
nil
|
22
|
+
else
|
23
|
+
fetch_cause(@ne, depth)
|
24
|
+
end
|
15
25
|
end
|
16
26
|
|
17
27
|
private
|
@@ -22,14 +32,20 @@ module Cascading
|
|
22
32
|
end
|
23
33
|
|
24
34
|
def trace_causes(ne, depth)
|
25
|
-
return unless ne
|
35
|
+
return ['', depth - 1] unless ne
|
36
|
+
|
37
|
+
warn "ERROR: Exception does not respond to cause: #{ne}" unless ne.respond_to?(:cause)
|
38
|
+
cause_trace, cause_depth = trace_causes(ne.respond_to?(:cause) ? ne.cause : nil, depth + 1)
|
39
|
+
|
26
40
|
trace = "Cause #{depth}: #{ne.respond_to?(:java_class) ? ne.java_class : ne.class}: #{ne}\n"
|
27
41
|
if ne.respond_to?(:stack_trace)
|
28
42
|
trace += "#{ne.stack_trace.map{ |e| " at #{e.class_name}.#{e.method_name}(#{e.file_name}:#{e.line_number})" }.join("\n")}\n"
|
29
43
|
elsif ne.respond_to?(:backtrace)
|
30
44
|
trace += " #{ne.backtrace.join("\n ")}\n"
|
31
45
|
end
|
32
|
-
trace +=
|
46
|
+
trace += cause_trace
|
47
|
+
|
48
|
+
[trace, cause_depth]
|
33
49
|
end
|
34
50
|
end
|
35
51
|
end
|
data/lib/cascading/flow.rb
CHANGED
@@ -144,7 +144,11 @@ module Cascading
|
|
144
144
|
puts "#{key}=#{properties[key]}"
|
145
145
|
end
|
146
146
|
|
147
|
-
|
147
|
+
# FIXME: why do I have to do this in 2.0 wip-255?
|
148
|
+
Java::CascadingFlow::FlowConnector.setApplicationName(properties, name)
|
149
|
+
Java::CascadingFlow::FlowConnector.setApplicationVersion(properties, '0.0.0')
|
150
|
+
|
151
|
+
Java::CascadingFlowHadoop::HadoopFlowConnector.new(properties).connect(
|
148
152
|
name,
|
149
153
|
make_tap_parameter(@sources),
|
150
154
|
make_tap_parameter(@sinks),
|
data/lib/cascading/scope.rb
CHANGED
@@ -49,17 +49,20 @@ module Cascading
|
|
49
49
|
<<-END
|
50
50
|
Scope name: #{@scope.name}
|
51
51
|
Kind: #{kind}
|
52
|
-
|
53
|
-
Declared fields: #{@scope.declared_fields}
|
54
|
-
Grouping selectors: #{@scope.grouping_selectors}
|
52
|
+
Key selectors: #{@scope.key_selectors}
|
55
53
|
Sorting selectors: #{@scope.sorting_selectors}
|
54
|
+
Remainder fields: #{@scope.remainder_fields}
|
55
|
+
Declared fields: #{@scope.declared_fields}
|
56
|
+
Arguments
|
57
|
+
selector: #{@scope.arguments_selector}
|
58
|
+
declarator: #{@scope.arguments_declarator}
|
56
59
|
Out grouping
|
57
|
-
selector:
|
58
|
-
fields:
|
59
|
-
key fields: #{@grouping_key_fields}
|
60
|
+
selector: #{@scope.out_grouping_selector}
|
61
|
+
fields: #{grouping_fields} (#{@scope.out_grouping_fields})
|
62
|
+
key fields: #{@grouping_key_fields} (#{@scope.key_selectors})
|
60
63
|
Out values
|
61
64
|
selector: #{@scope.out_values_selector}
|
62
|
-
fields:
|
65
|
+
fields: #{values_fields} (#{@scope.out_values_fields})
|
63
66
|
END
|
64
67
|
end
|
65
68
|
|
data/samples/branch.rb
CHANGED
@@ -3,7 +3,6 @@
|
|
3
3
|
$: << File.join(File.dirname(__FILE__), '..', 'lib')
|
4
4
|
|
5
5
|
require 'cascading'
|
6
|
-
require 'samples/cascading'
|
7
6
|
|
8
7
|
cascade 'branch' do
|
9
8
|
flow 'branch' do
|
@@ -28,4 +27,4 @@ cascade 'branch' do
|
|
28
27
|
sink 'branch1', tap('output/branch1', :sink_mode => :replace)
|
29
28
|
sink 'branch2', tap('output/branch2', :sink_mode => :replace)
|
30
29
|
end
|
31
|
-
end.complete(
|
30
|
+
end.complete(local_properties('build/sample'))
|
data/samples/copy.rb
CHANGED
@@ -2,7 +2,6 @@
|
|
2
2
|
$: << File.join(File.dirname(__FILE__), '..', 'lib')
|
3
3
|
|
4
4
|
require 'cascading'
|
5
|
-
require 'samples/cascading'
|
6
5
|
|
7
6
|
cascade 'copy' do
|
8
7
|
flow 'copy' do
|
@@ -16,4 +15,4 @@ cascade 'copy' do
|
|
16
15
|
|
17
16
|
sink 'input', tap('output/copy', :sink_mode => :replace)
|
18
17
|
end
|
19
|
-
end.complete(
|
18
|
+
end.complete(local_properties('build/sample'))
|
data/samples/join.rb
CHANGED
@@ -2,7 +2,6 @@
|
|
2
2
|
$: << File.join(File.dirname(__FILE__), '..', 'lib')
|
3
3
|
|
4
4
|
require 'cascading'
|
5
|
-
require 'samples/cascading'
|
6
5
|
|
7
6
|
cascade 'join' do
|
8
7
|
flow 'join' do
|
@@ -29,4 +28,4 @@ cascade 'join' do
|
|
29
28
|
|
30
29
|
sink 'join', tap('output/join', :sink_mode => :replace)
|
31
30
|
end
|
32
|
-
end.complete(
|
31
|
+
end.complete(local_properties('build/sample'))
|
data/samples/logwordcount.rb
CHANGED
@@ -2,7 +2,6 @@
|
|
2
2
|
$: << File.join(File.dirname(__FILE__), '..', 'lib')
|
3
3
|
|
4
4
|
require 'cascading'
|
5
|
-
require 'samples/cascading'
|
6
5
|
|
7
6
|
cascade 'logwordcount' do
|
8
7
|
flow 'logwordcount' do
|
@@ -19,4 +18,4 @@ cascade 'logwordcount' do
|
|
19
18
|
|
20
19
|
sink 'input', tap('output/logwordcount', :sink_mode => :replace)
|
21
20
|
end
|
22
|
-
end.complete(
|
21
|
+
end.complete(local_properties('build/sample'))
|
data/samples/project.rb
CHANGED
@@ -4,7 +4,6 @@ $: << File.join(File.dirname(__FILE__), '..', 'lib')
|
|
4
4
|
# History: "project" (verb) used to be known as "restrict"
|
5
5
|
|
6
6
|
require 'cascading'
|
7
|
-
require 'samples/cascading'
|
8
7
|
|
9
8
|
cascade 'project' do
|
10
9
|
flow 'project' do
|
@@ -21,4 +20,4 @@ cascade 'project' do
|
|
21
20
|
|
22
21
|
sink 'input', tap('output/project', :sink_mode => :replace)
|
23
22
|
end
|
24
|
-
end.complete(
|
23
|
+
end.complete(local_properties('build/sample'))
|
data/samples/rename.rb
CHANGED
@@ -2,7 +2,6 @@
|
|
2
2
|
$: << File.join(File.dirname(__FILE__), '..', 'lib')
|
3
3
|
|
4
4
|
require 'cascading'
|
5
|
-
require 'samples/cascading'
|
6
5
|
|
7
6
|
cascade 'rename' do
|
8
7
|
flow 'rename' do
|
@@ -18,4 +17,4 @@ cascade 'rename' do
|
|
18
17
|
|
19
18
|
sink 'input', tap('output/rename', :sink_mode => :replace)
|
20
19
|
end
|
21
|
-
end.complete(
|
20
|
+
end.complete(local_properties('build/sample'))
|
data/samples/scorenames.rb
CHANGED
@@ -2,7 +2,6 @@
|
|
2
2
|
$: << File.join(File.dirname(__FILE__), '..', 'lib')
|
3
3
|
|
4
4
|
require 'cascading'
|
5
|
-
require 'samples/cascading'
|
6
5
|
|
7
6
|
cascade 'scorenames' do
|
8
7
|
flow 'scorenames' do
|
@@ -17,4 +16,4 @@ cascade 'scorenames' do
|
|
17
16
|
|
18
17
|
sink 'input', tap('output/scorenames', :sink_mode => :replace)
|
19
18
|
end
|
20
|
-
end.complete(
|
19
|
+
end.complete(local_properties('build/sample'))
|
data/samples/splitter.rb
CHANGED
@@ -2,7 +2,6 @@
|
|
2
2
|
$: << File.join(File.dirname(__FILE__), '..', 'lib')
|
3
3
|
|
4
4
|
require 'cascading'
|
5
|
-
require 'samples/cascading'
|
6
5
|
|
7
6
|
cascade 'splitter' do
|
8
7
|
flow 'splitter' do
|
@@ -17,4 +16,4 @@ cascade 'splitter' do
|
|
17
16
|
|
18
17
|
sink 'input', tap('output/splitter', :sink_mode => :replace)
|
19
18
|
end
|
20
|
-
end.complete(
|
19
|
+
end.complete(local_properties('build/sample'))
|
data/samples/union.rb
CHANGED
@@ -2,7 +2,6 @@
|
|
2
2
|
$: << File.join(File.dirname(__FILE__), '..', 'lib')
|
3
3
|
|
4
4
|
require 'cascading'
|
5
|
-
require 'samples/cascading'
|
6
5
|
|
7
6
|
cascade 'union' do
|
8
7
|
flow 'union' do
|
@@ -32,4 +31,4 @@ cascade 'union' do
|
|
32
31
|
|
33
32
|
sink 'union', tap('output/union', :sink_mode => :replace)
|
34
33
|
end
|
35
|
-
end.complete(
|
34
|
+
end.complete(local_properties('build/sample'))
|
data/spec/scope_spec.rb
CHANGED
@@ -27,10 +27,9 @@ context Cascading::Scope do
|
|
27
27
|
|
28
28
|
# Pass that uses our grouping fields instead of all_fields
|
29
29
|
operation = Java::CascadingOperation::Identity.new
|
30
|
-
make_each(
|
31
|
-
Java::CascadingPipe::Each, tail_pipe, fields([0, 'x_sum']), operation)
|
30
|
+
make_each(Java::CascadingPipe::Each, tail_pipe, fields(['x', 'x_sum']), operation)
|
32
31
|
|
33
|
-
check_scope :values_fields => [
|
32
|
+
check_scope :values_fields => ['x', 'x_sum']
|
34
33
|
end
|
35
34
|
end
|
36
35
|
|
@@ -108,11 +107,6 @@ context Cascading::Scope do
|
|
108
107
|
:grouping_fields => ['x', 'x_sum', 'y_sum']
|
109
108
|
assert_size_equals 3
|
110
109
|
|
111
|
-
# No rename service provided unless you use the block form of join!
|
112
|
-
check_scope :values_fields => [0, 'x_sum', 'y_sum']
|
113
|
-
|
114
|
-
# Mimic rename service
|
115
|
-
bind_names ['x', 'x_sum', 'y_sum']
|
116
110
|
check_scope :values_fields => ['x', 'x_sum', 'y_sum']
|
117
111
|
end
|
118
112
|
end
|
@@ -132,7 +126,7 @@ context Cascading::Scope do
|
|
132
126
|
begin
|
133
127
|
sum :mapping => {'x' => 'x_sum'}, :type => :int
|
134
128
|
rescue CascadingException => e
|
135
|
-
raise e.cause
|
129
|
+
raise e.cause
|
136
130
|
end
|
137
131
|
end
|
138
132
|
end.should raise_error java.lang.IllegalStateException, 'Every cannot follow a Tap or an Each'
|
@@ -159,9 +153,9 @@ context Cascading::Scope do
|
|
159
153
|
end
|
160
154
|
end
|
161
155
|
rescue CascadingException => e
|
162
|
-
raise e.cause
|
156
|
+
raise e.cause
|
163
157
|
end
|
164
|
-
end.should raise_error
|
158
|
+
end.should raise_error Java::CascadingFlowPlanner::PlannerException, "[attempt_group][sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)] Every instances may not split after a GroupBy or CoGroup pipe, found: Every(attempt_group)[Sum[decl:'x_sum'][args:1]] after: CoGroup(left*right)[by:left:[{1}:'x']right:[{1}:'x']]"
|
165
159
|
end
|
166
160
|
|
167
161
|
it 'should propagate names through GroupBy' do
|
data/spec/spec_util.rb
CHANGED
@@ -25,7 +25,7 @@ def test_flow(&block)
|
|
25
25
|
cascade = cascade 'test_app' do
|
26
26
|
flow 'test', &block
|
27
27
|
end
|
28
|
-
cascade.complete(
|
28
|
+
cascade.complete(local_properties(BUILD_DIR))
|
29
29
|
end
|
30
30
|
|
31
31
|
def test_assembly(params = {}, &block)
|
@@ -90,38 +90,3 @@ def test_join_assembly(params = {}, &block)
|
|
90
90
|
end
|
91
91
|
end
|
92
92
|
end
|
93
|
-
|
94
|
-
def cascading_properties
|
95
|
-
build_dir = "#{BUILD_DIR}/build"
|
96
|
-
`mkdir -p #{build_dir}`
|
97
|
-
tmp_dir = "#{BUILD_DIR}/tmp"
|
98
|
-
`mkdir -p #{tmp_dir}`
|
99
|
-
log_dir = "#{BUILD_DIR}/log"
|
100
|
-
`mkdir -p #{log_dir}`
|
101
|
-
|
102
|
-
# Local cluster settings
|
103
|
-
#java.lang.System.set_property("test.build.data", build_dir)
|
104
|
-
#java.lang.System.set_property("hadoop.tmp.dir", tmp_dir)
|
105
|
-
#java.lang.System.set_property("hadoop.log.dir", log_dir)
|
106
|
-
#conf = Java::OrgApacheHadoopConf::Configuration.new
|
107
|
-
#dfs = Java::OrgApacheHadoopDfs::MiniDFSCluster.new(conf, 4, true, nil);
|
108
|
-
#file_sys = dfs.file_system
|
109
|
-
#mr = Java::OrgApacheHadoopMapred::MiniMRCluster.new(4, file_sys.uri.to_string, 1)
|
110
|
-
#job_conf = mr.create_job_conf
|
111
|
-
#job_conf.set("mapred.child.java.opts", "-Xmx512m")
|
112
|
-
#job_conf.set("mapred.map.tasks.speculative.execution", "false")
|
113
|
-
#job_conf.set("mapred.reduce.tasks.speculative.execution", "false")
|
114
|
-
|
115
|
-
job_conf = Java::OrgApacheHadoopMapred::JobConf.new
|
116
|
-
job_conf.jar = build_dir
|
117
|
-
job_conf.set("test.build.data", build_dir)
|
118
|
-
job_conf.set("hadoop.tmp.dir", tmp_dir)
|
119
|
-
job_conf.set("hadoop.log.dir", log_dir)
|
120
|
-
|
121
|
-
job_conf.num_map_tasks = 4
|
122
|
-
job_conf.num_reduce_tasks = 1
|
123
|
-
|
124
|
-
properties = java.util.HashMap.new({})
|
125
|
-
Java::CascadingFlow::MultiMapReducePlanner.set_job_conf(properties, job_conf)
|
126
|
-
properties
|
127
|
-
end
|
data/tags
CHANGED
@@ -36,8 +36,11 @@ Scope lib/cascading/scope.rb /^ class Scope$/;" c class:Cascading
|
|
36
36
|
ScopeTests spec/spec_util.rb /^module ScopeTests$/;" m
|
37
37
|
TC_Assembly test/test_assembly.rb /^class TC_Assembly < Test::Unit::TestCase$/;" c
|
38
38
|
TC_AssemblyScenarii test/test_assembly.rb /^class TC_AssemblyScenarii < Test::Unit::TestCase$/;" c
|
39
|
+
TC_Cascade test/test_cascade.rb /^class TC_Cascade < Test::Unit::TestCase$/;" c
|
39
40
|
TC_Cascading test/test_cascading.rb /^class TC_Cascading < Test::Unit::TestCase$/;" c
|
41
|
+
TC_Exceptions test/test_exceptions.rb /^class TC_Exceptions < Test::Unit::TestCase$/;" c
|
40
42
|
TC_Flow test/test_flow.rb /^class TC_Flow < Test::Unit::TestCase$/;" c
|
43
|
+
TC_Operations test/test_operations.rb /^class TC_Operations < Test::Unit::TestCase$/;" c
|
41
44
|
add lib/cascading/base.rb /^ def add(name, instance)$/;" f class:Cascading.Registerable
|
42
45
|
add_archive_to_distributed_cache lib/cascading/flow.rb /^ def add_archive_to_distributed_cache(file)$/;" f
|
43
46
|
add_child lib/cascading/base.rb /^ def add_child(node)$/;" f class:Cascading.Node
|
@@ -60,9 +63,8 @@ bind_names lib/cascading/assembly.rb /^ def bind_names(*new_names)$/;" f clas
|
|
60
63
|
branch lib/cascading/assembly.rb /^ def branch(name, &block)$/;" f class:Cascading.Assembly
|
61
64
|
cascade lib/cascading/cascading.rb /^ def cascade(name, &block)$/;" f class:Cascading
|
62
65
|
cascading.jruby src/cascading/jruby/Main.java /^package cascading.jruby;$/;" p
|
63
|
-
cascading_properties spec/spec_util.rb /^def cascading_properties$/;" f
|
64
66
|
cast lib/cascading/assembly.rb /^ def cast(type_map)$/;" f class:Cascading.Assembly
|
65
|
-
cause lib/cascading/cascading_exception.rb /^ def cause(depth)$/;" f class:Cascading.CascadingException
|
67
|
+
cause lib/cascading/cascading_exception.rb /^ def cause(depth = @depth)$/;" f class:Cascading.CascadingException
|
66
68
|
check_scope spec/spec_util.rb /^ def check_scope(params = {})$/;" f class:ScopeTests
|
67
69
|
compare_with_references test/test_assembly.rb /^def compare_with_references(test_name)$/;" f
|
68
70
|
compile lib/cascading/expr_stub.rb /^ def compile$/;" f class:Cascading
|
@@ -89,7 +91,6 @@ describe lib/cascading/base.rb /^ def describe(offset = '')$/;" f class:Casca
|
|
89
91
|
describe lib/cascading/cascade.rb /^ def describe(offset = '')$/;" f class:Cascading.Cascade
|
90
92
|
describe lib/cascading/cascading.rb /^ def describe$/;" f class:Cascading
|
91
93
|
describe lib/cascading/flow.rb /^ def describe(offset = '')$/;" f class:Cascading.Flow
|
92
|
-
describe_job spec/spec_util.rb /^def describe_job(job_file, &block)$/;" f
|
93
94
|
difference_fields lib/cascading/cascading.rb /^ def difference_fields(*fields)$/;" f class:Cascading
|
94
95
|
discard lib/cascading/assembly.rb /^ def discard(*args)$/;" f class:Cascading.Assembly
|
95
96
|
distinct lib/cascading/assembly.rb /^ def distinct(*args)$/;" f class:Cascading.Assembly
|
@@ -145,6 +146,7 @@ last_function lib/cascading/operations.rb /^ def last_function(*args)$/;" f c
|
|
145
146
|
last_grouping_fields lib/cascading/cascading.rb /^ def last_grouping_fields$/;" f class:Cascading
|
146
147
|
left_join lib/cascading/assembly.rb /^ def left_join(*args, &block)$/;" f class:Cascading.Assembly
|
147
148
|
libpath lib/cascading.rb /^ def self.libpath( *args )$/;" F class:Cascading
|
149
|
+
local_properties lib/cascading/cascading.rb /^ def local_properties(base_dir)$/;" f
|
148
150
|
main src/cascading/jruby/Main.java /^ public static void main(String[] args) {$/;" m class:Main
|
149
151
|
make_each lib/cascading/assembly.rb /^ def make_each(type, *parameters)$/;" f class:Cascading.Assembly
|
150
152
|
make_every lib/cascading/assembly.rb /^ def make_every(type, *parameters)$/;" f class:Cascading.Assembly
|
@@ -159,7 +161,7 @@ max_function lib/cascading/operations.rb /^ def max_function(*args)$/;" f cla
|
|
159
161
|
min lib/cascading/assembly.rb /^ def min(*args); composite_aggregator(args, :min_function); end$/;" f class:Cascading.Assembly
|
160
162
|
min_function lib/cascading/operations.rb /^ def min_function(*args)$/;" f class:Cascading.Operations
|
161
163
|
mock_assembly test/test_assembly.rb /^ def mock_assembly(&block)$/;" f class:TC_Assembly
|
162
|
-
multi_tap lib/cascading/cascading.rb /^ def multi_tap(*taps)$/;" f
|
164
|
+
multi_tap lib/cascading/cascading.rb /^ def multi_tap(*taps)$/;" f
|
163
165
|
names_and_types lib/cascading/expr_stub.rb /^ def names_and_types$/;" f class:Cascading
|
164
166
|
outer_join lib/cascading/assembly.rb /^ def outer_join(*args, &block)$/;" f class:Cascading.Assembly
|
165
167
|
outgoing_scope lib/cascading/scope.rb /^ def self.outgoing_scope(flow_element, incoming_scopes, grouping_key_fields, every_applied)$/;" F class:Cascading.Scope
|
@@ -171,6 +173,7 @@ parse_date lib/cascading/assembly.rb /^ def parse_date(*args)$/;" f class:Cas
|
|
171
173
|
pass lib/cascading/assembly.rb /^ def pass(*args)$/;" f class:Cascading.Assembly
|
172
174
|
path lib/cascading.rb /^ def self.path( *args )$/;" F class:Cascading
|
173
175
|
project lib/cascading/assembly.rb /^ def project(*args)$/;" f class:Cascading.Assembly
|
176
|
+
qualified_name lib/cascading/base.rb /^ def qualified_name$/;" f class:Cascading.Node
|
174
177
|
quiet tasks/setup.rb /^def quiet( &block )$/;" f
|
175
178
|
regex_filter lib/cascading/operations.rb /^ def regex_filter(*args)$/;" f class:Cascading.Operations.to_java_comparable_array
|
176
179
|
regex_generator lib/cascading/operations.rb /^ def regex_generator(*args)$/;" f class:Cascading.Operations
|
@@ -187,11 +190,12 @@ require_all_jars lib/cascading.rb /^ def self.require_all_jars(from = ::File.jo
|
|
187
190
|
reset lib/cascading/base.rb /^ def reset$/;" f class:Cascading.Registerable
|
188
191
|
results_fields lib/cascading/cascading.rb /^ def results_fields$/;" f class:Cascading
|
189
192
|
right_join lib/cascading/assembly.rb /^ def right_join(*args, &block)$/;" f class:Cascading.Assembly
|
193
|
+
root lib/cascading/base.rb /^ def root$/;" f class:Cascading.Node
|
190
194
|
sample_properties samples/cascading.rb /^ def sample_properties$/;" f class:Cascading
|
191
195
|
scope lib/cascading/assembly.rb /^ def scope$/;" f class:Cascading.Assembly
|
192
196
|
scope lib/cascading/flow.rb /^ def scope(name = nil)$/;" f class:Cascading.Flow
|
193
197
|
search_field_name lib/cascading/cascading.rb /^ def search_field_name(names, candidate)$/;" f class:Cascading
|
194
|
-
sequence_file_scheme lib/cascading/cascading.rb /^ def sequence_file_scheme(*fields)$/;" f
|
198
|
+
sequence_file_scheme lib/cascading/cascading.rb /^ def sequence_file_scheme(*fields)$/;" f
|
195
199
|
set_spill_threshold lib/cascading/flow.rb /^ def set_spill_threshold(threshold)$/;" f
|
196
200
|
sink lib/cascading/flow.rb /^ def sink(*args)$/;" f class:Cascading.Flow
|
197
201
|
sink_metadata lib/cascading/cascade.rb /^ def sink_metadata$/;" f class:Cascading.Cascade
|
@@ -203,13 +207,19 @@ split_hash lib/cascading/expr_stub.rb /^ def split_hash(h)$/;" f class:Cascad
|
|
203
207
|
split_rows lib/cascading/assembly.rb /^ def split_rows(*args)$/;" f class:Cascading.Assembly
|
204
208
|
sum lib/cascading/assembly.rb /^ def sum(*args)$/;" f class:Cascading.Assembly
|
205
209
|
sum_function lib/cascading/operations.rb /^ def sum_function(*args)$/;" f class:Cascading.Operations
|
206
|
-
tap lib/cascading/cascading.rb /^ def tap(*args)$/;" f
|
210
|
+
tap lib/cascading/cascading.rb /^ def tap(*args)$/;" f
|
207
211
|
tap_scope lib/cascading/scope.rb /^ def self.tap_scope(tap, name)$/;" F class:Cascading.Scope
|
212
|
+
test_aggregator_function_ignore_exception test/test_operations.rb /^ def test_aggregator_function_ignore_exception$/;" f class:TC_Operations
|
213
|
+
test_aggregator_function_ignore_tuples test/test_operations.rb /^ def test_aggregator_function_ignore_tuples$/;" f class:TC_Operations
|
214
|
+
test_aggregator_function_ignore_values test/test_operations.rb /^ def test_aggregator_function_ignore_values$/;" f class:TC_Operations
|
215
|
+
test_ambiguous_assembly_names test/test_flow.rb /^ def test_ambiguous_assembly_names$/;" f class:TC_Flow
|
216
|
+
test_ambiguous_branch_names test/test_flow.rb /^ def test_ambiguous_branch_names$/;" f class:TC_Flow
|
208
217
|
test_assembly spec/spec_util.rb /^def test_assembly(params = {}, &block)$/;" f
|
209
|
-
test_assembly test/test_flow.rb /^ def test_assembly$/;" f class:TC_Flow
|
210
218
|
test_branch_empty test/test_assembly.rb /^ def test_branch_empty$/;" f
|
211
219
|
test_branch_single test/test_assembly.rb /^ def test_branch_single$/;" f
|
212
220
|
test_branch_unique test/test_assembly.rb /^ def test_branch_unique$/;" f
|
221
|
+
test_cascade test/test_cascade.rb /^ def test_cascade$/;" f class:TC_Cascade
|
222
|
+
test_cascading_exception test/test_exceptions.rb /^ def test_cascading_exception$/;" f class:TC_Exceptions
|
213
223
|
test_create_assembly_simple test/test_assembly.rb /^ def test_create_assembly_simple$/;" f class:TC_Assembly
|
214
224
|
test_create_each test/test_assembly.rb /^ def test_create_each$/;" f
|
215
225
|
test_create_every test/test_assembly.rb /^ def test_create_every$/;" f
|
@@ -219,18 +229,25 @@ test_create_group_by_reverse test/test_assembly.rb /^ def test_create_group_by_
|
|
219
229
|
test_create_group_by_with_sort test/test_assembly.rb /^ def test_create_group_by_with_sort$/;" f
|
220
230
|
test_create_group_by_with_sort_reverse test/test_assembly.rb /^ def test_create_group_by_with_sort_reverse$/;" f
|
221
231
|
test_each_identity test/test_assembly.rb /^ def test_each_identity$/;" f
|
232
|
+
test_empty_where test/test_assembly.rb /^ def test_empty_where$/;" f
|
222
233
|
test_fields_field test/test_cascading.rb /^ def test_fields_field$/;" f class:TC_Cascading
|
223
234
|
test_fields_multiple test/test_cascading.rb /^ def test_fields_multiple$/;" f class:TC_Cascading
|
224
235
|
test_fields_single test/test_cascading.rb /^ def test_fields_single$/;" f class:TC_Cascading
|
225
236
|
test_flow spec/spec_util.rb /^def test_flow(&block)$/;" f
|
237
|
+
test_flow test/test_flow.rb /^ def test_flow$/;" f class:TC_Flow
|
226
238
|
test_full_assembly test/test_assembly.rb /^ def test_full_assembly$/;" f
|
227
|
-
test_join1 test/test_assembly.rb /^ def test_join1$/;" f
|
239
|
+
test_join1 test/test_assembly.rb /^ def test_join1$/;" f class:TC_AssemblyScenarii
|
228
240
|
test_join2 test/test_assembly.rb /^ def test_join2$/;" f
|
229
241
|
test_join_assembly spec/spec_util.rb /^def test_join_assembly(params = {}, &block)$/;" f
|
230
|
-
|
242
|
+
test_smoke_test_debug_scope test/test_assembly.rb /^ def test_smoke_test_debug_scope$/;" f
|
243
|
+
test_smoke_test_sequence_file_scheme test/test_assembly.rb /^ def test_smoke_test_sequence_file_scheme$/;" f class:TC_AssemblyScenarii
|
244
|
+
test_splitter test/test_assembly.rb /^ def test_splitter$/;" f class:TC_AssemblyScenarii.test_smoke_test_sequence_file_scheme
|
231
245
|
test_tap test/test_cascading.rb /^ def test_tap$/;" f class:TC_Cascading
|
232
246
|
test_values lib/cascading/expr_stub.rb /^ def test_values$/;" f class:Cascading
|
233
|
-
|
247
|
+
test_where test/test_assembly.rb /^ def test_where$/;" f
|
248
|
+
test_where_with_expression test/test_assembly.rb /^ def test_where_with_expression$/;" f
|
249
|
+
test_where_with_import test/test_assembly.rb /^ def test_where_with_import$/;" f
|
250
|
+
text_line_scheme lib/cascading/cascading.rb /^ def text_line_scheme(*args)$/;" f class:Cascading
|
234
251
|
to_java_comparable_array lib/cascading/operations.rb /^ def to_java_comparable_array(arr)$/;" f class:Cascading.Operations
|
235
252
|
to_s lib/cascading/assembly.rb /^ def to_s$/;" f class:Cascading.Assembly
|
236
253
|
to_s lib/cascading/expr_stub.rb /^ def to_s$/;" f class:Cascading
|
@@ -244,7 +261,6 @@ validate lib/cascading/expr_stub.rb /^ def validate(actual_args = {})$/;" f c
|
|
244
261
|
validate_fields lib/cascading/expr_stub.rb /^ def validate_fields(fields)$/;" f class:Cascading
|
245
262
|
validate_scope lib/cascading/expr_stub.rb /^ def validate_scope(scope)$/;" f class:Cascading
|
246
263
|
values_fields lib/cascading/scope.rb /^ def values_fields$/;" f class:Cascading.Scope
|
247
|
-
verify_assembly_output spec/spec_util.rb /^def verify_assembly_output(assembly_name, params, &block)$/;" f
|
248
264
|
version lib/cascading.rb /^ def self.version$/;" F class:Cascading
|
249
265
|
where lib/cascading/assembly.rb /^ def where(*args)$/;" f class:Cascading.Assembly
|
250
266
|
write_sink_metadata lib/cascading/cascade.rb /^ def write_sink_metadata(file_name)$/;" f class:Cascading.Cascade
|
data/test/test_assembly.rb
CHANGED
@@ -45,13 +45,15 @@ class TC_Assembly < Test::Unit::TestCase
|
|
45
45
|
end
|
46
46
|
|
47
47
|
def test_create_each
|
48
|
-
# You can
|
49
|
-
|
50
|
-
|
51
|
-
each(:filter => identity)
|
52
|
-
end
|
53
|
-
assert assembly.tail_pipe.is_a? Java::CascadingPipe::Each
|
48
|
+
# You can apply an Each to 0 fields
|
49
|
+
assembly = mock_assembly do
|
50
|
+
each(:filter => identity)
|
54
51
|
end
|
52
|
+
assert assembly.tail_pipe.is_a? Java::CascadingPipe::Each
|
53
|
+
|
54
|
+
# In which case, it has empty argument and output selectors
|
55
|
+
assert_equal 0, assembly.tail_pipe.argument_selector.size
|
56
|
+
assert_equal 0, assembly.tail_pipe.output_selector.size
|
55
57
|
|
56
58
|
assembly = mock_assembly do
|
57
59
|
each('offset', :output => 'offset_copy',
|
@@ -61,8 +63,8 @@ class TC_Assembly < Test::Unit::TestCase
|
|
61
63
|
|
62
64
|
assert pipe.is_a? Java::CascadingPipe::Each
|
63
65
|
|
64
|
-
assert_equal 'offset', pipe.
|
65
|
-
assert_equal 'offset_copy', pipe.
|
66
|
+
assert_equal 'offset', pipe.argument_selector.get(0)
|
67
|
+
assert_equal 'offset_copy', pipe.output_selector.get(0)
|
66
68
|
end
|
67
69
|
|
68
70
|
# For now, replaced these tests with the trivial observation that you can't
|
@@ -89,7 +91,7 @@ class TC_Assembly < Test::Unit::TestCase
|
|
89
91
|
every("Field1", :aggregator => count_function)
|
90
92
|
end
|
91
93
|
assert assembly.tail_pipe.is_a? Java::CascadingPipe::Every
|
92
|
-
assert_equal "Field1", assembly.tail_pipe.
|
94
|
+
assert_equal "Field1", assembly.tail_pipe.argument_selector.get(0)
|
93
95
|
end
|
94
96
|
|
95
97
|
assert_raise CascadingException do
|
@@ -97,8 +99,8 @@ class TC_Assembly < Test::Unit::TestCase
|
|
97
99
|
every('line', :aggregator => count_function, :output=>'line_count')
|
98
100
|
end
|
99
101
|
assert assembly.tail_pipe.is_a? Java::CascadingPipe::Every
|
100
|
-
assert_equal 'line', assembly.tail_pipe.
|
101
|
-
assert_equal 'line_count', assembly.tail_pipe.
|
102
|
+
assert_equal 'line', assembly.tail_pipe.argument_selector.get(0)
|
103
|
+
assert_equal 'line_count', assembly.tail_pipe.output_selector.get(0)
|
102
104
|
end
|
103
105
|
end
|
104
106
|
|
@@ -108,7 +110,7 @@ class TC_Assembly < Test::Unit::TestCase
|
|
108
110
|
end
|
109
111
|
|
110
112
|
assert assembly.tail_pipe.is_a? Java::CascadingPipe::GroupBy
|
111
|
-
grouping_fields = assembly.tail_pipe.
|
113
|
+
grouping_fields = assembly.tail_pipe.key_selectors['test']
|
112
114
|
assert_equal 'line', grouping_fields.get(0)
|
113
115
|
|
114
116
|
assembly = mock_assembly do
|
@@ -116,7 +118,7 @@ class TC_Assembly < Test::Unit::TestCase
|
|
116
118
|
end
|
117
119
|
|
118
120
|
assert assembly.tail_pipe.is_a? Java::CascadingPipe::GroupBy
|
119
|
-
grouping_fields = assembly.tail_pipe.
|
121
|
+
grouping_fields = assembly.tail_pipe.key_selectors['test']
|
120
122
|
assert_equal 'line', grouping_fields.get(0)
|
121
123
|
end
|
122
124
|
|
@@ -126,7 +128,7 @@ class TC_Assembly < Test::Unit::TestCase
|
|
126
128
|
end
|
127
129
|
|
128
130
|
assert assembly.tail_pipe.is_a? Java::CascadingPipe::GroupBy
|
129
|
-
grouping_fields = assembly.tail_pipe.
|
131
|
+
grouping_fields = assembly.tail_pipe.key_selectors['test']
|
130
132
|
assert_equal 'offset', grouping_fields.get(0)
|
131
133
|
assert_equal 'line', grouping_fields.get(1)
|
132
134
|
end
|
@@ -137,8 +139,8 @@ class TC_Assembly < Test::Unit::TestCase
|
|
137
139
|
end
|
138
140
|
|
139
141
|
assert assembly.tail_pipe.is_a? Java::CascadingPipe::GroupBy
|
140
|
-
grouping_fields = assembly.tail_pipe.
|
141
|
-
sorting_fields = assembly.tail_pipe.
|
142
|
+
grouping_fields = assembly.tail_pipe.key_selectors['test']
|
143
|
+
sorting_fields = assembly.tail_pipe.sorting_selectors['test']
|
142
144
|
|
143
145
|
assert_equal 2, grouping_fields.size
|
144
146
|
assert_equal 1, sorting_fields.size
|
@@ -156,8 +158,8 @@ class TC_Assembly < Test::Unit::TestCase
|
|
156
158
|
end
|
157
159
|
|
158
160
|
assert assembly.tail_pipe.is_a? Java::CascadingPipe::GroupBy
|
159
|
-
grouping_fields = assembly.tail_pipe.
|
160
|
-
sorting_fields = assembly.tail_pipe.
|
161
|
+
grouping_fields = assembly.tail_pipe.key_selectors['test']
|
162
|
+
sorting_fields = assembly.tail_pipe.sorting_selectors['test']
|
161
163
|
|
162
164
|
assert_equal 2, grouping_fields.size
|
163
165
|
assert_equal 1, sorting_fields.size
|
@@ -175,8 +177,8 @@ class TC_Assembly < Test::Unit::TestCase
|
|
175
177
|
end
|
176
178
|
|
177
179
|
assert assembly.tail_pipe.is_a? Java::CascadingPipe::GroupBy
|
178
|
-
grouping_fields = assembly.tail_pipe.
|
179
|
-
sorting_fields = assembly.tail_pipe.
|
180
|
+
grouping_fields = assembly.tail_pipe.key_selectors['test']
|
181
|
+
sorting_fields = assembly.tail_pipe.sorting_selectors['test']
|
180
182
|
|
181
183
|
assert_equal 2, grouping_fields.size
|
182
184
|
assert_equal 2, sorting_fields.size
|
@@ -283,10 +285,34 @@ class TC_Assembly < Test::Unit::TestCase
|
|
283
285
|
assert assembly.tail_pipe.is_a? Java::CascadingPipe::Each
|
284
286
|
assert assembly.tail_pipe.operation.is_a? Java::CascadingOperationExpression::ExpressionFilter
|
285
287
|
end
|
286
|
-
end
|
287
288
|
|
289
|
+
def test_smoke_test_debug_scope
|
290
|
+
cascade 'smoke' do
|
291
|
+
flow 'smoke' do
|
292
|
+
source 'input', tap('test/data/data1.txt')
|
293
|
+
assembly 'input' do
|
294
|
+
pass
|
295
|
+
debug_scope
|
296
|
+
end
|
297
|
+
sink 'input', tap('output/smoke_test_debug_scope')
|
298
|
+
end
|
299
|
+
end
|
300
|
+
end
|
301
|
+
end
|
288
302
|
|
289
303
|
class TC_AssemblyScenarii < Test::Unit::TestCase
|
304
|
+
def test_smoke_test_sequence_file_scheme
|
305
|
+
cascade 'smoke' do
|
306
|
+
flow 'smoke' do
|
307
|
+
source 'input', tap('test/data/data1.txt')
|
308
|
+
assembly 'input' do
|
309
|
+
pass
|
310
|
+
end
|
311
|
+
compress_output :default, :block
|
312
|
+
sink 'input', tap('output/smoke_test_sequence_file_scheme', :scheme => sequence_file_scheme)
|
313
|
+
end
|
314
|
+
end.complete
|
315
|
+
end
|
290
316
|
|
291
317
|
def test_splitter
|
292
318
|
flow = flow "splitter" do
|
data/test/test_cascading.rb
CHANGED
@@ -32,18 +32,18 @@ class TC_Cascading < Test::Unit::TestCase
|
|
32
32
|
def test_tap
|
33
33
|
tap = tap('/temp')
|
34
34
|
assert_equal '/temp', tap.getPath().toString()
|
35
|
-
assert tap.is_a? Java::
|
35
|
+
assert tap.is_a? Java::CascadingTapHadoop::Hfs
|
36
36
|
|
37
37
|
tap = tap('/temp', :kind => :dfs)
|
38
38
|
assert_equal '/temp', tap.getPath().toString()
|
39
|
-
assert tap.is_a? Java::
|
39
|
+
assert tap.is_a? Java::CascadingTapHadoop::Dfs
|
40
40
|
|
41
41
|
tap = tap('/temp', :kind => :lfs)
|
42
42
|
assert_equal '/temp', tap.getPath().toString()
|
43
|
-
assert tap.is_a? Java::
|
43
|
+
assert tap.is_a? Java::CascadingTapHadoop::Lfs
|
44
44
|
|
45
45
|
tap = tap('/temp', :kind => :hfs)
|
46
46
|
assert_equal '/temp', tap.getPath().toString()
|
47
|
-
assert tap.is_a? Java::
|
47
|
+
assert tap.is_a? Java::CascadingTapHadoop::Hfs
|
48
48
|
end
|
49
49
|
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'cascading'
|
3
|
+
|
4
|
+
class TC_Exceptions < Test::Unit::TestCase
|
5
|
+
def test_cascading_exception
|
6
|
+
ne3 = java.lang.IllegalArgumentException.new('Root cause')
|
7
|
+
ne2 = Java::CascadingPipe::OperatorException.new(Java::CascadingPipe::Pipe.new('dummy'), 'Exception thrown by Cascading', ne3)
|
8
|
+
ne1 = Java::Cascading::CascadingException.new('Exception Cascading hands us', ne2)
|
9
|
+
e = CascadingException.new(ne1, 'cascading.jruby wrapper exception')
|
10
|
+
|
11
|
+
assert_equal ne1, e.ne
|
12
|
+
assert_equal 'cascading.jruby wrapper exception', e.message
|
13
|
+
assert_equal 3, e.depth
|
14
|
+
|
15
|
+
|
16
|
+
assert_equal ne1, e.cause(1)
|
17
|
+
assert_equal 'Exception Cascading hands us', e.cause(1).message
|
18
|
+
|
19
|
+
assert_equal ne2, e.cause(2)
|
20
|
+
# Cascading inserts Operator#to_s, here
|
21
|
+
assert_match /Exception thrown by Cascading/, e.cause(2).message
|
22
|
+
|
23
|
+
assert_equal ne3, e.cause(3)
|
24
|
+
assert_equal 'Root cause', e.cause(3).message
|
25
|
+
|
26
|
+
# Shallower than depth 1 is the first cause
|
27
|
+
(-5..0).each do |i|
|
28
|
+
assert_equal ne1, e.cause(i)
|
29
|
+
assert_equal 'Exception Cascading hands us', e.cause(i).message
|
30
|
+
end
|
31
|
+
|
32
|
+
# Deeper than the root cause is nil
|
33
|
+
(4..10).each do |i|
|
34
|
+
assert_nil e.cause(i)
|
35
|
+
end
|
36
|
+
|
37
|
+
# cause without depth returns root cause
|
38
|
+
assert_equal ne3, e.cause
|
39
|
+
assert_equal 'Root cause', e.cause.message
|
40
|
+
end
|
41
|
+
end
|
metadata
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
name: cascading.jruby
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease:
|
5
|
-
version: 0.0.
|
5
|
+
version: 0.0.7
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Matt Walker
|
@@ -11,7 +11,7 @@ autorequire:
|
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
13
|
|
14
|
-
date: 2012-
|
14
|
+
date: 2012-04-02 00:00:00 Z
|
15
15
|
dependencies: []
|
16
16
|
|
17
17
|
description: cascading.jruby is a small DSL above Cascading, written in JRuby
|
@@ -52,7 +52,6 @@ files:
|
|
52
52
|
- lib/cascading/operations.rb
|
53
53
|
- lib/cascading/scope.rb
|
54
54
|
- samples/branch.rb
|
55
|
-
- samples/cascading.rb
|
56
55
|
- samples/copy.rb
|
57
56
|
- samples/data/data2.txt
|
58
57
|
- samples/data/data_join1.txt
|
@@ -96,6 +95,7 @@ files:
|
|
96
95
|
- test/test_assembly.rb
|
97
96
|
- test/test_cascade.rb
|
98
97
|
- test/test_cascading.rb
|
98
|
+
- test/test_exceptions.rb
|
99
99
|
- test/test_flow.rb
|
100
100
|
- test/test_operations.rb
|
101
101
|
homepage: http://github.com/etsy/cascading.jruby
|
@@ -122,7 +122,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
122
122
|
requirements: []
|
123
123
|
|
124
124
|
rubyforge_project: cascading.jruby
|
125
|
-
rubygems_version: 1.8.
|
125
|
+
rubygems_version: 1.8.21
|
126
126
|
signing_key:
|
127
127
|
specification_version: 3
|
128
128
|
summary: A JRuby DSL for Cascading
|
@@ -130,5 +130,6 @@ test_files:
|
|
130
130
|
- test/test_assembly.rb
|
131
131
|
- test/test_cascade.rb
|
132
132
|
- test/test_cascading.rb
|
133
|
+
- test/test_exceptions.rb
|
133
134
|
- test/test_flow.rb
|
134
135
|
- test/test_operations.rb
|
data/samples/cascading.rb
DELETED
@@ -1,41 +0,0 @@
|
|
1
|
-
module Cascading
|
2
|
-
# Constructs properties to be passed to Flow#complete or Cascade#complete
|
3
|
-
# which will locate temporary Hadoop files in build/sample. It is necessary
|
4
|
-
# to pass these properties only because the sample apps are invoked using
|
5
|
-
# JRuby's main method, which confuses the JobConf's attempt to find the
|
6
|
-
# containing jar.
|
7
|
-
def sample_properties
|
8
|
-
build_dir = 'build/sample/build'
|
9
|
-
`mkdir -p #{build_dir}`
|
10
|
-
tmp_dir = "build/sample/tmp"
|
11
|
-
`mkdir -p #{tmp_dir}`
|
12
|
-
log_dir = "build/sample/log"
|
13
|
-
`mkdir -p #{log_dir}`
|
14
|
-
|
15
|
-
# Local cluster settings
|
16
|
-
#java.lang.System.set_property("test.build.data", build_dir)
|
17
|
-
#java.lang.System.set_property("hadoop.tmp.dir", tmp_dir)
|
18
|
-
#java.lang.System.set_property("hadoop.log.dir", log_dir)
|
19
|
-
#conf = Java::OrgApacheHadoopConf::Configuration.new
|
20
|
-
#dfs = Java::OrgApacheHadoopDfs::MiniDFSCluster.new(conf, 4, true, nil);
|
21
|
-
#file_sys = dfs.file_system
|
22
|
-
#mr = Java::OrgApacheHadoopMapred::MiniMRCluster.new(4, file_sys.uri.to_string, 1)
|
23
|
-
#job_conf = mr.create_job_conf
|
24
|
-
#job_conf.set("mapred.child.java.opts", "-Xmx512m")
|
25
|
-
#job_conf.set("mapred.map.tasks.speculative.execution", "false")
|
26
|
-
#job_conf.set("mapred.reduce.tasks.speculative.execution", "false")
|
27
|
-
|
28
|
-
job_conf = Java::OrgApacheHadoopMapred::JobConf.new
|
29
|
-
job_conf.jar = build_dir
|
30
|
-
job_conf.set("test.build.data", build_dir)
|
31
|
-
job_conf.set("hadoop.tmp.dir", tmp_dir)
|
32
|
-
job_conf.set("hadoop.log.dir", log_dir)
|
33
|
-
|
34
|
-
job_conf.num_map_tasks = 4
|
35
|
-
job_conf.num_reduce_tasks = 1
|
36
|
-
|
37
|
-
properties = java.util.HashMap.new({})
|
38
|
-
Java::CascadingFlow::MultiMapReducePlanner.set_job_conf(properties, job_conf)
|
39
|
-
properties
|
40
|
-
end
|
41
|
-
end
|