jruby-on-hadoop 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +16 -4
- data/VERSION +1 -1
- data/jruby-on-hadoop.gemspec +5 -3
- data/lib/hadoop-ruby.jar +0 -0
- data/lib/jruby-on-hadoop/client.rb +15 -4
- data/spec/jruby-on-hadoop/client_spec.rb +79 -0
- data/spec/jruby-on-hadoop_spec.rb +0 -37
- data/spec/ruby_wrapper_spec.rb +1 -1
- metadata +4 -2
data/README.rdoc
CHANGED
@@ -1,6 +1,9 @@
|
|
1
1
|
= JRuby on Hadoop
|
2
2
|
|
3
3
|
JRuby on Hadoop is a thin wrapper for Hadoop Mapper / Reducer by JRuby.
|
4
|
+
We recommend to use this with hadoop-rubydsl on the github / gemcutter.
|
5
|
+
|
6
|
+
== Description
|
4
7
|
|
5
8
|
== Install
|
6
9
|
|
@@ -10,7 +13,7 @@ Required gems are all on GemCutter.
|
|
10
13
|
2. Install gems
|
11
14
|
$ gem install jruby-on-hadoop
|
12
15
|
|
13
|
-
==
|
16
|
+
== Usage
|
14
17
|
|
15
18
|
1. Run Hadoop cluster on your machines and set HADOOP_HOME env variable.
|
16
19
|
2. put files into your hdfs. ex) test/inputs/file1
|
@@ -18,18 +21,27 @@ Required gems are all on GemCutter.
|
|
18
21
|
$ joh examples/wordcount.rb test/inputs test/outputs
|
19
22
|
You can get Hadoop job results in your hdfs test/outputs/part-*
|
20
23
|
|
21
|
-
|
24
|
+
== Example
|
25
|
+
see also examples/wordcount.rb
|
22
26
|
|
23
27
|
def setup(conf)
|
24
28
|
# setup jobconf
|
25
29
|
end
|
26
30
|
|
27
|
-
def map(
|
31
|
+
def map(key, value, output, reporter)
|
28
32
|
# mapper process
|
33
|
+
# (wordcount example)
|
34
|
+
value.split.each do |word|
|
35
|
+
output.collect(word, 1)
|
36
|
+
end
|
29
37
|
end
|
30
38
|
|
31
|
-
def reduce(
|
39
|
+
def reduce(key, values, output, reporter)
|
32
40
|
# reducer process
|
41
|
+
# (wordcount example)
|
42
|
+
sum = 0
|
43
|
+
values.each {|v| sum += v }
|
44
|
+
output.collect(key, sum)
|
33
45
|
end
|
34
46
|
|
35
47
|
== Build
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.4
|
data/jruby-on-hadoop.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{jruby-on-hadoop}
|
8
|
-
s.version = "0.0.
|
8
|
+
s.version = "0.0.4"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Koichi Fujikawa"]
|
12
|
-
s.date = %q{2010-01-
|
12
|
+
s.date = %q{2010-01-15}
|
13
13
|
s.default_executable = %q{joh}
|
14
14
|
s.description = %q{JRuby on Hadoop}
|
15
15
|
s.email = %q{fujibee@gmail.com}
|
@@ -29,6 +29,7 @@ Gem::Specification.new do |s|
|
|
29
29
|
"lib/jruby-on-hadoop.rb",
|
30
30
|
"lib/jruby-on-hadoop/client.rb",
|
31
31
|
"lib/ruby_wrapper.rb",
|
32
|
+
"spec/jruby-on-hadoop/client_spec.rb",
|
32
33
|
"spec/jruby-on-hadoop_spec.rb",
|
33
34
|
"spec/ruby_wrapper_spec.rb",
|
34
35
|
"test/java/org/apache/hadoop/ruby/JRubyJobRunnerTest.java",
|
@@ -41,7 +42,8 @@ Gem::Specification.new do |s|
|
|
41
42
|
s.rubygems_version = %q{1.3.5}
|
42
43
|
s.summary = %q{JRuby on Hadoop}
|
43
44
|
s.test_files = [
|
44
|
-
"spec/jruby-on-
|
45
|
+
"spec/jruby-on-hadoop/client_spec.rb",
|
46
|
+
"spec/jruby-on-hadoop_spec.rb",
|
45
47
|
"spec/ruby_wrapper_spec.rb",
|
46
48
|
"examples/wordcount.rb"
|
47
49
|
]
|
data/lib/hadoop-ruby.jar
CHANGED
Binary file
|
@@ -9,18 +9,29 @@ module JRubyOnHadoop
|
|
9
9
|
parse_args
|
10
10
|
|
11
11
|
# env check
|
12
|
-
hadoop_home
|
13
|
-
raise 'HADOOP_HOME is not set' unless hadoop_home
|
14
|
-
@hadoop_cmd = "#{hadoop_home}/bin/hadoop"
|
12
|
+
hadoop_home and hadoop_cmd
|
15
13
|
ENV['HADOOP_CLASSPATH'] = "#{lib_path}:#{File.dirname(@script_path)}"
|
16
14
|
end
|
17
15
|
|
16
|
+
def hadoop_home
|
17
|
+
home = ENV['HADOOP_HOME']
|
18
|
+
raise 'HADOOP_HOME is not set' if home.nil? or home.empty?
|
19
|
+
home
|
20
|
+
end
|
21
|
+
|
22
|
+
def hadoop_cmd
|
23
|
+
hadoop = `which hadoop 2>/dev/null`
|
24
|
+
hadoop = "#{hadoop_home}/bin/hadoop" if hadoop.nil? or hadoop.empty?
|
25
|
+
raise 'cannot find hadoop command' unless hadoop
|
26
|
+
hadoop.chomp
|
27
|
+
end
|
28
|
+
|
18
29
|
def run
|
19
30
|
exec cmd
|
20
31
|
end
|
21
32
|
|
22
33
|
def cmd
|
23
|
-
"#{
|
34
|
+
"#{hadoop_cmd} jar #{main_jar_path} #{JAVA_MAIN_CLASS}" +
|
24
35
|
" -libjars #{opt_libjars} -files #{opt_files} #{mapred_args}"
|
25
36
|
end
|
26
37
|
|
@@ -0,0 +1,79 @@
|
|
1
|
+
require 'jruby-on-hadoop'
|
2
|
+
|
3
|
+
describe JRubyOnHadoop::Client do
|
4
|
+
before do
|
5
|
+
@client = JRubyOnHadoop::Client.new
|
6
|
+
end
|
7
|
+
|
8
|
+
it 'gather necessary jar paths' do
|
9
|
+
version_pattern = '[\d\.]*'
|
10
|
+
@client.main_jar_path.should include 'hadoop-ruby.jar'
|
11
|
+
|
12
|
+
@client.opt_libjars.should match /jruby\-core\-#{version_pattern}\.jar/
|
13
|
+
@client.opt_libjars.should match /jruby\-stdlib\-#{version_pattern}\.jar/
|
14
|
+
end
|
15
|
+
|
16
|
+
it 'gather necessary ruby files' do
|
17
|
+
@client.opt_files.split(",").should include "mapred.rb"
|
18
|
+
@client.opt_files.should match /ruby_wrapper\.rb/
|
19
|
+
end
|
20
|
+
|
21
|
+
it 'construct command for running hadoop' do
|
22
|
+
path_pattern = '[\w/\-\.,]*'
|
23
|
+
@client.cmd.should match /hadoop jar #{path_pattern}hadoop-ruby.jar org.apache.hadoop.ruby.JRubyJobRunner -libjars #{path_pattern}.jar -files mapred.rb/
|
24
|
+
end
|
25
|
+
|
26
|
+
it 'can get mapred args' do
|
27
|
+
client = JRubyOnHadoop::Client.new(["examples/mapred.rb", "inputs", "outputs"])
|
28
|
+
client.mapred_args.should == "--script mapred.rb inputs outputs"
|
29
|
+
end
|
30
|
+
|
31
|
+
it 'can parse args' do
|
32
|
+
client = JRubyOnHadoop::Client.new(["examples/mapred.rb", "in", "out"])
|
33
|
+
client.script.should == 'mapred.rb'
|
34
|
+
client.inputs.should == 'in'
|
35
|
+
client.outputs.should == 'out'
|
36
|
+
client.files.should include 'examples/mapred.rb'
|
37
|
+
end
|
38
|
+
|
39
|
+
it 'should raise error if HADOOP_HOME env is not set' do
|
40
|
+
saved = ENV['HADOOP_HOME']
|
41
|
+
ENV['HADOOP_HOME'] = ''
|
42
|
+
begin
|
43
|
+
lambda { JRubyOnHadoop::Client.new }.should raise_error
|
44
|
+
ensure
|
45
|
+
ENV['HADOOP_HOME'] = saved
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
it 'can determin bin/hadoop path' do
|
50
|
+
@client.hadoop_cmd.should match /hadoop$/
|
51
|
+
end
|
52
|
+
|
53
|
+
it 'can determin bin/hadoop path if even no in PATH env var' do
|
54
|
+
saved = ENV['PATH']
|
55
|
+
begin
|
56
|
+
ENV['PATH'] = ''
|
57
|
+
ENV['HADOOP_HOME'].should_not be_empty
|
58
|
+
client = JRubyOnHadoop::Client.new
|
59
|
+
client.hadoop_cmd.should match ENV['HADOOP_HOME']
|
60
|
+
client.hadoop_cmd.should match /hadoop$/
|
61
|
+
ensure
|
62
|
+
ENV['PATH'] = saved
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
it 'should raise error if cannot determin bin/hadoop path' do
|
67
|
+
saved_path = ENV['PATH']
|
68
|
+
saved_home = ENV['HADOOP_HOME']
|
69
|
+
begin
|
70
|
+
ENV['PATH'] = ''
|
71
|
+
lambda { JRubyOnHadoop::Client.new }.should_not raise_error
|
72
|
+
ENV['HADOOP_HOME'] = ''
|
73
|
+
lambda { JRubyOnHadoop::Client.new }.should raise_error
|
74
|
+
ensure
|
75
|
+
ENV['PATH'] = saved_path
|
76
|
+
ENV['HADOOP_HOME'] = saved_home
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
@@ -18,40 +18,3 @@ describe JRubyOnHadoop do
|
|
18
18
|
JRubyOnHadoop.wrapper_ruby_file.should == path
|
19
19
|
end
|
20
20
|
end
|
21
|
-
|
22
|
-
describe JRubyOnHadoop::Client do
|
23
|
-
before do
|
24
|
-
@client = JRubyOnHadoop::Client.new
|
25
|
-
end
|
26
|
-
|
27
|
-
it 'gather necessary jar paths' do
|
28
|
-
version_pattern = '[\d\.]*'
|
29
|
-
@client.main_jar_path.should include 'hadoop-ruby.jar'
|
30
|
-
|
31
|
-
@client.opt_libjars.should match /jruby\-core\-#{version_pattern}\.jar/
|
32
|
-
@client.opt_libjars.should match /jruby\-stdlib\-#{version_pattern}\.jar/
|
33
|
-
end
|
34
|
-
|
35
|
-
it 'gather necessary ruby files' do
|
36
|
-
@client.opt_files.split(",").should include "mapred.rb"
|
37
|
-
@client.opt_files.should match /ruby_wrapper\.rb/
|
38
|
-
end
|
39
|
-
|
40
|
-
it 'construct command for running hadoop' do
|
41
|
-
path_pattern = '[\w/\-\.,]*'
|
42
|
-
@client.cmd.should match /hadoop jar #{path_pattern}hadoop-ruby.jar org.apache.hadoop.ruby.JRubyJobRunner -libjars #{path_pattern}.jar -files mapred.rb/
|
43
|
-
end
|
44
|
-
|
45
|
-
it 'can get mapred args' do
|
46
|
-
client = JRubyOnHadoop::Client.new(["examples/mapred.rb", "inputs", "outputs"])
|
47
|
-
client.mapred_args.should == "--script mapred.rb inputs outputs"
|
48
|
-
end
|
49
|
-
|
50
|
-
it 'can parse args' do
|
51
|
-
client = JRubyOnHadoop::Client.new(["examples/mapred.rb", "in", "out"])
|
52
|
-
client.script.should == 'mapred.rb'
|
53
|
-
client.inputs.should == 'in'
|
54
|
-
client.outputs.should == 'out'
|
55
|
-
client.files.should include 'examples/mapred.rb'
|
56
|
-
end
|
57
|
-
end
|
data/spec/ruby_wrapper_spec.rb
CHANGED
@@ -5,7 +5,7 @@ describe 'wrapper' do
|
|
5
5
|
examples_dir = File.expand_path(File.join(File.dirname(__FILE__), '..', 'examples'))
|
6
6
|
$: << examples_dir
|
7
7
|
|
8
|
-
@script = '
|
8
|
+
@script = 'wordcount.rb'
|
9
9
|
@output, @repoter = mock('output'), mock('repoter')
|
10
10
|
|
11
11
|
@key, @value = Text.new, Text.new
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: jruby-on-hadoop
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Koichi Fujikawa
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2010-01-
|
12
|
+
date: 2010-01-15 00:00:00 +09:00
|
13
13
|
default_executable: joh
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -42,6 +42,7 @@ files:
|
|
42
42
|
- lib/jruby-on-hadoop.rb
|
43
43
|
- lib/jruby-on-hadoop/client.rb
|
44
44
|
- lib/ruby_wrapper.rb
|
45
|
+
- spec/jruby-on-hadoop/client_spec.rb
|
45
46
|
- spec/jruby-on-hadoop_spec.rb
|
46
47
|
- spec/ruby_wrapper_spec.rb
|
47
48
|
- test/java/org/apache/hadoop/ruby/JRubyJobRunnerTest.java
|
@@ -76,6 +77,7 @@ signing_key:
|
|
76
77
|
specification_version: 3
|
77
78
|
summary: JRuby on Hadoop
|
78
79
|
test_files:
|
80
|
+
- spec/jruby-on-hadoop/client_spec.rb
|
79
81
|
- spec/jruby-on-hadoop_spec.rb
|
80
82
|
- spec/ruby_wrapper_spec.rb
|
81
83
|
- examples/wordcount.rb
|