jruby-on-hadoop 0.0.3 → 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +16 -4
- data/VERSION +1 -1
- data/jruby-on-hadoop.gemspec +5 -3
- data/lib/hadoop-ruby.jar +0 -0
- data/lib/jruby-on-hadoop/client.rb +15 -4
- data/spec/jruby-on-hadoop/client_spec.rb +79 -0
- data/spec/jruby-on-hadoop_spec.rb +0 -37
- data/spec/ruby_wrapper_spec.rb +1 -1
- metadata +4 -2
data/README.rdoc
CHANGED
@@ -1,6 +1,9 @@
|
|
1
1
|
= JRuby on Hadoop
|
2
2
|
|
3
3
|
JRuby on Hadoop is a thin wrapper for Hadoop Mapper / Reducer by JRuby.
|
4
|
+
We recommend to use this with hadoop-rubydsl on the github / gemcutter.
|
5
|
+
|
6
|
+
== Description
|
4
7
|
|
5
8
|
== Install
|
6
9
|
|
@@ -10,7 +13,7 @@ Required gems are all on GemCutter.
|
|
10
13
|
2. Install gems
|
11
14
|
$ gem install jruby-on-hadoop
|
12
15
|
|
13
|
-
==
|
16
|
+
== Usage
|
14
17
|
|
15
18
|
1. Run Hadoop cluster on your machines and set HADOOP_HOME env variable.
|
16
19
|
2. put files into your hdfs. ex) test/inputs/file1
|
@@ -18,18 +21,27 @@ Required gems are all on GemCutter.
|
|
18
21
|
$ joh examples/wordcount.rb test/inputs test/outputs
|
19
22
|
You can get Hadoop job results in your hdfs test/outputs/part-*
|
20
23
|
|
21
|
-
|
24
|
+
== Example
|
25
|
+
see also examples/wordcount.rb
|
22
26
|
|
23
27
|
def setup(conf)
|
24
28
|
# setup jobconf
|
25
29
|
end
|
26
30
|
|
27
|
-
def map(
|
31
|
+
def map(key, value, output, reporter)
|
28
32
|
# mapper process
|
33
|
+
# (wordcount example)
|
34
|
+
value.split.each do |word|
|
35
|
+
output.collect(word, 1)
|
36
|
+
end
|
29
37
|
end
|
30
38
|
|
31
|
-
def reduce(
|
39
|
+
def reduce(key, values, output, reporter)
|
32
40
|
# reducer process
|
41
|
+
# (wordcount example)
|
42
|
+
sum = 0
|
43
|
+
values.each {|v| sum += v }
|
44
|
+
output.collect(key, sum)
|
33
45
|
end
|
34
46
|
|
35
47
|
== Build
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.4
|
data/jruby-on-hadoop.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{jruby-on-hadoop}
|
8
|
-
s.version = "0.0.
|
8
|
+
s.version = "0.0.4"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Koichi Fujikawa"]
|
12
|
-
s.date = %q{2010-01-
|
12
|
+
s.date = %q{2010-01-15}
|
13
13
|
s.default_executable = %q{joh}
|
14
14
|
s.description = %q{JRuby on Hadoop}
|
15
15
|
s.email = %q{fujibee@gmail.com}
|
@@ -29,6 +29,7 @@ Gem::Specification.new do |s|
|
|
29
29
|
"lib/jruby-on-hadoop.rb",
|
30
30
|
"lib/jruby-on-hadoop/client.rb",
|
31
31
|
"lib/ruby_wrapper.rb",
|
32
|
+
"spec/jruby-on-hadoop/client_spec.rb",
|
32
33
|
"spec/jruby-on-hadoop_spec.rb",
|
33
34
|
"spec/ruby_wrapper_spec.rb",
|
34
35
|
"test/java/org/apache/hadoop/ruby/JRubyJobRunnerTest.java",
|
@@ -41,7 +42,8 @@ Gem::Specification.new do |s|
|
|
41
42
|
s.rubygems_version = %q{1.3.5}
|
42
43
|
s.summary = %q{JRuby on Hadoop}
|
43
44
|
s.test_files = [
|
44
|
-
"spec/jruby-on-
|
45
|
+
"spec/jruby-on-hadoop/client_spec.rb",
|
46
|
+
"spec/jruby-on-hadoop_spec.rb",
|
45
47
|
"spec/ruby_wrapper_spec.rb",
|
46
48
|
"examples/wordcount.rb"
|
47
49
|
]
|
data/lib/hadoop-ruby.jar
CHANGED
Binary file
|
@@ -9,18 +9,29 @@ module JRubyOnHadoop
|
|
9
9
|
parse_args
|
10
10
|
|
11
11
|
# env check
|
12
|
-
hadoop_home
|
13
|
-
raise 'HADOOP_HOME is not set' unless hadoop_home
|
14
|
-
@hadoop_cmd = "#{hadoop_home}/bin/hadoop"
|
12
|
+
hadoop_home and hadoop_cmd
|
15
13
|
ENV['HADOOP_CLASSPATH'] = "#{lib_path}:#{File.dirname(@script_path)}"
|
16
14
|
end
|
17
15
|
|
16
|
+
def hadoop_home
|
17
|
+
home = ENV['HADOOP_HOME']
|
18
|
+
raise 'HADOOP_HOME is not set' if home.nil? or home.empty?
|
19
|
+
home
|
20
|
+
end
|
21
|
+
|
22
|
+
def hadoop_cmd
|
23
|
+
hadoop = `which hadoop 2>/dev/null`
|
24
|
+
hadoop = "#{hadoop_home}/bin/hadoop" if hadoop.nil? or hadoop.empty?
|
25
|
+
raise 'cannot find hadoop command' unless hadoop
|
26
|
+
hadoop.chomp
|
27
|
+
end
|
28
|
+
|
18
29
|
def run
|
19
30
|
exec cmd
|
20
31
|
end
|
21
32
|
|
22
33
|
def cmd
|
23
|
-
"#{
|
34
|
+
"#{hadoop_cmd} jar #{main_jar_path} #{JAVA_MAIN_CLASS}" +
|
24
35
|
" -libjars #{opt_libjars} -files #{opt_files} #{mapred_args}"
|
25
36
|
end
|
26
37
|
|
@@ -0,0 +1,79 @@
|
|
1
|
+
require 'jruby-on-hadoop'
|
2
|
+
|
3
|
+
describe JRubyOnHadoop::Client do
|
4
|
+
before do
|
5
|
+
@client = JRubyOnHadoop::Client.new
|
6
|
+
end
|
7
|
+
|
8
|
+
it 'gather necessary jar paths' do
|
9
|
+
version_pattern = '[\d\.]*'
|
10
|
+
@client.main_jar_path.should include 'hadoop-ruby.jar'
|
11
|
+
|
12
|
+
@client.opt_libjars.should match /jruby\-core\-#{version_pattern}\.jar/
|
13
|
+
@client.opt_libjars.should match /jruby\-stdlib\-#{version_pattern}\.jar/
|
14
|
+
end
|
15
|
+
|
16
|
+
it 'gather necessary ruby files' do
|
17
|
+
@client.opt_files.split(",").should include "mapred.rb"
|
18
|
+
@client.opt_files.should match /ruby_wrapper\.rb/
|
19
|
+
end
|
20
|
+
|
21
|
+
it 'construct command for running hadoop' do
|
22
|
+
path_pattern = '[\w/\-\.,]*'
|
23
|
+
@client.cmd.should match /hadoop jar #{path_pattern}hadoop-ruby.jar org.apache.hadoop.ruby.JRubyJobRunner -libjars #{path_pattern}.jar -files mapred.rb/
|
24
|
+
end
|
25
|
+
|
26
|
+
it 'can get mapred args' do
|
27
|
+
client = JRubyOnHadoop::Client.new(["examples/mapred.rb", "inputs", "outputs"])
|
28
|
+
client.mapred_args.should == "--script mapred.rb inputs outputs"
|
29
|
+
end
|
30
|
+
|
31
|
+
it 'can parse args' do
|
32
|
+
client = JRubyOnHadoop::Client.new(["examples/mapred.rb", "in", "out"])
|
33
|
+
client.script.should == 'mapred.rb'
|
34
|
+
client.inputs.should == 'in'
|
35
|
+
client.outputs.should == 'out'
|
36
|
+
client.files.should include 'examples/mapred.rb'
|
37
|
+
end
|
38
|
+
|
39
|
+
it 'should raise error if HADOOP_HOME env is not set' do
|
40
|
+
saved = ENV['HADOOP_HOME']
|
41
|
+
ENV['HADOOP_HOME'] = ''
|
42
|
+
begin
|
43
|
+
lambda { JRubyOnHadoop::Client.new }.should raise_error
|
44
|
+
ensure
|
45
|
+
ENV['HADOOP_HOME'] = saved
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
it 'can determin bin/hadoop path' do
|
50
|
+
@client.hadoop_cmd.should match /hadoop$/
|
51
|
+
end
|
52
|
+
|
53
|
+
it 'can determin bin/hadoop path if even no in PATH env var' do
|
54
|
+
saved = ENV['PATH']
|
55
|
+
begin
|
56
|
+
ENV['PATH'] = ''
|
57
|
+
ENV['HADOOP_HOME'].should_not be_empty
|
58
|
+
client = JRubyOnHadoop::Client.new
|
59
|
+
client.hadoop_cmd.should match ENV['HADOOP_HOME']
|
60
|
+
client.hadoop_cmd.should match /hadoop$/
|
61
|
+
ensure
|
62
|
+
ENV['PATH'] = saved
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
it 'should raise error if cannot determin bin/hadoop path' do
|
67
|
+
saved_path = ENV['PATH']
|
68
|
+
saved_home = ENV['HADOOP_HOME']
|
69
|
+
begin
|
70
|
+
ENV['PATH'] = ''
|
71
|
+
lambda { JRubyOnHadoop::Client.new }.should_not raise_error
|
72
|
+
ENV['HADOOP_HOME'] = ''
|
73
|
+
lambda { JRubyOnHadoop::Client.new }.should raise_error
|
74
|
+
ensure
|
75
|
+
ENV['PATH'] = saved_path
|
76
|
+
ENV['HADOOP_HOME'] = saved_home
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
@@ -18,40 +18,3 @@ describe JRubyOnHadoop do
|
|
18
18
|
JRubyOnHadoop.wrapper_ruby_file.should == path
|
19
19
|
end
|
20
20
|
end
|
21
|
-
|
22
|
-
describe JRubyOnHadoop::Client do
|
23
|
-
before do
|
24
|
-
@client = JRubyOnHadoop::Client.new
|
25
|
-
end
|
26
|
-
|
27
|
-
it 'gather necessary jar paths' do
|
28
|
-
version_pattern = '[\d\.]*'
|
29
|
-
@client.main_jar_path.should include 'hadoop-ruby.jar'
|
30
|
-
|
31
|
-
@client.opt_libjars.should match /jruby\-core\-#{version_pattern}\.jar/
|
32
|
-
@client.opt_libjars.should match /jruby\-stdlib\-#{version_pattern}\.jar/
|
33
|
-
end
|
34
|
-
|
35
|
-
it 'gather necessary ruby files' do
|
36
|
-
@client.opt_files.split(",").should include "mapred.rb"
|
37
|
-
@client.opt_files.should match /ruby_wrapper\.rb/
|
38
|
-
end
|
39
|
-
|
40
|
-
it 'construct command for running hadoop' do
|
41
|
-
path_pattern = '[\w/\-\.,]*'
|
42
|
-
@client.cmd.should match /hadoop jar #{path_pattern}hadoop-ruby.jar org.apache.hadoop.ruby.JRubyJobRunner -libjars #{path_pattern}.jar -files mapred.rb/
|
43
|
-
end
|
44
|
-
|
45
|
-
it 'can get mapred args' do
|
46
|
-
client = JRubyOnHadoop::Client.new(["examples/mapred.rb", "inputs", "outputs"])
|
47
|
-
client.mapred_args.should == "--script mapred.rb inputs outputs"
|
48
|
-
end
|
49
|
-
|
50
|
-
it 'can parse args' do
|
51
|
-
client = JRubyOnHadoop::Client.new(["examples/mapred.rb", "in", "out"])
|
52
|
-
client.script.should == 'mapred.rb'
|
53
|
-
client.inputs.should == 'in'
|
54
|
-
client.outputs.should == 'out'
|
55
|
-
client.files.should include 'examples/mapred.rb'
|
56
|
-
end
|
57
|
-
end
|
data/spec/ruby_wrapper_spec.rb
CHANGED
@@ -5,7 +5,7 @@ describe 'wrapper' do
|
|
5
5
|
examples_dir = File.expand_path(File.join(File.dirname(__FILE__), '..', 'examples'))
|
6
6
|
$: << examples_dir
|
7
7
|
|
8
|
-
@script = '
|
8
|
+
@script = 'wordcount.rb'
|
9
9
|
@output, @repoter = mock('output'), mock('repoter')
|
10
10
|
|
11
11
|
@key, @value = Text.new, Text.new
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: jruby-on-hadoop
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Koichi Fujikawa
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2010-01-
|
12
|
+
date: 2010-01-15 00:00:00 +09:00
|
13
13
|
default_executable: joh
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -42,6 +42,7 @@ files:
|
|
42
42
|
- lib/jruby-on-hadoop.rb
|
43
43
|
- lib/jruby-on-hadoop/client.rb
|
44
44
|
- lib/ruby_wrapper.rb
|
45
|
+
- spec/jruby-on-hadoop/client_spec.rb
|
45
46
|
- spec/jruby-on-hadoop_spec.rb
|
46
47
|
- spec/ruby_wrapper_spec.rb
|
47
48
|
- test/java/org/apache/hadoop/ruby/JRubyJobRunnerTest.java
|
@@ -76,6 +77,7 @@ signing_key:
|
|
76
77
|
specification_version: 3
|
77
78
|
summary: JRuby on Hadoop
|
78
79
|
test_files:
|
80
|
+
- spec/jruby-on-hadoop/client_spec.rb
|
79
81
|
- spec/jruby-on-hadoop_spec.rb
|
80
82
|
- spec/ruby_wrapper_spec.rb
|
81
83
|
- examples/wordcount.rb
|