jruby-on-hadoop 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +47 -0
- data/VERSION +1 -1
- data/build.xml +21 -9
- data/examples/wordcount.rb +18 -0
- data/jruby-on-hadoop.gemspec +15 -6
- data/lib/hadoop-ruby.jar +0 -0
- data/lib/jruby-on-hadoop.rb +10 -1
- data/lib/jruby-on-hadoop/client.rb +39 -7
- data/lib/ruby_wrapper.rb +59 -0
- data/spec/jruby-on-hadoop_spec.rb +37 -6
- data/spec/ruby_wrapper_spec.rb +30 -0
- data/test/java/org/apache/hadoop/ruby/JRubyJobRunnerTest.java +18 -0
- data/test/java/org/apache/hadoop/ruby/mapred/JRubyMapperTest.java +31 -0
- data/test/java/org/apache/hadoop/ruby/mapred/JRubyReducerTest.java +36 -0
- metadata +13 -4
- data/README +0 -30
data/README.rdoc
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
= JRuby on Hadoop
|
2
|
+
|
3
|
+
JRuby on Hadoop is a thin wrapper for Hadoop Mapper / Reducer by JRuby.
|
4
|
+
|
5
|
+
== Install
|
6
|
+
|
7
|
+
Required gems are all on GemCutter.
|
8
|
+
|
9
|
+
1. Upgrade your rubygem to 1.3.5
|
10
|
+
2. Install gems
|
11
|
+
$ gem install jruby-on-hadoop
|
12
|
+
|
13
|
+
== Description
|
14
|
+
|
15
|
+
1. Run Hadoop cluster on your machines and set HADOOP_HOME env variable.
|
16
|
+
2. put files into your hdfs. ex) test/inputs/file1
|
17
|
+
3. Now you can run 'joh' like below:
|
18
|
+
$ joh examples/wordcount.rb test/inputs test/outputs
|
19
|
+
You can get Hadoop job results in your hdfs test/outputs/part-*
|
20
|
+
|
21
|
+
Script example. (see also examples/wordcount.rb)
|
22
|
+
|
23
|
+
def setup(conf)
|
24
|
+
# setup jobconf
|
25
|
+
end
|
26
|
+
|
27
|
+
def map(script, key, value, output, reporter)
|
28
|
+
# mapper process
|
29
|
+
end
|
30
|
+
|
31
|
+
def reduce(script, key, values, output, reporter)
|
32
|
+
# reducer process
|
33
|
+
end
|
34
|
+
|
35
|
+
== Build
|
36
|
+
|
37
|
+
You can build hadoop-ruby.jar by "ant".
|
38
|
+
ant
|
39
|
+
|
40
|
+
Required to set env HADOOP_HOME for your system.
|
41
|
+
Assumed Hadoop version is 0.19.2.
|
42
|
+
|
43
|
+
== Author
|
44
|
+
Koichi Fujikawa <fujibee@gmail.com>
|
45
|
+
|
46
|
+
== Copyright
|
47
|
+
License: Apache License
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.3
|
data/build.xml
CHANGED
@@ -9,10 +9,10 @@
|
|
9
9
|
====================================================================== -->
|
10
10
|
<project name="build JRuby map reduce" default="jar">
|
11
11
|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
<property name="version" value="0.0.
|
12
|
+
<property environment="env" />
|
13
|
+
<property name="hadoop.home" value="${env.HADOOP_HOME}" />
|
14
|
+
|
15
|
+
<property name="version" value="0.0.2" />
|
16
16
|
<property name="src.dir" value="${basedir}/src/java" />
|
17
17
|
<property name="build.dir" value="${basedir}/build" />
|
18
18
|
<property name="dist.dir" value="${basedir}/dist" />
|
@@ -20,7 +20,7 @@
|
|
20
20
|
<property name="hadoop.version" value="0.19.2" />
|
21
21
|
<property name="hadoop.jar" value="${hadoop.home}/hadoop-${hadoop.version}-core.jar" />
|
22
22
|
|
23
|
-
<target name="jar" depends="compile">
|
23
|
+
<target name="jar" depends="clean,compile">
|
24
24
|
<mkdir dir="${dist.dir}" />
|
25
25
|
<jar jarfile="${dist.dir}/hadoop-ruby.jar" basedir="${build.dir}">
|
26
26
|
<manifest>
|
@@ -33,10 +33,22 @@
|
|
33
33
|
</jar>
|
34
34
|
</target>
|
35
35
|
|
36
|
-
|
37
|
-
|
36
|
+
<target name="compile">
|
37
|
+
<echo message="${hadoop.jar}" />
|
38
38
|
<mkdir dir="${build.dir}" />
|
39
|
-
|
40
|
-
|
39
|
+
<javac srcdir="${src.dir}" destdir="${build.dir}">
|
40
|
+
<classpath>
|
41
|
+
<fileset dir="${hadoop.home}">
|
42
|
+
<include name="*.jar" />
|
43
|
+
<include name="lib/*.jar" />
|
44
|
+
</fileset>
|
45
|
+
</classpath>
|
46
|
+
</javac>
|
47
|
+
</target>
|
48
|
+
|
49
|
+
<target name="clean">
|
50
|
+
<delete dir="${build.dir}" />
|
51
|
+
<delete dir="${dist.dir}" />
|
52
|
+
</target>
|
41
53
|
|
42
54
|
</project>
|
@@ -0,0 +1,18 @@
|
|
1
|
+
# wordcount example
|
2
|
+
|
3
|
+
# not necessary
|
4
|
+
def setup(conf)
|
5
|
+
['test/inputs', 'test/outputs']
|
6
|
+
end
|
7
|
+
|
8
|
+
def map(key, value, output, reporter)
|
9
|
+
value.split.each do |word|
|
10
|
+
output.collect(word, 1)
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def reduce(key, values, output, reporter)
|
15
|
+
sum = 0
|
16
|
+
values.each {|v| sum += v }
|
17
|
+
output.collect(key, sum)
|
18
|
+
end
|
data/jruby-on-hadoop.gemspec
CHANGED
@@ -5,28 +5,35 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{jruby-on-hadoop}
|
8
|
-
s.version = "0.0.
|
8
|
+
s.version = "0.0.3"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Koichi Fujikawa"]
|
12
|
-
s.date = %q{
|
12
|
+
s.date = %q{2010-01-03}
|
13
13
|
s.default_executable = %q{joh}
|
14
14
|
s.description = %q{JRuby on Hadoop}
|
15
15
|
s.email = %q{fujibee@gmail.com}
|
16
16
|
s.executables = ["joh"]
|
17
17
|
s.extra_rdoc_files = [
|
18
|
-
"README"
|
18
|
+
"README.rdoc"
|
19
19
|
]
|
20
20
|
s.files = [
|
21
|
-
"README",
|
21
|
+
"README.rdoc",
|
22
22
|
"Rakefile",
|
23
23
|
"VERSION",
|
24
|
+
"bin/joh",
|
24
25
|
"build.xml",
|
26
|
+
"examples/wordcount.rb",
|
25
27
|
"jruby-on-hadoop.gemspec",
|
26
28
|
"lib/hadoop-ruby.jar",
|
27
29
|
"lib/jruby-on-hadoop.rb",
|
28
30
|
"lib/jruby-on-hadoop/client.rb",
|
29
|
-
"
|
31
|
+
"lib/ruby_wrapper.rb",
|
32
|
+
"spec/jruby-on-hadoop_spec.rb",
|
33
|
+
"spec/ruby_wrapper_spec.rb",
|
34
|
+
"test/java/org/apache/hadoop/ruby/JRubyJobRunnerTest.java",
|
35
|
+
"test/java/org/apache/hadoop/ruby/mapred/JRubyMapperTest.java",
|
36
|
+
"test/java/org/apache/hadoop/ruby/mapred/JRubyReducerTest.java"
|
30
37
|
]
|
31
38
|
s.homepage = %q{http://github.com/fujibee/jruby-on-hadoop}
|
32
39
|
s.rdoc_options = ["--charset=UTF-8"]
|
@@ -34,7 +41,9 @@ Gem::Specification.new do |s|
|
|
34
41
|
s.rubygems_version = %q{1.3.5}
|
35
42
|
s.summary = %q{JRuby on Hadoop}
|
36
43
|
s.test_files = [
|
37
|
-
"spec/jruby-on-hadoop_spec.rb"
|
44
|
+
"spec/jruby-on-hadoop_spec.rb",
|
45
|
+
"spec/ruby_wrapper_spec.rb",
|
46
|
+
"examples/wordcount.rb"
|
38
47
|
]
|
39
48
|
|
40
49
|
if s.respond_to? :specification_version then
|
data/lib/hadoop-ruby.jar
CHANGED
Binary file
|
data/lib/jruby-on-hadoop.rb
CHANGED
@@ -2,7 +2,16 @@ require 'jruby-jars'
|
|
2
2
|
require 'jruby-on-hadoop/client'
|
3
3
|
|
4
4
|
module JRubyOnHadoop
|
5
|
+
|
6
|
+
def self.lib_path
|
7
|
+
File.expand_path(File.dirname(__FILE__))
|
8
|
+
end
|
9
|
+
|
5
10
|
def self.jar_path
|
6
|
-
File.join(
|
11
|
+
File.join(lib_path, "hadoop-ruby.jar")
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.wrapper_ruby_file
|
15
|
+
File.join(lib_path, "ruby_wrapper.rb")
|
7
16
|
end
|
8
17
|
end
|
@@ -2,9 +2,17 @@ module JRubyOnHadoop
|
|
2
2
|
JAVA_MAIN_CLASS = 'org.apache.hadoop.ruby.JRubyJobRunner'
|
3
3
|
|
4
4
|
class Client
|
5
|
-
|
6
|
-
|
7
|
-
|
5
|
+
attr_reader :script, :inputs, :outputs, :files
|
6
|
+
|
7
|
+
def initialize(args=[])
|
8
|
+
@args = args
|
9
|
+
parse_args
|
10
|
+
|
11
|
+
# env check
|
12
|
+
hadoop_home = ENV['HADOOP_HOME']
|
13
|
+
raise 'HADOOP_HOME is not set' unless hadoop_home
|
14
|
+
@hadoop_cmd = "#{hadoop_home}/bin/hadoop"
|
15
|
+
ENV['HADOOP_CLASSPATH'] = "#{lib_path}:#{File.dirname(@script_path)}"
|
8
16
|
end
|
9
17
|
|
10
18
|
def run
|
@@ -12,16 +20,40 @@ module JRubyOnHadoop
|
|
12
20
|
end
|
13
21
|
|
14
22
|
def cmd
|
15
|
-
"
|
16
|
-
" -libjars #{
|
23
|
+
"#{@hadoop_cmd} jar #{main_jar_path} #{JAVA_MAIN_CLASS}" +
|
24
|
+
" -libjars #{opt_libjars} -files #{opt_files} #{mapred_args}"
|
25
|
+
end
|
26
|
+
|
27
|
+
def parse_args
|
28
|
+
@script_path = @args.size > 0 ? @args[0] : 'mapred.rb'
|
29
|
+
@script = File.basename(@script_path)
|
30
|
+
@inputs = @args[1] if @args.size == 3
|
31
|
+
@outputs = @args[2] if @args.size == 3
|
32
|
+
@files = [@script_path, JRubyOnHadoop.wrapper_ruby_file]
|
33
|
+
end
|
34
|
+
|
35
|
+
def mapred_args
|
36
|
+
args = "--script #{@script} "
|
37
|
+
args += "#{@inputs} " if @inputs
|
38
|
+
args += "#{@outputs}" if @outputs
|
39
|
+
args
|
40
|
+
end
|
41
|
+
|
42
|
+
def opt_libjars
|
43
|
+
# jruby jars
|
44
|
+
[JRubyJars.core_jar_path, JRubyJars.stdlib_jar_path].join(',')
|
45
|
+
end
|
46
|
+
|
47
|
+
def opt_files
|
48
|
+
@files.join(',')
|
17
49
|
end
|
18
50
|
|
19
51
|
def main_jar_path
|
20
52
|
JRubyOnHadoop.jar_path
|
21
53
|
end
|
22
54
|
|
23
|
-
def
|
24
|
-
|
55
|
+
def lib_path
|
56
|
+
JRubyOnHadoop.lib_path
|
25
57
|
end
|
26
58
|
end
|
27
59
|
end
|
data/lib/ruby_wrapper.rb
ADDED
@@ -0,0 +1,59 @@
|
|
1
|
+
require 'java'
|
2
|
+
|
3
|
+
import 'org.apache.hadoop.io.IntWritable'
|
4
|
+
import 'org.apache.hadoop.io.LongWritable'
|
5
|
+
import 'org.apache.hadoop.io.Text'
|
6
|
+
|
7
|
+
def wrap_setup(conf, script, dslfile)
|
8
|
+
require script
|
9
|
+
paths = dslfile ? setup(conf, dslfile) : setup(conf)
|
10
|
+
paths.to_java if paths
|
11
|
+
end
|
12
|
+
|
13
|
+
def wrap_map(key, value, output, reporter, script, dslfile)
|
14
|
+
require script
|
15
|
+
output_wrapper = OutputWrapper.new(output)
|
16
|
+
dslfile ?
|
17
|
+
map(to_ruby(key), to_ruby(value), output_wrapper, reporter, dslfile) :
|
18
|
+
map(to_ruby(key), to_ruby(value), output_wrapper, reporter)
|
19
|
+
end
|
20
|
+
|
21
|
+
def wrap_reduce(key, values, output, reporter, script, dslfile)
|
22
|
+
require script
|
23
|
+
output_wrapper = OutputWrapper.new(output)
|
24
|
+
dslfile ?
|
25
|
+
reduce(to_ruby(key), to_ruby(values), output_wrapper, reporter, dslfile) :
|
26
|
+
reduce(to_ruby(key), to_ruby(values), output_wrapper, reporter)
|
27
|
+
end
|
28
|
+
|
29
|
+
class OutputWrapper
|
30
|
+
def initialize(output)
|
31
|
+
@output = output
|
32
|
+
end
|
33
|
+
|
34
|
+
def collect(key, value)
|
35
|
+
@output.collect(to_java(key), to_java(value))
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def to_ruby(value)
|
40
|
+
case value
|
41
|
+
when IntWritable, LongWritable then value.get
|
42
|
+
when Text then value.to_string
|
43
|
+
else
|
44
|
+
# for Java array
|
45
|
+
if value.respond_to? :map
|
46
|
+
value.map {|v| to_ruby(v)}
|
47
|
+
else value # as is
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def to_java(value)
|
53
|
+
case value
|
54
|
+
when Integer then IntWritable.new(value)
|
55
|
+
when String then t = Text.new; t.set(value); t
|
56
|
+
when Array then value.to_java
|
57
|
+
else raise "no match class: #{value.class}"
|
58
|
+
end
|
59
|
+
end
|
@@ -6,21 +6,52 @@ describe JRubyOnHadoop do
|
|
6
6
|
jar_path = File.join(File.expand_path(jar_dir), 'hadoop-ruby.jar')
|
7
7
|
JRubyOnHadoop.jar_path.should == jar_path
|
8
8
|
end
|
9
|
+
|
10
|
+
it 'should return lib path' do
|
11
|
+
lib_dir = File.expand_path(File.join(File.dirname(__FILE__), '..', 'lib'))
|
12
|
+
JRubyOnHadoop.lib_path.should == lib_dir
|
13
|
+
end
|
14
|
+
|
15
|
+
it 'should return wrapper ruby file' do
|
16
|
+
dir = File.join(File.dirname(__FILE__), '..', 'lib')
|
17
|
+
path = File.join(File.expand_path(dir), 'ruby_wrapper.rb')
|
18
|
+
JRubyOnHadoop.wrapper_ruby_file.should == path
|
19
|
+
end
|
9
20
|
end
|
10
21
|
|
11
22
|
describe JRubyOnHadoop::Client do
|
23
|
+
before do
|
24
|
+
@client = JRubyOnHadoop::Client.new
|
25
|
+
end
|
26
|
+
|
12
27
|
it 'gather necessary jar paths' do
|
13
28
|
version_pattern = '[\d\.]*'
|
14
|
-
client
|
15
|
-
client.main_jar_path.should include 'hadoop-ruby.jar'
|
29
|
+
@client.main_jar_path.should include 'hadoop-ruby.jar'
|
16
30
|
|
17
|
-
client.
|
18
|
-
client.
|
31
|
+
@client.opt_libjars.should match /jruby\-core\-#{version_pattern}\.jar/
|
32
|
+
@client.opt_libjars.should match /jruby\-stdlib\-#{version_pattern}\.jar/
|
33
|
+
end
|
34
|
+
|
35
|
+
it 'gather necessary ruby files' do
|
36
|
+
@client.opt_files.split(",").should include "mapred.rb"
|
37
|
+
@client.opt_files.should match /ruby_wrapper\.rb/
|
19
38
|
end
|
20
39
|
|
21
40
|
it 'construct command for running hadoop' do
|
22
41
|
path_pattern = '[\w/\-\.,]*'
|
23
|
-
client
|
24
|
-
|
42
|
+
@client.cmd.should match /hadoop jar #{path_pattern}hadoop-ruby.jar org.apache.hadoop.ruby.JRubyJobRunner -libjars #{path_pattern}.jar -files mapred.rb/
|
43
|
+
end
|
44
|
+
|
45
|
+
it 'can get mapred args' do
|
46
|
+
client = JRubyOnHadoop::Client.new(["examples/mapred.rb", "inputs", "outputs"])
|
47
|
+
client.mapred_args.should == "--script mapred.rb inputs outputs"
|
48
|
+
end
|
49
|
+
|
50
|
+
it 'can parse args' do
|
51
|
+
client = JRubyOnHadoop::Client.new(["examples/mapred.rb", "in", "out"])
|
52
|
+
client.script.should == 'mapred.rb'
|
53
|
+
client.inputs.should == 'in'
|
54
|
+
client.outputs.should == 'out'
|
55
|
+
client.files.should include 'examples/mapred.rb'
|
25
56
|
end
|
26
57
|
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
require 'ruby_wrapper'
|
2
|
+
|
3
|
+
describe 'wrapper' do
|
4
|
+
before do
|
5
|
+
examples_dir = File.expand_path(File.join(File.dirname(__FILE__), '..', 'examples'))
|
6
|
+
$: << examples_dir
|
7
|
+
|
8
|
+
@script = 'mapred.rb'
|
9
|
+
@output, @repoter = mock('output'), mock('repoter')
|
10
|
+
|
11
|
+
@key, @value = Text.new, Text.new
|
12
|
+
@key.set('key')
|
13
|
+
@value.set('value')
|
14
|
+
end
|
15
|
+
|
16
|
+
it 'can wrap setup' do
|
17
|
+
wrap_setup(mock('conf'), @script, nil)
|
18
|
+
end
|
19
|
+
|
20
|
+
it 'can wrap mapper' do
|
21
|
+
@output.should_receive(:collect).once
|
22
|
+
wrap_map(@key, @value, @output, @reporter, @script, nil)
|
23
|
+
end
|
24
|
+
|
25
|
+
it 'can wrap reducer' do
|
26
|
+
@output.should_receive(:collect).once
|
27
|
+
values = [1, 2, 3].map {|v| IntWritable.new(v)}.to_java
|
28
|
+
wrap_reduce(@key, values, @output, @reporter, @script, nil)
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
package org.apache.hadoop.ruby;
|
2
|
+
|
3
|
+
import org.apache.hadoop.conf.Configuration;
|
4
|
+
import org.junit.Test;
|
5
|
+
|
6
|
+
public class JRubyJobRunnerTest {
|
7
|
+
|
8
|
+
@Test
|
9
|
+
public void testRun() throws Exception {
|
10
|
+
JRubyJobRunner runner = new JRubyJobRunner();
|
11
|
+
Configuration conf = new Configuration();
|
12
|
+
runner.setConf(conf);
|
13
|
+
String[] args = { "--script", "mapred.rb", "inputs", "outputs" };
|
14
|
+
try {
|
15
|
+
runner.run(args);
|
16
|
+
} catch (Throwable t) { /* ignore */ }
|
17
|
+
}
|
18
|
+
}
|
@@ -0,0 +1,31 @@
|
|
1
|
+
package org.apache.hadoop.ruby.mapred;
|
2
|
+
|
3
|
+
import java.io.IOException;
|
4
|
+
|
5
|
+
import org.apache.hadoop.io.LongWritable;
|
6
|
+
import org.apache.hadoop.io.Text;
|
7
|
+
import org.apache.hadoop.mapred.JobConf;
|
8
|
+
import org.junit.Test;
|
9
|
+
|
10
|
+
public class JRubyMapperTest {
|
11
|
+
|
12
|
+
@Test
|
13
|
+
public void testMap() throws IOException {
|
14
|
+
LongWritable key = new LongWritable(0L);
|
15
|
+
Text value = new Text();
|
16
|
+
value.set("value");
|
17
|
+
|
18
|
+
JRubyMapper mapper = new JRubyMapper();
|
19
|
+
JobConf conf = new JobConf();
|
20
|
+
conf.set("mapred.ruby.script", "mapred.rb");
|
21
|
+
mapper.configure(conf);
|
22
|
+
|
23
|
+
try {
|
24
|
+
mapper.map(key, value, null, null);
|
25
|
+
} catch (Throwable t) {
|
26
|
+
// ignore
|
27
|
+
// TODO mock check
|
28
|
+
}
|
29
|
+
}
|
30
|
+
|
31
|
+
}
|
@@ -0,0 +1,36 @@
|
|
1
|
+
package org.apache.hadoop.ruby.mapred;
|
2
|
+
|
3
|
+
import java.io.IOException;
|
4
|
+
import java.util.ArrayList;
|
5
|
+
import java.util.List;
|
6
|
+
|
7
|
+
import org.apache.hadoop.io.IntWritable;
|
8
|
+
import org.apache.hadoop.io.Text;
|
9
|
+
import org.apache.hadoop.mapred.JobConf;
|
10
|
+
import org.junit.Test;
|
11
|
+
|
12
|
+
public class JRubyReducerTest {
|
13
|
+
|
14
|
+
@Test
|
15
|
+
public void testReduce() throws IOException {
|
16
|
+
Text key = new Text();
|
17
|
+
key.set("key");
|
18
|
+
List<IntWritable> values = new ArrayList<IntWritable>();
|
19
|
+
values.add(new IntWritable(1));
|
20
|
+
values.add(new IntWritable(2));
|
21
|
+
values.add(new IntWritable(3));
|
22
|
+
|
23
|
+
JRubyReducer reducer = new JRubyReducer();
|
24
|
+
JobConf conf = new JobConf();
|
25
|
+
conf.set("mapred.ruby.script", "mapred.rb");
|
26
|
+
reducer.configure(conf);
|
27
|
+
|
28
|
+
try {
|
29
|
+
reducer.reduce(key, values.iterator(), null, null);
|
30
|
+
} catch (Throwable t) {
|
31
|
+
// ignore
|
32
|
+
// TODO mock check
|
33
|
+
}
|
34
|
+
}
|
35
|
+
|
36
|
+
}
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: jruby-on-hadoop
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Koichi Fujikawa
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date:
|
12
|
+
date: 2010-01-03 00:00:00 +09:00
|
13
13
|
default_executable: joh
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -29,17 +29,24 @@ executables:
|
|
29
29
|
extensions: []
|
30
30
|
|
31
31
|
extra_rdoc_files:
|
32
|
-
- README
|
32
|
+
- README.rdoc
|
33
33
|
files:
|
34
|
-
- README
|
34
|
+
- README.rdoc
|
35
35
|
- Rakefile
|
36
36
|
- VERSION
|
37
|
+
- bin/joh
|
37
38
|
- build.xml
|
39
|
+
- examples/wordcount.rb
|
38
40
|
- jruby-on-hadoop.gemspec
|
39
41
|
- lib/hadoop-ruby.jar
|
40
42
|
- lib/jruby-on-hadoop.rb
|
41
43
|
- lib/jruby-on-hadoop/client.rb
|
44
|
+
- lib/ruby_wrapper.rb
|
42
45
|
- spec/jruby-on-hadoop_spec.rb
|
46
|
+
- spec/ruby_wrapper_spec.rb
|
47
|
+
- test/java/org/apache/hadoop/ruby/JRubyJobRunnerTest.java
|
48
|
+
- test/java/org/apache/hadoop/ruby/mapred/JRubyMapperTest.java
|
49
|
+
- test/java/org/apache/hadoop/ruby/mapred/JRubyReducerTest.java
|
43
50
|
has_rdoc: true
|
44
51
|
homepage: http://github.com/fujibee/jruby-on-hadoop
|
45
52
|
licenses: []
|
@@ -70,3 +77,5 @@ specification_version: 3
|
|
70
77
|
summary: JRuby on Hadoop
|
71
78
|
test_files:
|
72
79
|
- spec/jruby-on-hadoop_spec.rb
|
80
|
+
- spec/ruby_wrapper_spec.rb
|
81
|
+
- examples/wordcount.rb
|
data/README
DELETED
@@ -1,30 +0,0 @@
|
|
1
|
-
= hadoop-ruby
|
2
|
-
|
3
|
-
== Description
|
4
|
-
HadoopのMapper/ReducerをRubyで記述することができます。
|
5
|
-
hadoop-rubydslのためのJRubyラッパーです。
|
6
|
-
|
7
|
-
例)
|
8
|
-
init.rb
|
9
|
-
|
10
|
-
def map(script, key, value, output, reporter)
|
11
|
-
# map処理
|
12
|
-
end
|
13
|
-
|
14
|
-
def reduce(script, key, values, output, reporter)
|
15
|
-
# reduce処理
|
16
|
-
end
|
17
|
-
|
18
|
-
== Build
|
19
|
-
|
20
|
-
ant
|
21
|
-
|
22
|
-
を実行します。
|
23
|
-
環境変数HADOOP_HOMEを設定する必要があります。
|
24
|
-
想定しているHadoopのバージョンは0.19.2です。
|
25
|
-
|
26
|
-
== Author
|
27
|
-
Koichi Fujikawa <fujibee@gmail.com>
|
28
|
-
|
29
|
-
== Copyright
|
30
|
-
License: Apache License
|