jruby-on-hadoop 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +47 -0
- data/VERSION +1 -1
- data/build.xml +21 -9
- data/examples/wordcount.rb +18 -0
- data/jruby-on-hadoop.gemspec +15 -6
- data/lib/hadoop-ruby.jar +0 -0
- data/lib/jruby-on-hadoop.rb +10 -1
- data/lib/jruby-on-hadoop/client.rb +39 -7
- data/lib/ruby_wrapper.rb +59 -0
- data/spec/jruby-on-hadoop_spec.rb +37 -6
- data/spec/ruby_wrapper_spec.rb +30 -0
- data/test/java/org/apache/hadoop/ruby/JRubyJobRunnerTest.java +18 -0
- data/test/java/org/apache/hadoop/ruby/mapred/JRubyMapperTest.java +31 -0
- data/test/java/org/apache/hadoop/ruby/mapred/JRubyReducerTest.java +36 -0
- metadata +13 -4
- data/README +0 -30
data/README.rdoc
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
= JRuby on Hadoop
|
2
|
+
|
3
|
+
JRuby on Hadoop is a thin wrapper for Hadoop Mapper / Reducer by JRuby.
|
4
|
+
|
5
|
+
== Install
|
6
|
+
|
7
|
+
Required gems are all on GemCutter.
|
8
|
+
|
9
|
+
1. Upgrade your rubygem to 1.3.5
|
10
|
+
2. Install gems
|
11
|
+
$ gem install jruby-on-hadoop
|
12
|
+
|
13
|
+
== Description
|
14
|
+
|
15
|
+
1. Run Hadoop cluster on your machines and set HADOOP_HOME env variable.
|
16
|
+
2. put files into your hdfs. ex) test/inputs/file1
|
17
|
+
3. Now you can run 'joh' like below:
|
18
|
+
$ joh examples/wordcount.rb test/inputs test/outputs
|
19
|
+
You can get Hadoop job results in your hdfs test/outputs/part-*
|
20
|
+
|
21
|
+
Script example. (see also examples/wordcount.rb)
|
22
|
+
|
23
|
+
def setup(conf)
|
24
|
+
# setup jobconf
|
25
|
+
end
|
26
|
+
|
27
|
+
def map(script, key, value, output, reporter)
|
28
|
+
# mapper process
|
29
|
+
end
|
30
|
+
|
31
|
+
def reduce(script, key, values, output, reporter)
|
32
|
+
# reducer process
|
33
|
+
end
|
34
|
+
|
35
|
+
== Build
|
36
|
+
|
37
|
+
You can build hadoop-ruby.jar by "ant".
|
38
|
+
ant
|
39
|
+
|
40
|
+
Required to set env HADOOP_HOME for your system.
|
41
|
+
Assumed Hadoop version is 0.19.2.
|
42
|
+
|
43
|
+
== Author
|
44
|
+
Koichi Fujikawa <fujibee@gmail.com>
|
45
|
+
|
46
|
+
== Copyright
|
47
|
+
License: Apache License
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.3
|
data/build.xml
CHANGED
@@ -9,10 +9,10 @@
|
|
9
9
|
====================================================================== -->
|
10
10
|
<project name="build JRuby map reduce" default="jar">
|
11
11
|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
<property name="version" value="0.0.
|
12
|
+
<property environment="env" />
|
13
|
+
<property name="hadoop.home" value="${env.HADOOP_HOME}" />
|
14
|
+
|
15
|
+
<property name="version" value="0.0.2" />
|
16
16
|
<property name="src.dir" value="${basedir}/src/java" />
|
17
17
|
<property name="build.dir" value="${basedir}/build" />
|
18
18
|
<property name="dist.dir" value="${basedir}/dist" />
|
@@ -20,7 +20,7 @@
|
|
20
20
|
<property name="hadoop.version" value="0.19.2" />
|
21
21
|
<property name="hadoop.jar" value="${hadoop.home}/hadoop-${hadoop.version}-core.jar" />
|
22
22
|
|
23
|
-
<target name="jar" depends="compile">
|
23
|
+
<target name="jar" depends="clean,compile">
|
24
24
|
<mkdir dir="${dist.dir}" />
|
25
25
|
<jar jarfile="${dist.dir}/hadoop-ruby.jar" basedir="${build.dir}">
|
26
26
|
<manifest>
|
@@ -33,10 +33,22 @@
|
|
33
33
|
</jar>
|
34
34
|
</target>
|
35
35
|
|
36
|
-
|
37
|
-
|
36
|
+
<target name="compile">
|
37
|
+
<echo message="${hadoop.jar}" />
|
38
38
|
<mkdir dir="${build.dir}" />
|
39
|
-
|
40
|
-
|
39
|
+
<javac srcdir="${src.dir}" destdir="${build.dir}">
|
40
|
+
<classpath>
|
41
|
+
<fileset dir="${hadoop.home}">
|
42
|
+
<include name="*.jar" />
|
43
|
+
<include name="lib/*.jar" />
|
44
|
+
</fileset>
|
45
|
+
</classpath>
|
46
|
+
</javac>
|
47
|
+
</target>
|
48
|
+
|
49
|
+
<target name="clean">
|
50
|
+
<delete dir="${build.dir}" />
|
51
|
+
<delete dir="${dist.dir}" />
|
52
|
+
</target>
|
41
53
|
|
42
54
|
</project>
|
@@ -0,0 +1,18 @@
|
|
1
|
+
# wordcount example
|
2
|
+
|
3
|
+
# not necessary
|
4
|
+
def setup(conf)
|
5
|
+
['test/inputs', 'test/outputs']
|
6
|
+
end
|
7
|
+
|
8
|
+
def map(key, value, output, reporter)
|
9
|
+
value.split.each do |word|
|
10
|
+
output.collect(word, 1)
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def reduce(key, values, output, reporter)
|
15
|
+
sum = 0
|
16
|
+
values.each {|v| sum += v }
|
17
|
+
output.collect(key, sum)
|
18
|
+
end
|
data/jruby-on-hadoop.gemspec
CHANGED
@@ -5,28 +5,35 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{jruby-on-hadoop}
|
8
|
-
s.version = "0.0.
|
8
|
+
s.version = "0.0.3"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Koichi Fujikawa"]
|
12
|
-
s.date = %q{
|
12
|
+
s.date = %q{2010-01-03}
|
13
13
|
s.default_executable = %q{joh}
|
14
14
|
s.description = %q{JRuby on Hadoop}
|
15
15
|
s.email = %q{fujibee@gmail.com}
|
16
16
|
s.executables = ["joh"]
|
17
17
|
s.extra_rdoc_files = [
|
18
|
-
"README"
|
18
|
+
"README.rdoc"
|
19
19
|
]
|
20
20
|
s.files = [
|
21
|
-
"README",
|
21
|
+
"README.rdoc",
|
22
22
|
"Rakefile",
|
23
23
|
"VERSION",
|
24
|
+
"bin/joh",
|
24
25
|
"build.xml",
|
26
|
+
"examples/wordcount.rb",
|
25
27
|
"jruby-on-hadoop.gemspec",
|
26
28
|
"lib/hadoop-ruby.jar",
|
27
29
|
"lib/jruby-on-hadoop.rb",
|
28
30
|
"lib/jruby-on-hadoop/client.rb",
|
29
|
-
"
|
31
|
+
"lib/ruby_wrapper.rb",
|
32
|
+
"spec/jruby-on-hadoop_spec.rb",
|
33
|
+
"spec/ruby_wrapper_spec.rb",
|
34
|
+
"test/java/org/apache/hadoop/ruby/JRubyJobRunnerTest.java",
|
35
|
+
"test/java/org/apache/hadoop/ruby/mapred/JRubyMapperTest.java",
|
36
|
+
"test/java/org/apache/hadoop/ruby/mapred/JRubyReducerTest.java"
|
30
37
|
]
|
31
38
|
s.homepage = %q{http://github.com/fujibee/jruby-on-hadoop}
|
32
39
|
s.rdoc_options = ["--charset=UTF-8"]
|
@@ -34,7 +41,9 @@ Gem::Specification.new do |s|
|
|
34
41
|
s.rubygems_version = %q{1.3.5}
|
35
42
|
s.summary = %q{JRuby on Hadoop}
|
36
43
|
s.test_files = [
|
37
|
-
"spec/jruby-on-hadoop_spec.rb"
|
44
|
+
"spec/jruby-on-hadoop_spec.rb",
|
45
|
+
"spec/ruby_wrapper_spec.rb",
|
46
|
+
"examples/wordcount.rb"
|
38
47
|
]
|
39
48
|
|
40
49
|
if s.respond_to? :specification_version then
|
data/lib/hadoop-ruby.jar
CHANGED
Binary file
|
data/lib/jruby-on-hadoop.rb
CHANGED
@@ -2,7 +2,16 @@ require 'jruby-jars'
|
|
2
2
|
require 'jruby-on-hadoop/client'
|
3
3
|
|
4
4
|
module JRubyOnHadoop
|
5
|
+
|
6
|
+
def self.lib_path
|
7
|
+
File.expand_path(File.dirname(__FILE__))
|
8
|
+
end
|
9
|
+
|
5
10
|
def self.jar_path
|
6
|
-
File.join(
|
11
|
+
File.join(lib_path, "hadoop-ruby.jar")
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.wrapper_ruby_file
|
15
|
+
File.join(lib_path, "ruby_wrapper.rb")
|
7
16
|
end
|
8
17
|
end
|
@@ -2,9 +2,17 @@ module JRubyOnHadoop
|
|
2
2
|
JAVA_MAIN_CLASS = 'org.apache.hadoop.ruby.JRubyJobRunner'
|
3
3
|
|
4
4
|
class Client
|
5
|
-
|
6
|
-
|
7
|
-
|
5
|
+
attr_reader :script, :inputs, :outputs, :files
|
6
|
+
|
7
|
+
def initialize(args=[])
|
8
|
+
@args = args
|
9
|
+
parse_args
|
10
|
+
|
11
|
+
# env check
|
12
|
+
hadoop_home = ENV['HADOOP_HOME']
|
13
|
+
raise 'HADOOP_HOME is not set' unless hadoop_home
|
14
|
+
@hadoop_cmd = "#{hadoop_home}/bin/hadoop"
|
15
|
+
ENV['HADOOP_CLASSPATH'] = "#{lib_path}:#{File.dirname(@script_path)}"
|
8
16
|
end
|
9
17
|
|
10
18
|
def run
|
@@ -12,16 +20,40 @@ module JRubyOnHadoop
|
|
12
20
|
end
|
13
21
|
|
14
22
|
def cmd
|
15
|
-
"
|
16
|
-
" -libjars #{
|
23
|
+
"#{@hadoop_cmd} jar #{main_jar_path} #{JAVA_MAIN_CLASS}" +
|
24
|
+
" -libjars #{opt_libjars} -files #{opt_files} #{mapred_args}"
|
25
|
+
end
|
26
|
+
|
27
|
+
def parse_args
|
28
|
+
@script_path = @args.size > 0 ? @args[0] : 'mapred.rb'
|
29
|
+
@script = File.basename(@script_path)
|
30
|
+
@inputs = @args[1] if @args.size == 3
|
31
|
+
@outputs = @args[2] if @args.size == 3
|
32
|
+
@files = [@script_path, JRubyOnHadoop.wrapper_ruby_file]
|
33
|
+
end
|
34
|
+
|
35
|
+
def mapred_args
|
36
|
+
args = "--script #{@script} "
|
37
|
+
args += "#{@inputs} " if @inputs
|
38
|
+
args += "#{@outputs}" if @outputs
|
39
|
+
args
|
40
|
+
end
|
41
|
+
|
42
|
+
def opt_libjars
|
43
|
+
# jruby jars
|
44
|
+
[JRubyJars.core_jar_path, JRubyJars.stdlib_jar_path].join(',')
|
45
|
+
end
|
46
|
+
|
47
|
+
def opt_files
|
48
|
+
@files.join(',')
|
17
49
|
end
|
18
50
|
|
19
51
|
def main_jar_path
|
20
52
|
JRubyOnHadoop.jar_path
|
21
53
|
end
|
22
54
|
|
23
|
-
def
|
24
|
-
|
55
|
+
def lib_path
|
56
|
+
JRubyOnHadoop.lib_path
|
25
57
|
end
|
26
58
|
end
|
27
59
|
end
|
data/lib/ruby_wrapper.rb
ADDED
@@ -0,0 +1,59 @@
|
|
1
|
+
require 'java'
|
2
|
+
|
3
|
+
import 'org.apache.hadoop.io.IntWritable'
|
4
|
+
import 'org.apache.hadoop.io.LongWritable'
|
5
|
+
import 'org.apache.hadoop.io.Text'
|
6
|
+
|
7
|
+
def wrap_setup(conf, script, dslfile)
|
8
|
+
require script
|
9
|
+
paths = dslfile ? setup(conf, dslfile) : setup(conf)
|
10
|
+
paths.to_java if paths
|
11
|
+
end
|
12
|
+
|
13
|
+
def wrap_map(key, value, output, reporter, script, dslfile)
|
14
|
+
require script
|
15
|
+
output_wrapper = OutputWrapper.new(output)
|
16
|
+
dslfile ?
|
17
|
+
map(to_ruby(key), to_ruby(value), output_wrapper, reporter, dslfile) :
|
18
|
+
map(to_ruby(key), to_ruby(value), output_wrapper, reporter)
|
19
|
+
end
|
20
|
+
|
21
|
+
def wrap_reduce(key, values, output, reporter, script, dslfile)
|
22
|
+
require script
|
23
|
+
output_wrapper = OutputWrapper.new(output)
|
24
|
+
dslfile ?
|
25
|
+
reduce(to_ruby(key), to_ruby(values), output_wrapper, reporter, dslfile) :
|
26
|
+
reduce(to_ruby(key), to_ruby(values), output_wrapper, reporter)
|
27
|
+
end
|
28
|
+
|
29
|
+
class OutputWrapper
|
30
|
+
def initialize(output)
|
31
|
+
@output = output
|
32
|
+
end
|
33
|
+
|
34
|
+
def collect(key, value)
|
35
|
+
@output.collect(to_java(key), to_java(value))
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def to_ruby(value)
|
40
|
+
case value
|
41
|
+
when IntWritable, LongWritable then value.get
|
42
|
+
when Text then value.to_string
|
43
|
+
else
|
44
|
+
# for Java array
|
45
|
+
if value.respond_to? :map
|
46
|
+
value.map {|v| to_ruby(v)}
|
47
|
+
else value # as is
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def to_java(value)
|
53
|
+
case value
|
54
|
+
when Integer then IntWritable.new(value)
|
55
|
+
when String then t = Text.new; t.set(value); t
|
56
|
+
when Array then value.to_java
|
57
|
+
else raise "no match class: #{value.class}"
|
58
|
+
end
|
59
|
+
end
|
@@ -6,21 +6,52 @@ describe JRubyOnHadoop do
|
|
6
6
|
jar_path = File.join(File.expand_path(jar_dir), 'hadoop-ruby.jar')
|
7
7
|
JRubyOnHadoop.jar_path.should == jar_path
|
8
8
|
end
|
9
|
+
|
10
|
+
it 'should return lib path' do
|
11
|
+
lib_dir = File.expand_path(File.join(File.dirname(__FILE__), '..', 'lib'))
|
12
|
+
JRubyOnHadoop.lib_path.should == lib_dir
|
13
|
+
end
|
14
|
+
|
15
|
+
it 'should return wrapper ruby file' do
|
16
|
+
dir = File.join(File.dirname(__FILE__), '..', 'lib')
|
17
|
+
path = File.join(File.expand_path(dir), 'ruby_wrapper.rb')
|
18
|
+
JRubyOnHadoop.wrapper_ruby_file.should == path
|
19
|
+
end
|
9
20
|
end
|
10
21
|
|
11
22
|
describe JRubyOnHadoop::Client do
|
23
|
+
before do
|
24
|
+
@client = JRubyOnHadoop::Client.new
|
25
|
+
end
|
26
|
+
|
12
27
|
it 'gather necessary jar paths' do
|
13
28
|
version_pattern = '[\d\.]*'
|
14
|
-
client
|
15
|
-
client.main_jar_path.should include 'hadoop-ruby.jar'
|
29
|
+
@client.main_jar_path.should include 'hadoop-ruby.jar'
|
16
30
|
|
17
|
-
client.
|
18
|
-
client.
|
31
|
+
@client.opt_libjars.should match /jruby\-core\-#{version_pattern}\.jar/
|
32
|
+
@client.opt_libjars.should match /jruby\-stdlib\-#{version_pattern}\.jar/
|
33
|
+
end
|
34
|
+
|
35
|
+
it 'gather necessary ruby files' do
|
36
|
+
@client.opt_files.split(",").should include "mapred.rb"
|
37
|
+
@client.opt_files.should match /ruby_wrapper\.rb/
|
19
38
|
end
|
20
39
|
|
21
40
|
it 'construct command for running hadoop' do
|
22
41
|
path_pattern = '[\w/\-\.,]*'
|
23
|
-
client
|
24
|
-
|
42
|
+
@client.cmd.should match /hadoop jar #{path_pattern}hadoop-ruby.jar org.apache.hadoop.ruby.JRubyJobRunner -libjars #{path_pattern}.jar -files mapred.rb/
|
43
|
+
end
|
44
|
+
|
45
|
+
it 'can get mapred args' do
|
46
|
+
client = JRubyOnHadoop::Client.new(["examples/mapred.rb", "inputs", "outputs"])
|
47
|
+
client.mapred_args.should == "--script mapred.rb inputs outputs"
|
48
|
+
end
|
49
|
+
|
50
|
+
it 'can parse args' do
|
51
|
+
client = JRubyOnHadoop::Client.new(["examples/mapred.rb", "in", "out"])
|
52
|
+
client.script.should == 'mapred.rb'
|
53
|
+
client.inputs.should == 'in'
|
54
|
+
client.outputs.should == 'out'
|
55
|
+
client.files.should include 'examples/mapred.rb'
|
25
56
|
end
|
26
57
|
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
require 'ruby_wrapper'
|
2
|
+
|
3
|
+
describe 'wrapper' do
|
4
|
+
before do
|
5
|
+
examples_dir = File.expand_path(File.join(File.dirname(__FILE__), '..', 'examples'))
|
6
|
+
$: << examples_dir
|
7
|
+
|
8
|
+
@script = 'mapred.rb'
|
9
|
+
@output, @repoter = mock('output'), mock('repoter')
|
10
|
+
|
11
|
+
@key, @value = Text.new, Text.new
|
12
|
+
@key.set('key')
|
13
|
+
@value.set('value')
|
14
|
+
end
|
15
|
+
|
16
|
+
it 'can wrap setup' do
|
17
|
+
wrap_setup(mock('conf'), @script, nil)
|
18
|
+
end
|
19
|
+
|
20
|
+
it 'can wrap mapper' do
|
21
|
+
@output.should_receive(:collect).once
|
22
|
+
wrap_map(@key, @value, @output, @reporter, @script, nil)
|
23
|
+
end
|
24
|
+
|
25
|
+
it 'can wrap reducer' do
|
26
|
+
@output.should_receive(:collect).once
|
27
|
+
values = [1, 2, 3].map {|v| IntWritable.new(v)}.to_java
|
28
|
+
wrap_reduce(@key, values, @output, @reporter, @script, nil)
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
package org.apache.hadoop.ruby;
|
2
|
+
|
3
|
+
import org.apache.hadoop.conf.Configuration;
|
4
|
+
import org.junit.Test;
|
5
|
+
|
6
|
+
public class JRubyJobRunnerTest {
|
7
|
+
|
8
|
+
@Test
|
9
|
+
public void testRun() throws Exception {
|
10
|
+
JRubyJobRunner runner = new JRubyJobRunner();
|
11
|
+
Configuration conf = new Configuration();
|
12
|
+
runner.setConf(conf);
|
13
|
+
String[] args = { "--script", "mapred.rb", "inputs", "outputs" };
|
14
|
+
try {
|
15
|
+
runner.run(args);
|
16
|
+
} catch (Throwable t) { /* ignore */ }
|
17
|
+
}
|
18
|
+
}
|
@@ -0,0 +1,31 @@
|
|
1
|
+
package org.apache.hadoop.ruby.mapred;
|
2
|
+
|
3
|
+
import java.io.IOException;
|
4
|
+
|
5
|
+
import org.apache.hadoop.io.LongWritable;
|
6
|
+
import org.apache.hadoop.io.Text;
|
7
|
+
import org.apache.hadoop.mapred.JobConf;
|
8
|
+
import org.junit.Test;
|
9
|
+
|
10
|
+
public class JRubyMapperTest {
|
11
|
+
|
12
|
+
@Test
|
13
|
+
public void testMap() throws IOException {
|
14
|
+
LongWritable key = new LongWritable(0L);
|
15
|
+
Text value = new Text();
|
16
|
+
value.set("value");
|
17
|
+
|
18
|
+
JRubyMapper mapper = new JRubyMapper();
|
19
|
+
JobConf conf = new JobConf();
|
20
|
+
conf.set("mapred.ruby.script", "mapred.rb");
|
21
|
+
mapper.configure(conf);
|
22
|
+
|
23
|
+
try {
|
24
|
+
mapper.map(key, value, null, null);
|
25
|
+
} catch (Throwable t) {
|
26
|
+
// ignore
|
27
|
+
// TODO mock check
|
28
|
+
}
|
29
|
+
}
|
30
|
+
|
31
|
+
}
|
@@ -0,0 +1,36 @@
|
|
1
|
+
package org.apache.hadoop.ruby.mapred;
|
2
|
+
|
3
|
+
import java.io.IOException;
|
4
|
+
import java.util.ArrayList;
|
5
|
+
import java.util.List;
|
6
|
+
|
7
|
+
import org.apache.hadoop.io.IntWritable;
|
8
|
+
import org.apache.hadoop.io.Text;
|
9
|
+
import org.apache.hadoop.mapred.JobConf;
|
10
|
+
import org.junit.Test;
|
11
|
+
|
12
|
+
public class JRubyReducerTest {
|
13
|
+
|
14
|
+
@Test
|
15
|
+
public void testReduce() throws IOException {
|
16
|
+
Text key = new Text();
|
17
|
+
key.set("key");
|
18
|
+
List<IntWritable> values = new ArrayList<IntWritable>();
|
19
|
+
values.add(new IntWritable(1));
|
20
|
+
values.add(new IntWritable(2));
|
21
|
+
values.add(new IntWritable(3));
|
22
|
+
|
23
|
+
JRubyReducer reducer = new JRubyReducer();
|
24
|
+
JobConf conf = new JobConf();
|
25
|
+
conf.set("mapred.ruby.script", "mapred.rb");
|
26
|
+
reducer.configure(conf);
|
27
|
+
|
28
|
+
try {
|
29
|
+
reducer.reduce(key, values.iterator(), null, null);
|
30
|
+
} catch (Throwable t) {
|
31
|
+
// ignore
|
32
|
+
// TODO mock check
|
33
|
+
}
|
34
|
+
}
|
35
|
+
|
36
|
+
}
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: jruby-on-hadoop
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Koichi Fujikawa
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date:
|
12
|
+
date: 2010-01-03 00:00:00 +09:00
|
13
13
|
default_executable: joh
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -29,17 +29,24 @@ executables:
|
|
29
29
|
extensions: []
|
30
30
|
|
31
31
|
extra_rdoc_files:
|
32
|
-
- README
|
32
|
+
- README.rdoc
|
33
33
|
files:
|
34
|
-
- README
|
34
|
+
- README.rdoc
|
35
35
|
- Rakefile
|
36
36
|
- VERSION
|
37
|
+
- bin/joh
|
37
38
|
- build.xml
|
39
|
+
- examples/wordcount.rb
|
38
40
|
- jruby-on-hadoop.gemspec
|
39
41
|
- lib/hadoop-ruby.jar
|
40
42
|
- lib/jruby-on-hadoop.rb
|
41
43
|
- lib/jruby-on-hadoop/client.rb
|
44
|
+
- lib/ruby_wrapper.rb
|
42
45
|
- spec/jruby-on-hadoop_spec.rb
|
46
|
+
- spec/ruby_wrapper_spec.rb
|
47
|
+
- test/java/org/apache/hadoop/ruby/JRubyJobRunnerTest.java
|
48
|
+
- test/java/org/apache/hadoop/ruby/mapred/JRubyMapperTest.java
|
49
|
+
- test/java/org/apache/hadoop/ruby/mapred/JRubyReducerTest.java
|
43
50
|
has_rdoc: true
|
44
51
|
homepage: http://github.com/fujibee/jruby-on-hadoop
|
45
52
|
licenses: []
|
@@ -70,3 +77,5 @@ specification_version: 3
|
|
70
77
|
summary: JRuby on Hadoop
|
71
78
|
test_files:
|
72
79
|
- spec/jruby-on-hadoop_spec.rb
|
80
|
+
- spec/ruby_wrapper_spec.rb
|
81
|
+
- examples/wordcount.rb
|
data/README
DELETED
@@ -1,30 +0,0 @@
|
|
1
|
-
= hadoop-ruby
|
2
|
-
|
3
|
-
== Description
|
4
|
-
HadoopのMapper/ReducerをRubyで記述することができます。
|
5
|
-
hadoop-rubydslのためのJRubyラッパーです。
|
6
|
-
|
7
|
-
例)
|
8
|
-
init.rb
|
9
|
-
|
10
|
-
def map(script, key, value, output, reporter)
|
11
|
-
# map処理
|
12
|
-
end
|
13
|
-
|
14
|
-
def reduce(script, key, values, output, reporter)
|
15
|
-
# reduce処理
|
16
|
-
end
|
17
|
-
|
18
|
-
== Build
|
19
|
-
|
20
|
-
ant
|
21
|
-
|
22
|
-
を実行します。
|
23
|
-
環境変数HADOOP_HOMEを設定する必要があります。
|
24
|
-
想定しているHadoopのバージョンは0.19.2です。
|
25
|
-
|
26
|
-
== Author
|
27
|
-
Koichi Fujikawa <fujibee@gmail.com>
|
28
|
-
|
29
|
-
== Copyright
|
30
|
-
License: Apache License
|