mapleroad 0.0.1.pre
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +11 -0
- data/Gemfile +9 -0
- data/Gemfile.lock +18 -0
- data/Jarfile +2 -0
- data/Jarfile.lock +136 -0
- data/LICENSE.txt +22 -0
- data/README.md +65 -0
- data/Rakefile +17 -0
- data/bin/mapleroad +33 -0
- data/ext/mapleroad/MapleRoadCollector.java +32 -0
- data/ext/mapleroad/MapleRoadCollectorTextInt.java +37 -0
- data/ext/mapleroad/MapleRoadJob.java +64 -0
- data/ext/mapleroad/MapleRoadMapper.java +43 -0
- data/ext/mapleroad/MapleRoadMapperTextInt.java +44 -0
- data/ext/mapleroad/MapleRoadReducer.java +43 -0
- data/ext/mapleroad/MapleRoadReducerTextInt.java +44 -0
- data/lib/mapleroad/version.rb +3 -0
- data/lib/mapleroad_service.jar +0 -0
- data/mapleroad.gemspec +24 -0
- data/samples/anagram/mapper.rb +8 -0
- data/samples/anagram/reducer.rb +11 -0
- data/samples/wordcount/mapper.rb +8 -0
- data/samples/wordcount/reducer.rb +9 -0
- metadata +98 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 798e4df11da3783b2f5d91a163a58424da52c0f1
|
4
|
+
data.tar.gz: b467639d0217b7bd312ccf5effb6da7df8903e1d
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 88ff6c7173883e7bda6a1991f2f822702742a68f4c7e89223cfad35b6b8d630303b007835a3340b7134c84d503970b6f2414883c9d6b2875ef831c44f2b892d3
|
7
|
+
data.tar.gz: 96987a08a197239c1d3aab27f41ac1932357f91877f7b19ab1ccb09cb5828645e9a1dc489f06aaa7b772657da437b3db9bd7e46b40c49c326e192d31ed2fbc9a
|
data/.gitignore
ADDED
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
GEM
|
2
|
+
remote: https://rubygems.org/
|
3
|
+
specs:
|
4
|
+
lock_jar (0.7.5)
|
5
|
+
naether (~> 0.10.1)
|
6
|
+
thor (~> 0.18.1)
|
7
|
+
naether (0.10.1-java)
|
8
|
+
rake (10.3.2)
|
9
|
+
rake-compiler (0.9.2)
|
10
|
+
rake
|
11
|
+
thor (0.18.1)
|
12
|
+
|
13
|
+
PLATFORMS
|
14
|
+
java
|
15
|
+
|
16
|
+
DEPENDENCIES
|
17
|
+
lock_jar (~> 0.7.5)
|
18
|
+
rake-compiler (~> 0.9.1)
|
data/Jarfile
ADDED
data/Jarfile.lock
ADDED
@@ -0,0 +1,136 @@
|
|
1
|
+
---
|
2
|
+
version: 0.7.5
|
3
|
+
groups:
|
4
|
+
default:
|
5
|
+
dependencies:
|
6
|
+
- aopalliance:aopalliance:jar:1.0
|
7
|
+
- asm:asm:jar:3.1
|
8
|
+
- com.google.code.findbugs:jsr305:jar:1.3.9
|
9
|
+
- com.google.guava:guava:jar:11.0.2
|
10
|
+
- com.google.inject.extensions:guice-servlet:jar:3.0
|
11
|
+
- com.google.inject:guice:jar:3.0
|
12
|
+
- com.google.protobuf:protobuf-java:jar:2.5.0
|
13
|
+
- com.jamesmurty.utils:java-xmlbuilder:jar:0.4
|
14
|
+
- com.jcraft:jsch:jar:0.1.42
|
15
|
+
- com.sun.jersey.contribs:jersey-guice:jar:1.9
|
16
|
+
- com.sun.jersey:jersey-core:jar:1.9
|
17
|
+
- com.sun.jersey:jersey-json:jar:1.9
|
18
|
+
- com.sun.jersey:jersey-server:jar:1.9
|
19
|
+
- com.sun.xml.bind:jaxb-impl:jar:2.2.3-1
|
20
|
+
- com.thoughtworks.paranamer:paranamer:jar:2.3
|
21
|
+
- commons-beanutils:commons-beanutils-core:jar:1.8.0
|
22
|
+
- commons-beanutils:commons-beanutils:jar:1.7.0
|
23
|
+
- commons-cli:commons-cli:jar:1.2
|
24
|
+
- commons-codec:commons-codec:jar:1.4
|
25
|
+
- commons-collections:commons-collections:jar:3.2.1
|
26
|
+
- commons-configuration:commons-configuration:jar:1.6
|
27
|
+
- commons-digester:commons-digester:jar:1.8
|
28
|
+
- commons-el:commons-el:jar:1.0
|
29
|
+
- commons-httpclient:commons-httpclient:jar:3.1
|
30
|
+
- commons-io:commons-io:jar:2.4
|
31
|
+
- commons-lang:commons-lang:jar:2.6
|
32
|
+
- commons-logging:commons-logging:jar:1.1.3
|
33
|
+
- commons-net:commons-net:jar:3.1
|
34
|
+
- io.netty:netty:jar:3.6.2.Final
|
35
|
+
- javax.activation:activation:jar:1.1
|
36
|
+
- javax.inject:javax.inject:jar:1
|
37
|
+
- javax.servlet.jsp:jsp-api:jar:2.1
|
38
|
+
- javax.servlet:servlet-api:jar:2.5
|
39
|
+
- javax.xml.bind:jaxb-api:jar:2.2.2
|
40
|
+
- javax.xml.stream:stax-api:jar:1.0-2
|
41
|
+
- jdk.tools:jdk.tools:jar:1.7
|
42
|
+
- log4j:log4j:jar:1.2.17
|
43
|
+
- net.java.dev.jets3t:jets3t:jar:0.9.0
|
44
|
+
- org.apache.avro:avro:jar:1.7.4
|
45
|
+
- org.apache.commons:commons-compress:jar:1.4.1
|
46
|
+
- org.apache.commons:commons-math3:jar:3.1.1
|
47
|
+
- org.apache.hadoop:hadoop-annotations:jar:2.4.0
|
48
|
+
- org.apache.hadoop:hadoop-auth:jar:2.4.0
|
49
|
+
- org.apache.hadoop:hadoop-common:jar:2.4.0
|
50
|
+
- org.apache.hadoop:hadoop-mapreduce-client-core:jar:2.4.0
|
51
|
+
- org.apache.hadoop:hadoop-yarn-api:jar:2.4.0
|
52
|
+
- org.apache.hadoop:hadoop-yarn-common:jar:2.4.0
|
53
|
+
- org.apache.httpcomponents:httpclient:jar:4.1.2
|
54
|
+
- org.apache.httpcomponents:httpcore:jar:4.1.2
|
55
|
+
- org.apache.zookeeper:zookeeper:jar:3.4.5
|
56
|
+
- org.codehaus.jackson:jackson-core-asl:jar:1.8.8
|
57
|
+
- org.codehaus.jackson:jackson-jaxrs:jar:1.8.3
|
58
|
+
- org.codehaus.jackson:jackson-mapper-asl:jar:1.8.8
|
59
|
+
- org.codehaus.jackson:jackson-xc:jar:1.8.3
|
60
|
+
- org.codehaus.jettison:jettison:jar:1.1
|
61
|
+
- org.mortbay.jetty:jetty-util:jar:6.1.26
|
62
|
+
- org.mortbay.jetty:jetty:jar:6.1.26
|
63
|
+
- org.slf4j:slf4j-api:jar:1.7.5
|
64
|
+
- org.slf4j:slf4j-log4j12:jar:1.7.5
|
65
|
+
- org.tukaani:xz:jar:1.0
|
66
|
+
- org.xerial.snappy:snappy-java:jar:1.0.4.1
|
67
|
+
- tomcat:jasper-compiler:jar:5.5.23
|
68
|
+
- tomcat:jasper-runtime:jar:5.5.23
|
69
|
+
- xmlenc:xmlenc:jar:0.52
|
70
|
+
artifacts:
|
71
|
+
- jar:org.apache.hadoop:hadoop-mapreduce-client-core:jar:2.4.0:
|
72
|
+
transitive:
|
73
|
+
com.google.inject.extensions:guice-servlet:jar:3.0: {}
|
74
|
+
org.apache.hadoop:hadoop-yarn-common:jar:2.4.0:
|
75
|
+
javax.xml.bind:jaxb-api:jar:2.2.2:
|
76
|
+
javax.xml.stream:stax-api:jar:1.0-2: {}
|
77
|
+
javax.activation:activation:jar:1.1: {}
|
78
|
+
com.google.inject:guice:jar:3.0:
|
79
|
+
aopalliance:aopalliance:jar:1.0: {}
|
80
|
+
javax.inject:javax.inject:jar:1: {}
|
81
|
+
com.sun.jersey.contribs:jersey-guice:jar:1.9: {}
|
82
|
+
org.apache.hadoop:hadoop-yarn-api:jar:2.4.0: {}
|
83
|
+
io.netty:netty:jar:3.6.2.Final: {}
|
84
|
+
- jar:org.apache.hadoop:hadoop-common:jar:2.4.0:
|
85
|
+
transitive:
|
86
|
+
org.apache.hadoop:hadoop-auth:jar:2.4.0: {}
|
87
|
+
com.jcraft:jsch:jar:0.1.42: {}
|
88
|
+
com.sun.jersey:jersey-server:jar:1.9:
|
89
|
+
asm:asm:jar:3.1: {}
|
90
|
+
org.apache.commons:commons-math3:jar:3.1.1: {}
|
91
|
+
org.apache.zookeeper:zookeeper:jar:3.4.5: {}
|
92
|
+
com.sun.jersey:jersey-core:jar:1.9: {}
|
93
|
+
org.slf4j:slf4j-log4j12:jar:1.7.5: {}
|
94
|
+
com.google.guava:guava:jar:11.0.2: {}
|
95
|
+
org.apache.avro:avro:jar:1.7.4:
|
96
|
+
org.xerial.snappy:snappy-java:jar:1.0.4.1: {}
|
97
|
+
com.thoughtworks.paranamer:paranamer:jar:2.3: {}
|
98
|
+
javax.servlet.jsp:jsp-api:jar:2.1: {}
|
99
|
+
org.codehaus.jackson:jackson-core-asl:jar:1.8.8: {}
|
100
|
+
commons-cli:commons-cli:jar:1.2: {}
|
101
|
+
com.google.code.findbugs:jsr305:jar:1.3.9: {}
|
102
|
+
org.mortbay.jetty:jetty-util:jar:6.1.26: {}
|
103
|
+
xmlenc:xmlenc:jar:0.52: {}
|
104
|
+
org.apache.hadoop:hadoop-annotations:jar:2.4.0:
|
105
|
+
jdk.tools:jdk.tools:jar:1.7: {}
|
106
|
+
commons-el:commons-el:jar:1.0: {}
|
107
|
+
org.apache.commons:commons-compress:jar:1.4.1:
|
108
|
+
org.tukaani:xz:jar:1.0: {}
|
109
|
+
javax.servlet:servlet-api:jar:2.5: {}
|
110
|
+
com.sun.jersey:jersey-json:jar:1.9:
|
111
|
+
org.codehaus.jackson:jackson-jaxrs:jar:1.8.3: {}
|
112
|
+
com.sun.xml.bind:jaxb-impl:jar:2.2.3-1: {}
|
113
|
+
org.codehaus.jackson:jackson-xc:jar:1.8.3: {}
|
114
|
+
org.codehaus.jettison:jettison:jar:1.1: {}
|
115
|
+
net.java.dev.jets3t:jets3t:jar:0.9.0:
|
116
|
+
com.jamesmurty.utils:java-xmlbuilder:jar:0.4: {}
|
117
|
+
org.apache.httpcomponents:httpclient:jar:4.1.2: {}
|
118
|
+
org.apache.httpcomponents:httpcore:jar:4.1.2: {}
|
119
|
+
commons-lang:commons-lang:jar:2.6: {}
|
120
|
+
tomcat:jasper-compiler:jar:5.5.23: {}
|
121
|
+
commons-collections:commons-collections:jar:3.2.1: {}
|
122
|
+
org.codehaus.jackson:jackson-mapper-asl:jar:1.8.8: {}
|
123
|
+
tomcat:jasper-runtime:jar:5.5.23: {}
|
124
|
+
commons-logging:commons-logging:jar:1.1.3: {}
|
125
|
+
com.google.protobuf:protobuf-java:jar:2.5.0: {}
|
126
|
+
org.mortbay.jetty:jetty:jar:6.1.26: {}
|
127
|
+
commons-httpclient:commons-httpclient:jar:3.1: {}
|
128
|
+
org.slf4j:slf4j-api:jar:1.7.5: {}
|
129
|
+
commons-net:commons-net:jar:3.1: {}
|
130
|
+
commons-io:commons-io:jar:2.4: {}
|
131
|
+
commons-codec:commons-codec:jar:1.4: {}
|
132
|
+
commons-configuration:commons-configuration:jar:1.6:
|
133
|
+
commons-beanutils:commons-beanutils-core:jar:1.8.0: {}
|
134
|
+
commons-digester:commons-digester:jar:1.8:
|
135
|
+
commons-beanutils:commons-beanutils:jar:1.7.0: {}
|
136
|
+
log4j:log4j:jar:1.2.17: {}
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2014 TODO: Write your name
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,65 @@
|
|
1
|
+
# MapleRoad
|
2
|
+
|
3
|
+
This gem is a proof of concept to write Hadoop's mapreduce code in Ruby.
|
4
|
+
|
5
|
+
This gem works only on JRuby.
|
6
|
+
Also, this gem assumes hadoop command is on the PATH and can use it just type `hadoop`.
|
7
|
+
Prior to trying this gem, setup Hadoop's cluster(s) and check some Hadoop sample works fine.
|
8
|
+
|
9
|
+
## Installation
|
10
|
+
|
11
|
+
Add this line to your application's Gemfile:
|
12
|
+
|
13
|
+
gem 'mapleroad'
|
14
|
+
|
15
|
+
And then execute:
|
16
|
+
|
17
|
+
$ bundle
|
18
|
+
|
19
|
+
Or install it yourself as:
|
20
|
+
|
21
|
+
$ gem install mapleroad
|
22
|
+
|
23
|
+
## Usage
|
24
|
+
|
25
|
+
1. Write mapper class. Make sure mapper class file returns an instance of mapper class when the file is evaluated. The mapper class must have map method whose arguments are key, value, and collector.
|
26
|
+
|
27
|
+
2. Writer reducer class. Like mapper class, make sure to return reducer class instance. The reducer class must have reduce method whose arguments are key, value, and collector.
|
28
|
+
|
29
|
+
3. Put input file(s) to the hadoop filesystem
|
30
|
+
|
31
|
+
4. Run bin/mapleroad command with arguments
|
32
|
+
|
33
|
+
Example: bin/mapleroad mapper.rb reducer.rb my-dir/input my-dir/output Text:Int
|
34
|
+
|
35
|
+
- The first arg: mapper file name
|
36
|
+
- The second arg: reducer filename
|
37
|
+
- The third arg: input
|
38
|
+
- The fourth arg: output
|
39
|
+
- The fifth arg: types of output's key/value. Text:Text or Text:Int only for now
|
40
|
+
|
41
|
+
## Samples
|
42
|
+
|
43
|
+
This gem includes two samples, anagram and wordcount.
|
44
|
+
|
45
|
+
The original anagram examples is found at [https://code.google.com/p/hadoop-map-reduce-examples/wiki/Anagram_Example](https://code.google.com/p/hadoop-map-reduce-examples/wiki/Anagram_Example).
|
46
|
+
The data for input can be download from that wiki page.
|
47
|
+
|
48
|
+
Suppose you are in the top directory of this gem,
|
49
|
+
|
50
|
+
`bin/mapleroad samples/anagram/mapper.rb samples/anagram/reducer.rb anagram/input anagram/output Text:Text`
|
51
|
+
|
52
|
+
|
53
|
+
The wordcount example is a famous Hadoop example.
|
54
|
+
As it is explained at [http://www.cloudera.com/content/cloudera-content/cloudera-docs/HadoopTutorial/CDH4/Hadoop-Tutorial/ht_usage.html](http://www.cloudera.com/content/cloudera-content/cloudera-docs/HadoopTutorial/CDH4/Hadoop-Tutorial/ht_usage.html), create two input files.
|
55
|
+
|
56
|
+
`bin/mapleroad samples/wordcount/mapper.rb samples/wordcount/reducer.rb wordcount/input wordcount/output Text:Int
|
57
|
+
|
58
|
+
|
59
|
+
## Contributing
|
60
|
+
|
61
|
+
1. Fork it ( https://github.com/yokolet/mapleroad/fork )
|
62
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
63
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
64
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
65
|
+
5. Create a new Pull Request
|
data/Rakefile
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
# -*- ruby -*-
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'rake/javaextensiontask'
|
5
|
+
|
6
|
+
Rake::JavaExtensionTask.new('mapleroad') do |ext|
|
7
|
+
require 'lock_jar'
|
8
|
+
LockJar.lock
|
9
|
+
locked_jars = LockJar.load
|
10
|
+
|
11
|
+
jruby_home = ENV['MY_RUBY_HOME'] # this is available on rvm
|
12
|
+
jars = ["#{jruby_home}/lib/jruby.jar"] + FileList['lib/*.jar'] + locked_jars
|
13
|
+
ext.classpath = jars.map {|x| File.expand_path x}.join ':'
|
14
|
+
ext.source_version = '1.7'
|
15
|
+
ext.target_version = '1.7'
|
16
|
+
ext.name = 'mapleroad_service'
|
17
|
+
end
|
data/bin/mapleroad
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'lock_jar'
|
3
|
+
require 'optparse'
|
4
|
+
require 'jruby-jars'
|
5
|
+
|
6
|
+
opts = OptionParser.new do |opts|
|
7
|
+
opts.banner = "MapleRoad: proof of concept, hadoop by Ruby"
|
8
|
+
opts.define_head "Usage: mapleroad <mapper filename> <reducer filename> <input> <output> <type>"
|
9
|
+
opts.separator ""
|
10
|
+
opts.separator "Example:"
|
11
|
+
opts.separator " mapleroad samples/anagram/mapper.rb samples/anagram/reducer.rb anagram/input anagram/output Text:Text"
|
12
|
+
# TODO: options
|
13
|
+
end
|
14
|
+
opts.parse!
|
15
|
+
|
16
|
+
MAPPER_FILENAME = File.expand_path(ARGV.shift)
|
17
|
+
REDUCER_FILENAME = File.expand_path(ARGV.shift)
|
18
|
+
INPUT = ARGV.shift
|
19
|
+
OUTPUT = ARGV.shift
|
20
|
+
TYPE = ARGV.shift
|
21
|
+
TYPE ||= "Text:Text"
|
22
|
+
|
23
|
+
LockJar.lock
|
24
|
+
locked_jars = LockJar.load
|
25
|
+
classpath = locked_jars.map {|x| File.expand_path(x)}.join(":")
|
26
|
+
libjarpath = locked_jars.map {|x| File.expand_path(x)}.join(",")
|
27
|
+
|
28
|
+
JAVA_CLASS = 'mapleroad.MapleRoadJob'
|
29
|
+
command = %Q(export HADOOP_CLASSPATH=#{classpath}; hadoop jar lib/mapleroad_service.jar #{JAVA_CLASS} #{MAPPER_FILENAME} #{REDUCER_FILENAME} #{INPUT} #{OUTPUT} #{TYPE})
|
30
|
+
puts "COMMAND: #{command}"
|
31
|
+
|
32
|
+
|
33
|
+
exec(command)
|
@@ -0,0 +1,32 @@
|
|
1
|
+
package mapleroad;
|
2
|
+
|
3
|
+
import java.io.IOException;
|
4
|
+
|
5
|
+
import org.apache.hadoop.io.Text;
|
6
|
+
import org.apache.hadoop.mapred.OutputCollector;
|
7
|
+
|
8
|
+
public class MapleRoadCollector {
|
9
|
+
private OutputCollector<Text, Text> collector = null;
|
10
|
+
private Text key = null;
|
11
|
+
private Text value = null;
|
12
|
+
|
13
|
+
MapleRoadCollector(Text key, Text value, OutputCollector<Text, Text> collector) {
|
14
|
+
this.key = key;
|
15
|
+
this.value = value;
|
16
|
+
this.collector = collector;
|
17
|
+
}
|
18
|
+
|
19
|
+
public void collect(Object k, Object v) throws IOException {
|
20
|
+
if (k instanceof String) {
|
21
|
+
key.set((String)k);
|
22
|
+
} else if (k instanceof Text) {
|
23
|
+
key = (Text)k;
|
24
|
+
}
|
25
|
+
if (v instanceof String) {
|
26
|
+
value.set((String)v);
|
27
|
+
} else if (v instanceof Text) {
|
28
|
+
value.set((Text)v);
|
29
|
+
}
|
30
|
+
collector.collect(key, value);
|
31
|
+
}
|
32
|
+
}
|
@@ -0,0 +1,37 @@
|
|
1
|
+
package mapleroad;
|
2
|
+
|
3
|
+
import java.io.IOException;
|
4
|
+
|
5
|
+
import org.apache.hadoop.io.IntWritable;
|
6
|
+
import org.apache.hadoop.io.Text;
|
7
|
+
import org.apache.hadoop.mapred.OutputCollector;
|
8
|
+
import org.jruby.RubyFixnum;
|
9
|
+
|
10
|
+
public class MapleRoadCollectorTextInt {
|
11
|
+
private OutputCollector<Text, IntWritable> collector = null;
|
12
|
+
private Text key = null;
|
13
|
+
private IntWritable value = null;
|
14
|
+
|
15
|
+
MapleRoadCollectorTextInt(Text key, IntWritable value, OutputCollector<Text, IntWritable> collector) {
|
16
|
+
this.key = key;
|
17
|
+
this.value = value;
|
18
|
+
this.collector = collector;
|
19
|
+
}
|
20
|
+
|
21
|
+
public void collect(Object k, Object v) throws IOException {
|
22
|
+
if (k instanceof String) {
|
23
|
+
key.set((String)k);
|
24
|
+
} else if (k instanceof Text) {
|
25
|
+
key.set((Text)k);
|
26
|
+
}
|
27
|
+
if (v instanceof RubyFixnum) {
|
28
|
+
Long longValue = ((RubyFixnum)v).getLongValue();
|
29
|
+
value.set(longValue.intValue());
|
30
|
+
} else if (v instanceof Long) {
|
31
|
+
value.set(((Long)v).intValue());
|
32
|
+
} else if (v instanceof Integer) {
|
33
|
+
value.set(((Integer)v).intValue());
|
34
|
+
}
|
35
|
+
collector.collect(key, value);
|
36
|
+
}
|
37
|
+
}
|
@@ -0,0 +1,64 @@
|
|
1
|
+
package mapleroad;
|
2
|
+
|
3
|
+
import java.io.IOException;
|
4
|
+
import java.nio.charset.Charset;
|
5
|
+
import java.nio.charset.StandardCharsets;
|
6
|
+
import java.nio.file.Files;
|
7
|
+
import java.nio.file.Paths;
|
8
|
+
|
9
|
+
import org.apache.hadoop.fs.Path;
|
10
|
+
import org.apache.hadoop.io.IntWritable;
|
11
|
+
import org.apache.hadoop.io.Text;
|
12
|
+
import org.apache.hadoop.mapred.FileInputFormat;
|
13
|
+
import org.apache.hadoop.mapred.FileOutputFormat;
|
14
|
+
import org.apache.hadoop.mapred.JobClient;
|
15
|
+
import org.apache.hadoop.mapred.JobConf;
|
16
|
+
import org.apache.hadoop.mapred.TextInputFormat;
|
17
|
+
import org.apache.hadoop.mapred.TextOutputFormat;
|
18
|
+
|
19
|
+
public class MapleRoadJob {
|
20
|
+
|
21
|
+
/**
|
22
|
+
* @throws IOException
|
23
|
+
* @param args args[0]: mapper filename,
|
24
|
+
* args[1]: reducer filename,
|
25
|
+
* args[2]: input,
|
26
|
+
* args[3]: output,
|
27
|
+
* args[4]: output key/value types
|
28
|
+
* @throws
|
29
|
+
*/
|
30
|
+
public static void main(String[] args) throws IOException {
|
31
|
+
JobConf conf = new JobConf(MapleRoadJob.class);
|
32
|
+
conf.set("mapper.file.name", args[0]);
|
33
|
+
conf.set("reducer.file.name", args[1]);
|
34
|
+
conf.set("mapper.code", getCodeAsString(args[0], StandardCharsets.UTF_8));
|
35
|
+
System.out.println(conf.get("mapper.code"));
|
36
|
+
conf.set("reducer.code", getCodeAsString(args[1], StandardCharsets.UTF_8));
|
37
|
+
System.out.println(conf.get("reducer.code"));
|
38
|
+
conf.setJobName("maplpleroad_job");
|
39
|
+
conf.setInputFormat(TextInputFormat.class);
|
40
|
+
conf.setOutputFormat(TextOutputFormat.class);
|
41
|
+
FileInputFormat.setInputPaths(conf, new Path(args[2]));
|
42
|
+
FileOutputFormat.setOutputPath(conf, new Path(args[3]));
|
43
|
+
|
44
|
+
if (args[4].equals("Text:Text")) {
|
45
|
+
conf.setOutputKeyClass(Text.class);
|
46
|
+
conf.setOutputValueClass(Text.class);
|
47
|
+
conf.setMapperClass(MapleRoadMapper.class);
|
48
|
+
conf.setReducerClass(MapleRoadReducer.class);
|
49
|
+
} else if (args[4].equals("Text:Int")) {
|
50
|
+
conf.setOutputKeyClass(Text.class);
|
51
|
+
conf.setOutputValueClass(IntWritable.class);
|
52
|
+
conf.setMapperClass(MapleRoadMapperTextInt.class);
|
53
|
+
conf.setReducerClass(MapleRoadReducerTextInt.class);
|
54
|
+
}
|
55
|
+
|
56
|
+
JobClient.runJob(conf);
|
57
|
+
}
|
58
|
+
|
59
|
+
private static String getCodeAsString(String filename, Charset encoding) throws IOException {
|
60
|
+
byte[] encoded = Files.readAllBytes(Paths.get(filename).toAbsolutePath());
|
61
|
+
return new String(encoded, encoding);
|
62
|
+
}
|
63
|
+
|
64
|
+
}
|
@@ -0,0 +1,43 @@
|
|
1
|
+
package mapleroad;
|
2
|
+
|
3
|
+
import java.io.IOException;
|
4
|
+
|
5
|
+
import org.apache.hadoop.io.LongWritable;
|
6
|
+
import org.apache.hadoop.io.Text;
|
7
|
+
import org.apache.hadoop.mapred.JobConf;
|
8
|
+
import org.apache.hadoop.mapred.MapReduceBase;
|
9
|
+
import org.apache.hadoop.mapred.Mapper;
|
10
|
+
import org.apache.hadoop.mapred.OutputCollector;
|
11
|
+
import org.apache.hadoop.mapred.Reporter;
|
12
|
+
import org.jruby.Ruby;
|
13
|
+
import org.jruby.RubyRuntimeAdapter;
|
14
|
+
import org.jruby.javasupport.JavaEmbedUtils;
|
15
|
+
import org.jruby.javasupport.JavaUtil;
|
16
|
+
import org.jruby.runtime.Helpers;
|
17
|
+
import org.jruby.runtime.builtin.IRubyObject;
|
18
|
+
|
19
|
+
public class MapleRoadMapper extends MapReduceBase implements Mapper<LongWritable, Text, Text, Text> {
|
20
|
+
private String mapper_code = null;
|
21
|
+
private Text keyText = new Text();
|
22
|
+
private Text valueText = new Text();
|
23
|
+
|
24
|
+
@Override
|
25
|
+
public void configure(JobConf job) {
|
26
|
+
mapper_code = job.get("mapper.code");
|
27
|
+
}
|
28
|
+
|
29
|
+
@Override
|
30
|
+
public void map(LongWritable key, Text value,
|
31
|
+
OutputCollector<Text, Text> outputCollector, Reporter arg3) throws IOException {
|
32
|
+
Ruby runtime = Ruby.getGlobalRuntime();
|
33
|
+
RubyRuntimeAdapter adapter = JavaEmbedUtils.newRuntimeAdapter();
|
34
|
+
IRubyObject receiver = adapter.eval(runtime, mapper_code);
|
35
|
+
IRubyObject rubyKey = JavaUtil.convertJavaToRuby(runtime, key);
|
36
|
+
IRubyObject rubyValue = JavaUtil.convertJavaToRuby(runtime, value);
|
37
|
+
MapleRoadCollector collector =
|
38
|
+
new MapleRoadCollector(keyText, valueText, outputCollector);
|
39
|
+
IRubyObject rubyCollector = JavaUtil.convertJavaToRuby(runtime, collector);
|
40
|
+
Helpers.invoke(runtime.getCurrentContext(), receiver, "map", rubyKey, rubyValue, rubyCollector);
|
41
|
+
}
|
42
|
+
|
43
|
+
}
|
@@ -0,0 +1,44 @@
|
|
1
|
+
package mapleroad;
|
2
|
+
|
3
|
+
import java.io.IOException;
|
4
|
+
|
5
|
+
import org.apache.hadoop.io.IntWritable;
|
6
|
+
import org.apache.hadoop.io.LongWritable;
|
7
|
+
import org.apache.hadoop.io.Text;
|
8
|
+
import org.apache.hadoop.mapred.JobConf;
|
9
|
+
import org.apache.hadoop.mapred.MapReduceBase;
|
10
|
+
import org.apache.hadoop.mapred.Mapper;
|
11
|
+
import org.apache.hadoop.mapred.OutputCollector;
|
12
|
+
import org.apache.hadoop.mapred.Reporter;
|
13
|
+
import org.jruby.Ruby;
|
14
|
+
import org.jruby.RubyRuntimeAdapter;
|
15
|
+
import org.jruby.javasupport.JavaEmbedUtils;
|
16
|
+
import org.jruby.javasupport.JavaUtil;
|
17
|
+
import org.jruby.runtime.Helpers;
|
18
|
+
import org.jruby.runtime.builtin.IRubyObject;
|
19
|
+
|
20
|
+
public class MapleRoadMapperTextInt extends MapReduceBase implements Mapper<LongWritable, Text, Text, IntWritable> {
|
21
|
+
private String mapper_code = null;
|
22
|
+
private Text keyText = new Text();
|
23
|
+
private IntWritable valueInt = new IntWritable();
|
24
|
+
|
25
|
+
@Override
|
26
|
+
public void configure(JobConf job) {
|
27
|
+
mapper_code = job.get("mapper.code");
|
28
|
+
}
|
29
|
+
|
30
|
+
@Override
|
31
|
+
public void map(LongWritable key, Text value,
|
32
|
+
OutputCollector<Text, IntWritable> outputCollector, Reporter arg3) throws IOException {
|
33
|
+
Ruby runtime = Ruby.getGlobalRuntime();
|
34
|
+
RubyRuntimeAdapter adapter = JavaEmbedUtils.newRuntimeAdapter();
|
35
|
+
IRubyObject receiver = adapter.eval(runtime, mapper_code);
|
36
|
+
IRubyObject rubyKey = JavaUtil.convertJavaToRuby(runtime, key);
|
37
|
+
IRubyObject rubyValue = JavaUtil.convertJavaToRuby(runtime, value);
|
38
|
+
MapleRoadCollectorTextInt collector =
|
39
|
+
new MapleRoadCollectorTextInt(keyText, valueInt, outputCollector);
|
40
|
+
IRubyObject rubyCollector = JavaUtil.convertJavaToRuby(runtime, collector);
|
41
|
+
Helpers.invoke(runtime.getCurrentContext(), receiver, "map", rubyKey, rubyValue, rubyCollector);
|
42
|
+
}
|
43
|
+
|
44
|
+
}
|
@@ -0,0 +1,43 @@
|
|
1
|
+
package mapleroad;
|
2
|
+
|
3
|
+
import java.io.IOException;
|
4
|
+
import java.util.Iterator;
|
5
|
+
|
6
|
+
import org.apache.hadoop.io.Text;
|
7
|
+
import org.apache.hadoop.mapred.JobConf;
|
8
|
+
import org.apache.hadoop.mapred.MapReduceBase;
|
9
|
+
import org.apache.hadoop.mapred.OutputCollector;
|
10
|
+
import org.apache.hadoop.mapred.Reducer;
|
11
|
+
import org.apache.hadoop.mapred.Reporter;
|
12
|
+
import org.jruby.Ruby;
|
13
|
+
import org.jruby.RubyRuntimeAdapter;
|
14
|
+
import org.jruby.javasupport.JavaEmbedUtils;
|
15
|
+
import org.jruby.javasupport.JavaUtil;
|
16
|
+
import org.jruby.runtime.Helpers;
|
17
|
+
import org.jruby.runtime.builtin.IRubyObject;
|
18
|
+
|
19
|
+
public class MapleRoadReducer extends MapReduceBase implements Reducer<Text, Text, Text, Text> {
|
20
|
+
private String reducer_code = null;
|
21
|
+
private Text keyText = new Text();
|
22
|
+
private Text valueText = new Text();
|
23
|
+
|
24
|
+
@Override
|
25
|
+
public void configure(JobConf job) {
|
26
|
+
reducer_code = job.get("reducer.code");
|
27
|
+
}
|
28
|
+
|
29
|
+
@Override
|
30
|
+
public void reduce(Text key, Iterator<Text> values,
|
31
|
+
OutputCollector<Text, Text> outputCollector, Reporter arg3) throws IOException {;
|
32
|
+
Ruby runtime = Ruby.getGlobalRuntime();
|
33
|
+
RubyRuntimeAdapter adapter = JavaEmbedUtils.newRuntimeAdapter();
|
34
|
+
IRubyObject receiver = adapter.eval(runtime, reducer_code);
|
35
|
+
IRubyObject rubyKey = JavaUtil.convertJavaToRuby(runtime, key);
|
36
|
+
IRubyObject rubyValues = JavaUtil.convertJavaToRuby(runtime, values);
|
37
|
+
MapleRoadCollector collector =
|
38
|
+
new MapleRoadCollector(keyText, valueText, outputCollector);
|
39
|
+
IRubyObject rubyCollector = JavaUtil.convertJavaToRuby(runtime, collector);
|
40
|
+
Helpers.invoke(runtime.getCurrentContext(), receiver, "reduce", rubyKey, rubyValues, rubyCollector);
|
41
|
+
}
|
42
|
+
|
43
|
+
}
|
@@ -0,0 +1,44 @@
|
|
1
|
+
package mapleroad;
|
2
|
+
|
3
|
+
import java.io.IOException;
|
4
|
+
import java.util.Iterator;
|
5
|
+
|
6
|
+
import org.apache.hadoop.io.IntWritable;
|
7
|
+
import org.apache.hadoop.io.Text;
|
8
|
+
import org.apache.hadoop.mapred.JobConf;
|
9
|
+
import org.apache.hadoop.mapred.MapReduceBase;
|
10
|
+
import org.apache.hadoop.mapred.OutputCollector;
|
11
|
+
import org.apache.hadoop.mapred.Reducer;
|
12
|
+
import org.apache.hadoop.mapred.Reporter;
|
13
|
+
import org.jruby.Ruby;
|
14
|
+
import org.jruby.RubyRuntimeAdapter;
|
15
|
+
import org.jruby.javasupport.JavaEmbedUtils;
|
16
|
+
import org.jruby.javasupport.JavaUtil;
|
17
|
+
import org.jruby.runtime.Helpers;
|
18
|
+
import org.jruby.runtime.builtin.IRubyObject;
|
19
|
+
|
20
|
+
public class MapleRoadReducerTextInt extends MapReduceBase implements Reducer<Text, IntWritable, Text, IntWritable> {
|
21
|
+
private String reducer_code = null;
|
22
|
+
private Text keyText = new Text();
|
23
|
+
private IntWritable valueInt = new IntWritable();
|
24
|
+
|
25
|
+
@Override
|
26
|
+
public void configure(JobConf job) {
|
27
|
+
reducer_code = job.get("reducer.code");
|
28
|
+
}
|
29
|
+
|
30
|
+
@Override
|
31
|
+
public void reduce(Text key, Iterator<IntWritable> values,
|
32
|
+
OutputCollector<Text, IntWritable> outputCollector, Reporter arg3) throws IOException {;
|
33
|
+
Ruby runtime = Ruby.getGlobalRuntime();
|
34
|
+
RubyRuntimeAdapter adapter = JavaEmbedUtils.newRuntimeAdapter();
|
35
|
+
IRubyObject receiver = adapter.eval(runtime, reducer_code);
|
36
|
+
IRubyObject rubyKey = JavaUtil.convertJavaToRuby(runtime, key);
|
37
|
+
IRubyObject rubyValues = JavaUtil.convertJavaToRuby(runtime, values);
|
38
|
+
MapleRoadCollectorTextInt collector =
|
39
|
+
new MapleRoadCollectorTextInt(keyText, valueInt, outputCollector);
|
40
|
+
IRubyObject rubyCollector = JavaUtil.convertJavaToRuby(runtime, collector);
|
41
|
+
Helpers.invoke(runtime.getCurrentContext(), receiver, "reduce", rubyKey, rubyValues, rubyCollector);
|
42
|
+
}
|
43
|
+
|
44
|
+
}
|
Binary file
|
data/mapleroad.gemspec
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'mapleroad/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "mapleroad"
|
8
|
+
spec.version = MapleRoad::VERSION
|
9
|
+
spec.authors = ["Yoko Harada"]
|
10
|
+
spec.email = ["yokolet@gmail.com"]
|
11
|
+
spec.summary = %q{Proof of concept gem for hadoop's mapreduce}
|
12
|
+
spec.description = %q{This gem is a proof of concept that mapper and reducer
|
13
|
+
of mapreduce can be written in Ruby.}
|
14
|
+
spec.homepage = ""
|
15
|
+
spec.license = "MIT"
|
16
|
+
|
17
|
+
spec.files = `git ls-files -z`.split("\x0")
|
18
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
19
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
20
|
+
spec.require_paths = ["lib"]
|
21
|
+
|
22
|
+
spec.add_development_dependency "bundler", "~> 1.6"
|
23
|
+
spec.add_development_dependency "rake"
|
24
|
+
end
|
@@ -0,0 +1,11 @@
|
|
1
|
+
class AnagramReducer
|
2
|
+
def reduce(key, values, collector)
|
3
|
+
output = values.inject("") do |memo, v|
|
4
|
+
memo = memo + v.to_s + "~"
|
5
|
+
end
|
6
|
+
if (output.split("~").length >= 2)
|
7
|
+
collector.collect(key.to_s, output.gsub("~", ","))
|
8
|
+
end
|
9
|
+
end
|
10
|
+
end
|
11
|
+
AnagramReducer.new
|
metadata
ADDED
@@ -0,0 +1,98 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: mapleroad
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1.pre
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Yoko Harada
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-06-01 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
version_requirements: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ~>
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.6'
|
20
|
+
requirement: !ruby/object:Gem::Requirement
|
21
|
+
requirements:
|
22
|
+
- - ~>
|
23
|
+
- !ruby/object:Gem::Version
|
24
|
+
version: '1.6'
|
25
|
+
prerelease: false
|
26
|
+
type: :development
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
version_requirements: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
requirement: !ruby/object:Gem::Requirement
|
35
|
+
requirements:
|
36
|
+
- - '>='
|
37
|
+
- !ruby/object:Gem::Version
|
38
|
+
version: '0'
|
39
|
+
prerelease: false
|
40
|
+
type: :development
|
41
|
+
description: |-
|
42
|
+
This gem is a proof of concept that mapper and reducer
|
43
|
+
of mapreduce can be written in Ruby.
|
44
|
+
email:
|
45
|
+
- yokolet@gmail.com
|
46
|
+
executables:
|
47
|
+
- mapleroad
|
48
|
+
extensions: []
|
49
|
+
extra_rdoc_files: []
|
50
|
+
files:
|
51
|
+
- .gitignore
|
52
|
+
- Gemfile
|
53
|
+
- Gemfile.lock
|
54
|
+
- Jarfile
|
55
|
+
- Jarfile.lock
|
56
|
+
- LICENSE.txt
|
57
|
+
- README.md
|
58
|
+
- Rakefile
|
59
|
+
- bin/mapleroad
|
60
|
+
- ext/mapleroad/MapleRoadCollector.java
|
61
|
+
- ext/mapleroad/MapleRoadCollectorTextInt.java
|
62
|
+
- ext/mapleroad/MapleRoadJob.java
|
63
|
+
- ext/mapleroad/MapleRoadMapper.java
|
64
|
+
- ext/mapleroad/MapleRoadMapperTextInt.java
|
65
|
+
- ext/mapleroad/MapleRoadReducer.java
|
66
|
+
- ext/mapleroad/MapleRoadReducerTextInt.java
|
67
|
+
- lib/mapleroad/version.rb
|
68
|
+
- lib/mapleroad_service.jar
|
69
|
+
- mapleroad.gemspec
|
70
|
+
- samples/anagram/mapper.rb
|
71
|
+
- samples/anagram/reducer.rb
|
72
|
+
- samples/wordcount/mapper.rb
|
73
|
+
- samples/wordcount/reducer.rb
|
74
|
+
homepage: ''
|
75
|
+
licenses:
|
76
|
+
- MIT
|
77
|
+
metadata: {}
|
78
|
+
post_install_message:
|
79
|
+
rdoc_options: []
|
80
|
+
require_paths:
|
81
|
+
- lib
|
82
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
83
|
+
requirements:
|
84
|
+
- - '>='
|
85
|
+
- !ruby/object:Gem::Version
|
86
|
+
version: '0'
|
87
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
88
|
+
requirements:
|
89
|
+
- - '>'
|
90
|
+
- !ruby/object:Gem::Version
|
91
|
+
version: 1.3.1
|
92
|
+
requirements: []
|
93
|
+
rubyforge_project:
|
94
|
+
rubygems_version: 2.2.2
|
95
|
+
signing_key:
|
96
|
+
specification_version: 4
|
97
|
+
summary: Proof of concept gem for hadoop's mapreduce
|
98
|
+
test_files: []
|