mapleroad 0.0.1.pre

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 798e4df11da3783b2f5d91a163a58424da52c0f1
4
+ data.tar.gz: b467639d0217b7bd312ccf5effb6da7df8903e1d
5
+ SHA512:
6
+ metadata.gz: 88ff6c7173883e7bda6a1991f2f822702742a68f4c7e89223cfad35b6b8d630303b007835a3340b7134c84d503970b6f2414883c9d6b2875ef831c44f2b892d3
7
+ data.tar.gz: 96987a08a197239c1d3aab27f41ac1932357f91877f7b19ab1ccb09cb5828645e9a1dc489f06aaa7b772657da437b3db9bd7e46b40c49c326e192d31ed2fbc9a
@@ -0,0 +1,11 @@
1
+ .classpath
2
+ .project
3
+ .settings
4
+ .idea
5
+ .rspec
6
+ .ruby-version
7
+ .ruby-gemset
8
+ *~
9
+ lib/.idea
10
+ build
11
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,9 @@
1
+ # -*- ruby -*-
2
+
3
+ source "https://rubygems.org"
4
+
5
+ gem 'lock_jar', '~> 0.7.5', :platform => :jruby
6
+
7
+ group :test, :development do
8
+ gem 'rake-compiler', '~> 0.9.1', :platform => :jruby
9
+ end
@@ -0,0 +1,18 @@
1
+ GEM
2
+ remote: https://rubygems.org/
3
+ specs:
4
+ lock_jar (0.7.5)
5
+ naether (~> 0.10.1)
6
+ thor (~> 0.18.1)
7
+ naether (0.10.1-java)
8
+ rake (10.3.2)
9
+ rake-compiler (0.9.2)
10
+ rake
11
+ thor (0.18.1)
12
+
13
+ PLATFORMS
14
+ java
15
+
16
+ DEPENDENCIES
17
+ lock_jar (~> 0.7.5)
18
+ rake-compiler (~> 0.9.1)
data/Jarfile ADDED
@@ -0,0 +1,2 @@
1
+ jar "org.apache.hadoop:hadoop-mapreduce-client-core:2.4.0"
2
+ jar "org.apache.hadoop:hadoop-common:2.4.0"
@@ -0,0 +1,136 @@
1
+ ---
2
+ version: 0.7.5
3
+ groups:
4
+ default:
5
+ dependencies:
6
+ - aopalliance:aopalliance:jar:1.0
7
+ - asm:asm:jar:3.1
8
+ - com.google.code.findbugs:jsr305:jar:1.3.9
9
+ - com.google.guava:guava:jar:11.0.2
10
+ - com.google.inject.extensions:guice-servlet:jar:3.0
11
+ - com.google.inject:guice:jar:3.0
12
+ - com.google.protobuf:protobuf-java:jar:2.5.0
13
+ - com.jamesmurty.utils:java-xmlbuilder:jar:0.4
14
+ - com.jcraft:jsch:jar:0.1.42
15
+ - com.sun.jersey.contribs:jersey-guice:jar:1.9
16
+ - com.sun.jersey:jersey-core:jar:1.9
17
+ - com.sun.jersey:jersey-json:jar:1.9
18
+ - com.sun.jersey:jersey-server:jar:1.9
19
+ - com.sun.xml.bind:jaxb-impl:jar:2.2.3-1
20
+ - com.thoughtworks.paranamer:paranamer:jar:2.3
21
+ - commons-beanutils:commons-beanutils-core:jar:1.8.0
22
+ - commons-beanutils:commons-beanutils:jar:1.7.0
23
+ - commons-cli:commons-cli:jar:1.2
24
+ - commons-codec:commons-codec:jar:1.4
25
+ - commons-collections:commons-collections:jar:3.2.1
26
+ - commons-configuration:commons-configuration:jar:1.6
27
+ - commons-digester:commons-digester:jar:1.8
28
+ - commons-el:commons-el:jar:1.0
29
+ - commons-httpclient:commons-httpclient:jar:3.1
30
+ - commons-io:commons-io:jar:2.4
31
+ - commons-lang:commons-lang:jar:2.6
32
+ - commons-logging:commons-logging:jar:1.1.3
33
+ - commons-net:commons-net:jar:3.1
34
+ - io.netty:netty:jar:3.6.2.Final
35
+ - javax.activation:activation:jar:1.1
36
+ - javax.inject:javax.inject:jar:1
37
+ - javax.servlet.jsp:jsp-api:jar:2.1
38
+ - javax.servlet:servlet-api:jar:2.5
39
+ - javax.xml.bind:jaxb-api:jar:2.2.2
40
+ - javax.xml.stream:stax-api:jar:1.0-2
41
+ - jdk.tools:jdk.tools:jar:1.7
42
+ - log4j:log4j:jar:1.2.17
43
+ - net.java.dev.jets3t:jets3t:jar:0.9.0
44
+ - org.apache.avro:avro:jar:1.7.4
45
+ - org.apache.commons:commons-compress:jar:1.4.1
46
+ - org.apache.commons:commons-math3:jar:3.1.1
47
+ - org.apache.hadoop:hadoop-annotations:jar:2.4.0
48
+ - org.apache.hadoop:hadoop-auth:jar:2.4.0
49
+ - org.apache.hadoop:hadoop-common:jar:2.4.0
50
+ - org.apache.hadoop:hadoop-mapreduce-client-core:jar:2.4.0
51
+ - org.apache.hadoop:hadoop-yarn-api:jar:2.4.0
52
+ - org.apache.hadoop:hadoop-yarn-common:jar:2.4.0
53
+ - org.apache.httpcomponents:httpclient:jar:4.1.2
54
+ - org.apache.httpcomponents:httpcore:jar:4.1.2
55
+ - org.apache.zookeeper:zookeeper:jar:3.4.5
56
+ - org.codehaus.jackson:jackson-core-asl:jar:1.8.8
57
+ - org.codehaus.jackson:jackson-jaxrs:jar:1.8.3
58
+ - org.codehaus.jackson:jackson-mapper-asl:jar:1.8.8
59
+ - org.codehaus.jackson:jackson-xc:jar:1.8.3
60
+ - org.codehaus.jettison:jettison:jar:1.1
61
+ - org.mortbay.jetty:jetty-util:jar:6.1.26
62
+ - org.mortbay.jetty:jetty:jar:6.1.26
63
+ - org.slf4j:slf4j-api:jar:1.7.5
64
+ - org.slf4j:slf4j-log4j12:jar:1.7.5
65
+ - org.tukaani:xz:jar:1.0
66
+ - org.xerial.snappy:snappy-java:jar:1.0.4.1
67
+ - tomcat:jasper-compiler:jar:5.5.23
68
+ - tomcat:jasper-runtime:jar:5.5.23
69
+ - xmlenc:xmlenc:jar:0.52
70
+ artifacts:
71
+ - jar:org.apache.hadoop:hadoop-mapreduce-client-core:jar:2.4.0:
72
+ transitive:
73
+ com.google.inject.extensions:guice-servlet:jar:3.0: {}
74
+ org.apache.hadoop:hadoop-yarn-common:jar:2.4.0:
75
+ javax.xml.bind:jaxb-api:jar:2.2.2:
76
+ javax.xml.stream:stax-api:jar:1.0-2: {}
77
+ javax.activation:activation:jar:1.1: {}
78
+ com.google.inject:guice:jar:3.0:
79
+ aopalliance:aopalliance:jar:1.0: {}
80
+ javax.inject:javax.inject:jar:1: {}
81
+ com.sun.jersey.contribs:jersey-guice:jar:1.9: {}
82
+ org.apache.hadoop:hadoop-yarn-api:jar:2.4.0: {}
83
+ io.netty:netty:jar:3.6.2.Final: {}
84
+ - jar:org.apache.hadoop:hadoop-common:jar:2.4.0:
85
+ transitive:
86
+ org.apache.hadoop:hadoop-auth:jar:2.4.0: {}
87
+ com.jcraft:jsch:jar:0.1.42: {}
88
+ com.sun.jersey:jersey-server:jar:1.9:
89
+ asm:asm:jar:3.1: {}
90
+ org.apache.commons:commons-math3:jar:3.1.1: {}
91
+ org.apache.zookeeper:zookeeper:jar:3.4.5: {}
92
+ com.sun.jersey:jersey-core:jar:1.9: {}
93
+ org.slf4j:slf4j-log4j12:jar:1.7.5: {}
94
+ com.google.guava:guava:jar:11.0.2: {}
95
+ org.apache.avro:avro:jar:1.7.4:
96
+ org.xerial.snappy:snappy-java:jar:1.0.4.1: {}
97
+ com.thoughtworks.paranamer:paranamer:jar:2.3: {}
98
+ javax.servlet.jsp:jsp-api:jar:2.1: {}
99
+ org.codehaus.jackson:jackson-core-asl:jar:1.8.8: {}
100
+ commons-cli:commons-cli:jar:1.2: {}
101
+ com.google.code.findbugs:jsr305:jar:1.3.9: {}
102
+ org.mortbay.jetty:jetty-util:jar:6.1.26: {}
103
+ xmlenc:xmlenc:jar:0.52: {}
104
+ org.apache.hadoop:hadoop-annotations:jar:2.4.0:
105
+ jdk.tools:jdk.tools:jar:1.7: {}
106
+ commons-el:commons-el:jar:1.0: {}
107
+ org.apache.commons:commons-compress:jar:1.4.1:
108
+ org.tukaani:xz:jar:1.0: {}
109
+ javax.servlet:servlet-api:jar:2.5: {}
110
+ com.sun.jersey:jersey-json:jar:1.9:
111
+ org.codehaus.jackson:jackson-jaxrs:jar:1.8.3: {}
112
+ com.sun.xml.bind:jaxb-impl:jar:2.2.3-1: {}
113
+ org.codehaus.jackson:jackson-xc:jar:1.8.3: {}
114
+ org.codehaus.jettison:jettison:jar:1.1: {}
115
+ net.java.dev.jets3t:jets3t:jar:0.9.0:
116
+ com.jamesmurty.utils:java-xmlbuilder:jar:0.4: {}
117
+ org.apache.httpcomponents:httpclient:jar:4.1.2: {}
118
+ org.apache.httpcomponents:httpcore:jar:4.1.2: {}
119
+ commons-lang:commons-lang:jar:2.6: {}
120
+ tomcat:jasper-compiler:jar:5.5.23: {}
121
+ commons-collections:commons-collections:jar:3.2.1: {}
122
+ org.codehaus.jackson:jackson-mapper-asl:jar:1.8.8: {}
123
+ tomcat:jasper-runtime:jar:5.5.23: {}
124
+ commons-logging:commons-logging:jar:1.1.3: {}
125
+ com.google.protobuf:protobuf-java:jar:2.5.0: {}
126
+ org.mortbay.jetty:jetty:jar:6.1.26: {}
127
+ commons-httpclient:commons-httpclient:jar:3.1: {}
128
+ org.slf4j:slf4j-api:jar:1.7.5: {}
129
+ commons-net:commons-net:jar:3.1: {}
130
+ commons-io:commons-io:jar:2.4: {}
131
+ commons-codec:commons-codec:jar:1.4: {}
132
+ commons-configuration:commons-configuration:jar:1.6:
133
+ commons-beanutils:commons-beanutils-core:jar:1.8.0: {}
134
+ commons-digester:commons-digester:jar:1.8:
135
+ commons-beanutils:commons-beanutils:jar:1.7.0: {}
136
+ log4j:log4j:jar:1.2.17: {}
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 TODO: Write your name
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,65 @@
1
+ # MapleRoad
2
+
3
+ This gem is a proof of concept to write Hadoop's mapreduce code in Ruby.
4
+
5
+ This gem works only on JRuby.
6
+ Also, this gem assumes hadoop command is on the PATH and can use it just type `hadoop`.
7
+ Prior to trying this gem, setup Hadoop's cluster(s) and check some Hadoop sample works fine.
8
+
9
+ ## Installation
10
+
11
+ Add this line to your application's Gemfile:
12
+
13
+ gem 'mapleroad'
14
+
15
+ And then execute:
16
+
17
+ $ bundle
18
+
19
+ Or install it yourself as:
20
+
21
+ $ gem install mapleroad
22
+
23
+ ## Usage
24
+
25
+ 1. Write mapper class. Make sure mapper class file returns an instance of mapper class when the file is evaluated. The mapper class must have map method whose arguments are key, value, and collector.
26
+
27
+ 2. Writer reducer class. Like mapper class, make sure to return reducer class instance. The reducer class must have reduce method whose arguments are key, value, and collector.
28
+
29
+ 3. Put input file(s) to the hadoop filesystem
30
+
31
+ 4. Run bin/mapleroad command with arguments
32
+
33
+ Example: bin/mapleroad mapper.rb reducer.rb my-dir/input my-dir/output Text:Int
34
+
35
+ - The first arg: mapper file name
36
+ - The second arg: reducer filename
37
+ - The third arg: input
38
+ - The fourth arg: output
39
+ - The fifth arg: types of output's key/value. Text:Text or Text:Int only for now
40
+
41
+ ## Samples
42
+
43
+ This gem includes two samples, anagram and wordcount.
44
+
45
+ The original anagram examples is found at [https://code.google.com/p/hadoop-map-reduce-examples/wiki/Anagram_Example](https://code.google.com/p/hadoop-map-reduce-examples/wiki/Anagram_Example).
46
+ The data for input can be download from that wiki page.
47
+
48
+ Suppose you are in the top directory of this gem,
49
+
50
+ `bin/mapleroad samples/anagram/mapper.rb samples/anagram/reducer.rb anagram/input anagram/output Text:Text`
51
+
52
+
53
+ The wordcount example is a famous Hadoop example.
54
+ As it is explained at [http://www.cloudera.com/content/cloudera-content/cloudera-docs/HadoopTutorial/CDH4/Hadoop-Tutorial/ht_usage.html](http://www.cloudera.com/content/cloudera-content/cloudera-docs/HadoopTutorial/CDH4/Hadoop-Tutorial/ht_usage.html), create two input files.
55
+
56
+ `bin/mapleroad samples/wordcount/mapper.rb samples/wordcount/reducer.rb wordcount/input wordcount/output Text:Int
57
+
58
+
59
+ ## Contributing
60
+
61
+ 1. Fork it ( https://github.com/yokolet/mapleroad/fork )
62
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
63
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
64
+ 4. Push to the branch (`git push origin my-new-feature`)
65
+ 5. Create a new Pull Request
@@ -0,0 +1,17 @@
1
+ # -*- ruby -*-
2
+
3
+ require 'rubygems'
4
+ require 'rake/javaextensiontask'
5
+
6
+ Rake::JavaExtensionTask.new('mapleroad') do |ext|
7
+ require 'lock_jar'
8
+ LockJar.lock
9
+ locked_jars = LockJar.load
10
+
11
+ jruby_home = ENV['MY_RUBY_HOME'] # this is available on rvm
12
+ jars = ["#{jruby_home}/lib/jruby.jar"] + FileList['lib/*.jar'] + locked_jars
13
+ ext.classpath = jars.map {|x| File.expand_path x}.join ':'
14
+ ext.source_version = '1.7'
15
+ ext.target_version = '1.7'
16
+ ext.name = 'mapleroad_service'
17
+ end
@@ -0,0 +1,33 @@
1
+ #!/usr/bin/env ruby
2
+ require 'lock_jar'
3
+ require 'optparse'
4
+ require 'jruby-jars'
5
+
6
+ opts = OptionParser.new do |opts|
7
+ opts.banner = "MapleRoad: proof of concept, hadoop by Ruby"
8
+ opts.define_head "Usage: mapleroad <mapper filename> <reducer filename> <input> <output> <type>"
9
+ opts.separator ""
10
+ opts.separator "Example:"
11
+ opts.separator " mapleroad samples/anagram/mapper.rb samples/anagram/reducer.rb anagram/input anagram/output Text:Text"
12
+ # TODO: options
13
+ end
14
+ opts.parse!
15
+
16
+ MAPPER_FILENAME = File.expand_path(ARGV.shift)
17
+ REDUCER_FILENAME = File.expand_path(ARGV.shift)
18
+ INPUT = ARGV.shift
19
+ OUTPUT = ARGV.shift
20
+ TYPE = ARGV.shift
21
+ TYPE ||= "Text:Text"
22
+
23
+ LockJar.lock
24
+ locked_jars = LockJar.load
25
+ classpath = locked_jars.map {|x| File.expand_path(x)}.join(":")
26
+ libjarpath = locked_jars.map {|x| File.expand_path(x)}.join(",")
27
+
28
+ JAVA_CLASS = 'mapleroad.MapleRoadJob'
29
+ command = %Q(export HADOOP_CLASSPATH=#{classpath}; hadoop jar lib/mapleroad_service.jar #{JAVA_CLASS} #{MAPPER_FILENAME} #{REDUCER_FILENAME} #{INPUT} #{OUTPUT} #{TYPE})
30
+ puts "COMMAND: #{command}"
31
+
32
+
33
+ exec(command)
@@ -0,0 +1,32 @@
1
+ package mapleroad;
2
+
3
+ import java.io.IOException;
4
+
5
+ import org.apache.hadoop.io.Text;
6
+ import org.apache.hadoop.mapred.OutputCollector;
7
+
8
+ public class MapleRoadCollector {
9
+ private OutputCollector<Text, Text> collector = null;
10
+ private Text key = null;
11
+ private Text value = null;
12
+
13
+ MapleRoadCollector(Text key, Text value, OutputCollector<Text, Text> collector) {
14
+ this.key = key;
15
+ this.value = value;
16
+ this.collector = collector;
17
+ }
18
+
19
+ public void collect(Object k, Object v) throws IOException {
20
+ if (k instanceof String) {
21
+ key.set((String)k);
22
+ } else if (k instanceof Text) {
23
+ key = (Text)k;
24
+ }
25
+ if (v instanceof String) {
26
+ value.set((String)v);
27
+ } else if (v instanceof Text) {
28
+ value.set((Text)v);
29
+ }
30
+ collector.collect(key, value);
31
+ }
32
+ }
@@ -0,0 +1,37 @@
1
+ package mapleroad;
2
+
3
+ import java.io.IOException;
4
+
5
+ import org.apache.hadoop.io.IntWritable;
6
+ import org.apache.hadoop.io.Text;
7
+ import org.apache.hadoop.mapred.OutputCollector;
8
+ import org.jruby.RubyFixnum;
9
+
10
+ public class MapleRoadCollectorTextInt {
11
+ private OutputCollector<Text, IntWritable> collector = null;
12
+ private Text key = null;
13
+ private IntWritable value = null;
14
+
15
+ MapleRoadCollectorTextInt(Text key, IntWritable value, OutputCollector<Text, IntWritable> collector) {
16
+ this.key = key;
17
+ this.value = value;
18
+ this.collector = collector;
19
+ }
20
+
21
+ public void collect(Object k, Object v) throws IOException {
22
+ if (k instanceof String) {
23
+ key.set((String)k);
24
+ } else if (k instanceof Text) {
25
+ key.set((Text)k);
26
+ }
27
+ if (v instanceof RubyFixnum) {
28
+ Long longValue = ((RubyFixnum)v).getLongValue();
29
+ value.set(longValue.intValue());
30
+ } else if (v instanceof Long) {
31
+ value.set(((Long)v).intValue());
32
+ } else if (v instanceof Integer) {
33
+ value.set(((Integer)v).intValue());
34
+ }
35
+ collector.collect(key, value);
36
+ }
37
+ }
@@ -0,0 +1,64 @@
1
+ package mapleroad;
2
+
3
+ import java.io.IOException;
4
+ import java.nio.charset.Charset;
5
+ import java.nio.charset.StandardCharsets;
6
+ import java.nio.file.Files;
7
+ import java.nio.file.Paths;
8
+
9
+ import org.apache.hadoop.fs.Path;
10
+ import org.apache.hadoop.io.IntWritable;
11
+ import org.apache.hadoop.io.Text;
12
+ import org.apache.hadoop.mapred.FileInputFormat;
13
+ import org.apache.hadoop.mapred.FileOutputFormat;
14
+ import org.apache.hadoop.mapred.JobClient;
15
+ import org.apache.hadoop.mapred.JobConf;
16
+ import org.apache.hadoop.mapred.TextInputFormat;
17
+ import org.apache.hadoop.mapred.TextOutputFormat;
18
+
19
+ public class MapleRoadJob {
20
+
21
+ /**
22
+ * @throws IOException
23
+ * @param args args[0]: mapper filename,
24
+ * args[1]: reducer filename,
25
+ * args[2]: input,
26
+ * args[3]: output,
27
+ * args[4]: output key/value types
28
+ * @throws
29
+ */
30
+ public static void main(String[] args) throws IOException {
31
+ JobConf conf = new JobConf(MapleRoadJob.class);
32
+ conf.set("mapper.file.name", args[0]);
33
+ conf.set("reducer.file.name", args[1]);
34
+ conf.set("mapper.code", getCodeAsString(args[0], StandardCharsets.UTF_8));
35
+ System.out.println(conf.get("mapper.code"));
36
+ conf.set("reducer.code", getCodeAsString(args[1], StandardCharsets.UTF_8));
37
+ System.out.println(conf.get("reducer.code"));
38
+ conf.setJobName("maplpleroad_job");
39
+ conf.setInputFormat(TextInputFormat.class);
40
+ conf.setOutputFormat(TextOutputFormat.class);
41
+ FileInputFormat.setInputPaths(conf, new Path(args[2]));
42
+ FileOutputFormat.setOutputPath(conf, new Path(args[3]));
43
+
44
+ if (args[4].equals("Text:Text")) {
45
+ conf.setOutputKeyClass(Text.class);
46
+ conf.setOutputValueClass(Text.class);
47
+ conf.setMapperClass(MapleRoadMapper.class);
48
+ conf.setReducerClass(MapleRoadReducer.class);
49
+ } else if (args[4].equals("Text:Int")) {
50
+ conf.setOutputKeyClass(Text.class);
51
+ conf.setOutputValueClass(IntWritable.class);
52
+ conf.setMapperClass(MapleRoadMapperTextInt.class);
53
+ conf.setReducerClass(MapleRoadReducerTextInt.class);
54
+ }
55
+
56
+ JobClient.runJob(conf);
57
+ }
58
+
59
+ private static String getCodeAsString(String filename, Charset encoding) throws IOException {
60
+ byte[] encoded = Files.readAllBytes(Paths.get(filename).toAbsolutePath());
61
+ return new String(encoded, encoding);
62
+ }
63
+
64
+ }
@@ -0,0 +1,43 @@
1
+ package mapleroad;
2
+
3
+ import java.io.IOException;
4
+
5
+ import org.apache.hadoop.io.LongWritable;
6
+ import org.apache.hadoop.io.Text;
7
+ import org.apache.hadoop.mapred.JobConf;
8
+ import org.apache.hadoop.mapred.MapReduceBase;
9
+ import org.apache.hadoop.mapred.Mapper;
10
+ import org.apache.hadoop.mapred.OutputCollector;
11
+ import org.apache.hadoop.mapred.Reporter;
12
+ import org.jruby.Ruby;
13
+ import org.jruby.RubyRuntimeAdapter;
14
+ import org.jruby.javasupport.JavaEmbedUtils;
15
+ import org.jruby.javasupport.JavaUtil;
16
+ import org.jruby.runtime.Helpers;
17
+ import org.jruby.runtime.builtin.IRubyObject;
18
+
19
+ public class MapleRoadMapper extends MapReduceBase implements Mapper<LongWritable, Text, Text, Text> {
20
+ private String mapper_code = null;
21
+ private Text keyText = new Text();
22
+ private Text valueText = new Text();
23
+
24
+ @Override
25
+ public void configure(JobConf job) {
26
+ mapper_code = job.get("mapper.code");
27
+ }
28
+
29
+ @Override
30
+ public void map(LongWritable key, Text value,
31
+ OutputCollector<Text, Text> outputCollector, Reporter arg3) throws IOException {
32
+ Ruby runtime = Ruby.getGlobalRuntime();
33
+ RubyRuntimeAdapter adapter = JavaEmbedUtils.newRuntimeAdapter();
34
+ IRubyObject receiver = adapter.eval(runtime, mapper_code);
35
+ IRubyObject rubyKey = JavaUtil.convertJavaToRuby(runtime, key);
36
+ IRubyObject rubyValue = JavaUtil.convertJavaToRuby(runtime, value);
37
+ MapleRoadCollector collector =
38
+ new MapleRoadCollector(keyText, valueText, outputCollector);
39
+ IRubyObject rubyCollector = JavaUtil.convertJavaToRuby(runtime, collector);
40
+ Helpers.invoke(runtime.getCurrentContext(), receiver, "map", rubyKey, rubyValue, rubyCollector);
41
+ }
42
+
43
+ }
@@ -0,0 +1,44 @@
1
+ package mapleroad;
2
+
3
+ import java.io.IOException;
4
+
5
+ import org.apache.hadoop.io.IntWritable;
6
+ import org.apache.hadoop.io.LongWritable;
7
+ import org.apache.hadoop.io.Text;
8
+ import org.apache.hadoop.mapred.JobConf;
9
+ import org.apache.hadoop.mapred.MapReduceBase;
10
+ import org.apache.hadoop.mapred.Mapper;
11
+ import org.apache.hadoop.mapred.OutputCollector;
12
+ import org.apache.hadoop.mapred.Reporter;
13
+ import org.jruby.Ruby;
14
+ import org.jruby.RubyRuntimeAdapter;
15
+ import org.jruby.javasupport.JavaEmbedUtils;
16
+ import org.jruby.javasupport.JavaUtil;
17
+ import org.jruby.runtime.Helpers;
18
+ import org.jruby.runtime.builtin.IRubyObject;
19
+
20
+ public class MapleRoadMapperTextInt extends MapReduceBase implements Mapper<LongWritable, Text, Text, IntWritable> {
21
+ private String mapper_code = null;
22
+ private Text keyText = new Text();
23
+ private IntWritable valueInt = new IntWritable();
24
+
25
+ @Override
26
+ public void configure(JobConf job) {
27
+ mapper_code = job.get("mapper.code");
28
+ }
29
+
30
+ @Override
31
+ public void map(LongWritable key, Text value,
32
+ OutputCollector<Text, IntWritable> outputCollector, Reporter arg3) throws IOException {
33
+ Ruby runtime = Ruby.getGlobalRuntime();
34
+ RubyRuntimeAdapter adapter = JavaEmbedUtils.newRuntimeAdapter();
35
+ IRubyObject receiver = adapter.eval(runtime, mapper_code);
36
+ IRubyObject rubyKey = JavaUtil.convertJavaToRuby(runtime, key);
37
+ IRubyObject rubyValue = JavaUtil.convertJavaToRuby(runtime, value);
38
+ MapleRoadCollectorTextInt collector =
39
+ new MapleRoadCollectorTextInt(keyText, valueInt, outputCollector);
40
+ IRubyObject rubyCollector = JavaUtil.convertJavaToRuby(runtime, collector);
41
+ Helpers.invoke(runtime.getCurrentContext(), receiver, "map", rubyKey, rubyValue, rubyCollector);
42
+ }
43
+
44
+ }
@@ -0,0 +1,43 @@
1
+ package mapleroad;
2
+
3
+ import java.io.IOException;
4
+ import java.util.Iterator;
5
+
6
+ import org.apache.hadoop.io.Text;
7
+ import org.apache.hadoop.mapred.JobConf;
8
+ import org.apache.hadoop.mapred.MapReduceBase;
9
+ import org.apache.hadoop.mapred.OutputCollector;
10
+ import org.apache.hadoop.mapred.Reducer;
11
+ import org.apache.hadoop.mapred.Reporter;
12
+ import org.jruby.Ruby;
13
+ import org.jruby.RubyRuntimeAdapter;
14
+ import org.jruby.javasupport.JavaEmbedUtils;
15
+ import org.jruby.javasupport.JavaUtil;
16
+ import org.jruby.runtime.Helpers;
17
+ import org.jruby.runtime.builtin.IRubyObject;
18
+
19
+ public class MapleRoadReducer extends MapReduceBase implements Reducer<Text, Text, Text, Text> {
20
+ private String reducer_code = null;
21
+ private Text keyText = new Text();
22
+ private Text valueText = new Text();
23
+
24
+ @Override
25
+ public void configure(JobConf job) {
26
+ reducer_code = job.get("reducer.code");
27
+ }
28
+
29
+ @Override
30
+ public void reduce(Text key, Iterator<Text> values,
31
+ OutputCollector<Text, Text> outputCollector, Reporter arg3) throws IOException {;
32
+ Ruby runtime = Ruby.getGlobalRuntime();
33
+ RubyRuntimeAdapter adapter = JavaEmbedUtils.newRuntimeAdapter();
34
+ IRubyObject receiver = adapter.eval(runtime, reducer_code);
35
+ IRubyObject rubyKey = JavaUtil.convertJavaToRuby(runtime, key);
36
+ IRubyObject rubyValues = JavaUtil.convertJavaToRuby(runtime, values);
37
+ MapleRoadCollector collector =
38
+ new MapleRoadCollector(keyText, valueText, outputCollector);
39
+ IRubyObject rubyCollector = JavaUtil.convertJavaToRuby(runtime, collector);
40
+ Helpers.invoke(runtime.getCurrentContext(), receiver, "reduce", rubyKey, rubyValues, rubyCollector);
41
+ }
42
+
43
+ }
@@ -0,0 +1,44 @@
1
+ package mapleroad;
2
+
3
+ import java.io.IOException;
4
+ import java.util.Iterator;
5
+
6
+ import org.apache.hadoop.io.IntWritable;
7
+ import org.apache.hadoop.io.Text;
8
+ import org.apache.hadoop.mapred.JobConf;
9
+ import org.apache.hadoop.mapred.MapReduceBase;
10
+ import org.apache.hadoop.mapred.OutputCollector;
11
+ import org.apache.hadoop.mapred.Reducer;
12
+ import org.apache.hadoop.mapred.Reporter;
13
+ import org.jruby.Ruby;
14
+ import org.jruby.RubyRuntimeAdapter;
15
+ import org.jruby.javasupport.JavaEmbedUtils;
16
+ import org.jruby.javasupport.JavaUtil;
17
+ import org.jruby.runtime.Helpers;
18
+ import org.jruby.runtime.builtin.IRubyObject;
19
+
20
+ public class MapleRoadReducerTextInt extends MapReduceBase implements Reducer<Text, IntWritable, Text, IntWritable> {
21
+ private String reducer_code = null;
22
+ private Text keyText = new Text();
23
+ private IntWritable valueInt = new IntWritable();
24
+
25
+ @Override
26
+ public void configure(JobConf job) {
27
+ reducer_code = job.get("reducer.code");
28
+ }
29
+
30
+ @Override
31
+ public void reduce(Text key, Iterator<IntWritable> values,
32
+ OutputCollector<Text, IntWritable> outputCollector, Reporter arg3) throws IOException {;
33
+ Ruby runtime = Ruby.getGlobalRuntime();
34
+ RubyRuntimeAdapter adapter = JavaEmbedUtils.newRuntimeAdapter();
35
+ IRubyObject receiver = adapter.eval(runtime, reducer_code);
36
+ IRubyObject rubyKey = JavaUtil.convertJavaToRuby(runtime, key);
37
+ IRubyObject rubyValues = JavaUtil.convertJavaToRuby(runtime, values);
38
+ MapleRoadCollectorTextInt collector =
39
+ new MapleRoadCollectorTextInt(keyText, valueInt, outputCollector);
40
+ IRubyObject rubyCollector = JavaUtil.convertJavaToRuby(runtime, collector);
41
+ Helpers.invoke(runtime.getCurrentContext(), receiver, "reduce", rubyKey, rubyValues, rubyCollector);
42
+ }
43
+
44
+ }
@@ -0,0 +1,3 @@
1
+ module MapleRoad
2
+ VERSION = '0.0.1.pre'
3
+ end
Binary file
@@ -0,0 +1,24 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'mapleroad/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "mapleroad"
8
+ spec.version = MapleRoad::VERSION
9
+ spec.authors = ["Yoko Harada"]
10
+ spec.email = ["yokolet@gmail.com"]
11
+ spec.summary = %q{Proof of concept gem for hadoop's mapreduce}
12
+ spec.description = %q{This gem is a proof of concept that mapper and reducer
13
+ of mapreduce can be written in Ruby.}
14
+ spec.homepage = ""
15
+ spec.license = "MIT"
16
+
17
+ spec.files = `git ls-files -z`.split("\x0")
18
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
19
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
20
+ spec.require_paths = ["lib"]
21
+
22
+ spec.add_development_dependency "bundler", "~> 1.6"
23
+ spec.add_development_dependency "rake"
24
+ end
@@ -0,0 +1,8 @@
1
+ class AnagramMapper
2
+ def map(key, value, collector)
3
+ word = value.to_s
4
+ sorted_word = word.scan(/./).sort.join
5
+ collector.collect(sorted_word, word)
6
+ end
7
+ end
8
+ AnagramMapper.new
@@ -0,0 +1,11 @@
1
+ class AnagramReducer
2
+ def reduce(key, values, collector)
3
+ output = values.inject("") do |memo, v|
4
+ memo = memo + v.to_s + "~"
5
+ end
6
+ if (output.split("~").length >= 2)
7
+ collector.collect(key.to_s, output.gsub("~", ","))
8
+ end
9
+ end
10
+ end
11
+ AnagramReducer.new
@@ -0,0 +1,8 @@
1
+ class WordCounterMapper
2
+ def map(key, value, collector)
3
+ value.to_s.split.each do |word|
4
+ collector.collect(word, 1)
5
+ end
6
+ end
7
+ end
8
+ WordCounterMapper.new
@@ -0,0 +1,9 @@
1
+ class WordCounterReducer
2
+ def reduce(key, values, collector)
3
+ sum = values.inject(0) do |memo, v|
4
+ memo = memo + v.to_string.to_i
5
+ end
6
+ collector.collect(key, sum)
7
+ end
8
+ end
9
+ WordCounterReducer.new
metadata ADDED
@@ -0,0 +1,98 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: mapleroad
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1.pre
5
+ platform: ruby
6
+ authors:
7
+ - Yoko Harada
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-06-01 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ version_requirements: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: '1.6'
20
+ requirement: !ruby/object:Gem::Requirement
21
+ requirements:
22
+ - - ~>
23
+ - !ruby/object:Gem::Version
24
+ version: '1.6'
25
+ prerelease: false
26
+ type: :development
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ requirement: !ruby/object:Gem::Requirement
35
+ requirements:
36
+ - - '>='
37
+ - !ruby/object:Gem::Version
38
+ version: '0'
39
+ prerelease: false
40
+ type: :development
41
+ description: |-
42
+ This gem is a proof of concept that mapper and reducer
43
+ of mapreduce can be written in Ruby.
44
+ email:
45
+ - yokolet@gmail.com
46
+ executables:
47
+ - mapleroad
48
+ extensions: []
49
+ extra_rdoc_files: []
50
+ files:
51
+ - .gitignore
52
+ - Gemfile
53
+ - Gemfile.lock
54
+ - Jarfile
55
+ - Jarfile.lock
56
+ - LICENSE.txt
57
+ - README.md
58
+ - Rakefile
59
+ - bin/mapleroad
60
+ - ext/mapleroad/MapleRoadCollector.java
61
+ - ext/mapleroad/MapleRoadCollectorTextInt.java
62
+ - ext/mapleroad/MapleRoadJob.java
63
+ - ext/mapleroad/MapleRoadMapper.java
64
+ - ext/mapleroad/MapleRoadMapperTextInt.java
65
+ - ext/mapleroad/MapleRoadReducer.java
66
+ - ext/mapleroad/MapleRoadReducerTextInt.java
67
+ - lib/mapleroad/version.rb
68
+ - lib/mapleroad_service.jar
69
+ - mapleroad.gemspec
70
+ - samples/anagram/mapper.rb
71
+ - samples/anagram/reducer.rb
72
+ - samples/wordcount/mapper.rb
73
+ - samples/wordcount/reducer.rb
74
+ homepage: ''
75
+ licenses:
76
+ - MIT
77
+ metadata: {}
78
+ post_install_message:
79
+ rdoc_options: []
80
+ require_paths:
81
+ - lib
82
+ required_ruby_version: !ruby/object:Gem::Requirement
83
+ requirements:
84
+ - - '>='
85
+ - !ruby/object:Gem::Version
86
+ version: '0'
87
+ required_rubygems_version: !ruby/object:Gem::Requirement
88
+ requirements:
89
+ - - '>'
90
+ - !ruby/object:Gem::Version
91
+ version: 1.3.1
92
+ requirements: []
93
+ rubyforge_project:
94
+ rubygems_version: 2.2.2
95
+ signing_key:
96
+ specification_version: 4
97
+ summary: Proof of concept gem for hadoop's mapreduce
98
+ test_files: []