jmapreduce 0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +70 -0
- data/bin/jmapreduce +62 -0
- data/examples/alice.txt +3736 -0
- data/examples/wordcount.rb +42 -0
- data/lib/jmapreduce/runner.rb +110 -0
- data/release/jmapreduce.jar +0 -0
- data/vendors/gson.jar +0 -0
- data/vendors/javassist.jar +0 -0
- data/vendors/msgpack.jar +0 -0
- metadata +86 -0
@@ -0,0 +1,42 @@
|
|
1
|
+
import org.fingertap.jmapreduce.JMapReduce
|
2
|
+
|
3
|
+
JMapReduce.job 'Count' do
|
4
|
+
reduce_tasks 1
|
5
|
+
|
6
|
+
map do |key, value|
|
7
|
+
value.split.each do |word|
|
8
|
+
emit(word, 1)
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
reduce do |key, values|
|
13
|
+
sum = 0
|
14
|
+
values.each {|v| sum += v }
|
15
|
+
emit(key, {'sum' => sum})
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
JMapReduce.job "Histogram" do
|
20
|
+
setup do
|
21
|
+
RANGES = [0..1, 2..3, 4..5, 6..10, 11..20, 21..30, 31..40, 41..50, 51..100, 101..200, 201..300, 301..10_000, 10_001..99_999]
|
22
|
+
end
|
23
|
+
|
24
|
+
map do |word, count|
|
25
|
+
range = RANGES.find {|range| range.include?(count['sum']) }
|
26
|
+
emit("#{range.first.to_s.rjust(5,'0')}-#{range.last.to_s.rjust(5,'0')}", 1)
|
27
|
+
end
|
28
|
+
|
29
|
+
reduce do |range, counts|
|
30
|
+
total = counts.inject(0) {|sum,count| sum+count }
|
31
|
+
emit(range, '|'*(total/20))
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
# this job is just a pass though which takes advantage of the map/reduce shuffle to get ordered keys
|
36
|
+
JMapReduce.job "Sort" do
|
37
|
+
reduce_tasks 1
|
38
|
+
end
|
39
|
+
|
40
|
+
__END__
|
41
|
+
|
42
|
+
./bin/jmapreduce examples/wordcount.rb examples/alice.txt /tmp/output
|
@@ -0,0 +1,110 @@
|
|
1
|
+
class Runner
|
2
|
+
JAVA_MAIN_CLASS = 'org.fingertap.jmapreduce.JMapReduce'
|
3
|
+
|
4
|
+
def initialize(script, input, output, opts={})
|
5
|
+
@script = script
|
6
|
+
@input = input
|
7
|
+
@output = output
|
8
|
+
@opts = opts
|
9
|
+
|
10
|
+
# env get / set and check
|
11
|
+
hadoop_home and hadoop_cmd and hadoop_classpath
|
12
|
+
end
|
13
|
+
|
14
|
+
def hadoop_home
|
15
|
+
ENV['HADOOP_HOME']
|
16
|
+
end
|
17
|
+
|
18
|
+
def hadoop_cmd
|
19
|
+
hadoop = `which hadoop 2>/dev/null`
|
20
|
+
hadoop = "#{hadoop_home}/bin/hadoop" if hadoop.empty? and (!hadoop_home.empty?)
|
21
|
+
raise 'cannot find hadoop command' if hadoop.empty?
|
22
|
+
hadoop.chomp
|
23
|
+
end
|
24
|
+
|
25
|
+
def hadoop_classpath
|
26
|
+
ENV['HADOOP_CLASSPATH'] = ([lib_path] + dirnames + lib_jars).join(':')
|
27
|
+
end
|
28
|
+
|
29
|
+
def run
|
30
|
+
puts cmd
|
31
|
+
exec cmd
|
32
|
+
end
|
33
|
+
|
34
|
+
def cmd
|
35
|
+
"#{hadoop_cmd} jar #{main_jar_path} #{JAVA_MAIN_CLASS} #{jars_args} #{file_args} #{conf_args} #{archived_args} #{mapred_args} #{properties_args}"
|
36
|
+
end
|
37
|
+
|
38
|
+
def jars_args
|
39
|
+
"-libjars #{lib_jars.join(',')}"
|
40
|
+
end
|
41
|
+
|
42
|
+
def file_args
|
43
|
+
"-files #{files.join(',')}"
|
44
|
+
end
|
45
|
+
|
46
|
+
def conf_args
|
47
|
+
args = ''
|
48
|
+
args += @opts[:conf] ? "-conf #{@opts[:conf]} " : ''
|
49
|
+
args += @opts[:namenode] ? "-fs #{@opts[:namenode]} " : ''
|
50
|
+
args += @opts[:jobtracker] ? "-jt #{@opts[:jobtracker]} " : ''
|
51
|
+
args
|
52
|
+
end
|
53
|
+
|
54
|
+
def archived_args
|
55
|
+
return unless @opts[:dirs]
|
56
|
+
|
57
|
+
archived_files = []
|
58
|
+
@opts[:dirs].split(',').each do |dir|
|
59
|
+
next unless File.directory?(dir)
|
60
|
+
tgz = "/tmp/jmapreduce-#{Process.pid}-#{Time.now.to_i}-#{rand(1000)}.tgz"
|
61
|
+
system("cd #{dir} && tar -czf #{tgz} *")
|
62
|
+
archived_files << "#{tgz}\##{File.basename(dir)}"
|
63
|
+
end
|
64
|
+
|
65
|
+
"-archives #{archived_files.join(',')}"
|
66
|
+
end
|
67
|
+
|
68
|
+
def mapred_args
|
69
|
+
"#{File.basename(@script)} #{@input} #{@output}"
|
70
|
+
end
|
71
|
+
|
72
|
+
def properties_args
|
73
|
+
return '' if @opts[:properties].nil? && @opts[:json].nil?
|
74
|
+
properties = []
|
75
|
+
properties << @opts[:properties] if @opts[:properties]
|
76
|
+
properties << @opts[:json] if @opts[:json]
|
77
|
+
properties.join(',')
|
78
|
+
end
|
79
|
+
|
80
|
+
def files
|
81
|
+
ret = [@script]
|
82
|
+
ret += @opts[:files].split(',') if @opts[:files]
|
83
|
+
ret
|
84
|
+
end
|
85
|
+
|
86
|
+
def dirnames
|
87
|
+
files.map{ |f| File.dirname(f) }
|
88
|
+
end
|
89
|
+
|
90
|
+
def lib_jars
|
91
|
+
jars = [
|
92
|
+
JRubyJars.core_jar_path,
|
93
|
+
JRubyJars.stdlib_jar_path,
|
94
|
+
main_jar_path,
|
95
|
+
File.expand_path(File.join(File.dirname(__FILE__), '..', '..', 'vendors', 'gson.jar')),
|
96
|
+
File.expand_path(File.join(File.dirname(__FILE__), '..', '..', 'vendors', 'javassist.jar')),
|
97
|
+
File.expand_path(File.join(File.dirname(__FILE__), '..', '..', 'vendors', 'msgpack.jar'))
|
98
|
+
]
|
99
|
+
jars += @opts[:libjars].split(',') if @opts[:libjars]
|
100
|
+
jars
|
101
|
+
end
|
102
|
+
|
103
|
+
def main_jar_path
|
104
|
+
File.expand_path(File.join(File.dirname(__FILE__), '..', '..', 'release', 'jmapreduce.jar'))
|
105
|
+
end
|
106
|
+
|
107
|
+
def lib_path
|
108
|
+
File.expand_path(File.join(File.dirname(__FILE__), '..'))
|
109
|
+
end
|
110
|
+
end
|
Binary file
|
data/vendors/gson.jar
ADDED
Binary file
|
Binary file
|
data/vendors/msgpack.jar
ADDED
Binary file
|
metadata
ADDED
@@ -0,0 +1,86 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: jmapreduce
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 9
|
5
|
+
prerelease:
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 1
|
9
|
+
version: "0.1"
|
10
|
+
platform: ruby
|
11
|
+
authors:
|
12
|
+
- Abhinay Mehta
|
13
|
+
autorequire:
|
14
|
+
bindir: bin
|
15
|
+
cert_chain: []
|
16
|
+
|
17
|
+
date: 2011-09-12 00:00:00 Z
|
18
|
+
dependencies:
|
19
|
+
- !ruby/object:Gem::Dependency
|
20
|
+
name: jruby-jars
|
21
|
+
prerelease: false
|
22
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
23
|
+
none: false
|
24
|
+
requirements:
|
25
|
+
- - ~>
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
hash: 3
|
28
|
+
segments:
|
29
|
+
- 1
|
30
|
+
- 6
|
31
|
+
version: "1.6"
|
32
|
+
type: :runtime
|
33
|
+
version_requirements: *id001
|
34
|
+
description: JMapReduce is JRuby Map/Reduce Framework built on top of the Hadoop Distributed computing platform.
|
35
|
+
email: abhinay.mehta@gmail.com
|
36
|
+
executables:
|
37
|
+
- jmapreduce
|
38
|
+
extensions: []
|
39
|
+
|
40
|
+
extra_rdoc_files: []
|
41
|
+
|
42
|
+
files:
|
43
|
+
- bin/jmapreduce
|
44
|
+
- README.md
|
45
|
+
- lib/jmapreduce/runner.rb
|
46
|
+
- release/jmapreduce.jar
|
47
|
+
- vendors/gson.jar
|
48
|
+
- vendors/javassist.jar
|
49
|
+
- vendors/msgpack.jar
|
50
|
+
- examples/alice.txt
|
51
|
+
- examples/wordcount.rb
|
52
|
+
homepage: https://bitbucket.org/abhinaymehta/jmapreduce
|
53
|
+
licenses: []
|
54
|
+
|
55
|
+
post_install_message:
|
56
|
+
rdoc_options: []
|
57
|
+
|
58
|
+
require_paths:
|
59
|
+
- lib
|
60
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
61
|
+
none: false
|
62
|
+
requirements:
|
63
|
+
- - ">="
|
64
|
+
- !ruby/object:Gem::Version
|
65
|
+
hash: 3
|
66
|
+
segments:
|
67
|
+
- 0
|
68
|
+
version: "0"
|
69
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
70
|
+
none: false
|
71
|
+
requirements:
|
72
|
+
- - ">="
|
73
|
+
- !ruby/object:Gem::Version
|
74
|
+
hash: 3
|
75
|
+
segments:
|
76
|
+
- 0
|
77
|
+
version: "0"
|
78
|
+
requirements: []
|
79
|
+
|
80
|
+
rubyforge_project:
|
81
|
+
rubygems_version: 1.7.2
|
82
|
+
signing_key:
|
83
|
+
specification_version: 3
|
84
|
+
summary: Map/Reduce Framework
|
85
|
+
test_files: []
|
86
|
+
|