jmapreduce 0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +70 -0
- data/bin/jmapreduce +62 -0
- data/examples/alice.txt +3736 -0
- data/examples/wordcount.rb +42 -0
- data/lib/jmapreduce/runner.rb +110 -0
- data/release/jmapreduce.jar +0 -0
- data/vendors/gson.jar +0 -0
- data/vendors/javassist.jar +0 -0
- data/vendors/msgpack.jar +0 -0
- metadata +86 -0
@@ -0,0 +1,42 @@
|
|
1
|
+
import org.fingertap.jmapreduce.JMapReduce
|
2
|
+
|
3
|
+
JMapReduce.job 'Count' do
|
4
|
+
reduce_tasks 1
|
5
|
+
|
6
|
+
map do |key, value|
|
7
|
+
value.split.each do |word|
|
8
|
+
emit(word, 1)
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
reduce do |key, values|
|
13
|
+
sum = 0
|
14
|
+
values.each {|v| sum += v }
|
15
|
+
emit(key, {'sum' => sum})
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
JMapReduce.job "Histogram" do
|
20
|
+
setup do
|
21
|
+
RANGES = [0..1, 2..3, 4..5, 6..10, 11..20, 21..30, 31..40, 41..50, 51..100, 101..200, 201..300, 301..10_000, 10_001..99_999]
|
22
|
+
end
|
23
|
+
|
24
|
+
map do |word, count|
|
25
|
+
range = RANGES.find {|range| range.include?(count['sum']) }
|
26
|
+
emit("#{range.first.to_s.rjust(5,'0')}-#{range.last.to_s.rjust(5,'0')}", 1)
|
27
|
+
end
|
28
|
+
|
29
|
+
reduce do |range, counts|
|
30
|
+
total = counts.inject(0) {|sum,count| sum+count }
|
31
|
+
emit(range, '|'*(total/20))
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
# this job is just a pass though which takes advantage of the map/reduce shuffle to get ordered keys
|
36
|
+
JMapReduce.job "Sort" do
|
37
|
+
reduce_tasks 1
|
38
|
+
end
|
39
|
+
|
40
|
+
__END__
|
41
|
+
|
42
|
+
./bin/jmapreduce examples/wordcount.rb examples/alice.txt /tmp/output
|
@@ -0,0 +1,110 @@
|
|
1
|
+
class Runner
|
2
|
+
JAVA_MAIN_CLASS = 'org.fingertap.jmapreduce.JMapReduce'
|
3
|
+
|
4
|
+
def initialize(script, input, output, opts={})
|
5
|
+
@script = script
|
6
|
+
@input = input
|
7
|
+
@output = output
|
8
|
+
@opts = opts
|
9
|
+
|
10
|
+
# env get / set and check
|
11
|
+
hadoop_home and hadoop_cmd and hadoop_classpath
|
12
|
+
end
|
13
|
+
|
14
|
+
def hadoop_home
|
15
|
+
ENV['HADOOP_HOME']
|
16
|
+
end
|
17
|
+
|
18
|
+
def hadoop_cmd
|
19
|
+
hadoop = `which hadoop 2>/dev/null`
|
20
|
+
hadoop = "#{hadoop_home}/bin/hadoop" if hadoop.empty? and (!hadoop_home.empty?)
|
21
|
+
raise 'cannot find hadoop command' if hadoop.empty?
|
22
|
+
hadoop.chomp
|
23
|
+
end
|
24
|
+
|
25
|
+
def hadoop_classpath
|
26
|
+
ENV['HADOOP_CLASSPATH'] = ([lib_path] + dirnames + lib_jars).join(':')
|
27
|
+
end
|
28
|
+
|
29
|
+
def run
|
30
|
+
puts cmd
|
31
|
+
exec cmd
|
32
|
+
end
|
33
|
+
|
34
|
+
def cmd
|
35
|
+
"#{hadoop_cmd} jar #{main_jar_path} #{JAVA_MAIN_CLASS} #{jars_args} #{file_args} #{conf_args} #{archived_args} #{mapred_args} #{properties_args}"
|
36
|
+
end
|
37
|
+
|
38
|
+
def jars_args
|
39
|
+
"-libjars #{lib_jars.join(',')}"
|
40
|
+
end
|
41
|
+
|
42
|
+
def file_args
|
43
|
+
"-files #{files.join(',')}"
|
44
|
+
end
|
45
|
+
|
46
|
+
def conf_args
|
47
|
+
args = ''
|
48
|
+
args += @opts[:conf] ? "-conf #{@opts[:conf]} " : ''
|
49
|
+
args += @opts[:namenode] ? "-fs #{@opts[:namenode]} " : ''
|
50
|
+
args += @opts[:jobtracker] ? "-jt #{@opts[:jobtracker]} " : ''
|
51
|
+
args
|
52
|
+
end
|
53
|
+
|
54
|
+
def archived_args
|
55
|
+
return unless @opts[:dirs]
|
56
|
+
|
57
|
+
archived_files = []
|
58
|
+
@opts[:dirs].split(',').each do |dir|
|
59
|
+
next unless File.directory?(dir)
|
60
|
+
tgz = "/tmp/jmapreduce-#{Process.pid}-#{Time.now.to_i}-#{rand(1000)}.tgz"
|
61
|
+
system("cd #{dir} && tar -czf #{tgz} *")
|
62
|
+
archived_files << "#{tgz}\##{File.basename(dir)}"
|
63
|
+
end
|
64
|
+
|
65
|
+
"-archives #{archived_files.join(',')}"
|
66
|
+
end
|
67
|
+
|
68
|
+
def mapred_args
|
69
|
+
"#{File.basename(@script)} #{@input} #{@output}"
|
70
|
+
end
|
71
|
+
|
72
|
+
def properties_args
|
73
|
+
return '' if @opts[:properties].nil? && @opts[:json].nil?
|
74
|
+
properties = []
|
75
|
+
properties << @opts[:properties] if @opts[:properties]
|
76
|
+
properties << @opts[:json] if @opts[:json]
|
77
|
+
properties.join(',')
|
78
|
+
end
|
79
|
+
|
80
|
+
def files
|
81
|
+
ret = [@script]
|
82
|
+
ret += @opts[:files].split(',') if @opts[:files]
|
83
|
+
ret
|
84
|
+
end
|
85
|
+
|
86
|
+
def dirnames
|
87
|
+
files.map{ |f| File.dirname(f) }
|
88
|
+
end
|
89
|
+
|
90
|
+
def lib_jars
|
91
|
+
jars = [
|
92
|
+
JRubyJars.core_jar_path,
|
93
|
+
JRubyJars.stdlib_jar_path,
|
94
|
+
main_jar_path,
|
95
|
+
File.expand_path(File.join(File.dirname(__FILE__), '..', '..', 'vendors', 'gson.jar')),
|
96
|
+
File.expand_path(File.join(File.dirname(__FILE__), '..', '..', 'vendors', 'javassist.jar')),
|
97
|
+
File.expand_path(File.join(File.dirname(__FILE__), '..', '..', 'vendors', 'msgpack.jar'))
|
98
|
+
]
|
99
|
+
jars += @opts[:libjars].split(',') if @opts[:libjars]
|
100
|
+
jars
|
101
|
+
end
|
102
|
+
|
103
|
+
def main_jar_path
|
104
|
+
File.expand_path(File.join(File.dirname(__FILE__), '..', '..', 'release', 'jmapreduce.jar'))
|
105
|
+
end
|
106
|
+
|
107
|
+
def lib_path
|
108
|
+
File.expand_path(File.join(File.dirname(__FILE__), '..'))
|
109
|
+
end
|
110
|
+
end
|
Binary file
|
data/vendors/gson.jar
ADDED
Binary file
|
Binary file
|
data/vendors/msgpack.jar
ADDED
Binary file
|
metadata
ADDED
@@ -0,0 +1,86 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: jmapreduce
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 9
|
5
|
+
prerelease:
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 1
|
9
|
+
version: "0.1"
|
10
|
+
platform: ruby
|
11
|
+
authors:
|
12
|
+
- Abhinay Mehta
|
13
|
+
autorequire:
|
14
|
+
bindir: bin
|
15
|
+
cert_chain: []
|
16
|
+
|
17
|
+
date: 2011-09-12 00:00:00 Z
|
18
|
+
dependencies:
|
19
|
+
- !ruby/object:Gem::Dependency
|
20
|
+
name: jruby-jars
|
21
|
+
prerelease: false
|
22
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
23
|
+
none: false
|
24
|
+
requirements:
|
25
|
+
- - ~>
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
hash: 3
|
28
|
+
segments:
|
29
|
+
- 1
|
30
|
+
- 6
|
31
|
+
version: "1.6"
|
32
|
+
type: :runtime
|
33
|
+
version_requirements: *id001
|
34
|
+
description: JMapReduce is JRuby Map/Reduce Framework built on top of the Hadoop Distributed computing platform.
|
35
|
+
email: abhinay.mehta@gmail.com
|
36
|
+
executables:
|
37
|
+
- jmapreduce
|
38
|
+
extensions: []
|
39
|
+
|
40
|
+
extra_rdoc_files: []
|
41
|
+
|
42
|
+
files:
|
43
|
+
- bin/jmapreduce
|
44
|
+
- README.md
|
45
|
+
- lib/jmapreduce/runner.rb
|
46
|
+
- release/jmapreduce.jar
|
47
|
+
- vendors/gson.jar
|
48
|
+
- vendors/javassist.jar
|
49
|
+
- vendors/msgpack.jar
|
50
|
+
- examples/alice.txt
|
51
|
+
- examples/wordcount.rb
|
52
|
+
homepage: https://bitbucket.org/abhinaymehta/jmapreduce
|
53
|
+
licenses: []
|
54
|
+
|
55
|
+
post_install_message:
|
56
|
+
rdoc_options: []
|
57
|
+
|
58
|
+
require_paths:
|
59
|
+
- lib
|
60
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
61
|
+
none: false
|
62
|
+
requirements:
|
63
|
+
- - ">="
|
64
|
+
- !ruby/object:Gem::Version
|
65
|
+
hash: 3
|
66
|
+
segments:
|
67
|
+
- 0
|
68
|
+
version: "0"
|
69
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
70
|
+
none: false
|
71
|
+
requirements:
|
72
|
+
- - ">="
|
73
|
+
- !ruby/object:Gem::Version
|
74
|
+
hash: 3
|
75
|
+
segments:
|
76
|
+
- 0
|
77
|
+
version: "0"
|
78
|
+
requirements: []
|
79
|
+
|
80
|
+
rubyforge_project:
|
81
|
+
rubygems_version: 1.7.2
|
82
|
+
signing_key:
|
83
|
+
specification_version: 3
|
84
|
+
summary: Map/Reduce Framework
|
85
|
+
test_files: []
|
86
|
+
|