jmapreduce 0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,42 @@
1
+ import org.fingertap.jmapreduce.JMapReduce
2
+
3
+ JMapReduce.job 'Count' do
4
+ reduce_tasks 1
5
+
6
+ map do |key, value|
7
+ value.split.each do |word|
8
+ emit(word, 1)
9
+ end
10
+ end
11
+
12
+ reduce do |key, values|
13
+ sum = 0
14
+ values.each {|v| sum += v }
15
+ emit(key, {'sum' => sum})
16
+ end
17
+ end
18
+
19
+ JMapReduce.job "Histogram" do
20
+ setup do
21
+ RANGES = [0..1, 2..3, 4..5, 6..10, 11..20, 21..30, 31..40, 41..50, 51..100, 101..200, 201..300, 301..10_000, 10_001..99_999]
22
+ end
23
+
24
+ map do |word, count|
25
+ range = RANGES.find {|range| range.include?(count['sum']) }
26
+ emit("#{range.first.to_s.rjust(5,'0')}-#{range.last.to_s.rjust(5,'0')}", 1)
27
+ end
28
+
29
+ reduce do |range, counts|
30
+ total = counts.inject(0) {|sum,count| sum+count }
31
+ emit(range, '|'*(total/20))
32
+ end
33
+ end
34
+
35
+ # this job is just a pass though which takes advantage of the map/reduce shuffle to get ordered keys
36
+ JMapReduce.job "Sort" do
37
+ reduce_tasks 1
38
+ end
39
+
40
+ __END__
41
+
42
+ ./bin/jmapreduce examples/wordcount.rb examples/alice.txt /tmp/output
@@ -0,0 +1,110 @@
1
+ class Runner
2
+ JAVA_MAIN_CLASS = 'org.fingertap.jmapreduce.JMapReduce'
3
+
4
+ def initialize(script, input, output, opts={})
5
+ @script = script
6
+ @input = input
7
+ @output = output
8
+ @opts = opts
9
+
10
+ # env get / set and check
11
+ hadoop_home and hadoop_cmd and hadoop_classpath
12
+ end
13
+
14
+ def hadoop_home
15
+ ENV['HADOOP_HOME']
16
+ end
17
+
18
+ def hadoop_cmd
19
+ hadoop = `which hadoop 2>/dev/null`
20
+ hadoop = "#{hadoop_home}/bin/hadoop" if hadoop.empty? and (!hadoop_home.empty?)
21
+ raise 'cannot find hadoop command' if hadoop.empty?
22
+ hadoop.chomp
23
+ end
24
+
25
+ def hadoop_classpath
26
+ ENV['HADOOP_CLASSPATH'] = ([lib_path] + dirnames + lib_jars).join(':')
27
+ end
28
+
29
+ def run
30
+ puts cmd
31
+ exec cmd
32
+ end
33
+
34
+ def cmd
35
+ "#{hadoop_cmd} jar #{main_jar_path} #{JAVA_MAIN_CLASS} #{jars_args} #{file_args} #{conf_args} #{archived_args} #{mapred_args} #{properties_args}"
36
+ end
37
+
38
+ def jars_args
39
+ "-libjars #{lib_jars.join(',')}"
40
+ end
41
+
42
+ def file_args
43
+ "-files #{files.join(',')}"
44
+ end
45
+
46
+ def conf_args
47
+ args = ''
48
+ args += @opts[:conf] ? "-conf #{@opts[:conf]} " : ''
49
+ args += @opts[:namenode] ? "-fs #{@opts[:namenode]} " : ''
50
+ args += @opts[:jobtracker] ? "-jt #{@opts[:jobtracker]} " : ''
51
+ args
52
+ end
53
+
54
+ def archived_args
55
+ return unless @opts[:dirs]
56
+
57
+ archived_files = []
58
+ @opts[:dirs].split(',').each do |dir|
59
+ next unless File.directory?(dir)
60
+ tgz = "/tmp/jmapreduce-#{Process.pid}-#{Time.now.to_i}-#{rand(1000)}.tgz"
61
+ system("cd #{dir} && tar -czf #{tgz} *")
62
+ archived_files << "#{tgz}\##{File.basename(dir)}"
63
+ end
64
+
65
+ "-archives #{archived_files.join(',')}"
66
+ end
67
+
68
+ def mapred_args
69
+ "#{File.basename(@script)} #{@input} #{@output}"
70
+ end
71
+
72
+ def properties_args
73
+ return '' if @opts[:properties].nil? && @opts[:json].nil?
74
+ properties = []
75
+ properties << @opts[:properties] if @opts[:properties]
76
+ properties << @opts[:json] if @opts[:json]
77
+ properties.join(',')
78
+ end
79
+
80
+ def files
81
+ ret = [@script]
82
+ ret += @opts[:files].split(',') if @opts[:files]
83
+ ret
84
+ end
85
+
86
+ def dirnames
87
+ files.map{ |f| File.dirname(f) }
88
+ end
89
+
90
+ def lib_jars
91
+ jars = [
92
+ JRubyJars.core_jar_path,
93
+ JRubyJars.stdlib_jar_path,
94
+ main_jar_path,
95
+ File.expand_path(File.join(File.dirname(__FILE__), '..', '..', 'vendors', 'gson.jar')),
96
+ File.expand_path(File.join(File.dirname(__FILE__), '..', '..', 'vendors', 'javassist.jar')),
97
+ File.expand_path(File.join(File.dirname(__FILE__), '..', '..', 'vendors', 'msgpack.jar'))
98
+ ]
99
+ jars += @opts[:libjars].split(',') if @opts[:libjars]
100
+ jars
101
+ end
102
+
103
+ def main_jar_path
104
+ File.expand_path(File.join(File.dirname(__FILE__), '..', '..', 'release', 'jmapreduce.jar'))
105
+ end
106
+
107
+ def lib_path
108
+ File.expand_path(File.join(File.dirname(__FILE__), '..'))
109
+ end
110
+ end
Binary file
data/vendors/gson.jar ADDED
Binary file
Binary file
Binary file
metadata ADDED
@@ -0,0 +1,86 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: jmapreduce
3
+ version: !ruby/object:Gem::Version
4
+ hash: 9
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 1
9
+ version: "0.1"
10
+ platform: ruby
11
+ authors:
12
+ - Abhinay Mehta
13
+ autorequire:
14
+ bindir: bin
15
+ cert_chain: []
16
+
17
+ date: 2011-09-12 00:00:00 Z
18
+ dependencies:
19
+ - !ruby/object:Gem::Dependency
20
+ name: jruby-jars
21
+ prerelease: false
22
+ requirement: &id001 !ruby/object:Gem::Requirement
23
+ none: false
24
+ requirements:
25
+ - - ~>
26
+ - !ruby/object:Gem::Version
27
+ hash: 3
28
+ segments:
29
+ - 1
30
+ - 6
31
+ version: "1.6"
32
+ type: :runtime
33
+ version_requirements: *id001
34
+ description: JMapReduce is JRuby Map/Reduce Framework built on top of the Hadoop Distributed computing platform.
35
+ email: abhinay.mehta@gmail.com
36
+ executables:
37
+ - jmapreduce
38
+ extensions: []
39
+
40
+ extra_rdoc_files: []
41
+
42
+ files:
43
+ - bin/jmapreduce
44
+ - README.md
45
+ - lib/jmapreduce/runner.rb
46
+ - release/jmapreduce.jar
47
+ - vendors/gson.jar
48
+ - vendors/javassist.jar
49
+ - vendors/msgpack.jar
50
+ - examples/alice.txt
51
+ - examples/wordcount.rb
52
+ homepage: https://bitbucket.org/abhinaymehta/jmapreduce
53
+ licenses: []
54
+
55
+ post_install_message:
56
+ rdoc_options: []
57
+
58
+ require_paths:
59
+ - lib
60
+ required_ruby_version: !ruby/object:Gem::Requirement
61
+ none: false
62
+ requirements:
63
+ - - ">="
64
+ - !ruby/object:Gem::Version
65
+ hash: 3
66
+ segments:
67
+ - 0
68
+ version: "0"
69
+ required_rubygems_version: !ruby/object:Gem::Requirement
70
+ none: false
71
+ requirements:
72
+ - - ">="
73
+ - !ruby/object:Gem::Version
74
+ hash: 3
75
+ segments:
76
+ - 0
77
+ version: "0"
78
+ requirements: []
79
+
80
+ rubyforge_project:
81
+ rubygems_version: 1.7.2
82
+ signing_key:
83
+ specification_version: 3
84
+ summary: Map/Reduce Framework
85
+ test_files: []
86
+