jruby-mapreduce 0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,44 @@
1
+ import org.fingertap.jmapreduce.JMapReduce
2
+
3
+ JMapReduce.job 'Count' do
4
+ reduce_tasks 1
5
+
6
+ map do |key, value|
7
+ value.split.each do |word|
8
+ emit(word, 1)
9
+ end
10
+ end
11
+
12
+ reduce do |key, values|
13
+ sum = 0
14
+ values.each {|v| sum += v }
15
+ emit(key, {'sum' => sum})
16
+ end
17
+ end
18
+
19
+ JMapReduce.job "Histogram" do
20
+ setup do
21
+ RANGES = [0..1, 2..3, 4..5, 6..10, 11..20, 21..30, 31..40, 41..50, 51..100, 101..200, 201..300, 301..10_000, 10_001..99_999]
22
+ end
23
+
24
+ map do |word, count|
25
+ range = RANGES.find {|range| range.include?(count['sum']) }
26
+ emit("#{range.first.to_s.rjust(5,'0')}-#{range.last.to_s.rjust(5,'0')}", 1)
27
+ end
28
+
29
+ reduce do |range, counts|
30
+ total = counts.inject(0) {|sum,count| sum+count }
31
+ emit(range, '|'*(total/20))
32
+ end
33
+ end
34
+
35
+ # this job is just a pass though which takes advantage of the map/reduce shuffle to get ordered keys
36
+ JMapReduce.job "Sort" do
37
+ reduce_tasks 1
38
+ end
39
+
40
+ __END__
41
+
42
+ To run:
43
+
44
+ ./bin/jmapreduce examples/wordcount.rb examples/alice.txt /tmp/output
@@ -0,0 +1,111 @@
1
+ class Runner
2
+ JAVA_MAIN_CLASS = 'org.fingertap.jmapreduce.JMapReduce'
3
+
4
+ def initialize(script, input, output, opts={})
5
+ @script = script
6
+ @input = input
7
+ @output = output
8
+ @opts = opts
9
+
10
+ # env get / set and check
11
+ hadoop_home and hadoop_cmd and hadoop_classpath
12
+ end
13
+
14
+ def hadoop_home
15
+ raise 'Please set HADOOP_HOME' unless ENV['HADOOP_HOME']
16
+ ENV['HADOOP_HOME']
17
+ end
18
+
19
+ def hadoop_cmd
20
+ hadoop = `which hadoop 2>/dev/null`
21
+ hadoop = "#{hadoop_home}/bin/hadoop" if hadoop.empty? and (!hadoop_home.empty?)
22
+ raise 'Cannot find hadoop command' if hadoop.empty?
23
+ hadoop.chomp
24
+ end
25
+
26
+ def hadoop_classpath
27
+ ENV['HADOOP_CLASSPATH'] = ([lib_path] + dirnames + lib_jars).join(':')
28
+ end
29
+
30
+ def run
31
+ puts cmd
32
+ exec cmd
33
+ end
34
+
35
+ def cmd
36
+ "#{hadoop_cmd} jar #{main_jar_path} #{JAVA_MAIN_CLASS} #{file_args} #{jars_args} #{conf_args} #{archived_args} #{mapred_args} \"#{properties_args}\""
37
+ end
38
+
39
+ def jars_args
40
+ "-libjars #{lib_jars.join(',')}"
41
+ end
42
+
43
+ def file_args
44
+ "-files #{files.join(',')}"
45
+ end
46
+
47
+ def conf_args
48
+ args = ''
49
+ args += @opts[:conf] ? "-conf #{@opts[:conf]} " : ''
50
+ args += @opts[:namenode] ? "-fs #{@opts[:namenode]} " : ''
51
+ args += @opts[:jobtracker] ? "-jt #{@opts[:jobtracker]} " : ''
52
+ args
53
+ end
54
+
55
+ def archived_args
56
+ return unless @opts[:dirs]
57
+
58
+ archived_files = []
59
+ @opts[:dirs].split(',').each do |dir|
60
+ next unless File.directory?(dir)
61
+ tgz = "/tmp/jmapreduce-#{Process.pid}-#{Time.now.to_i}-#{rand(1000)}.tgz"
62
+ system("cd #{dir} && tar -czf #{tgz} *")
63
+ archived_files << "#{tgz}\##{File.basename(dir)}"
64
+ end
65
+
66
+ "-archives #{archived_files.join(',')}"
67
+ end
68
+
69
+ def mapred_args
70
+ "#{File.basename(@script)} #{@input} #{@output}"
71
+ end
72
+
73
+ def properties_args
74
+ return '' if @opts[:properties].nil? && @opts[:json].nil?
75
+ properties = []
76
+ properties << @opts[:properties] if @opts[:properties]
77
+ properties << @opts[:json] if @opts[:json]
78
+ properties.join(',')
79
+ end
80
+
81
+ def files
82
+ ret = [@script]
83
+ ret += @opts[:files].split(',') if @opts[:files]
84
+ ret
85
+ end
86
+
87
+ def dirnames
88
+ files.map{ |f| File.dirname(f) }
89
+ end
90
+
91
+ def lib_jars
92
+ jars = [
93
+ JRubyJars.core_jar_path,
94
+ JRubyJars.stdlib_jar_path,
95
+ main_jar_path,
96
+ File.expand_path(File.join(File.dirname(__FILE__), '..', '..', 'vendors', 'gson.jar')),
97
+ File.expand_path(File.join(File.dirname(__FILE__), '..', '..', 'vendors', 'javassist.jar')),
98
+ File.expand_path(File.join(File.dirname(__FILE__), '..', '..', 'vendors', 'msgpack.jar'))
99
+ ]
100
+ jars += @opts[:libjars].split(',') if @opts[:libjars]
101
+ jars
102
+ end
103
+
104
+ def main_jar_path
105
+ File.expand_path(File.join(File.dirname(__FILE__), '..', '..', 'release', 'jmapreduce.jar'))
106
+ end
107
+
108
+ def lib_path
109
+ File.expand_path(File.join(File.dirname(__FILE__), '..'))
110
+ end
111
+ end
Binary file
data/vendors/gson.jar ADDED
Binary file
Binary file
Binary file
metadata ADDED
@@ -0,0 +1,67 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: jruby-mapreduce
3
+ version: !ruby/object:Gem::Version
4
+ version: '0.5'
5
+ platform: ruby
6
+ authors:
7
+ - Shinji Ikeda
8
+ - Abhinay Mehta
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2014-06-17 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: jruby-jars
16
+ version_requirements: !ruby/object:Gem::Requirement
17
+ requirements:
18
+ - - ~>
19
+ - !ruby/object:Gem::Version
20
+ version: '1.7'
21
+ requirement: !ruby/object:Gem::Requirement
22
+ requirements:
23
+ - - ~>
24
+ - !ruby/object:Gem::Version
25
+ version: '1.7'
26
+ prerelease: false
27
+ type: :runtime
28
+ description: JMapReduce is JRuby Map/Reduce Framework built on top of the Hadoop Distributed computing platform.
29
+ email: gm.ikeda@gmail.com
30
+ executables:
31
+ - jmapreduce
32
+ extensions: []
33
+ extra_rdoc_files: []
34
+ files:
35
+ - README.md
36
+ - bin/jmapreduce
37
+ - examples/alice.txt
38
+ - examples/wordcount.rb
39
+ - lib/jmapreduce/runner.rb
40
+ - release/jmapreduce.jar
41
+ - vendors/gson.jar
42
+ - vendors/javassist.jar
43
+ - vendors/msgpack.jar
44
+ homepage: https://github.com/shinjiikeda/jmapreduce
45
+ licenses: []
46
+ metadata: {}
47
+ post_install_message:
48
+ rdoc_options: []
49
+ require_paths:
50
+ - lib
51
+ required_ruby_version: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - '>='
54
+ - !ruby/object:Gem::Version
55
+ version: '0'
56
+ required_rubygems_version: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - '>='
59
+ - !ruby/object:Gem::Version
60
+ version: '0'
61
+ requirements: []
62
+ rubyforge_project:
63
+ rubygems_version: 2.2.2
64
+ signing_key:
65
+ specification_version: 4
66
+ summary: Map/Reduce Framework
67
+ test_files: []