jmapreduce 0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,42 @@
1
+ import org.fingertap.jmapreduce.JMapReduce
2
+
3
+ JMapReduce.job 'Count' do
4
+ reduce_tasks 1
5
+
6
+ map do |key, value|
7
+ value.split.each do |word|
8
+ emit(word, 1)
9
+ end
10
+ end
11
+
12
+ reduce do |key, values|
13
+ sum = 0
14
+ values.each {|v| sum += v }
15
+ emit(key, {'sum' => sum})
16
+ end
17
+ end
18
+
19
+ JMapReduce.job "Histogram" do
20
+ setup do
21
+ RANGES = [0..1, 2..3, 4..5, 6..10, 11..20, 21..30, 31..40, 41..50, 51..100, 101..200, 201..300, 301..10_000, 10_001..99_999]
22
+ end
23
+
24
+ map do |word, count|
25
+ range = RANGES.find {|range| range.include?(count['sum']) }
26
+ emit("#{range.first.to_s.rjust(5,'0')}-#{range.last.to_s.rjust(5,'0')}", 1)
27
+ end
28
+
29
+ reduce do |range, counts|
30
+ total = counts.inject(0) {|sum,count| sum+count }
31
+ emit(range, '|'*(total/20))
32
+ end
33
+ end
34
+
35
+ # this job is just a pass though which takes advantage of the map/reduce shuffle to get ordered keys
36
+ JMapReduce.job "Sort" do
37
+ reduce_tasks 1
38
+ end
39
+
40
+ __END__
41
+
42
+ ./bin/jmapreduce examples/wordcount.rb examples/alice.txt /tmp/output
@@ -0,0 +1,110 @@
1
+ class Runner
2
+ JAVA_MAIN_CLASS = 'org.fingertap.jmapreduce.JMapReduce'
3
+
4
+ def initialize(script, input, output, opts={})
5
+ @script = script
6
+ @input = input
7
+ @output = output
8
+ @opts = opts
9
+
10
+ # env get / set and check
11
+ hadoop_home and hadoop_cmd and hadoop_classpath
12
+ end
13
+
14
+ def hadoop_home
15
+ ENV['HADOOP_HOME']
16
+ end
17
+
18
+ def hadoop_cmd
19
+ hadoop = `which hadoop 2>/dev/null`
20
+ hadoop = "#{hadoop_home}/bin/hadoop" if hadoop.empty? and (!hadoop_home.empty?)
21
+ raise 'cannot find hadoop command' if hadoop.empty?
22
+ hadoop.chomp
23
+ end
24
+
25
+ def hadoop_classpath
26
+ ENV['HADOOP_CLASSPATH'] = ([lib_path] + dirnames + lib_jars).join(':')
27
+ end
28
+
29
+ def run
30
+ puts cmd
31
+ exec cmd
32
+ end
33
+
34
+ def cmd
35
+ "#{hadoop_cmd} jar #{main_jar_path} #{JAVA_MAIN_CLASS} #{jars_args} #{file_args} #{conf_args} #{archived_args} #{mapred_args} #{properties_args}"
36
+ end
37
+
38
+ def jars_args
39
+ "-libjars #{lib_jars.join(',')}"
40
+ end
41
+
42
+ def file_args
43
+ "-files #{files.join(',')}"
44
+ end
45
+
46
+ def conf_args
47
+ args = ''
48
+ args += @opts[:conf] ? "-conf #{@opts[:conf]} " : ''
49
+ args += @opts[:namenode] ? "-fs #{@opts[:namenode]} " : ''
50
+ args += @opts[:jobtracker] ? "-jt #{@opts[:jobtracker]} " : ''
51
+ args
52
+ end
53
+
54
+ def archived_args
55
+ return unless @opts[:dirs]
56
+
57
+ archived_files = []
58
+ @opts[:dirs].split(',').each do |dir|
59
+ next unless File.directory?(dir)
60
+ tgz = "/tmp/jmapreduce-#{Process.pid}-#{Time.now.to_i}-#{rand(1000)}.tgz"
61
+ system("cd #{dir} && tar -czf #{tgz} *")
62
+ archived_files << "#{tgz}\##{File.basename(dir)}"
63
+ end
64
+
65
+ "-archives #{archived_files.join(',')}"
66
+ end
67
+
68
+ def mapred_args
69
+ "#{File.basename(@script)} #{@input} #{@output}"
70
+ end
71
+
72
+ def properties_args
73
+ return '' if @opts[:properties].nil? && @opts[:json].nil?
74
+ properties = []
75
+ properties << @opts[:properties] if @opts[:properties]
76
+ properties << @opts[:json] if @opts[:json]
77
+ properties.join(',')
78
+ end
79
+
80
+ def files
81
+ ret = [@script]
82
+ ret += @opts[:files].split(',') if @opts[:files]
83
+ ret
84
+ end
85
+
86
+ def dirnames
87
+ files.map{ |f| File.dirname(f) }
88
+ end
89
+
90
+ def lib_jars
91
+ jars = [
92
+ JRubyJars.core_jar_path,
93
+ JRubyJars.stdlib_jar_path,
94
+ main_jar_path,
95
+ File.expand_path(File.join(File.dirname(__FILE__), '..', '..', 'vendors', 'gson.jar')),
96
+ File.expand_path(File.join(File.dirname(__FILE__), '..', '..', 'vendors', 'javassist.jar')),
97
+ File.expand_path(File.join(File.dirname(__FILE__), '..', '..', 'vendors', 'msgpack.jar'))
98
+ ]
99
+ jars += @opts[:libjars].split(',') if @opts[:libjars]
100
+ jars
101
+ end
102
+
103
+ def main_jar_path
104
+ File.expand_path(File.join(File.dirname(__FILE__), '..', '..', 'release', 'jmapreduce.jar'))
105
+ end
106
+
107
+ def lib_path
108
+ File.expand_path(File.join(File.dirname(__FILE__), '..'))
109
+ end
110
+ end
Binary file
data/vendors/gson.jar ADDED
Binary file
Binary file
Binary file
metadata ADDED
@@ -0,0 +1,86 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: jmapreduce
3
+ version: !ruby/object:Gem::Version
4
+ hash: 9
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 1
9
+ version: "0.1"
10
+ platform: ruby
11
+ authors:
12
+ - Abhinay Mehta
13
+ autorequire:
14
+ bindir: bin
15
+ cert_chain: []
16
+
17
+ date: 2011-09-12 00:00:00 Z
18
+ dependencies:
19
+ - !ruby/object:Gem::Dependency
20
+ name: jruby-jars
21
+ prerelease: false
22
+ requirement: &id001 !ruby/object:Gem::Requirement
23
+ none: false
24
+ requirements:
25
+ - - ~>
26
+ - !ruby/object:Gem::Version
27
+ hash: 3
28
+ segments:
29
+ - 1
30
+ - 6
31
+ version: "1.6"
32
+ type: :runtime
33
+ version_requirements: *id001
34
+ description: JMapReduce is JRuby Map/Reduce Framework built on top of the Hadoop Distributed computing platform.
35
+ email: abhinay.mehta@gmail.com
36
+ executables:
37
+ - jmapreduce
38
+ extensions: []
39
+
40
+ extra_rdoc_files: []
41
+
42
+ files:
43
+ - bin/jmapreduce
44
+ - README.md
45
+ - lib/jmapreduce/runner.rb
46
+ - release/jmapreduce.jar
47
+ - vendors/gson.jar
48
+ - vendors/javassist.jar
49
+ - vendors/msgpack.jar
50
+ - examples/alice.txt
51
+ - examples/wordcount.rb
52
+ homepage: https://bitbucket.org/abhinaymehta/jmapreduce
53
+ licenses: []
54
+
55
+ post_install_message:
56
+ rdoc_options: []
57
+
58
+ require_paths:
59
+ - lib
60
+ required_ruby_version: !ruby/object:Gem::Requirement
61
+ none: false
62
+ requirements:
63
+ - - ">="
64
+ - !ruby/object:Gem::Version
65
+ hash: 3
66
+ segments:
67
+ - 0
68
+ version: "0"
69
+ required_rubygems_version: !ruby/object:Gem::Requirement
70
+ none: false
71
+ requirements:
72
+ - - ">="
73
+ - !ruby/object:Gem::Version
74
+ hash: 3
75
+ segments:
76
+ - 0
77
+ version: "0"
78
+ requirements: []
79
+
80
+ rubyforge_project:
81
+ rubygems_version: 1.7.2
82
+ signing_key:
83
+ specification_version: 3
84
+ summary: Map/Reduce Framework
85
+ test_files: []
86
+