RubyGems - jmapreduce - Versions diffs - 0.1 - Mend

jmapreduce 0.1

Files changed (10) hide show

data/README.md ADDED Viewed

@@ -0,0 +1,70 @@
+JMapReduce
+==========
+JMapReduce is JRuby Map/Reduce Framework built on top of the Hadoop Distributed computing platform.
+Inspired by [mandy](http://github.com/forward/mandy "Mandy") but runs the map/reduce jobs on the JVM.
+Install
+-------
+    gem install jmapreduce
+Usage
+-----
+1. Run Hadoop cluster on your machines and set HADOOP_HOME env variable.
+2. put files into your hdfs. eg) test/inputs/file1
+3. Now you can run 'jmapreduce' like below:
+> $ jmapreduce examples/wordcount.rb test/inputs/file1 test/output
+4. You can also chain map/reduce jobs like the example below. The output of one map/reduce job will be the input of the next job
+5. For full list of options, run:
+> $ jmapreduce -h
+Example
+-------
+    import org.fingertap.jmapreduce.JMapReduce
+    JMapReduce.job 'Count' do
+      reduce_tasks 1
+      map do |key, value|
+        value.split.each do |word|
+            emit(word, 1)
+        end
+      end
+      reduce do |key, values|
+        sum = 0
+        values.each {|v| sum += v.to_i }
+        emit(key, sum)
+      end
+    end
+    JMapReduce.job "Histogram" do
+      setup do
+        RANGES = [0..10, 11..20, 21..50, 51..100, 101..200, 201..300, 301..10_000, 10_001..99_999]
+      end
+      map do |word, count|
+        range = RANGES.find {|range| range.include?(count.to_i) }
+        emit("#{range.first.to_s}-#{range.last.to_s}", 1)
+      end
+      reduce do |range, counts|
+        total = counts.inject(0) {|sum,count| sum+count.to_i }
+        emit(range, '|'*(total/20))
+      end
+    end
+Author
+-------
+Abhinay Mehta <abhinay.mehta@gmail.com>
+Copyright
+---------
+License: Apache License

data/bin/jmapreduce ADDED Viewed

@@ -0,0 +1,62 @@
+#!/usr/bin/env ruby
+require 'rubygems'
+require 'jruby-jars'
+require 'optparse'
+require 'ostruct'
+require 'cgi'
+require File.join(File.dirname(__FILE__), '..', 'lib', 'jmapreduce', 'runner')
+options = OpenStruct.new
+OptionParser.new do |opts|
+  opts.banner = "USAGE: jmapreduce script input output [options]"
+  opts.on("-c", "--conf HADOOP_CONF", "Use this cluster xml config file") do |config|
+    options.config = config
+  end
+  opts.on("-n", "--namenode namenode:port", "Specify a namenode") do |namenode|
+    options.namenode = namenode
+  end
+  opts.on("-t", "--jobtracker jobtracker:port", "Specify a job tracker") do |jobtracker|
+    options.jobtracker = jobtracker
+  end
+  opts.on("-l", "--libjars jar1,jar2", "comma-separated jar files to include in the classpath") do |libjars|
+    options.libjars = libjars
+  end
+  opts.on("-f", "--files file1,file2", "comma separated files to be copied to the map reduce cluster") do |files|
+    options.files = files
+  end
+  opts.on("-d", "--dirs dir1,dir2", "comma separated directories to be copied to the map reduce cluster") do |dirs|
+    options.dirs = dirs
+  end
+  opts.on("-v", '--variables k1=v1,k2=v2', "Pass additional parameters to jobs") do |params|
+    options.properties = params
+  end
+  opts.on("-j", '--json {"key":"1 value"}', "Pass JSON encoded parameters to jobs") do |json|
+    options.json = "json=#{CGI.escape(json)}"
+  end
+  opts.on_tail("-h", "--help", "Show this message") do
+    puts opts
+    exit
+  end
+end.parse!
+Runner.new(ARGV[0], ARGV[1], ARGV[2],
+  :conf => options.config,
+  :namenode => options.namenode,
+  :jobtracker => options.jobtracker,
+  :properties => options.properties,
+  :json => options.json,
+  :libjars => options.libjars,
+  :files => options.files,
+  :dirs => options.dirs
+  ).run