RubyGems - hadoop-jruby-connector - Versions diffs - 0.0.1.2010122601 → 0.0.3 - Mend

hadoop-jruby-connector 0.0.1.2010122601 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

data/VERSION +1 -1
data/lib/hjc/fs_shell.rb +6 -0
data/lib/hjc/hadoop_streaming.rb +8 -7
data/lib/hjc/jar_job.rb +31 -0
data/lib/hjc/job_monitor.rb +24 -0
data/lib/hjc/util.rb +20 -0
data/lib/hjc.rb +14 -5
data/spec/hjc/hadoop_streaming_spec.rb +10 -8
data/spec/hjc/jar_job_spec.rb +25 -0
data/spec/hjc/job_monitor_spec.rb +40 -0
metadata +7 -4

data/VERSION CHANGED Viewed

	@@ -1 +1 @@
1	- 0.0.~~1.2010122601~~
1	+ 0.0.3

data/lib/hjc/fs_shell.rb CHANGED Viewed

@@ -22,6 +22,12 @@ module Hjc
       run
     end
+    def rmr(remote)
+      @cmd = :rmr
+      @params = [remote]
+      run
+    end
     def run
       java_shell = ::FsShell.new(Configuration.new)
       java_shell.run(build_args)

data/lib/hjc/hadoop_streaming.rb CHANGED Viewed

@@ -1,13 +1,13 @@
 module Hjc
   class HadoopStreaming
-    attr_accessor :input_path, :output_path, :mapper_path, :reducer_path
-    attr_accessor :local
-    attr_reader :options
+    attr_accessor :input_path, :output_path, :mapper_path, :reducer_path, :jobconf
+    attr_accessor :local, :debug
     def initialize
       @files = {}
-      @options = {}
+      @jobconf = {}
       @local = false
+      @debug = false
     end
     def run
@@ -47,16 +47,17 @@ module Hjc
       concated_args.concat ['-reducer', @reducer_path] if @reducer_path
       concated_args.concat ['-dfs', 'file:///'] if @local
       concated_args.concat ['-jt', 'local'] if @local # no use?
+      concated_args.concat ['-debug'] if @debug
-      @options.each do |k, v|
-        concated_args.concat ["-#{k}", v]
+      @jobconf.each do |k, v|
+        concated_args += ['-jobconf', "#{k}=#{v}"]
       end
       @files.each do |k, v|
         concated_args.concat ["-file", v.path]
       end
-      puts "args: #{concated_args.join(' ')}"
+      puts "args: #{concated_args.join(' ')}" if @debug
       concated_args
     end
   end

data/lib/hjc/jar_job.rb ADDED Viewed

@@ -0,0 +1,31 @@
+module Hjc
+  class JarJob
+    attr_accessor :jar_file, :main_class, :jar_args, :args
+    def initialize
+    end
+    def run
+      # jar_urls = Util.hadoop_jars.map {|jar| URL.new('file://' + jar)}
+      # cl = URLClassLoader.new(jar_urls.to_java(URL))
+      # java.lang.Thread.current_thread.set_context_class_loader(cl)
+      # org.apache.hadoop.examples.ExampleDriver.main(['pi', '1', '10'])
+      target_class = eval(@main_class)
+      @ret = target_class.main(@jar_args)
+    end
+    def success?
+      @ret == 0
+    end
+    def args
+      concated_args = []
+      concated_args << @jar_file if @jar_file
+      concated_args << @main_class if @main_class
+      concated_args.concat @jar_args if @jar_args
+      puts "args: #{concated_args.join(' ')}"
+      concated_args
+    end
+  end
+end

data/lib/hjc/job_monitor.rb ADDED Viewed

@@ -0,0 +1,24 @@
+module Hjc
+  class JobMonitor
+    attr_reader :jt # mainly for debug..
+    def initialize
+      conf = Configuration.new
+      address, port = *conf.get("mapred.job.tracker").split(":")
+      addr = InetSocketAddress.new(address, port.to_i)
+      @jt = RPC.get_proxy(JobSubmissionProtocol.java_class,
+                         JobSubmissionProtocol.versionID, addr, conf)
+    end
+    def running_jobs
+      @jt.all_jobs.select do |j|
+        [JobStatus::RUNNING, JobStatus::PREP].include? j.run_state
+      end
+    end
+    def job_status(job_id_str)
+      job_id = JobID.for_name(job_id_str)
+      @jt.get_job_status(job_id)
+    end
+  end
+end

data/lib/hjc/util.rb CHANGED Viewed

@@ -4,6 +4,26 @@ module Hjc
   class Util
     TMP_DIR = 'tmp'
+    def self.setup
+      hadoop_jars.each do |jar|
+        require jar # fro JRuby
+      end
+      $CLASSPATH << "#{hadoop_home}/conf"
+    end
+    def self.hadoop_jars
+      jars = []
+      ['', 'lib', 'contrib/streaming'].each do |path|
+        jars.concat Dir.glob(File.join(hadoop_home, path, "*.jar"))
+      end
+      jars
+    end
+    def self.hadoop_home
+      ENV['HADOOP_HOME']
+    end
     def self.to_temp_file(filename, body, options={})
       file = Tempfile.new(filename, TMP_DIR)
       file.print body

data/lib/hjc.rb CHANGED Viewed

@@ -2,14 +2,23 @@ require 'java'
 require 'hjc/util'
 require 'hjc/hadoop_streaming'
+require 'hjc/jar_job'
 require 'hjc/fs_shell'
+require 'hjc/job_monitor'
-home = ENV['HADOOP_HOME']
-['', 'lib', 'contrib/streaming'].each do |path|
-  Dir.glob(File.join(home, path, "*.jar")) {|jar| require jar }
-end
-$CLASSPATH << "#{home}/conf"
+Hjc::Util.setup
+java_import java.net.URL
+java_import java.net.URLClassLoader
 java_import org.apache.hadoop.streaming.StreamJob
 java_import org.apache.hadoop.fs.FsShell
 java_import org.apache.hadoop.conf.Configuration
+java_import org.apache.hadoop.util.RunJar
+java_import org.apache.hadoop.ipc.RPC
+java_import org.apache.hadoop.mapred.JobID
+java_import org.apache.hadoop.mapred.JobTracker
+java_import org.apache.hadoop.mapred.JobStatus
+java_import org.apache.hadoop.mapred.JobSubmissionProtocol
+java_import java.net.InetSocketAddress

data/spec/hjc/hadoop_streaming_spec.rb CHANGED Viewed

@@ -22,14 +22,6 @@ module Hjc
       job.args.join(" ").should == "-input input -output outdir -mapper mapper -reducer reducer"
     end
-    it 'create args for hadoop streaming with options' do
-      job = HadoopStreaming.new
-      job.options["dfs"] = "local"
-      job.options["jobconf"] = "mapred.map.tasks=1"
-      job.args.join(" ").should == %!-dfs local -jobconf mapred.map.tasks=1!
-    end
     it 'create args for hadoop streaming with files' do
       job = HadoopStreaming.new
       job.input = 'input'
@@ -59,6 +51,7 @@ module Hjc
     end
     it 'can run Hadoop streaming job with string' do
+      pending
       job = HadoopStreaming.new
       job.input = TEST_DATA
       job.output_path = @output_dir
@@ -72,6 +65,15 @@ module Hjc
       clean_output
     end
+    it 'sets conf params' do
+      job = HadoopStreaming.new
+      job.input_path = "input"
+      job.output_path = "outdir"
+      job.jobconf['hoge'] = "fuga"
+      job.args.join(" ").should == %Q!-input input -output outdir -jobconf hoge=fuga!
+    end
     def assert_result
       File.open(File.join(@output_dir, 'part-00000')) do |f|

data/spec/hjc/jar_job_spec.rb ADDED Viewed

@@ -0,0 +1,25 @@
+require 'hjc'
+module Hjc
+  describe JarJob do
+    before :all do
+      @example_jar = Dir.glob(File.join(Util.hadoop_home, 'hadoop-*-examples.jar')).first
+    end
+    it 'creates hadoop jar job args' do
+      job = JarJob.new
+      job.jar_args = %w!pi 1 10!
+      job.args.join(" ").should == 'pi 1 10'
+    end
+    it 'can run hadoop jar job' do
+      job = JarJob.new
+      job.jar_file = @example_jar
+      job.main_class = 'org.apache.hadoop.examples.ExampleDriver'
+      job.jar_args = %w!pi 1 10!
+      p job.args
+      job.run
+    end
+  end
+end

data/spec/hjc/job_monitor_spec.rb ADDED Viewed

@@ -0,0 +1,40 @@
+require 'hjc'
+module Hjc
+  describe JobMonitor do
+    before :all do
+      @example_jar = Dir.glob(File.join(Util.hadoop_home, 'hadoop-*-examples.jar')).first
+      s = Hjc::FsShell.new
+      s.rmr('PiEstimator_TMP_3_141592654')
+    end
+    it 'can monitor hadoop job' do
+      run_job_async
+      sleep 5
+      monitor = JobMonitor.new
+      jobs = monitor.running_jobs
+      jobs.size.should > 0
+      job = jobs.first
+      job_id_str = job.job_id.to_s
+      job_id_str.should match(/^job_/)
+      monitor.job_status(job_id_str).class.should == JobStatus
+    end
+    def run_job_async
+      begin
+        Thread.new do
+          job = JarJob.new
+          job.jar_file = @example_jar
+          job.main_class = 'org.apache.hadoop.examples.ExampleDriver'
+          job.jar_args = %w!pi 1 10!
+          job.run
+        end
+      rescue => e
+        #p e
+      end
+    end
+  end
+end

metadata CHANGED Viewed

@@ -5,9 +5,8 @@ version: !ruby/object:Gem::Version
   segments:
     - 0
     - 0
-    - 1
-    - 2010122601
-  version: 0.0.1.2010122601
+    - 3
+  version: 0.0.3
 platform: ruby
 authors:
   - Koichi Fujikawa
@@ -15,7 +14,7 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2010-12-26 00:00:00 +09:00
+date: 2011-04-12 00:00:00 +09:00
 default_executable:
 dependencies: []
@@ -34,6 +33,8 @@ files:
   - lib/hjc.rb
   - lib/hjc/fs_shell.rb
   - lib/hjc/hadoop_streaming.rb
+  - lib/hjc/jar_job.rb
+  - lib/hjc/job_monitor.rb
   - lib/hjc/util.rb
 has_rdoc: true
 homepage: http://github.com/hapyrus/hadoop-jruby-connector
@@ -68,4 +69,6 @@ summary: Hadoop connector by JRuby
 test_files:
   - spec/hjc/fs_shell_spec.rb
   - spec/hjc/hadoop_streaming_spec.rb
+  - spec/hjc/jar_job_spec.rb
+  - spec/hjc/job_monitor_spec.rb
   - spec/hjc/util_spec.rb