RubyGems - hadoop-jruby-connector - Versions diffs - 0.0.8 → 0.0.9 - Mend

hadoop-jruby-connector 0.0.8 → 0.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

data/VERSION +1 -1
data/lib/hjc/hadoop_streaming.rb +18 -11
metadata +8 -18
data/spec/hjc/fs_shell_spec.rb +0 -19
data/spec/hjc/hadoop_streaming_spec.rb +0 -132
data/spec/hjc/jar_job_spec.rb +0 -25
data/spec/hjc/job_monitor_spec.rb +0 -55
data/spec/hjc/util_spec.rb +0 -25

data/VERSION CHANGED

	@@ -1 +1 @@
1	- 0.0.8
1	+ 0.0.9

data/lib/hjc/hadoop_streaming.rb CHANGED

@@ -1,6 +1,6 @@
 module Hjc
   class HadoopStreaming
-    attr_accessor :input_path, :output_path, :mapper_path, :reducer_path, :jobconf
+    attr_accessor :input_paths, :output_path, :mapper_path, :reducer_path, :jobconf
     attr_accessor :local, :debug
     def initialize
@@ -19,13 +19,20 @@ module Hjc
       @ret == 0 # success if job returned 0
     end
-    def input=(input)
-      # input param seems to explain exact path on Hadoop streaming..
-      file = Util.to_temp_file('input', input)
-      @input_path = Util.rel_path(file)
-      unless @local # path seems on HDFS
-        sh = FsShell.new
-        sh.put(file.path, Util.rel_path(file))
+    def input_path=(input) # compatibility
+      self.input = input
+    end
+    def input=(*inputs)
+      @input_paths = []
+      inputs.each do |input|
+	# input param seems to explain exact path on Hadoop streaming..
+	file = Util.to_temp_file('input', input)
+	@input_paths << Util.rel_path(file)
+	unless @local # path seems on HDFS
+	  sh = FsShell.new
+	  sh.put(file.path, Util.rel_path(file))
+	end
       end
     end
@@ -45,7 +52,7 @@ module Hjc
     def args
       concated_args = []
-      concated_args.concat ['-input', @input_path] if @input_path
+      concated_args.concat @input_paths.collect{|e| ['-input', e]}.flatten if @input_paths
       concated_args.concat ['-output' ,@output_path] if @output_path
       concated_args.concat ['-mapper', @mapper_path] if @mapper_path
       concated_args.concat ['-reducer', @reducer_path] if @reducer_path
@@ -54,11 +61,11 @@ module Hjc
       concated_args.concat ['-debug'] if @debug
       @jobconf.each do |k, v|
-        concated_args += ['-jobconf', "#{k}=#{v}"]
+	concated_args += ['-jobconf', "#{k}=#{v}"]
       end
       @files.each do |k, v|
-        concated_args.concat ["-file", v.path]
+	concated_args.concat ["-file", v.path]
       end
       puts "args: #{concated_args.join(' ')}" if @debug

metadata CHANGED

@@ -1,12 +1,8 @@
 --- !ruby/object:Gem::Specification
 name: hadoop-jruby-connector
 version: !ruby/object:Gem::Version
-  prerelease: false
-  segments:
-    - 0
-    - 0
-    - 8
-  version: 0.0.8
+  prerelease:
+  version: 0.0.9
 platform: ruby
 authors:
   - Koichi Fujikawa
@@ -14,7 +10,7 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2011-08-24 00:00:00 +09:00
+date: 2011-10-27 00:00:00 -07:00
 default_executable:
 dependencies: []
@@ -46,29 +42,23 @@ rdoc_options: []
 require_paths:
   - lib
 required_ruby_version: !ruby/object:Gem::Requirement
+  none: false
   requirements:
     - - ">="
       - !ruby/object:Gem::Version
-        segments:
-          - 0
         version: "0"
 required_rubygems_version: !ruby/object:Gem::Requirement
+  none: false
   requirements:
     - - ">="
       - !ruby/object:Gem::Version
-        segments:
-          - 0
         version: "0"
 requirements: []
 rubyforge_project:
-rubygems_version: 1.3.6
+rubygems_version: 1.5.1
 signing_key:
 specification_version: 3
 summary: Hadoop connector by JRuby
-test_files:
-  - spec/hjc/fs_shell_spec.rb
-  - spec/hjc/hadoop_streaming_spec.rb
-  - spec/hjc/jar_job_spec.rb
-  - spec/hjc/job_monitor_spec.rb
-  - spec/hjc/util_spec.rb
+test_files: []

data/spec/hjc/fs_shell_spec.rb DELETED

@@ -1,19 +0,0 @@
-require 'hjc'
-module Hjc
-  describe FsShell do
-    it 'put and get file to hdfs, and delete' do
-      localfile = Util.to_temp_file('localfile', 'fs shell test')
-      shell = FsShell.new
-      shell.put(localfile.path, 'remotefile')
-      shell.get('remotefile', 'tmp/returnedfile')
-      File.open('tmp/returnedfile').read.should == 'fs shell test'
-      shell.rm('remotefile')
-      File.delete('tmp/returnedfile')
-    end
-  end
-end

data/spec/hjc/hadoop_streaming_spec.rb DELETED

@@ -1,132 +0,0 @@
-require 'hjc'
-require 'fileutils'
-module Hjc
-  describe HadoopStreaming do
-    TMP_DIR = 'tmp'
-    before :all do
-      @map_script = Util.to_temp_file('map.rb', MAPPER, :mod => 0700)
-      @reduce_script = Util.to_temp_file('reduce.rb', REDUCER, :mod => 0700)
-      @data_file = Util.to_temp_file('testdata', TEST_DATA)
-      @output_dir = TMP_DIR + '/out' + Time.new.to_i.to_s
-    end
-    it 'create args for hadoop streaming' do
-      job = HadoopStreaming.new
-      job.input_path = "input"
-      job.output_path = "outdir"
-      job.mapper_path = "mapper"
-      job.reducer_path = "reducer"
-      job.args.join(" ").should == "-input input -output outdir -mapper mapper -reducer reducer"
-    end
-    it 'create args for hadoop streaming with files' do
-      job = HadoopStreaming.new
-      job.input = 'input'
-      job.mapper = 'mapper'
-      job.reducer = 'reducer'
-      job.args.should include('-file')
-      job.args.join(" ").should match(/input/)
-      job.args.join(" ").should match(/mapper/)
-      job.args.join(" ").should match(/reducer/)
-    end
-    it 'can run Hadoop streaming job with path' do
-      pending 'path does not work'
-      job = HadoopStreaming.new
-      job.input_path = "file://" + File.expand_path(@data_file.path)
-      job.output_path = @output_dir
-      job.mapper_path = @map_script.path
-      job.reducer_path = @reduce_script.path
-      job.local = true
-      job.run
-      assert_result
-      clean_output
-    end
-    it 'can run Hadoop streaming job with string' do
-      pending
-      job = HadoopStreaming.new
-      job.input = TEST_DATA
-      job.output_path = @output_dir
-      job.mapper = MAPPER
-      job.reducer = REDUCER
-      job.local = true
-      job.run
-      assert_result
-      clean_output
-    end
-    it 'sets conf params' do
-      job = HadoopStreaming.new
-      job.input_path = "input"
-      job.output_path = "outdir"
-      job.jobconf['hoge'] = "fuga"
-      job.args.join(" ").should == %Q!-input input -output outdir -jobconf hoge=fuga!
-    end
-    describe '.add_file' do
-      it 'adds -file option' do
-      job = HadoopStreaming.new
-      file = Tempfile.new('additional.txt', 'tmp')
-      job.add_file(file)
-      job.args.join(" ").should match("-file")
-      job.args.join(" ").should match("tmp/additional.txt")
-      end
-    end
-    def assert_result
-      File.open(File.join(@output_dir, 'part-00000')) do |f|
-        h = {}
-        f.readlines.each do |line|
-          a = line.split /\t/
-          h[a[0]] = a[1].chomp
-        end
-        h.should == {'bar' => '1', 'foo' => '3', 'fuga' => '2', 'hoge' => '4'}
-      end
-    end
-    def clean_output
-      FileUtils.rm_r([@output_dir])
-    end
-    MAPPER = <<-'EOF'
-#!/usr/bin/env ruby
-ARGF.each do |line|
-  line.chomp!
-  line.split.each do |word|
-    puts "#{word}\t1"
-  end
-end
-    EOF
-    REDUCER = <<-'EOF'
-#!/usr/bin/env ruby
-count = Hash.new {|h,k| h[k] = 0}
-ARGF.each do |line|
-  line.chomp!
-    key, value = line.split(/\t/)
-  count[key] += 1
-end
-count.each do |k,v|
-  puts "#{k}\t#{v}"
-end
-    EOF
-    TEST_DATA = <<-'EOF'
-hoge fuga foo hoge foo
-foo bar hoge hoge fuga
-    EOF
-  end
-end

data/spec/hjc/jar_job_spec.rb DELETED

@@ -1,25 +0,0 @@
-require 'hjc'
-module Hjc
-  describe JarJob do
-    before :all do
-      @example_jar = Dir.glob(File.join(Util.hadoop_home, 'hadoop-*-examples.jar')).first
-    end
-    it 'creates hadoop jar job args' do
-      job = JarJob.new
-      job.jar_args = %w!pi 1 10!
-      job.args.join(" ").should == 'pi 1 10'
-    end
-    it 'can run hadoop jar job' do
-      job = JarJob.new
-      job.jar_file = @example_jar
-      job.main_class = 'org.apache.hadoop.examples.ExampleDriver'
-      job.jar_args = %w!pi 1 10!
-      p job.args
-      job.run
-    end
-  end
-end

data/spec/hjc/job_monitor_spec.rb DELETED

@@ -1,55 +0,0 @@
-require 'hjc'
-module Hjc
-  describe JobMonitor do
-    before :each do
-      @example_jar = Dir.glob(File.join(Util.hadoop_home, 'hadoop-*-examples.jar')).first
-      s = Hjc::FsShell.new
-      s.rmr('PiEstimator_TMP_3_141592654')
-    end
-    it 'monitors hadoop job' do
-    pending
-      run_job_async
-      sleep 5
-      monitor = JobMonitor.new
-      jobs = monitor.running_jobs
-      jobs.size.should > 0
-      job = jobs.first
-      job_id_str = job.job_id.to_s
-      job_id_str.should match(/^job_/)
-      monitor.job_status(job_id_str).class.should == JobStatus
-    end
-    it 'kills job' do
-      run_job_async
-      sleep 5
-      monitor = JobMonitor.new
-      job = monitor.running_jobs.first
-      job.should_not be_nil
-      job_id_str = job.job_id.to_s
-      monitor.kill_job(job_id_str)
-      sleep 60
-      monitor.running_jobs.size.should == 0
-    end
-    def run_job_async
-      begin
-        Thread.new do
-          job = JarJob.new
-          job.jar_file = @example_jar
-          job.main_class = 'org.apache.hadoop.examples.ExampleDriver'
-          job.jar_args = %w!pi 1 10!
-          job.run
-        end
-      rescue => e
-        p e
-      end
-    end
-  end
-end

data/spec/hjc/util_spec.rb DELETED

@@ -1,25 +0,0 @@
-require 'hjc'
-module Hjc
-  describe 'Hjc::util' do
-    it 'can convert string to file' do
-      f = Util.to_temp_file('map.rb', 'mapscript')
-      FileTest.exist?(f.path).should be_true
-      File.open(f.path).read.should == 'mapscript'
-    end
-    it 'can convert string to file with exec flag' do
-      f = Util.to_temp_file('map.rb', 'mapscript', :mod => 0700)
-      FileTest.executable?(f.path).should be_true
-    end
-    it 'returns relative path from tempfile' do
-      f = Util.to_temp_file('map.rb', 'mapscript', :mod => 0700)
-      filename = File.basename(f.path)
-      Util.rel_path(f).should == "tmp/#{filename}"
-    end
-  end
-end