hadoop-jruby-connector 0.0.1.2010122601 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.1.2010122601
1
+ 0.0.3
data/lib/hjc/fs_shell.rb CHANGED
@@ -22,6 +22,12 @@ module Hjc
22
22
  run
23
23
  end
24
24
 
25
+ def rmr(remote)
26
+ @cmd = :rmr
27
+ @params = [remote]
28
+ run
29
+ end
30
+
25
31
  def run
26
32
  java_shell = ::FsShell.new(Configuration.new)
27
33
  java_shell.run(build_args)
@@ -1,13 +1,13 @@
1
1
  module Hjc
2
2
  class HadoopStreaming
3
- attr_accessor :input_path, :output_path, :mapper_path, :reducer_path
4
- attr_accessor :local
5
- attr_reader :options
3
+ attr_accessor :input_path, :output_path, :mapper_path, :reducer_path, :jobconf
4
+ attr_accessor :local, :debug
6
5
 
7
6
  def initialize
8
7
  @files = {}
9
- @options = {}
8
+ @jobconf = {}
10
9
  @local = false
10
+ @debug = false
11
11
  end
12
12
 
13
13
  def run
@@ -47,16 +47,17 @@ module Hjc
47
47
  concated_args.concat ['-reducer', @reducer_path] if @reducer_path
48
48
  concated_args.concat ['-dfs', 'file:///'] if @local
49
49
  concated_args.concat ['-jt', 'local'] if @local # no use?
50
+ concated_args.concat ['-debug'] if @debug
50
51
 
51
- @options.each do |k, v|
52
- concated_args.concat ["-#{k}", v]
52
+ @jobconf.each do |k, v|
53
+ concated_args += ['-jobconf', "#{k}=#{v}"]
53
54
  end
54
55
 
55
56
  @files.each do |k, v|
56
57
  concated_args.concat ["-file", v.path]
57
58
  end
58
59
 
59
- puts "args: #{concated_args.join(' ')}"
60
+ puts "args: #{concated_args.join(' ')}" if @debug
60
61
  concated_args
61
62
  end
62
63
  end
@@ -0,0 +1,31 @@
1
+ module Hjc
2
+ class JarJob
3
+ attr_accessor :jar_file, :main_class, :jar_args, :args
4
+
5
+ def initialize
6
+ end
7
+
8
+ def run
9
+ # jar_urls = Util.hadoop_jars.map {|jar| URL.new('file://' + jar)}
10
+ # cl = URLClassLoader.new(jar_urls.to_java(URL))
11
+ # java.lang.Thread.current_thread.set_context_class_loader(cl)
12
+ # org.apache.hadoop.examples.ExampleDriver.main(['pi', '1', '10'])
13
+ target_class = eval(@main_class)
14
+ @ret = target_class.main(@jar_args)
15
+ end
16
+
17
+ def success?
18
+ @ret == 0
19
+ end
20
+
21
+ def args
22
+ concated_args = []
23
+ concated_args << @jar_file if @jar_file
24
+ concated_args << @main_class if @main_class
25
+ concated_args.concat @jar_args if @jar_args
26
+
27
+ puts "args: #{concated_args.join(' ')}"
28
+ concated_args
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,24 @@
1
+ module Hjc
2
+ class JobMonitor
3
+ attr_reader :jt # mainly for debug..
4
+
5
+ def initialize
6
+ conf = Configuration.new
7
+ address, port = *conf.get("mapred.job.tracker").split(":")
8
+ addr = InetSocketAddress.new(address, port.to_i)
9
+ @jt = RPC.get_proxy(JobSubmissionProtocol.java_class,
10
+ JobSubmissionProtocol.versionID, addr, conf)
11
+ end
12
+
13
+ def running_jobs
14
+ @jt.all_jobs.select do |j|
15
+ [JobStatus::RUNNING, JobStatus::PREP].include? j.run_state
16
+ end
17
+ end
18
+
19
+ def job_status(job_id_str)
20
+ job_id = JobID.for_name(job_id_str)
21
+ @jt.get_job_status(job_id)
22
+ end
23
+ end
24
+ end
data/lib/hjc/util.rb CHANGED
@@ -4,6 +4,26 @@ module Hjc
4
4
  class Util
5
5
  TMP_DIR = 'tmp'
6
6
 
7
+ def self.setup
8
+ hadoop_jars.each do |jar|
9
+ require jar # fro JRuby
10
+ end
11
+
12
+ $CLASSPATH << "#{hadoop_home}/conf"
13
+ end
14
+
15
+ def self.hadoop_jars
16
+ jars = []
17
+ ['', 'lib', 'contrib/streaming'].each do |path|
18
+ jars.concat Dir.glob(File.join(hadoop_home, path, "*.jar"))
19
+ end
20
+ jars
21
+ end
22
+
23
+ def self.hadoop_home
24
+ ENV['HADOOP_HOME']
25
+ end
26
+
7
27
  def self.to_temp_file(filename, body, options={})
8
28
  file = Tempfile.new(filename, TMP_DIR)
9
29
  file.print body
data/lib/hjc.rb CHANGED
@@ -2,14 +2,23 @@ require 'java'
2
2
 
3
3
  require 'hjc/util'
4
4
  require 'hjc/hadoop_streaming'
5
+ require 'hjc/jar_job'
5
6
  require 'hjc/fs_shell'
7
+ require 'hjc/job_monitor'
6
8
 
7
- home = ENV['HADOOP_HOME']
8
- ['', 'lib', 'contrib/streaming'].each do |path|
9
- Dir.glob(File.join(home, path, "*.jar")) {|jar| require jar }
10
- end
11
- $CLASSPATH << "#{home}/conf"
9
+ Hjc::Util.setup
12
10
 
11
+ java_import java.net.URL
12
+ java_import java.net.URLClassLoader
13
13
  java_import org.apache.hadoop.streaming.StreamJob
14
14
  java_import org.apache.hadoop.fs.FsShell
15
15
  java_import org.apache.hadoop.conf.Configuration
16
+ java_import org.apache.hadoop.util.RunJar
17
+
18
+ java_import org.apache.hadoop.ipc.RPC
19
+ java_import org.apache.hadoop.mapred.JobID
20
+ java_import org.apache.hadoop.mapred.JobTracker
21
+ java_import org.apache.hadoop.mapred.JobStatus
22
+ java_import org.apache.hadoop.mapred.JobSubmissionProtocol
23
+ java_import java.net.InetSocketAddress
24
+
@@ -22,14 +22,6 @@ module Hjc
22
22
  job.args.join(" ").should == "-input input -output outdir -mapper mapper -reducer reducer"
23
23
  end
24
24
 
25
- it 'create args for hadoop streaming with options' do
26
- job = HadoopStreaming.new
27
- job.options["dfs"] = "local"
28
- job.options["jobconf"] = "mapred.map.tasks=1"
29
-
30
- job.args.join(" ").should == %!-dfs local -jobconf mapred.map.tasks=1!
31
- end
32
-
33
25
  it 'create args for hadoop streaming with files' do
34
26
  job = HadoopStreaming.new
35
27
  job.input = 'input'
@@ -59,6 +51,7 @@ module Hjc
59
51
  end
60
52
 
61
53
  it 'can run Hadoop streaming job with string' do
54
+ pending
62
55
  job = HadoopStreaming.new
63
56
  job.input = TEST_DATA
64
57
  job.output_path = @output_dir
@@ -72,6 +65,15 @@ module Hjc
72
65
 
73
66
  clean_output
74
67
  end
68
+
69
+ it 'sets conf params' do
70
+ job = HadoopStreaming.new
71
+ job.input_path = "input"
72
+ job.output_path = "outdir"
73
+ job.jobconf['hoge'] = "fuga"
74
+
75
+ job.args.join(" ").should == %Q!-input input -output outdir -jobconf hoge=fuga!
76
+ end
75
77
 
76
78
  def assert_result
77
79
  File.open(File.join(@output_dir, 'part-00000')) do |f|
@@ -0,0 +1,25 @@
1
+ require 'hjc'
2
+
3
+ module Hjc
4
+ describe JarJob do
5
+
6
+ before :all do
7
+ @example_jar = Dir.glob(File.join(Util.hadoop_home, 'hadoop-*-examples.jar')).first
8
+ end
9
+
10
+ it 'creates hadoop jar job args' do
11
+ job = JarJob.new
12
+ job.jar_args = %w!pi 1 10!
13
+ job.args.join(" ").should == 'pi 1 10'
14
+ end
15
+
16
+ it 'can run hadoop jar job' do
17
+ job = JarJob.new
18
+ job.jar_file = @example_jar
19
+ job.main_class = 'org.apache.hadoop.examples.ExampleDriver'
20
+ job.jar_args = %w!pi 1 10!
21
+ p job.args
22
+ job.run
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,40 @@
1
+ require 'hjc'
2
+
3
+ module Hjc
4
+ describe JobMonitor do
5
+
6
+ before :all do
7
+ @example_jar = Dir.glob(File.join(Util.hadoop_home, 'hadoop-*-examples.jar')).first
8
+ s = Hjc::FsShell.new
9
+ s.rmr('PiEstimator_TMP_3_141592654')
10
+ end
11
+
12
+ it 'can monitor hadoop job' do
13
+ run_job_async
14
+ sleep 5
15
+ monitor = JobMonitor.new
16
+ jobs = monitor.running_jobs
17
+ jobs.size.should > 0
18
+
19
+ job = jobs.first
20
+ job_id_str = job.job_id.to_s
21
+ job_id_str.should match(/^job_/)
22
+
23
+ monitor.job_status(job_id_str).class.should == JobStatus
24
+ end
25
+
26
+ def run_job_async
27
+ begin
28
+ Thread.new do
29
+ job = JarJob.new
30
+ job.jar_file = @example_jar
31
+ job.main_class = 'org.apache.hadoop.examples.ExampleDriver'
32
+ job.jar_args = %w!pi 1 10!
33
+ job.run
34
+ end
35
+ rescue => e
36
+ #p e
37
+ end
38
+ end
39
+ end
40
+ end
metadata CHANGED
@@ -5,9 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 0
8
- - 1
9
- - 2010122601
10
- version: 0.0.1.2010122601
8
+ - 3
9
+ version: 0.0.3
11
10
  platform: ruby
12
11
  authors:
13
12
  - Koichi Fujikawa
@@ -15,7 +14,7 @@ autorequire:
15
14
  bindir: bin
16
15
  cert_chain: []
17
16
 
18
- date: 2010-12-26 00:00:00 +09:00
17
+ date: 2011-04-12 00:00:00 +09:00
19
18
  default_executable:
20
19
  dependencies: []
21
20
 
@@ -34,6 +33,8 @@ files:
34
33
  - lib/hjc.rb
35
34
  - lib/hjc/fs_shell.rb
36
35
  - lib/hjc/hadoop_streaming.rb
36
+ - lib/hjc/jar_job.rb
37
+ - lib/hjc/job_monitor.rb
37
38
  - lib/hjc/util.rb
38
39
  has_rdoc: true
39
40
  homepage: http://github.com/hapyrus/hadoop-jruby-connector
@@ -68,4 +69,6 @@ summary: Hadoop connector by JRuby
68
69
  test_files:
69
70
  - spec/hjc/fs_shell_spec.rb
70
71
  - spec/hjc/hadoop_streaming_spec.rb
72
+ - spec/hjc/jar_job_spec.rb
73
+ - spec/hjc/job_monitor_spec.rb
71
74
  - spec/hjc/util_spec.rb