hadoop-jruby-connector 0.0.1.2010122601 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.1.2010122601
1
+ 0.0.3
data/lib/hjc/fs_shell.rb CHANGED
@@ -22,6 +22,12 @@ module Hjc
22
22
  run
23
23
  end
24
24
 
25
+ def rmr(remote)
26
+ @cmd = :rmr
27
+ @params = [remote]
28
+ run
29
+ end
30
+
25
31
  def run
26
32
  java_shell = ::FsShell.new(Configuration.new)
27
33
  java_shell.run(build_args)
@@ -1,13 +1,13 @@
1
1
  module Hjc
2
2
  class HadoopStreaming
3
- attr_accessor :input_path, :output_path, :mapper_path, :reducer_path
4
- attr_accessor :local
5
- attr_reader :options
3
+ attr_accessor :input_path, :output_path, :mapper_path, :reducer_path, :jobconf
4
+ attr_accessor :local, :debug
6
5
 
7
6
  def initialize
8
7
  @files = {}
9
- @options = {}
8
+ @jobconf = {}
10
9
  @local = false
10
+ @debug = false
11
11
  end
12
12
 
13
13
  def run
@@ -47,16 +47,17 @@ module Hjc
47
47
  concated_args.concat ['-reducer', @reducer_path] if @reducer_path
48
48
  concated_args.concat ['-dfs', 'file:///'] if @local
49
49
  concated_args.concat ['-jt', 'local'] if @local # no use?
50
+ concated_args.concat ['-debug'] if @debug
50
51
 
51
- @options.each do |k, v|
52
- concated_args.concat ["-#{k}", v]
52
+ @jobconf.each do |k, v|
53
+ concated_args += ['-jobconf', "#{k}=#{v}"]
53
54
  end
54
55
 
55
56
  @files.each do |k, v|
56
57
  concated_args.concat ["-file", v.path]
57
58
  end
58
59
 
59
- puts "args: #{concated_args.join(' ')}"
60
+ puts "args: #{concated_args.join(' ')}" if @debug
60
61
  concated_args
61
62
  end
62
63
  end
@@ -0,0 +1,31 @@
1
+ module Hjc
2
+ class JarJob
3
+ attr_accessor :jar_file, :main_class, :jar_args, :args
4
+
5
+ def initialize
6
+ end
7
+
8
+ def run
9
+ # jar_urls = Util.hadoop_jars.map {|jar| URL.new('file://' + jar)}
10
+ # cl = URLClassLoader.new(jar_urls.to_java(URL))
11
+ # java.lang.Thread.current_thread.set_context_class_loader(cl)
12
+ # org.apache.hadoop.examples.ExampleDriver.main(['pi', '1', '10'])
13
+ target_class = eval(@main_class)
14
+ @ret = target_class.main(@jar_args)
15
+ end
16
+
17
+ def success?
18
+ @ret == 0
19
+ end
20
+
21
+ def args
22
+ concated_args = []
23
+ concated_args << @jar_file if @jar_file
24
+ concated_args << @main_class if @main_class
25
+ concated_args.concat @jar_args if @jar_args
26
+
27
+ puts "args: #{concated_args.join(' ')}"
28
+ concated_args
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,24 @@
1
+ module Hjc
2
+ class JobMonitor
3
+ attr_reader :jt # mainly for debug..
4
+
5
+ def initialize
6
+ conf = Configuration.new
7
+ address, port = *conf.get("mapred.job.tracker").split(":")
8
+ addr = InetSocketAddress.new(address, port.to_i)
9
+ @jt = RPC.get_proxy(JobSubmissionProtocol.java_class,
10
+ JobSubmissionProtocol.versionID, addr, conf)
11
+ end
12
+
13
+ def running_jobs
14
+ @jt.all_jobs.select do |j|
15
+ [JobStatus::RUNNING, JobStatus::PREP].include? j.run_state
16
+ end
17
+ end
18
+
19
+ def job_status(job_id_str)
20
+ job_id = JobID.for_name(job_id_str)
21
+ @jt.get_job_status(job_id)
22
+ end
23
+ end
24
+ end
data/lib/hjc/util.rb CHANGED
@@ -4,6 +4,26 @@ module Hjc
4
4
  class Util
5
5
  TMP_DIR = 'tmp'
6
6
 
7
+ def self.setup
8
+ hadoop_jars.each do |jar|
9
+ require jar # fro JRuby
10
+ end
11
+
12
+ $CLASSPATH << "#{hadoop_home}/conf"
13
+ end
14
+
15
+ def self.hadoop_jars
16
+ jars = []
17
+ ['', 'lib', 'contrib/streaming'].each do |path|
18
+ jars.concat Dir.glob(File.join(hadoop_home, path, "*.jar"))
19
+ end
20
+ jars
21
+ end
22
+
23
+ def self.hadoop_home
24
+ ENV['HADOOP_HOME']
25
+ end
26
+
7
27
  def self.to_temp_file(filename, body, options={})
8
28
  file = Tempfile.new(filename, TMP_DIR)
9
29
  file.print body
data/lib/hjc.rb CHANGED
@@ -2,14 +2,23 @@ require 'java'
2
2
 
3
3
  require 'hjc/util'
4
4
  require 'hjc/hadoop_streaming'
5
+ require 'hjc/jar_job'
5
6
  require 'hjc/fs_shell'
7
+ require 'hjc/job_monitor'
6
8
 
7
- home = ENV['HADOOP_HOME']
8
- ['', 'lib', 'contrib/streaming'].each do |path|
9
- Dir.glob(File.join(home, path, "*.jar")) {|jar| require jar }
10
- end
11
- $CLASSPATH << "#{home}/conf"
9
+ Hjc::Util.setup
12
10
 
11
+ java_import java.net.URL
12
+ java_import java.net.URLClassLoader
13
13
  java_import org.apache.hadoop.streaming.StreamJob
14
14
  java_import org.apache.hadoop.fs.FsShell
15
15
  java_import org.apache.hadoop.conf.Configuration
16
+ java_import org.apache.hadoop.util.RunJar
17
+
18
+ java_import org.apache.hadoop.ipc.RPC
19
+ java_import org.apache.hadoop.mapred.JobID
20
+ java_import org.apache.hadoop.mapred.JobTracker
21
+ java_import org.apache.hadoop.mapred.JobStatus
22
+ java_import org.apache.hadoop.mapred.JobSubmissionProtocol
23
+ java_import java.net.InetSocketAddress
24
+
@@ -22,14 +22,6 @@ module Hjc
22
22
  job.args.join(" ").should == "-input input -output outdir -mapper mapper -reducer reducer"
23
23
  end
24
24
 
25
- it 'create args for hadoop streaming with options' do
26
- job = HadoopStreaming.new
27
- job.options["dfs"] = "local"
28
- job.options["jobconf"] = "mapred.map.tasks=1"
29
-
30
- job.args.join(" ").should == %!-dfs local -jobconf mapred.map.tasks=1!
31
- end
32
-
33
25
  it 'create args for hadoop streaming with files' do
34
26
  job = HadoopStreaming.new
35
27
  job.input = 'input'
@@ -59,6 +51,7 @@ module Hjc
59
51
  end
60
52
 
61
53
  it 'can run Hadoop streaming job with string' do
54
+ pending
62
55
  job = HadoopStreaming.new
63
56
  job.input = TEST_DATA
64
57
  job.output_path = @output_dir
@@ -72,6 +65,15 @@ module Hjc
72
65
 
73
66
  clean_output
74
67
  end
68
+
69
+ it 'sets conf params' do
70
+ job = HadoopStreaming.new
71
+ job.input_path = "input"
72
+ job.output_path = "outdir"
73
+ job.jobconf['hoge'] = "fuga"
74
+
75
+ job.args.join(" ").should == %Q!-input input -output outdir -jobconf hoge=fuga!
76
+ end
75
77
 
76
78
  def assert_result
77
79
  File.open(File.join(@output_dir, 'part-00000')) do |f|
@@ -0,0 +1,25 @@
1
+ require 'hjc'
2
+
3
+ module Hjc
4
+ describe JarJob do
5
+
6
+ before :all do
7
+ @example_jar = Dir.glob(File.join(Util.hadoop_home, 'hadoop-*-examples.jar')).first
8
+ end
9
+
10
+ it 'creates hadoop jar job args' do
11
+ job = JarJob.new
12
+ job.jar_args = %w!pi 1 10!
13
+ job.args.join(" ").should == 'pi 1 10'
14
+ end
15
+
16
+ it 'can run hadoop jar job' do
17
+ job = JarJob.new
18
+ job.jar_file = @example_jar
19
+ job.main_class = 'org.apache.hadoop.examples.ExampleDriver'
20
+ job.jar_args = %w!pi 1 10!
21
+ p job.args
22
+ job.run
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,40 @@
1
+ require 'hjc'
2
+
3
+ module Hjc
4
+ describe JobMonitor do
5
+
6
+ before :all do
7
+ @example_jar = Dir.glob(File.join(Util.hadoop_home, 'hadoop-*-examples.jar')).first
8
+ s = Hjc::FsShell.new
9
+ s.rmr('PiEstimator_TMP_3_141592654')
10
+ end
11
+
12
+ it 'can monitor hadoop job' do
13
+ run_job_async
14
+ sleep 5
15
+ monitor = JobMonitor.new
16
+ jobs = monitor.running_jobs
17
+ jobs.size.should > 0
18
+
19
+ job = jobs.first
20
+ job_id_str = job.job_id.to_s
21
+ job_id_str.should match(/^job_/)
22
+
23
+ monitor.job_status(job_id_str).class.should == JobStatus
24
+ end
25
+
26
+ def run_job_async
27
+ begin
28
+ Thread.new do
29
+ job = JarJob.new
30
+ job.jar_file = @example_jar
31
+ job.main_class = 'org.apache.hadoop.examples.ExampleDriver'
32
+ job.jar_args = %w!pi 1 10!
33
+ job.run
34
+ end
35
+ rescue => e
36
+ #p e
37
+ end
38
+ end
39
+ end
40
+ end
metadata CHANGED
@@ -5,9 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 0
8
- - 1
9
- - 2010122601
10
- version: 0.0.1.2010122601
8
+ - 3
9
+ version: 0.0.3
11
10
  platform: ruby
12
11
  authors:
13
12
  - Koichi Fujikawa
@@ -15,7 +14,7 @@ autorequire:
15
14
  bindir: bin
16
15
  cert_chain: []
17
16
 
18
- date: 2010-12-26 00:00:00 +09:00
17
+ date: 2011-04-12 00:00:00 +09:00
19
18
  default_executable:
20
19
  dependencies: []
21
20
 
@@ -34,6 +33,8 @@ files:
34
33
  - lib/hjc.rb
35
34
  - lib/hjc/fs_shell.rb
36
35
  - lib/hjc/hadoop_streaming.rb
36
+ - lib/hjc/jar_job.rb
37
+ - lib/hjc/job_monitor.rb
37
38
  - lib/hjc/util.rb
38
39
  has_rdoc: true
39
40
  homepage: http://github.com/hapyrus/hadoop-jruby-connector
@@ -68,4 +69,6 @@ summary: Hadoop connector by JRuby
68
69
  test_files:
69
70
  - spec/hjc/fs_shell_spec.rb
70
71
  - spec/hjc/hadoop_streaming_spec.rb
72
+ - spec/hjc/jar_job_spec.rb
73
+ - spec/hjc/job_monitor_spec.rb
71
74
  - spec/hjc/util_spec.rb