hadoop-jruby-connector 0.0.1.2010122601 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
- data/lib/hjc/fs_shell.rb +6 -0
- data/lib/hjc/hadoop_streaming.rb +8 -7
- data/lib/hjc/jar_job.rb +31 -0
- data/lib/hjc/job_monitor.rb +24 -0
- data/lib/hjc/util.rb +20 -0
- data/lib/hjc.rb +14 -5
- data/spec/hjc/hadoop_streaming_spec.rb +10 -8
- data/spec/hjc/jar_job_spec.rb +25 -0
- data/spec/hjc/job_monitor_spec.rb +40 -0
- metadata +7 -4
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.3
|
data/lib/hjc/fs_shell.rb
CHANGED
data/lib/hjc/hadoop_streaming.rb
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
module Hjc
|
2
2
|
class HadoopStreaming
|
3
|
-
attr_accessor :input_path, :output_path, :mapper_path, :reducer_path
|
4
|
-
attr_accessor :local
|
5
|
-
attr_reader :options
|
3
|
+
attr_accessor :input_path, :output_path, :mapper_path, :reducer_path, :jobconf
|
4
|
+
attr_accessor :local, :debug
|
6
5
|
|
7
6
|
def initialize
|
8
7
|
@files = {}
|
9
|
-
@
|
8
|
+
@jobconf = {}
|
10
9
|
@local = false
|
10
|
+
@debug = false
|
11
11
|
end
|
12
12
|
|
13
13
|
def run
|
@@ -47,16 +47,17 @@ module Hjc
|
|
47
47
|
concated_args.concat ['-reducer', @reducer_path] if @reducer_path
|
48
48
|
concated_args.concat ['-dfs', 'file:///'] if @local
|
49
49
|
concated_args.concat ['-jt', 'local'] if @local # no use?
|
50
|
+
concated_args.concat ['-debug'] if @debug
|
50
51
|
|
51
|
-
@
|
52
|
-
concated_args
|
52
|
+
@jobconf.each do |k, v|
|
53
|
+
concated_args += ['-jobconf', "#{k}=#{v}"]
|
53
54
|
end
|
54
55
|
|
55
56
|
@files.each do |k, v|
|
56
57
|
concated_args.concat ["-file", v.path]
|
57
58
|
end
|
58
59
|
|
59
|
-
puts "args: #{concated_args.join(' ')}"
|
60
|
+
puts "args: #{concated_args.join(' ')}" if @debug
|
60
61
|
concated_args
|
61
62
|
end
|
62
63
|
end
|
data/lib/hjc/jar_job.rb
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
module Hjc
|
2
|
+
class JarJob
|
3
|
+
attr_accessor :jar_file, :main_class, :jar_args, :args
|
4
|
+
|
5
|
+
def initialize
|
6
|
+
end
|
7
|
+
|
8
|
+
def run
|
9
|
+
# jar_urls = Util.hadoop_jars.map {|jar| URL.new('file://' + jar)}
|
10
|
+
# cl = URLClassLoader.new(jar_urls.to_java(URL))
|
11
|
+
# java.lang.Thread.current_thread.set_context_class_loader(cl)
|
12
|
+
# org.apache.hadoop.examples.ExampleDriver.main(['pi', '1', '10'])
|
13
|
+
target_class = eval(@main_class)
|
14
|
+
@ret = target_class.main(@jar_args)
|
15
|
+
end
|
16
|
+
|
17
|
+
def success?
|
18
|
+
@ret == 0
|
19
|
+
end
|
20
|
+
|
21
|
+
def args
|
22
|
+
concated_args = []
|
23
|
+
concated_args << @jar_file if @jar_file
|
24
|
+
concated_args << @main_class if @main_class
|
25
|
+
concated_args.concat @jar_args if @jar_args
|
26
|
+
|
27
|
+
puts "args: #{concated_args.join(' ')}"
|
28
|
+
concated_args
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module Hjc
|
2
|
+
class JobMonitor
|
3
|
+
attr_reader :jt # mainly for debug..
|
4
|
+
|
5
|
+
def initialize
|
6
|
+
conf = Configuration.new
|
7
|
+
address, port = *conf.get("mapred.job.tracker").split(":")
|
8
|
+
addr = InetSocketAddress.new(address, port.to_i)
|
9
|
+
@jt = RPC.get_proxy(JobSubmissionProtocol.java_class,
|
10
|
+
JobSubmissionProtocol.versionID, addr, conf)
|
11
|
+
end
|
12
|
+
|
13
|
+
def running_jobs
|
14
|
+
@jt.all_jobs.select do |j|
|
15
|
+
[JobStatus::RUNNING, JobStatus::PREP].include? j.run_state
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def job_status(job_id_str)
|
20
|
+
job_id = JobID.for_name(job_id_str)
|
21
|
+
@jt.get_job_status(job_id)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
data/lib/hjc/util.rb
CHANGED
@@ -4,6 +4,26 @@ module Hjc
|
|
4
4
|
class Util
|
5
5
|
TMP_DIR = 'tmp'
|
6
6
|
|
7
|
+
def self.setup
|
8
|
+
hadoop_jars.each do |jar|
|
9
|
+
require jar # fro JRuby
|
10
|
+
end
|
11
|
+
|
12
|
+
$CLASSPATH << "#{hadoop_home}/conf"
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.hadoop_jars
|
16
|
+
jars = []
|
17
|
+
['', 'lib', 'contrib/streaming'].each do |path|
|
18
|
+
jars.concat Dir.glob(File.join(hadoop_home, path, "*.jar"))
|
19
|
+
end
|
20
|
+
jars
|
21
|
+
end
|
22
|
+
|
23
|
+
def self.hadoop_home
|
24
|
+
ENV['HADOOP_HOME']
|
25
|
+
end
|
26
|
+
|
7
27
|
def self.to_temp_file(filename, body, options={})
|
8
28
|
file = Tempfile.new(filename, TMP_DIR)
|
9
29
|
file.print body
|
data/lib/hjc.rb
CHANGED
@@ -2,14 +2,23 @@ require 'java'
|
|
2
2
|
|
3
3
|
require 'hjc/util'
|
4
4
|
require 'hjc/hadoop_streaming'
|
5
|
+
require 'hjc/jar_job'
|
5
6
|
require 'hjc/fs_shell'
|
7
|
+
require 'hjc/job_monitor'
|
6
8
|
|
7
|
-
|
8
|
-
['', 'lib', 'contrib/streaming'].each do |path|
|
9
|
-
Dir.glob(File.join(home, path, "*.jar")) {|jar| require jar }
|
10
|
-
end
|
11
|
-
$CLASSPATH << "#{home}/conf"
|
9
|
+
Hjc::Util.setup
|
12
10
|
|
11
|
+
java_import java.net.URL
|
12
|
+
java_import java.net.URLClassLoader
|
13
13
|
java_import org.apache.hadoop.streaming.StreamJob
|
14
14
|
java_import org.apache.hadoop.fs.FsShell
|
15
15
|
java_import org.apache.hadoop.conf.Configuration
|
16
|
+
java_import org.apache.hadoop.util.RunJar
|
17
|
+
|
18
|
+
java_import org.apache.hadoop.ipc.RPC
|
19
|
+
java_import org.apache.hadoop.mapred.JobID
|
20
|
+
java_import org.apache.hadoop.mapred.JobTracker
|
21
|
+
java_import org.apache.hadoop.mapred.JobStatus
|
22
|
+
java_import org.apache.hadoop.mapred.JobSubmissionProtocol
|
23
|
+
java_import java.net.InetSocketAddress
|
24
|
+
|
@@ -22,14 +22,6 @@ module Hjc
|
|
22
22
|
job.args.join(" ").should == "-input input -output outdir -mapper mapper -reducer reducer"
|
23
23
|
end
|
24
24
|
|
25
|
-
it 'create args for hadoop streaming with options' do
|
26
|
-
job = HadoopStreaming.new
|
27
|
-
job.options["dfs"] = "local"
|
28
|
-
job.options["jobconf"] = "mapred.map.tasks=1"
|
29
|
-
|
30
|
-
job.args.join(" ").should == %!-dfs local -jobconf mapred.map.tasks=1!
|
31
|
-
end
|
32
|
-
|
33
25
|
it 'create args for hadoop streaming with files' do
|
34
26
|
job = HadoopStreaming.new
|
35
27
|
job.input = 'input'
|
@@ -59,6 +51,7 @@ module Hjc
|
|
59
51
|
end
|
60
52
|
|
61
53
|
it 'can run Hadoop streaming job with string' do
|
54
|
+
pending
|
62
55
|
job = HadoopStreaming.new
|
63
56
|
job.input = TEST_DATA
|
64
57
|
job.output_path = @output_dir
|
@@ -72,6 +65,15 @@ module Hjc
|
|
72
65
|
|
73
66
|
clean_output
|
74
67
|
end
|
68
|
+
|
69
|
+
it 'sets conf params' do
|
70
|
+
job = HadoopStreaming.new
|
71
|
+
job.input_path = "input"
|
72
|
+
job.output_path = "outdir"
|
73
|
+
job.jobconf['hoge'] = "fuga"
|
74
|
+
|
75
|
+
job.args.join(" ").should == %Q!-input input -output outdir -jobconf hoge=fuga!
|
76
|
+
end
|
75
77
|
|
76
78
|
def assert_result
|
77
79
|
File.open(File.join(@output_dir, 'part-00000')) do |f|
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'hjc'
|
2
|
+
|
3
|
+
module Hjc
|
4
|
+
describe JarJob do
|
5
|
+
|
6
|
+
before :all do
|
7
|
+
@example_jar = Dir.glob(File.join(Util.hadoop_home, 'hadoop-*-examples.jar')).first
|
8
|
+
end
|
9
|
+
|
10
|
+
it 'creates hadoop jar job args' do
|
11
|
+
job = JarJob.new
|
12
|
+
job.jar_args = %w!pi 1 10!
|
13
|
+
job.args.join(" ").should == 'pi 1 10'
|
14
|
+
end
|
15
|
+
|
16
|
+
it 'can run hadoop jar job' do
|
17
|
+
job = JarJob.new
|
18
|
+
job.jar_file = @example_jar
|
19
|
+
job.main_class = 'org.apache.hadoop.examples.ExampleDriver'
|
20
|
+
job.jar_args = %w!pi 1 10!
|
21
|
+
p job.args
|
22
|
+
job.run
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
require 'hjc'
|
2
|
+
|
3
|
+
module Hjc
|
4
|
+
describe JobMonitor do
|
5
|
+
|
6
|
+
before :all do
|
7
|
+
@example_jar = Dir.glob(File.join(Util.hadoop_home, 'hadoop-*-examples.jar')).first
|
8
|
+
s = Hjc::FsShell.new
|
9
|
+
s.rmr('PiEstimator_TMP_3_141592654')
|
10
|
+
end
|
11
|
+
|
12
|
+
it 'can monitor hadoop job' do
|
13
|
+
run_job_async
|
14
|
+
sleep 5
|
15
|
+
monitor = JobMonitor.new
|
16
|
+
jobs = monitor.running_jobs
|
17
|
+
jobs.size.should > 0
|
18
|
+
|
19
|
+
job = jobs.first
|
20
|
+
job_id_str = job.job_id.to_s
|
21
|
+
job_id_str.should match(/^job_/)
|
22
|
+
|
23
|
+
monitor.job_status(job_id_str).class.should == JobStatus
|
24
|
+
end
|
25
|
+
|
26
|
+
def run_job_async
|
27
|
+
begin
|
28
|
+
Thread.new do
|
29
|
+
job = JarJob.new
|
30
|
+
job.jar_file = @example_jar
|
31
|
+
job.main_class = 'org.apache.hadoop.examples.ExampleDriver'
|
32
|
+
job.jar_args = %w!pi 1 10!
|
33
|
+
job.run
|
34
|
+
end
|
35
|
+
rescue => e
|
36
|
+
#p e
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
metadata
CHANGED
@@ -5,9 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
|
10
|
-
version: 0.0.1.2010122601
|
8
|
+
- 3
|
9
|
+
version: 0.0.3
|
11
10
|
platform: ruby
|
12
11
|
authors:
|
13
12
|
- Koichi Fujikawa
|
@@ -15,7 +14,7 @@ autorequire:
|
|
15
14
|
bindir: bin
|
16
15
|
cert_chain: []
|
17
16
|
|
18
|
-
date:
|
17
|
+
date: 2011-04-12 00:00:00 +09:00
|
19
18
|
default_executable:
|
20
19
|
dependencies: []
|
21
20
|
|
@@ -34,6 +33,8 @@ files:
|
|
34
33
|
- lib/hjc.rb
|
35
34
|
- lib/hjc/fs_shell.rb
|
36
35
|
- lib/hjc/hadoop_streaming.rb
|
36
|
+
- lib/hjc/jar_job.rb
|
37
|
+
- lib/hjc/job_monitor.rb
|
37
38
|
- lib/hjc/util.rb
|
38
39
|
has_rdoc: true
|
39
40
|
homepage: http://github.com/hapyrus/hadoop-jruby-connector
|
@@ -68,4 +69,6 @@ summary: Hadoop connector by JRuby
|
|
68
69
|
test_files:
|
69
70
|
- spec/hjc/fs_shell_spec.rb
|
70
71
|
- spec/hjc/hadoop_streaming_spec.rb
|
72
|
+
- spec/hjc/jar_job_spec.rb
|
73
|
+
- spec/hjc/job_monitor_spec.rb
|
71
74
|
- spec/hjc/util_spec.rb
|