hadoop-jruby-connector 0.0.1.2010122601 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION +1 -1
- data/lib/hjc/fs_shell.rb +6 -0
- data/lib/hjc/hadoop_streaming.rb +8 -7
- data/lib/hjc/jar_job.rb +31 -0
- data/lib/hjc/job_monitor.rb +24 -0
- data/lib/hjc/util.rb +20 -0
- data/lib/hjc.rb +14 -5
- data/spec/hjc/hadoop_streaming_spec.rb +10 -8
- data/spec/hjc/jar_job_spec.rb +25 -0
- data/spec/hjc/job_monitor_spec.rb +40 -0
- metadata +7 -4
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.3
|
data/lib/hjc/fs_shell.rb
CHANGED
data/lib/hjc/hadoop_streaming.rb
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
module Hjc
|
2
2
|
class HadoopStreaming
|
3
|
-
attr_accessor :input_path, :output_path, :mapper_path, :reducer_path
|
4
|
-
attr_accessor :local
|
5
|
-
attr_reader :options
|
3
|
+
attr_accessor :input_path, :output_path, :mapper_path, :reducer_path, :jobconf
|
4
|
+
attr_accessor :local, :debug
|
6
5
|
|
7
6
|
def initialize
|
8
7
|
@files = {}
|
9
|
-
@
|
8
|
+
@jobconf = {}
|
10
9
|
@local = false
|
10
|
+
@debug = false
|
11
11
|
end
|
12
12
|
|
13
13
|
def run
|
@@ -47,16 +47,17 @@ module Hjc
|
|
47
47
|
concated_args.concat ['-reducer', @reducer_path] if @reducer_path
|
48
48
|
concated_args.concat ['-dfs', 'file:///'] if @local
|
49
49
|
concated_args.concat ['-jt', 'local'] if @local # no use?
|
50
|
+
concated_args.concat ['-debug'] if @debug
|
50
51
|
|
51
|
-
@
|
52
|
-
concated_args
|
52
|
+
@jobconf.each do |k, v|
|
53
|
+
concated_args += ['-jobconf', "#{k}=#{v}"]
|
53
54
|
end
|
54
55
|
|
55
56
|
@files.each do |k, v|
|
56
57
|
concated_args.concat ["-file", v.path]
|
57
58
|
end
|
58
59
|
|
59
|
-
puts "args: #{concated_args.join(' ')}"
|
60
|
+
puts "args: #{concated_args.join(' ')}" if @debug
|
60
61
|
concated_args
|
61
62
|
end
|
62
63
|
end
|
data/lib/hjc/jar_job.rb
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
module Hjc
|
2
|
+
class JarJob
|
3
|
+
attr_accessor :jar_file, :main_class, :jar_args, :args
|
4
|
+
|
5
|
+
def initialize
|
6
|
+
end
|
7
|
+
|
8
|
+
def run
|
9
|
+
# jar_urls = Util.hadoop_jars.map {|jar| URL.new('file://' + jar)}
|
10
|
+
# cl = URLClassLoader.new(jar_urls.to_java(URL))
|
11
|
+
# java.lang.Thread.current_thread.set_context_class_loader(cl)
|
12
|
+
# org.apache.hadoop.examples.ExampleDriver.main(['pi', '1', '10'])
|
13
|
+
target_class = eval(@main_class)
|
14
|
+
@ret = target_class.main(@jar_args)
|
15
|
+
end
|
16
|
+
|
17
|
+
def success?
|
18
|
+
@ret == 0
|
19
|
+
end
|
20
|
+
|
21
|
+
def args
|
22
|
+
concated_args = []
|
23
|
+
concated_args << @jar_file if @jar_file
|
24
|
+
concated_args << @main_class if @main_class
|
25
|
+
concated_args.concat @jar_args if @jar_args
|
26
|
+
|
27
|
+
puts "args: #{concated_args.join(' ')}"
|
28
|
+
concated_args
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module Hjc
|
2
|
+
class JobMonitor
|
3
|
+
attr_reader :jt # mainly for debug..
|
4
|
+
|
5
|
+
def initialize
|
6
|
+
conf = Configuration.new
|
7
|
+
address, port = *conf.get("mapred.job.tracker").split(":")
|
8
|
+
addr = InetSocketAddress.new(address, port.to_i)
|
9
|
+
@jt = RPC.get_proxy(JobSubmissionProtocol.java_class,
|
10
|
+
JobSubmissionProtocol.versionID, addr, conf)
|
11
|
+
end
|
12
|
+
|
13
|
+
def running_jobs
|
14
|
+
@jt.all_jobs.select do |j|
|
15
|
+
[JobStatus::RUNNING, JobStatus::PREP].include? j.run_state
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def job_status(job_id_str)
|
20
|
+
job_id = JobID.for_name(job_id_str)
|
21
|
+
@jt.get_job_status(job_id)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
data/lib/hjc/util.rb
CHANGED
@@ -4,6 +4,26 @@ module Hjc
|
|
4
4
|
class Util
|
5
5
|
TMP_DIR = 'tmp'
|
6
6
|
|
7
|
+
def self.setup
|
8
|
+
hadoop_jars.each do |jar|
|
9
|
+
require jar # fro JRuby
|
10
|
+
end
|
11
|
+
|
12
|
+
$CLASSPATH << "#{hadoop_home}/conf"
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.hadoop_jars
|
16
|
+
jars = []
|
17
|
+
['', 'lib', 'contrib/streaming'].each do |path|
|
18
|
+
jars.concat Dir.glob(File.join(hadoop_home, path, "*.jar"))
|
19
|
+
end
|
20
|
+
jars
|
21
|
+
end
|
22
|
+
|
23
|
+
def self.hadoop_home
|
24
|
+
ENV['HADOOP_HOME']
|
25
|
+
end
|
26
|
+
|
7
27
|
def self.to_temp_file(filename, body, options={})
|
8
28
|
file = Tempfile.new(filename, TMP_DIR)
|
9
29
|
file.print body
|
data/lib/hjc.rb
CHANGED
@@ -2,14 +2,23 @@ require 'java'
|
|
2
2
|
|
3
3
|
require 'hjc/util'
|
4
4
|
require 'hjc/hadoop_streaming'
|
5
|
+
require 'hjc/jar_job'
|
5
6
|
require 'hjc/fs_shell'
|
7
|
+
require 'hjc/job_monitor'
|
6
8
|
|
7
|
-
|
8
|
-
['', 'lib', 'contrib/streaming'].each do |path|
|
9
|
-
Dir.glob(File.join(home, path, "*.jar")) {|jar| require jar }
|
10
|
-
end
|
11
|
-
$CLASSPATH << "#{home}/conf"
|
9
|
+
Hjc::Util.setup
|
12
10
|
|
11
|
+
java_import java.net.URL
|
12
|
+
java_import java.net.URLClassLoader
|
13
13
|
java_import org.apache.hadoop.streaming.StreamJob
|
14
14
|
java_import org.apache.hadoop.fs.FsShell
|
15
15
|
java_import org.apache.hadoop.conf.Configuration
|
16
|
+
java_import org.apache.hadoop.util.RunJar
|
17
|
+
|
18
|
+
java_import org.apache.hadoop.ipc.RPC
|
19
|
+
java_import org.apache.hadoop.mapred.JobID
|
20
|
+
java_import org.apache.hadoop.mapred.JobTracker
|
21
|
+
java_import org.apache.hadoop.mapred.JobStatus
|
22
|
+
java_import org.apache.hadoop.mapred.JobSubmissionProtocol
|
23
|
+
java_import java.net.InetSocketAddress
|
24
|
+
|
@@ -22,14 +22,6 @@ module Hjc
|
|
22
22
|
job.args.join(" ").should == "-input input -output outdir -mapper mapper -reducer reducer"
|
23
23
|
end
|
24
24
|
|
25
|
-
it 'create args for hadoop streaming with options' do
|
26
|
-
job = HadoopStreaming.new
|
27
|
-
job.options["dfs"] = "local"
|
28
|
-
job.options["jobconf"] = "mapred.map.tasks=1"
|
29
|
-
|
30
|
-
job.args.join(" ").should == %!-dfs local -jobconf mapred.map.tasks=1!
|
31
|
-
end
|
32
|
-
|
33
25
|
it 'create args for hadoop streaming with files' do
|
34
26
|
job = HadoopStreaming.new
|
35
27
|
job.input = 'input'
|
@@ -59,6 +51,7 @@ module Hjc
|
|
59
51
|
end
|
60
52
|
|
61
53
|
it 'can run Hadoop streaming job with string' do
|
54
|
+
pending
|
62
55
|
job = HadoopStreaming.new
|
63
56
|
job.input = TEST_DATA
|
64
57
|
job.output_path = @output_dir
|
@@ -72,6 +65,15 @@ module Hjc
|
|
72
65
|
|
73
66
|
clean_output
|
74
67
|
end
|
68
|
+
|
69
|
+
it 'sets conf params' do
|
70
|
+
job = HadoopStreaming.new
|
71
|
+
job.input_path = "input"
|
72
|
+
job.output_path = "outdir"
|
73
|
+
job.jobconf['hoge'] = "fuga"
|
74
|
+
|
75
|
+
job.args.join(" ").should == %Q!-input input -output outdir -jobconf hoge=fuga!
|
76
|
+
end
|
75
77
|
|
76
78
|
def assert_result
|
77
79
|
File.open(File.join(@output_dir, 'part-00000')) do |f|
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'hjc'
|
2
|
+
|
3
|
+
module Hjc
|
4
|
+
describe JarJob do
|
5
|
+
|
6
|
+
before :all do
|
7
|
+
@example_jar = Dir.glob(File.join(Util.hadoop_home, 'hadoop-*-examples.jar')).first
|
8
|
+
end
|
9
|
+
|
10
|
+
it 'creates hadoop jar job args' do
|
11
|
+
job = JarJob.new
|
12
|
+
job.jar_args = %w!pi 1 10!
|
13
|
+
job.args.join(" ").should == 'pi 1 10'
|
14
|
+
end
|
15
|
+
|
16
|
+
it 'can run hadoop jar job' do
|
17
|
+
job = JarJob.new
|
18
|
+
job.jar_file = @example_jar
|
19
|
+
job.main_class = 'org.apache.hadoop.examples.ExampleDriver'
|
20
|
+
job.jar_args = %w!pi 1 10!
|
21
|
+
p job.args
|
22
|
+
job.run
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
require 'hjc'
|
2
|
+
|
3
|
+
module Hjc
|
4
|
+
describe JobMonitor do
|
5
|
+
|
6
|
+
before :all do
|
7
|
+
@example_jar = Dir.glob(File.join(Util.hadoop_home, 'hadoop-*-examples.jar')).first
|
8
|
+
s = Hjc::FsShell.new
|
9
|
+
s.rmr('PiEstimator_TMP_3_141592654')
|
10
|
+
end
|
11
|
+
|
12
|
+
it 'can monitor hadoop job' do
|
13
|
+
run_job_async
|
14
|
+
sleep 5
|
15
|
+
monitor = JobMonitor.new
|
16
|
+
jobs = monitor.running_jobs
|
17
|
+
jobs.size.should > 0
|
18
|
+
|
19
|
+
job = jobs.first
|
20
|
+
job_id_str = job.job_id.to_s
|
21
|
+
job_id_str.should match(/^job_/)
|
22
|
+
|
23
|
+
monitor.job_status(job_id_str).class.should == JobStatus
|
24
|
+
end
|
25
|
+
|
26
|
+
def run_job_async
|
27
|
+
begin
|
28
|
+
Thread.new do
|
29
|
+
job = JarJob.new
|
30
|
+
job.jar_file = @example_jar
|
31
|
+
job.main_class = 'org.apache.hadoop.examples.ExampleDriver'
|
32
|
+
job.jar_args = %w!pi 1 10!
|
33
|
+
job.run
|
34
|
+
end
|
35
|
+
rescue => e
|
36
|
+
#p e
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
metadata
CHANGED
@@ -5,9 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
|
10
|
-
version: 0.0.1.2010122601
|
8
|
+
- 3
|
9
|
+
version: 0.0.3
|
11
10
|
platform: ruby
|
12
11
|
authors:
|
13
12
|
- Koichi Fujikawa
|
@@ -15,7 +14,7 @@ autorequire:
|
|
15
14
|
bindir: bin
|
16
15
|
cert_chain: []
|
17
16
|
|
18
|
-
date:
|
17
|
+
date: 2011-04-12 00:00:00 +09:00
|
19
18
|
default_executable:
|
20
19
|
dependencies: []
|
21
20
|
|
@@ -34,6 +33,8 @@ files:
|
|
34
33
|
- lib/hjc.rb
|
35
34
|
- lib/hjc/fs_shell.rb
|
36
35
|
- lib/hjc/hadoop_streaming.rb
|
36
|
+
- lib/hjc/jar_job.rb
|
37
|
+
- lib/hjc/job_monitor.rb
|
37
38
|
- lib/hjc/util.rb
|
38
39
|
has_rdoc: true
|
39
40
|
homepage: http://github.com/hapyrus/hadoop-jruby-connector
|
@@ -68,4 +69,6 @@ summary: Hadoop connector by JRuby
|
|
68
69
|
test_files:
|
69
70
|
- spec/hjc/fs_shell_spec.rb
|
70
71
|
- spec/hjc/hadoop_streaming_spec.rb
|
72
|
+
- spec/hjc/jar_job_spec.rb
|
73
|
+
- spec/hjc/job_monitor_spec.rb
|
71
74
|
- spec/hjc/util_spec.rb
|