elephant-driver 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +44 -0
- data/Rakefile +75 -0
- data/VERSION +1 -0
- data/lib/elephant-driver.rb +19 -0
- data/lib/elephant-driver/client.rb +69 -0
- data/lib/elephant-driver/job.rb +127 -0
- data/lib/elephant-driver/task.rb +46 -0
- data/lib/elephant-driver/thrift/common.thrift +129 -0
- data/lib/elephant-driver/thrift/common_constants.rb +12 -0
- data/lib/elephant-driver/thrift/common_types.rb +209 -0
- data/lib/elephant-driver/thrift/hadoop_service_base.rb +314 -0
- data/lib/elephant-driver/thrift/jobtracker.rb +1466 -0
- data/lib/elephant-driver/thrift/jobtracker.thrift +478 -0
- data/lib/elephant-driver/thrift/jobtracker_constants.rb +14 -0
- data/lib/elephant-driver/thrift/jobtracker_types.rb +735 -0
- data/lib/elephant-driver/tracker.rb +10 -0
- data/lib/elephant-driver/version.rb +5 -0
- metadata +85 -0
data/README.md
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
# About
|
2
|
+
|
3
|
+
'elephant-driver' is a Ruby library to communicate with Hadoop daemons.
|
4
|
+
|
5
|
+
# Notice
|
6
|
+
|
7
|
+
Currently, only communicating with JobTracker is supported. Other daemons (TaskTracker, NameNode, DataNode) are not supported yet.
|
8
|
+
|
9
|
+
# Requirements
|
10
|
+
|
11
|
+
* thfift
|
12
|
+
* nokogiri
|
13
|
+
|
14
|
+
# Setup
|
15
|
+
|
16
|
+
This library assumes that you're using CDH3 (Cloudera Distribution for Hadoop, version 3).
|
17
|
+
|
18
|
+
You first need to install 'hue-plugins' package at the JobTracker node. Then, the following settings needs to be included in your mapred-site.xml.
|
19
|
+
|
20
|
+
```xml
|
21
|
+
<!-- Enable Hue plugins -->
|
22
|
+
<property>
|
23
|
+
<name>mapred.jobtracker.plugins</name>
|
24
|
+
<value>org.apache.hadoop.thriftfs.ThriftJobTrackerPlugin</value>
|
25
|
+
<description>Comma-separated list of jobtracker plug-ins to be activated.</description>
|
26
|
+
</property>
|
27
|
+
<property>
|
28
|
+
<name>jobtracker.thrift.address</name>
|
29
|
+
<value>0.0.0.0:9290</value>
|
30
|
+
</property>
|
31
|
+
```
|
32
|
+
|
33
|
+
# Usage
|
34
|
+
|
35
|
+
See spec/ directory for the example usage.
|
36
|
+
|
37
|
+
```ruby
|
38
|
+
@cln = ElephantDriver::Client.new($HOST, $PORT)
|
39
|
+
@cln.jobs.each { |j|
|
40
|
+
j.tasks.each { |t|
|
41
|
+
t.counters
|
42
|
+
}
|
43
|
+
}
|
44
|
+
```
|
data/Rakefile
ADDED
@@ -0,0 +1,75 @@
|
|
1
|
+
require 'rake'
|
2
|
+
require 'rake/testtask'
|
3
|
+
require 'rake/clean'
|
4
|
+
|
5
|
+
begin
|
6
|
+
require 'jeweler'
|
7
|
+
Jeweler::Tasks.new do |gemspec|
|
8
|
+
gemspec.name = "elephant-driver"
|
9
|
+
gemspec.summary = "Ruby libray for managing Hadoop clusters"
|
10
|
+
gemspec.author = "Kazuki Ohta"
|
11
|
+
gemspec.email = "kazuki.ohta@gmail.com"
|
12
|
+
#gemspec.homepage = "http://.../"
|
13
|
+
gemspec.has_rdoc = false
|
14
|
+
gemspec.require_paths = ["lib"]
|
15
|
+
gemspec.add_dependency "thrift", "~> 0.7.0"
|
16
|
+
gemspec.add_dependency "nokogiri", ">= 1.5.0"
|
17
|
+
gemspec.test_files = Dir["test/**/*.rb", "test/**/*.sh"]
|
18
|
+
gemspec.files = Dir["bin/**/*", "lib/**/*", "test/**/*.rb"]
|
19
|
+
gemspec.executables = []
|
20
|
+
end
|
21
|
+
Jeweler::GemcutterTasks.new
|
22
|
+
rescue LoadError
|
23
|
+
puts "Jeweler not available. Install it with: gem install jeweler"
|
24
|
+
end
|
25
|
+
|
26
|
+
############################
|
27
|
+
|
28
|
+
require 'spec/rake/spectask'
|
29
|
+
|
30
|
+
desc "Run all specs"
|
31
|
+
Spec::Rake::SpecTask.new('spec') do |t|
|
32
|
+
t.spec_opts = ['--colour --format progress --loadby mtime --reverse']
|
33
|
+
t.spec_files = FileList['spec/*_spec.rb']
|
34
|
+
end
|
35
|
+
|
36
|
+
desc "Run all examples with RCov"
|
37
|
+
Spec::Rake::SpecTask.new('rcov') do |t|
|
38
|
+
t.spec_files = FileList['spec/*_spec.rb']
|
39
|
+
t.rcov = true
|
40
|
+
t.rcov_opts = ['--exclude', 'examples']
|
41
|
+
end
|
42
|
+
|
43
|
+
# task :default => :spec
|
44
|
+
|
45
|
+
############################
|
46
|
+
|
47
|
+
task "thrift_gen" do
|
48
|
+
system "rm -f common.thrift jobtracker.thrift"
|
49
|
+
system "wget https://raw.github.com/cloudera/hue/master/desktop/libs/hadoop/java/if/common.thrift"
|
50
|
+
system "wget https://raw.github.com/cloudera/hue/master/desktop/libs/hadoop/java/if/jobtracker.thrift"
|
51
|
+
system "mv common.thrift lib/elephant-driver/thrift/"
|
52
|
+
system "mv jobtracker.thrift lib/elephant-driver/thrift/"
|
53
|
+
system "mkdir -p tmp"
|
54
|
+
system "thrift --gen rb -o tmp lib/elephant-driver/thrift/common.thrift"
|
55
|
+
system "thrift --gen rb -o tmp lib/elephant-driver/thrift/jobtracker.thrift"
|
56
|
+
system "mv tmp/gen-rb/* lib/elephant-driver/thrift"
|
57
|
+
system "rm -fR tmp"
|
58
|
+
end
|
59
|
+
|
60
|
+
VERSION_FILE = "lib/elephant-driver/version.rb"
|
61
|
+
|
62
|
+
file VERSION_FILE => ["VERSION"] do |t|
|
63
|
+
version = File.read("VERSION").strip
|
64
|
+
File.open(VERSION_FILE, "w") {|f|
|
65
|
+
f.write <<EOF
|
66
|
+
module ElephantDriver
|
67
|
+
|
68
|
+
VERSION = '#{version}'
|
69
|
+
|
70
|
+
end
|
71
|
+
EOF
|
72
|
+
}
|
73
|
+
end
|
74
|
+
|
75
|
+
task :default => [VERSION_FILE, :build]
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.1.0
|
@@ -0,0 +1,19 @@
|
|
1
|
+
require 'thrift'
|
2
|
+
|
3
|
+
# thrift-generated files
|
4
|
+
$:.unshift File.join(File.dirname(__FILE__), 'elephant-driver', 'thrift')
|
5
|
+
[
|
6
|
+
'common_types',
|
7
|
+
'common_constants',
|
8
|
+
'hadoop_service_base',
|
9
|
+
'jobtracker_types',
|
10
|
+
'jobtracker_constants',
|
11
|
+
'jobtracker',
|
12
|
+
].each { |fn|
|
13
|
+
require File.join(File.dirname(__FILE__), 'elephant-driver', 'thrift', fn)
|
14
|
+
}
|
15
|
+
|
16
|
+
# library files
|
17
|
+
[ 'client', 'task', 'job', 'tracker' ].each { |fn|
|
18
|
+
require File.join(File.dirname(__FILE__), 'elephant-driver', fn)
|
19
|
+
}
|
@@ -0,0 +1,69 @@
|
|
1
|
+
module ElephantDriver
|
2
|
+
|
3
|
+
class Client
|
4
|
+
def initialize(host, port=9290, user='mapred', timeout=30)
|
5
|
+
sock = Thrift::Socket.new host, port
|
6
|
+
sock.timeout = timeout * 1000
|
7
|
+
|
8
|
+
@transport = Thrift::BufferedTransport.new sock
|
9
|
+
@transport.open
|
10
|
+
|
11
|
+
# 2011/08/23 Kazuki Ohta <kazuki.ohta@gmail.com>
|
12
|
+
# explicitly specify TCP_NODELAY for low-latency communication.
|
13
|
+
raw_sock = sock.to_io
|
14
|
+
raw_sock.setsockopt Socket::IPPROTO_TCP, Socket::TCP_NODELAY, 1
|
15
|
+
|
16
|
+
protocol = Thrift::BinaryProtocol.new @transport
|
17
|
+
@client = Hadoop::API::Jobtracker::Jobtracker::Client.new protocol
|
18
|
+
options = { 'effective_user' => user }
|
19
|
+
@ctx = Hadoop::API::RequestContext.new(:confOptions => options)
|
20
|
+
end
|
21
|
+
|
22
|
+
# Jobs
|
23
|
+
def jobs(status=:running)
|
24
|
+
ret =
|
25
|
+
case status
|
26
|
+
when :running then call :getRunningJobs
|
27
|
+
when :completed then call :getCompletedJobs
|
28
|
+
when :failed then call :getFailedJobs
|
29
|
+
when :killed then call :getKilledJobs
|
30
|
+
else call :getAllJobs
|
31
|
+
end
|
32
|
+
ret.jobs.collect{ |j| Job.new(self, j) }
|
33
|
+
end
|
34
|
+
|
35
|
+
def get_job(job_id)
|
36
|
+
Job.new self, call(:getJob, job_id)
|
37
|
+
end
|
38
|
+
|
39
|
+
# Trackers
|
40
|
+
def trackers(status=:active)
|
41
|
+
ret =
|
42
|
+
case status
|
43
|
+
when :active then call :getActiveTrackers
|
44
|
+
when :blacklisted then call :getBlacklistedTrackers
|
45
|
+
else call :getAllTrackers
|
46
|
+
end
|
47
|
+
ret.trackers.collect{ |t| Tracker.new(self, t) }
|
48
|
+
end
|
49
|
+
|
50
|
+
def get_tracker(name)
|
51
|
+
Tracker.new(self, (call :getTracker, name))
|
52
|
+
end
|
53
|
+
|
54
|
+
# Tasks
|
55
|
+
def tasks
|
56
|
+
end
|
57
|
+
|
58
|
+
# Status
|
59
|
+
def status
|
60
|
+
call :getClusterStatus
|
61
|
+
end
|
62
|
+
|
63
|
+
private
|
64
|
+
def call(method, *args)
|
65
|
+
@client.send method, @ctx, *args
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
end
|
@@ -0,0 +1,127 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
|
3
|
+
module ElephantDriver
|
4
|
+
|
5
|
+
class Job
|
6
|
+
STATES = [ :running, :succeeded, :failed, :prep, :killed ]
|
7
|
+
|
8
|
+
def initialize(cln, thrift_job)
|
9
|
+
@cln = cln
|
10
|
+
@thrift_job = thrift_job
|
11
|
+
end
|
12
|
+
|
13
|
+
def job_id
|
14
|
+
@thrift_job.jobID
|
15
|
+
end
|
16
|
+
|
17
|
+
def user
|
18
|
+
@thrift_job.status.user
|
19
|
+
end
|
20
|
+
|
21
|
+
def completed?
|
22
|
+
state != :running
|
23
|
+
end
|
24
|
+
|
25
|
+
def start_time
|
26
|
+
@thrift_job.startTime
|
27
|
+
end
|
28
|
+
|
29
|
+
def launch_time
|
30
|
+
@thrift_job.launchTime
|
31
|
+
end
|
32
|
+
|
33
|
+
def finish_time
|
34
|
+
@thrift_job.finishTime
|
35
|
+
end
|
36
|
+
|
37
|
+
def state
|
38
|
+
STATES[@thrift_job.status.runState - 1]
|
39
|
+
end
|
40
|
+
|
41
|
+
def map_progress
|
42
|
+
@thrift_job.status.mapProgress
|
43
|
+
end
|
44
|
+
|
45
|
+
def reduce_progress
|
46
|
+
@thrift_job.status.reduceProgress
|
47
|
+
end
|
48
|
+
|
49
|
+
def cleanup_progress
|
50
|
+
@thrift_job.status.cleanupProgress
|
51
|
+
end
|
52
|
+
|
53
|
+
def setup_progress
|
54
|
+
@thrift_job.status.setupProgress
|
55
|
+
end
|
56
|
+
|
57
|
+
def progress
|
58
|
+
(@thrift_job.status.mapProgress + @thrift_job.status.reduceProgress) / 2.0
|
59
|
+
end
|
60
|
+
|
61
|
+
def config_params
|
62
|
+
xml = call :getJobConfXML
|
63
|
+
#@parsed_config ||= Nokogiri::XML(xml).xpath("//property").inject({}) { |props, xprop|
|
64
|
+
# props[xprop.xpath("./name").text] = xprop.xpath("./value").text
|
65
|
+
# props
|
66
|
+
#}
|
67
|
+
{}
|
68
|
+
end
|
69
|
+
|
70
|
+
def counters
|
71
|
+
counters = {}
|
72
|
+
ret = call :getJobCounters
|
73
|
+
ret.groups.each { |g|
|
74
|
+
h = {}
|
75
|
+
g.counters.each { |name, c| h[name] = c.value }
|
76
|
+
counters[g.name] = h
|
77
|
+
}
|
78
|
+
counters
|
79
|
+
end
|
80
|
+
|
81
|
+
def tasks
|
82
|
+
types = [
|
83
|
+
Hadoop::API::Jobtracker::ThriftTaskType::MAP,
|
84
|
+
Hadoop::API::Jobtracker::ThriftTaskType::REDUCE,
|
85
|
+
Hadoop::API::Jobtracker::ThriftTaskType::JOB_SETUP,
|
86
|
+
Hadoop::API::Jobtracker::ThriftTaskType::JOB_CLEANUP,
|
87
|
+
Hadoop::API::Jobtracker::ThriftTaskType::TASK_CLEANUP,
|
88
|
+
]
|
89
|
+
states = [
|
90
|
+
Hadoop::API::Jobtracker::ThriftTaskState::RUNNING,
|
91
|
+
Hadoop::API::Jobtracker::ThriftTaskState::SUCCEEDED,
|
92
|
+
Hadoop::API::Jobtracker::ThriftTaskState::FAILED,
|
93
|
+
Hadoop::API::Jobtracker::ThriftTaskState::UNASSIGNED,
|
94
|
+
Hadoop::API::Jobtracker::ThriftTaskState::KILLED,
|
95
|
+
Hadoop::API::Jobtracker::ThriftTaskState::COMMIT_PENDING,
|
96
|
+
Hadoop::API::Jobtracker::ThriftTaskState::FAILED_UNCLEAN,
|
97
|
+
Hadoop::API::Jobtracker::ThriftTaskState::KILLED_UNCLEAN,
|
98
|
+
]
|
99
|
+
|
100
|
+
tasks = (call :getTaskList, types, states, '', 10000, 0).tasks
|
101
|
+
return tasks.collect{ |t| Task.new(self, t) }
|
102
|
+
end
|
103
|
+
|
104
|
+
def set_priority(priority)
|
105
|
+
prio =
|
106
|
+
case priority
|
107
|
+
when :very_high then Hadoop::API::Jobtracker::ThriftJobPriority::VERY_HIGH
|
108
|
+
when :high then Hadoop::API::Jobtracker::ThriftJobPriority::HIGH
|
109
|
+
when :normal then Hadoop::API::Jobtracker::ThriftJobPriority::NORMAL
|
110
|
+
when :low then Hadoop::API::Jobtracker::ThriftJobPriority::LOW
|
111
|
+
when :very_low then Hadoop::API::Jobtracker::ThriftJobPriority::VERY_LOW
|
112
|
+
else Hadoop::API::Jobtracker::ThriftJobPriority::NORMAL
|
113
|
+
end
|
114
|
+
call :setJobPriority, prio
|
115
|
+
end
|
116
|
+
|
117
|
+
def kill!
|
118
|
+
call :killJob
|
119
|
+
end
|
120
|
+
|
121
|
+
private
|
122
|
+
def call(method, *args)
|
123
|
+
@cln.send :call, method, @thrift_job.jobID, *args
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
|
3
|
+
module ElephantDriver
|
4
|
+
|
5
|
+
class Task
|
6
|
+
def initialize(cln, thrift_task)
|
7
|
+
@cln = cln
|
8
|
+
@thrift_task = thrift_task
|
9
|
+
end
|
10
|
+
|
11
|
+
def start_time
|
12
|
+
@thrift_task.startTime
|
13
|
+
end
|
14
|
+
|
15
|
+
def exec_start_time
|
16
|
+
@thrift_task.execStartTime
|
17
|
+
end
|
18
|
+
|
19
|
+
def exec_finish_time
|
20
|
+
@thrift_task.execFinishTime
|
21
|
+
end
|
22
|
+
|
23
|
+
def progress
|
24
|
+
@thrift_task.progress
|
25
|
+
end
|
26
|
+
|
27
|
+
def failed?
|
28
|
+
@thrift_task.failed
|
29
|
+
end
|
30
|
+
|
31
|
+
def completed?
|
32
|
+
@thrift_task.complete
|
33
|
+
end
|
34
|
+
|
35
|
+
def counters
|
36
|
+
counters = {}
|
37
|
+
@thrift_task.counters.groups.each { |g|
|
38
|
+
h = {}
|
39
|
+
g.counters.each { |name, c| h[name] = c.value }
|
40
|
+
counters[g.name] = h
|
41
|
+
}
|
42
|
+
counters
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
@@ -0,0 +1,129 @@
|
|
1
|
+
/*
|
2
|
+
* Licensed to Cloudera, Inc. under one
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
4
|
+
* distributed with this work for additional information
|
5
|
+
* regarding copyright ownership. Cloudera, Inc. licenses this file
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
7
|
+
* "License"); you may not use this file except in compliance
|
8
|
+
* with the License. You may obtain a copy of the License at
|
9
|
+
*
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
*
|
12
|
+
* Unless required by applicable law or agreed to in writing, software
|
13
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
14
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
15
|
+
* See the License for the specific language governing permissions and
|
16
|
+
* limitations under the License.
|
17
|
+
*/
|
18
|
+
|
19
|
+
/*
|
20
|
+
* Common declarations for Hadoop Thrift interfaces
|
21
|
+
*/
|
22
|
+
|
23
|
+
/*
|
24
|
+
* Namespaces for generated code. The idea is to keep code generated by
|
25
|
+
* Thrift under a 'hadoop.api' namespace, so that a higher-level set of
|
26
|
+
* functions and classes may be defined under 'hadoop'.
|
27
|
+
*/
|
28
|
+
|
29
|
+
namespace cpp hadoop.api
|
30
|
+
namespace csharp Hadoop.API
|
31
|
+
namespace java org.apache.hadoop.thriftfs.api
|
32
|
+
namespace perl Hadoop.API
|
33
|
+
namespace php hadoop_api
|
34
|
+
namespace py hadoop.api.common
|
35
|
+
namespace rb Hadoop.API
|
36
|
+
|
37
|
+
/** Generic I/O error */
|
38
|
+
exception IOException {
|
39
|
+
/** Error message. */
|
40
|
+
1: string msg,
|
41
|
+
|
42
|
+
/** Textual representation of the call stack. */
|
43
|
+
2: string stack
|
44
|
+
|
45
|
+
/** The Java class of the Exception (may be a subclass) */
|
46
|
+
3: string clazz
|
47
|
+
}
|
48
|
+
|
49
|
+
/**
|
50
|
+
* Information about the compilation version of this server
|
51
|
+
*/
|
52
|
+
struct VersionInfo {
|
53
|
+
1: string version
|
54
|
+
2: string revision
|
55
|
+
4: string compileDate
|
56
|
+
5: string compilingUser
|
57
|
+
6: string url
|
58
|
+
7: string buildVersion
|
59
|
+
}
|
60
|
+
|
61
|
+
|
62
|
+
/** A single stack frame in a stack dump */
|
63
|
+
struct StackTraceElement {
|
64
|
+
1: string className
|
65
|
+
2: string fileName
|
66
|
+
3: i32 lineNumber
|
67
|
+
4: string methodName
|
68
|
+
5: bool isNativeMethod
|
69
|
+
6: string stringRepresentation
|
70
|
+
}
|
71
|
+
|
72
|
+
/** Info about a thread with its corresponding stack trace */
|
73
|
+
struct ThreadStackTrace {
|
74
|
+
1: string threadName
|
75
|
+
2: string threadStringRepresentation
|
76
|
+
3: bool isDaemon
|
77
|
+
|
78
|
+
4: list<StackTraceElement> stackTrace;
|
79
|
+
}
|
80
|
+
|
81
|
+
/**
|
82
|
+
* Memory available via java.lang.Runtime
|
83
|
+
*/
|
84
|
+
struct RuntimeInfo {
|
85
|
+
1:i64 totalMemory
|
86
|
+
2:i64 freeMemory
|
87
|
+
3:i64 maxMemory
|
88
|
+
}
|
89
|
+
|
90
|
+
/**
|
91
|
+
* Context options for every request.
|
92
|
+
*/
|
93
|
+
struct RequestContext {
|
94
|
+
/**
|
95
|
+
* This map turns into a Configuration object in the server and
|
96
|
+
* is currently used to construct a UserGroupInformation to
|
97
|
+
* authenticate this request.
|
98
|
+
*/
|
99
|
+
1:map<string, string> confOptions
|
100
|
+
}
|
101
|
+
|
102
|
+
struct MetricsRecord {
|
103
|
+
2: map<string, string> tags
|
104
|
+
3: map<string, i64> metrics
|
105
|
+
}
|
106
|
+
|
107
|
+
struct MetricsContext {
|
108
|
+
1: string name
|
109
|
+
2: bool isMonitoring
|
110
|
+
3: i32 period
|
111
|
+
|
112
|
+
4: map<string, list<MetricsRecord>> records
|
113
|
+
}
|
114
|
+
|
115
|
+
struct ThriftDelegationToken {
|
116
|
+
1: binary delegationTokenBytes
|
117
|
+
}
|
118
|
+
|
119
|
+
service HadoopServiceBase {
|
120
|
+
/** Return the version information for this server */
|
121
|
+
VersionInfo getVersionInfo(10:RequestContext ctx);
|
122
|
+
RuntimeInfo getRuntimeInfo(10:RequestContext ctx);
|
123
|
+
list<ThreadStackTrace> getThreadDump(10:RequestContext ctx);
|
124
|
+
list<MetricsContext> getAllMetrics(10:RequestContext ctx)
|
125
|
+
throws (1:IOException err);
|
126
|
+
MetricsContext getMetricsContext(10:RequestContext ctx, 1:string contextName)
|
127
|
+
throws (1:IOException err);
|
128
|
+
}
|
129
|
+
|