elephant-driver 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +44 -0
- data/Rakefile +75 -0
- data/VERSION +1 -0
- data/lib/elephant-driver.rb +19 -0
- data/lib/elephant-driver/client.rb +69 -0
- data/lib/elephant-driver/job.rb +127 -0
- data/lib/elephant-driver/task.rb +46 -0
- data/lib/elephant-driver/thrift/common.thrift +129 -0
- data/lib/elephant-driver/thrift/common_constants.rb +12 -0
- data/lib/elephant-driver/thrift/common_types.rb +209 -0
- data/lib/elephant-driver/thrift/hadoop_service_base.rb +314 -0
- data/lib/elephant-driver/thrift/jobtracker.rb +1466 -0
- data/lib/elephant-driver/thrift/jobtracker.thrift +478 -0
- data/lib/elephant-driver/thrift/jobtracker_constants.rb +14 -0
- data/lib/elephant-driver/thrift/jobtracker_types.rb +735 -0
- data/lib/elephant-driver/tracker.rb +10 -0
- data/lib/elephant-driver/version.rb +5 -0
- metadata +85 -0
data/README.md
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
# About
|
2
|
+
|
3
|
+
'elephant-driver' is a Ruby library to communicate with Hadoop daemons.
|
4
|
+
|
5
|
+
# Notice
|
6
|
+
|
7
|
+
Currently, only communicating with JobTracker is supported. Other daemons (TaskTracker, NameNode, DataNode) are not supported yet.
|
8
|
+
|
9
|
+
# Requirements
|
10
|
+
|
11
|
+
* thfift
|
12
|
+
* nokogiri
|
13
|
+
|
14
|
+
# Setup
|
15
|
+
|
16
|
+
This library assumes that you're using CDH3 (Cloudera Distribution for Hadoop, version 3).
|
17
|
+
|
18
|
+
You first need to install 'hue-plugins' package at the JobTracker node. Then, the following settings needs to be included in your mapred-site.xml.
|
19
|
+
|
20
|
+
```xml
|
21
|
+
<!-- Enable Hue plugins -->
|
22
|
+
<property>
|
23
|
+
<name>mapred.jobtracker.plugins</name>
|
24
|
+
<value>org.apache.hadoop.thriftfs.ThriftJobTrackerPlugin</value>
|
25
|
+
<description>Comma-separated list of jobtracker plug-ins to be activated.</description>
|
26
|
+
</property>
|
27
|
+
<property>
|
28
|
+
<name>jobtracker.thrift.address</name>
|
29
|
+
<value>0.0.0.0:9290</value>
|
30
|
+
</property>
|
31
|
+
```
|
32
|
+
|
33
|
+
# Usage
|
34
|
+
|
35
|
+
See spec/ directory for the example usage.
|
36
|
+
|
37
|
+
```ruby
|
38
|
+
@cln = ElephantDriver::Client.new($HOST, $PORT)
|
39
|
+
@cln.jobs.each { |j|
|
40
|
+
j.tasks.each { |t|
|
41
|
+
t.counters
|
42
|
+
}
|
43
|
+
}
|
44
|
+
```
|
data/Rakefile
ADDED
@@ -0,0 +1,75 @@
|
|
1
|
+
require 'rake'
|
2
|
+
require 'rake/testtask'
|
3
|
+
require 'rake/clean'
|
4
|
+
|
5
|
+
begin
|
6
|
+
require 'jeweler'
|
7
|
+
Jeweler::Tasks.new do |gemspec|
|
8
|
+
gemspec.name = "elephant-driver"
|
9
|
+
gemspec.summary = "Ruby libray for managing Hadoop clusters"
|
10
|
+
gemspec.author = "Kazuki Ohta"
|
11
|
+
gemspec.email = "kazuki.ohta@gmail.com"
|
12
|
+
#gemspec.homepage = "http://.../"
|
13
|
+
gemspec.has_rdoc = false
|
14
|
+
gemspec.require_paths = ["lib"]
|
15
|
+
gemspec.add_dependency "thrift", "~> 0.7.0"
|
16
|
+
gemspec.add_dependency "nokogiri", ">= 1.5.0"
|
17
|
+
gemspec.test_files = Dir["test/**/*.rb", "test/**/*.sh"]
|
18
|
+
gemspec.files = Dir["bin/**/*", "lib/**/*", "test/**/*.rb"]
|
19
|
+
gemspec.executables = []
|
20
|
+
end
|
21
|
+
Jeweler::GemcutterTasks.new
|
22
|
+
rescue LoadError
|
23
|
+
puts "Jeweler not available. Install it with: gem install jeweler"
|
24
|
+
end
|
25
|
+
|
26
|
+
############################
|
27
|
+
|
28
|
+
require 'spec/rake/spectask'
|
29
|
+
|
30
|
+
desc "Run all specs"
|
31
|
+
Spec::Rake::SpecTask.new('spec') do |t|
|
32
|
+
t.spec_opts = ['--colour --format progress --loadby mtime --reverse']
|
33
|
+
t.spec_files = FileList['spec/*_spec.rb']
|
34
|
+
end
|
35
|
+
|
36
|
+
desc "Run all examples with RCov"
|
37
|
+
Spec::Rake::SpecTask.new('rcov') do |t|
|
38
|
+
t.spec_files = FileList['spec/*_spec.rb']
|
39
|
+
t.rcov = true
|
40
|
+
t.rcov_opts = ['--exclude', 'examples']
|
41
|
+
end
|
42
|
+
|
43
|
+
# task :default => :spec
|
44
|
+
|
45
|
+
############################
|
46
|
+
|
47
|
+
task "thrift_gen" do
|
48
|
+
system "rm -f common.thrift jobtracker.thrift"
|
49
|
+
system "wget https://raw.github.com/cloudera/hue/master/desktop/libs/hadoop/java/if/common.thrift"
|
50
|
+
system "wget https://raw.github.com/cloudera/hue/master/desktop/libs/hadoop/java/if/jobtracker.thrift"
|
51
|
+
system "mv common.thrift lib/elephant-driver/thrift/"
|
52
|
+
system "mv jobtracker.thrift lib/elephant-driver/thrift/"
|
53
|
+
system "mkdir -p tmp"
|
54
|
+
system "thrift --gen rb -o tmp lib/elephant-driver/thrift/common.thrift"
|
55
|
+
system "thrift --gen rb -o tmp lib/elephant-driver/thrift/jobtracker.thrift"
|
56
|
+
system "mv tmp/gen-rb/* lib/elephant-driver/thrift"
|
57
|
+
system "rm -fR tmp"
|
58
|
+
end
|
59
|
+
|
60
|
+
VERSION_FILE = "lib/elephant-driver/version.rb"
|
61
|
+
|
62
|
+
file VERSION_FILE => ["VERSION"] do |t|
|
63
|
+
version = File.read("VERSION").strip
|
64
|
+
File.open(VERSION_FILE, "w") {|f|
|
65
|
+
f.write <<EOF
|
66
|
+
module ElephantDriver
|
67
|
+
|
68
|
+
VERSION = '#{version}'
|
69
|
+
|
70
|
+
end
|
71
|
+
EOF
|
72
|
+
}
|
73
|
+
end
|
74
|
+
|
75
|
+
task :default => [VERSION_FILE, :build]
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.1.0
|
@@ -0,0 +1,19 @@
|
|
1
|
+
require 'thrift'
|
2
|
+
|
3
|
+
# thrift-generated files
|
4
|
+
$:.unshift File.join(File.dirname(__FILE__), 'elephant-driver', 'thrift')
|
5
|
+
[
|
6
|
+
'common_types',
|
7
|
+
'common_constants',
|
8
|
+
'hadoop_service_base',
|
9
|
+
'jobtracker_types',
|
10
|
+
'jobtracker_constants',
|
11
|
+
'jobtracker',
|
12
|
+
].each { |fn|
|
13
|
+
require File.join(File.dirname(__FILE__), 'elephant-driver', 'thrift', fn)
|
14
|
+
}
|
15
|
+
|
16
|
+
# library files
|
17
|
+
[ 'client', 'task', 'job', 'tracker' ].each { |fn|
|
18
|
+
require File.join(File.dirname(__FILE__), 'elephant-driver', fn)
|
19
|
+
}
|
@@ -0,0 +1,69 @@
|
|
1
|
+
module ElephantDriver
|
2
|
+
|
3
|
+
class Client
|
4
|
+
def initialize(host, port=9290, user='mapred', timeout=30)
|
5
|
+
sock = Thrift::Socket.new host, port
|
6
|
+
sock.timeout = timeout * 1000
|
7
|
+
|
8
|
+
@transport = Thrift::BufferedTransport.new sock
|
9
|
+
@transport.open
|
10
|
+
|
11
|
+
# 2011/08/23 Kazuki Ohta <kazuki.ohta@gmail.com>
|
12
|
+
# explicitly specify TCP_NODELAY for low-latency communication.
|
13
|
+
raw_sock = sock.to_io
|
14
|
+
raw_sock.setsockopt Socket::IPPROTO_TCP, Socket::TCP_NODELAY, 1
|
15
|
+
|
16
|
+
protocol = Thrift::BinaryProtocol.new @transport
|
17
|
+
@client = Hadoop::API::Jobtracker::Jobtracker::Client.new protocol
|
18
|
+
options = { 'effective_user' => user }
|
19
|
+
@ctx = Hadoop::API::RequestContext.new(:confOptions => options)
|
20
|
+
end
|
21
|
+
|
22
|
+
# Jobs
|
23
|
+
def jobs(status=:running)
|
24
|
+
ret =
|
25
|
+
case status
|
26
|
+
when :running then call :getRunningJobs
|
27
|
+
when :completed then call :getCompletedJobs
|
28
|
+
when :failed then call :getFailedJobs
|
29
|
+
when :killed then call :getKilledJobs
|
30
|
+
else call :getAllJobs
|
31
|
+
end
|
32
|
+
ret.jobs.collect{ |j| Job.new(self, j) }
|
33
|
+
end
|
34
|
+
|
35
|
+
def get_job(job_id)
|
36
|
+
Job.new self, call(:getJob, job_id)
|
37
|
+
end
|
38
|
+
|
39
|
+
# Trackers
|
40
|
+
def trackers(status=:active)
|
41
|
+
ret =
|
42
|
+
case status
|
43
|
+
when :active then call :getActiveTrackers
|
44
|
+
when :blacklisted then call :getBlacklistedTrackers
|
45
|
+
else call :getAllTrackers
|
46
|
+
end
|
47
|
+
ret.trackers.collect{ |t| Tracker.new(self, t) }
|
48
|
+
end
|
49
|
+
|
50
|
+
def get_tracker(name)
|
51
|
+
Tracker.new(self, (call :getTracker, name))
|
52
|
+
end
|
53
|
+
|
54
|
+
# Tasks
|
55
|
+
def tasks
|
56
|
+
end
|
57
|
+
|
58
|
+
# Status
|
59
|
+
def status
|
60
|
+
call :getClusterStatus
|
61
|
+
end
|
62
|
+
|
63
|
+
private
|
64
|
+
def call(method, *args)
|
65
|
+
@client.send method, @ctx, *args
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
end
|
@@ -0,0 +1,127 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
|
3
|
+
module ElephantDriver
|
4
|
+
|
5
|
+
class Job
|
6
|
+
STATES = [ :running, :succeeded, :failed, :prep, :killed ]
|
7
|
+
|
8
|
+
def initialize(cln, thrift_job)
|
9
|
+
@cln = cln
|
10
|
+
@thrift_job = thrift_job
|
11
|
+
end
|
12
|
+
|
13
|
+
def job_id
|
14
|
+
@thrift_job.jobID
|
15
|
+
end
|
16
|
+
|
17
|
+
def user
|
18
|
+
@thrift_job.status.user
|
19
|
+
end
|
20
|
+
|
21
|
+
def completed?
|
22
|
+
state != :running
|
23
|
+
end
|
24
|
+
|
25
|
+
def start_time
|
26
|
+
@thrift_job.startTime
|
27
|
+
end
|
28
|
+
|
29
|
+
def launch_time
|
30
|
+
@thrift_job.launchTime
|
31
|
+
end
|
32
|
+
|
33
|
+
def finish_time
|
34
|
+
@thrift_job.finishTime
|
35
|
+
end
|
36
|
+
|
37
|
+
def state
|
38
|
+
STATES[@thrift_job.status.runState - 1]
|
39
|
+
end
|
40
|
+
|
41
|
+
def map_progress
|
42
|
+
@thrift_job.status.mapProgress
|
43
|
+
end
|
44
|
+
|
45
|
+
def reduce_progress
|
46
|
+
@thrift_job.status.reduceProgress
|
47
|
+
end
|
48
|
+
|
49
|
+
def cleanup_progress
|
50
|
+
@thrift_job.status.cleanupProgress
|
51
|
+
end
|
52
|
+
|
53
|
+
def setup_progress
|
54
|
+
@thrift_job.status.setupProgress
|
55
|
+
end
|
56
|
+
|
57
|
+
def progress
|
58
|
+
(@thrift_job.status.mapProgress + @thrift_job.status.reduceProgress) / 2.0
|
59
|
+
end
|
60
|
+
|
61
|
+
def config_params
|
62
|
+
xml = call :getJobConfXML
|
63
|
+
#@parsed_config ||= Nokogiri::XML(xml).xpath("//property").inject({}) { |props, xprop|
|
64
|
+
# props[xprop.xpath("./name").text] = xprop.xpath("./value").text
|
65
|
+
# props
|
66
|
+
#}
|
67
|
+
{}
|
68
|
+
end
|
69
|
+
|
70
|
+
def counters
|
71
|
+
counters = {}
|
72
|
+
ret = call :getJobCounters
|
73
|
+
ret.groups.each { |g|
|
74
|
+
h = {}
|
75
|
+
g.counters.each { |name, c| h[name] = c.value }
|
76
|
+
counters[g.name] = h
|
77
|
+
}
|
78
|
+
counters
|
79
|
+
end
|
80
|
+
|
81
|
+
def tasks
|
82
|
+
types = [
|
83
|
+
Hadoop::API::Jobtracker::ThriftTaskType::MAP,
|
84
|
+
Hadoop::API::Jobtracker::ThriftTaskType::REDUCE,
|
85
|
+
Hadoop::API::Jobtracker::ThriftTaskType::JOB_SETUP,
|
86
|
+
Hadoop::API::Jobtracker::ThriftTaskType::JOB_CLEANUP,
|
87
|
+
Hadoop::API::Jobtracker::ThriftTaskType::TASK_CLEANUP,
|
88
|
+
]
|
89
|
+
states = [
|
90
|
+
Hadoop::API::Jobtracker::ThriftTaskState::RUNNING,
|
91
|
+
Hadoop::API::Jobtracker::ThriftTaskState::SUCCEEDED,
|
92
|
+
Hadoop::API::Jobtracker::ThriftTaskState::FAILED,
|
93
|
+
Hadoop::API::Jobtracker::ThriftTaskState::UNASSIGNED,
|
94
|
+
Hadoop::API::Jobtracker::ThriftTaskState::KILLED,
|
95
|
+
Hadoop::API::Jobtracker::ThriftTaskState::COMMIT_PENDING,
|
96
|
+
Hadoop::API::Jobtracker::ThriftTaskState::FAILED_UNCLEAN,
|
97
|
+
Hadoop::API::Jobtracker::ThriftTaskState::KILLED_UNCLEAN,
|
98
|
+
]
|
99
|
+
|
100
|
+
tasks = (call :getTaskList, types, states, '', 10000, 0).tasks
|
101
|
+
return tasks.collect{ |t| Task.new(self, t) }
|
102
|
+
end
|
103
|
+
|
104
|
+
def set_priority(priority)
|
105
|
+
prio =
|
106
|
+
case priority
|
107
|
+
when :very_high then Hadoop::API::Jobtracker::ThriftJobPriority::VERY_HIGH
|
108
|
+
when :high then Hadoop::API::Jobtracker::ThriftJobPriority::HIGH
|
109
|
+
when :normal then Hadoop::API::Jobtracker::ThriftJobPriority::NORMAL
|
110
|
+
when :low then Hadoop::API::Jobtracker::ThriftJobPriority::LOW
|
111
|
+
when :very_low then Hadoop::API::Jobtracker::ThriftJobPriority::VERY_LOW
|
112
|
+
else Hadoop::API::Jobtracker::ThriftJobPriority::NORMAL
|
113
|
+
end
|
114
|
+
call :setJobPriority, prio
|
115
|
+
end
|
116
|
+
|
117
|
+
def kill!
|
118
|
+
call :killJob
|
119
|
+
end
|
120
|
+
|
121
|
+
private
|
122
|
+
def call(method, *args)
|
123
|
+
@cln.send :call, method, @thrift_job.jobID, *args
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
|
3
|
+
module ElephantDriver
|
4
|
+
|
5
|
+
class Task
|
6
|
+
def initialize(cln, thrift_task)
|
7
|
+
@cln = cln
|
8
|
+
@thrift_task = thrift_task
|
9
|
+
end
|
10
|
+
|
11
|
+
def start_time
|
12
|
+
@thrift_task.startTime
|
13
|
+
end
|
14
|
+
|
15
|
+
def exec_start_time
|
16
|
+
@thrift_task.execStartTime
|
17
|
+
end
|
18
|
+
|
19
|
+
def exec_finish_time
|
20
|
+
@thrift_task.execFinishTime
|
21
|
+
end
|
22
|
+
|
23
|
+
def progress
|
24
|
+
@thrift_task.progress
|
25
|
+
end
|
26
|
+
|
27
|
+
def failed?
|
28
|
+
@thrift_task.failed
|
29
|
+
end
|
30
|
+
|
31
|
+
def completed?
|
32
|
+
@thrift_task.complete
|
33
|
+
end
|
34
|
+
|
35
|
+
def counters
|
36
|
+
counters = {}
|
37
|
+
@thrift_task.counters.groups.each { |g|
|
38
|
+
h = {}
|
39
|
+
g.counters.each { |name, c| h[name] = c.value }
|
40
|
+
counters[g.name] = h
|
41
|
+
}
|
42
|
+
counters
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
@@ -0,0 +1,129 @@
|
|
1
|
+
/*
|
2
|
+
* Licensed to Cloudera, Inc. under one
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
4
|
+
* distributed with this work for additional information
|
5
|
+
* regarding copyright ownership. Cloudera, Inc. licenses this file
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
7
|
+
* "License"); you may not use this file except in compliance
|
8
|
+
* with the License. You may obtain a copy of the License at
|
9
|
+
*
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
*
|
12
|
+
* Unless required by applicable law or agreed to in writing, software
|
13
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
14
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
15
|
+
* See the License for the specific language governing permissions and
|
16
|
+
* limitations under the License.
|
17
|
+
*/
|
18
|
+
|
19
|
+
/*
|
20
|
+
* Common declarations for Hadoop Thrift interfaces
|
21
|
+
*/
|
22
|
+
|
23
|
+
/*
|
24
|
+
* Namespaces for generated code. The idea is to keep code generated by
|
25
|
+
* Thrift under a 'hadoop.api' namespace, so that a higher-level set of
|
26
|
+
* functions and classes may be defined under 'hadoop'.
|
27
|
+
*/
|
28
|
+
|
29
|
+
namespace cpp hadoop.api
|
30
|
+
namespace csharp Hadoop.API
|
31
|
+
namespace java org.apache.hadoop.thriftfs.api
|
32
|
+
namespace perl Hadoop.API
|
33
|
+
namespace php hadoop_api
|
34
|
+
namespace py hadoop.api.common
|
35
|
+
namespace rb Hadoop.API
|
36
|
+
|
37
|
+
/** Generic I/O error */
|
38
|
+
exception IOException {
|
39
|
+
/** Error message. */
|
40
|
+
1: string msg,
|
41
|
+
|
42
|
+
/** Textual representation of the call stack. */
|
43
|
+
2: string stack
|
44
|
+
|
45
|
+
/** The Java class of the Exception (may be a subclass) */
|
46
|
+
3: string clazz
|
47
|
+
}
|
48
|
+
|
49
|
+
/**
|
50
|
+
* Information about the compilation version of this server
|
51
|
+
*/
|
52
|
+
struct VersionInfo {
|
53
|
+
1: string version
|
54
|
+
2: string revision
|
55
|
+
4: string compileDate
|
56
|
+
5: string compilingUser
|
57
|
+
6: string url
|
58
|
+
7: string buildVersion
|
59
|
+
}
|
60
|
+
|
61
|
+
|
62
|
+
/** A single stack frame in a stack dump */
|
63
|
+
struct StackTraceElement {
|
64
|
+
1: string className
|
65
|
+
2: string fileName
|
66
|
+
3: i32 lineNumber
|
67
|
+
4: string methodName
|
68
|
+
5: bool isNativeMethod
|
69
|
+
6: string stringRepresentation
|
70
|
+
}
|
71
|
+
|
72
|
+
/** Info about a thread with its corresponding stack trace */
|
73
|
+
struct ThreadStackTrace {
|
74
|
+
1: string threadName
|
75
|
+
2: string threadStringRepresentation
|
76
|
+
3: bool isDaemon
|
77
|
+
|
78
|
+
4: list<StackTraceElement> stackTrace;
|
79
|
+
}
|
80
|
+
|
81
|
+
/**
|
82
|
+
* Memory available via java.lang.Runtime
|
83
|
+
*/
|
84
|
+
struct RuntimeInfo {
|
85
|
+
1:i64 totalMemory
|
86
|
+
2:i64 freeMemory
|
87
|
+
3:i64 maxMemory
|
88
|
+
}
|
89
|
+
|
90
|
+
/**
|
91
|
+
* Context options for every request.
|
92
|
+
*/
|
93
|
+
struct RequestContext {
|
94
|
+
/**
|
95
|
+
* This map turns into a Configuration object in the server and
|
96
|
+
* is currently used to construct a UserGroupInformation to
|
97
|
+
* authenticate this request.
|
98
|
+
*/
|
99
|
+
1:map<string, string> confOptions
|
100
|
+
}
|
101
|
+
|
102
|
+
struct MetricsRecord {
|
103
|
+
2: map<string, string> tags
|
104
|
+
3: map<string, i64> metrics
|
105
|
+
}
|
106
|
+
|
107
|
+
struct MetricsContext {
|
108
|
+
1: string name
|
109
|
+
2: bool isMonitoring
|
110
|
+
3: i32 period
|
111
|
+
|
112
|
+
4: map<string, list<MetricsRecord>> records
|
113
|
+
}
|
114
|
+
|
115
|
+
struct ThriftDelegationToken {
|
116
|
+
1: binary delegationTokenBytes
|
117
|
+
}
|
118
|
+
|
119
|
+
service HadoopServiceBase {
|
120
|
+
/** Return the version information for this server */
|
121
|
+
VersionInfo getVersionInfo(10:RequestContext ctx);
|
122
|
+
RuntimeInfo getRuntimeInfo(10:RequestContext ctx);
|
123
|
+
list<ThreadStackTrace> getThreadDump(10:RequestContext ctx);
|
124
|
+
list<MetricsContext> getAllMetrics(10:RequestContext ctx)
|
125
|
+
throws (1:IOException err);
|
126
|
+
MetricsContext getMetricsContext(10:RequestContext ctx, 1:string contextName)
|
127
|
+
throws (1:IOException err);
|
128
|
+
}
|
129
|
+
|