elephant-driver 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md ADDED
@@ -0,0 +1,44 @@
1
+ # About
2
+
3
+ 'elephant-driver' is a Ruby library to communicate with Hadoop daemons.
4
+
5
+ # Notice
6
+
7
+ Currently, only communicating with JobTracker is supported. Other daemons (TaskTracker, NameNode, DataNode) are not supported yet.
8
+
9
+ # Requirements
10
+
11
+ * thfift
12
+ * nokogiri
13
+
14
+ # Setup
15
+
16
+ This library assumes that you're using CDH3 (Cloudera Distribution for Hadoop, version 3).
17
+
18
+ You first need to install 'hue-plugins' package at the JobTracker node. Then, the following settings needs to be included in your mapred-site.xml.
19
+
20
+ ```xml
21
+ <!-- Enable Hue plugins -->
22
+ <property>
23
+ <name>mapred.jobtracker.plugins</name>
24
+ <value>org.apache.hadoop.thriftfs.ThriftJobTrackerPlugin</value>
25
+ <description>Comma-separated list of jobtracker plug-ins to be activated.</description>
26
+ </property>
27
+ <property>
28
+ <name>jobtracker.thrift.address</name>
29
+ <value>0.0.0.0:9290</value>
30
+ </property>
31
+ ```
32
+
33
+ # Usage
34
+
35
+ See spec/ directory for the example usage.
36
+
37
+ ```ruby
38
+ @cln = ElephantDriver::Client.new($HOST, $PORT)
39
+ @cln.jobs.each { |j|
40
+ j.tasks.each { |t|
41
+ t.counters
42
+ }
43
+ }
44
+ ```
data/Rakefile ADDED
@@ -0,0 +1,75 @@
1
+ require 'rake'
2
+ require 'rake/testtask'
3
+ require 'rake/clean'
4
+
5
+ begin
6
+ require 'jeweler'
7
+ Jeweler::Tasks.new do |gemspec|
8
+ gemspec.name = "elephant-driver"
9
+ gemspec.summary = "Ruby libray for managing Hadoop clusters"
10
+ gemspec.author = "Kazuki Ohta"
11
+ gemspec.email = "kazuki.ohta@gmail.com"
12
+ #gemspec.homepage = "http://.../"
13
+ gemspec.has_rdoc = false
14
+ gemspec.require_paths = ["lib"]
15
+ gemspec.add_dependency "thrift", "~> 0.7.0"
16
+ gemspec.add_dependency "nokogiri", ">= 1.5.0"
17
+ gemspec.test_files = Dir["test/**/*.rb", "test/**/*.sh"]
18
+ gemspec.files = Dir["bin/**/*", "lib/**/*", "test/**/*.rb"]
19
+ gemspec.executables = []
20
+ end
21
+ Jeweler::GemcutterTasks.new
22
+ rescue LoadError
23
+ puts "Jeweler not available. Install it with: gem install jeweler"
24
+ end
25
+
26
+ ############################
27
+
28
+ require 'spec/rake/spectask'
29
+
30
+ desc "Run all specs"
31
+ Spec::Rake::SpecTask.new('spec') do |t|
32
+ t.spec_opts = ['--colour --format progress --loadby mtime --reverse']
33
+ t.spec_files = FileList['spec/*_spec.rb']
34
+ end
35
+
36
+ desc "Run all examples with RCov"
37
+ Spec::Rake::SpecTask.new('rcov') do |t|
38
+ t.spec_files = FileList['spec/*_spec.rb']
39
+ t.rcov = true
40
+ t.rcov_opts = ['--exclude', 'examples']
41
+ end
42
+
43
+ # task :default => :spec
44
+
45
+ ############################
46
+
47
+ task "thrift_gen" do
48
+ system "rm -f common.thrift jobtracker.thrift"
49
+ system "wget https://raw.github.com/cloudera/hue/master/desktop/libs/hadoop/java/if/common.thrift"
50
+ system "wget https://raw.github.com/cloudera/hue/master/desktop/libs/hadoop/java/if/jobtracker.thrift"
51
+ system "mv common.thrift lib/elephant-driver/thrift/"
52
+ system "mv jobtracker.thrift lib/elephant-driver/thrift/"
53
+ system "mkdir -p tmp"
54
+ system "thrift --gen rb -o tmp lib/elephant-driver/thrift/common.thrift"
55
+ system "thrift --gen rb -o tmp lib/elephant-driver/thrift/jobtracker.thrift"
56
+ system "mv tmp/gen-rb/* lib/elephant-driver/thrift"
57
+ system "rm -fR tmp"
58
+ end
59
+
60
+ VERSION_FILE = "lib/elephant-driver/version.rb"
61
+
62
+ file VERSION_FILE => ["VERSION"] do |t|
63
+ version = File.read("VERSION").strip
64
+ File.open(VERSION_FILE, "w") {|f|
65
+ f.write <<EOF
66
+ module ElephantDriver
67
+
68
+ VERSION = '#{version}'
69
+
70
+ end
71
+ EOF
72
+ }
73
+ end
74
+
75
+ task :default => [VERSION_FILE, :build]
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.0
@@ -0,0 +1,19 @@
1
+ require 'thrift'
2
+
3
+ # thrift-generated files
4
+ $:.unshift File.join(File.dirname(__FILE__), 'elephant-driver', 'thrift')
5
+ [
6
+ 'common_types',
7
+ 'common_constants',
8
+ 'hadoop_service_base',
9
+ 'jobtracker_types',
10
+ 'jobtracker_constants',
11
+ 'jobtracker',
12
+ ].each { |fn|
13
+ require File.join(File.dirname(__FILE__), 'elephant-driver', 'thrift', fn)
14
+ }
15
+
16
+ # library files
17
+ [ 'client', 'task', 'job', 'tracker' ].each { |fn|
18
+ require File.join(File.dirname(__FILE__), 'elephant-driver', fn)
19
+ }
@@ -0,0 +1,69 @@
1
+ module ElephantDriver
2
+
3
+ class Client
4
+ def initialize(host, port=9290, user='mapred', timeout=30)
5
+ sock = Thrift::Socket.new host, port
6
+ sock.timeout = timeout * 1000
7
+
8
+ @transport = Thrift::BufferedTransport.new sock
9
+ @transport.open
10
+
11
+ # 2011/08/23 Kazuki Ohta <kazuki.ohta@gmail.com>
12
+ # explicitly specify TCP_NODELAY for low-latency communication.
13
+ raw_sock = sock.to_io
14
+ raw_sock.setsockopt Socket::IPPROTO_TCP, Socket::TCP_NODELAY, 1
15
+
16
+ protocol = Thrift::BinaryProtocol.new @transport
17
+ @client = Hadoop::API::Jobtracker::Jobtracker::Client.new protocol
18
+ options = { 'effective_user' => user }
19
+ @ctx = Hadoop::API::RequestContext.new(:confOptions => options)
20
+ end
21
+
22
+ # Jobs
23
+ def jobs(status=:running)
24
+ ret =
25
+ case status
26
+ when :running then call :getRunningJobs
27
+ when :completed then call :getCompletedJobs
28
+ when :failed then call :getFailedJobs
29
+ when :killed then call :getKilledJobs
30
+ else call :getAllJobs
31
+ end
32
+ ret.jobs.collect{ |j| Job.new(self, j) }
33
+ end
34
+
35
+ def get_job(job_id)
36
+ Job.new self, call(:getJob, job_id)
37
+ end
38
+
39
+ # Trackers
40
+ def trackers(status=:active)
41
+ ret =
42
+ case status
43
+ when :active then call :getActiveTrackers
44
+ when :blacklisted then call :getBlacklistedTrackers
45
+ else call :getAllTrackers
46
+ end
47
+ ret.trackers.collect{ |t| Tracker.new(self, t) }
48
+ end
49
+
50
+ def get_tracker(name)
51
+ Tracker.new(self, (call :getTracker, name))
52
+ end
53
+
54
+ # Tasks
55
+ def tasks
56
+ end
57
+
58
+ # Status
59
+ def status
60
+ call :getClusterStatus
61
+ end
62
+
63
+ private
64
+ def call(method, *args)
65
+ @client.send method, @ctx, *args
66
+ end
67
+ end
68
+
69
+ end
@@ -0,0 +1,127 @@
1
+ require 'nokogiri'
2
+
3
+ module ElephantDriver
4
+
5
+ class Job
6
+ STATES = [ :running, :succeeded, :failed, :prep, :killed ]
7
+
8
+ def initialize(cln, thrift_job)
9
+ @cln = cln
10
+ @thrift_job = thrift_job
11
+ end
12
+
13
+ def job_id
14
+ @thrift_job.jobID
15
+ end
16
+
17
+ def user
18
+ @thrift_job.status.user
19
+ end
20
+
21
+ def completed?
22
+ state != :running
23
+ end
24
+
25
+ def start_time
26
+ @thrift_job.startTime
27
+ end
28
+
29
+ def launch_time
30
+ @thrift_job.launchTime
31
+ end
32
+
33
+ def finish_time
34
+ @thrift_job.finishTime
35
+ end
36
+
37
+ def state
38
+ STATES[@thrift_job.status.runState - 1]
39
+ end
40
+
41
+ def map_progress
42
+ @thrift_job.status.mapProgress
43
+ end
44
+
45
+ def reduce_progress
46
+ @thrift_job.status.reduceProgress
47
+ end
48
+
49
+ def cleanup_progress
50
+ @thrift_job.status.cleanupProgress
51
+ end
52
+
53
+ def setup_progress
54
+ @thrift_job.status.setupProgress
55
+ end
56
+
57
+ def progress
58
+ (@thrift_job.status.mapProgress + @thrift_job.status.reduceProgress) / 2.0
59
+ end
60
+
61
+ def config_params
62
+ xml = call :getJobConfXML
63
+ #@parsed_config ||= Nokogiri::XML(xml).xpath("//property").inject({}) { |props, xprop|
64
+ # props[xprop.xpath("./name").text] = xprop.xpath("./value").text
65
+ # props
66
+ #}
67
+ {}
68
+ end
69
+
70
+ def counters
71
+ counters = {}
72
+ ret = call :getJobCounters
73
+ ret.groups.each { |g|
74
+ h = {}
75
+ g.counters.each { |name, c| h[name] = c.value }
76
+ counters[g.name] = h
77
+ }
78
+ counters
79
+ end
80
+
81
+ def tasks
82
+ types = [
83
+ Hadoop::API::Jobtracker::ThriftTaskType::MAP,
84
+ Hadoop::API::Jobtracker::ThriftTaskType::REDUCE,
85
+ Hadoop::API::Jobtracker::ThriftTaskType::JOB_SETUP,
86
+ Hadoop::API::Jobtracker::ThriftTaskType::JOB_CLEANUP,
87
+ Hadoop::API::Jobtracker::ThriftTaskType::TASK_CLEANUP,
88
+ ]
89
+ states = [
90
+ Hadoop::API::Jobtracker::ThriftTaskState::RUNNING,
91
+ Hadoop::API::Jobtracker::ThriftTaskState::SUCCEEDED,
92
+ Hadoop::API::Jobtracker::ThriftTaskState::FAILED,
93
+ Hadoop::API::Jobtracker::ThriftTaskState::UNASSIGNED,
94
+ Hadoop::API::Jobtracker::ThriftTaskState::KILLED,
95
+ Hadoop::API::Jobtracker::ThriftTaskState::COMMIT_PENDING,
96
+ Hadoop::API::Jobtracker::ThriftTaskState::FAILED_UNCLEAN,
97
+ Hadoop::API::Jobtracker::ThriftTaskState::KILLED_UNCLEAN,
98
+ ]
99
+
100
+ tasks = (call :getTaskList, types, states, '', 10000, 0).tasks
101
+ return tasks.collect{ |t| Task.new(self, t) }
102
+ end
103
+
104
+ def set_priority(priority)
105
+ prio =
106
+ case priority
107
+ when :very_high then Hadoop::API::Jobtracker::ThriftJobPriority::VERY_HIGH
108
+ when :high then Hadoop::API::Jobtracker::ThriftJobPriority::HIGH
109
+ when :normal then Hadoop::API::Jobtracker::ThriftJobPriority::NORMAL
110
+ when :low then Hadoop::API::Jobtracker::ThriftJobPriority::LOW
111
+ when :very_low then Hadoop::API::Jobtracker::ThriftJobPriority::VERY_LOW
112
+ else Hadoop::API::Jobtracker::ThriftJobPriority::NORMAL
113
+ end
114
+ call :setJobPriority, prio
115
+ end
116
+
117
+ def kill!
118
+ call :killJob
119
+ end
120
+
121
+ private
122
+ def call(method, *args)
123
+ @cln.send :call, method, @thrift_job.jobID, *args
124
+ end
125
+ end
126
+
127
+ end
@@ -0,0 +1,46 @@
1
+ require 'nokogiri'
2
+
3
+ module ElephantDriver
4
+
5
+ class Task
6
+ def initialize(cln, thrift_task)
7
+ @cln = cln
8
+ @thrift_task = thrift_task
9
+ end
10
+
11
+ def start_time
12
+ @thrift_task.startTime
13
+ end
14
+
15
+ def exec_start_time
16
+ @thrift_task.execStartTime
17
+ end
18
+
19
+ def exec_finish_time
20
+ @thrift_task.execFinishTime
21
+ end
22
+
23
+ def progress
24
+ @thrift_task.progress
25
+ end
26
+
27
+ def failed?
28
+ @thrift_task.failed
29
+ end
30
+
31
+ def completed?
32
+ @thrift_task.complete
33
+ end
34
+
35
+ def counters
36
+ counters = {}
37
+ @thrift_task.counters.groups.each { |g|
38
+ h = {}
39
+ g.counters.each { |name, c| h[name] = c.value }
40
+ counters[g.name] = h
41
+ }
42
+ counters
43
+ end
44
+ end
45
+
46
+ end
@@ -0,0 +1,129 @@
1
+ /*
2
+ * Licensed to Cloudera, Inc. under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. Cloudera, Inc. licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing, software
13
+ * distributed under the License is distributed on an "AS IS" BASIS,
14
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ * See the License for the specific language governing permissions and
16
+ * limitations under the License.
17
+ */
18
+
19
+ /*
20
+ * Common declarations for Hadoop Thrift interfaces
21
+ */
22
+
23
+ /*
24
+ * Namespaces for generated code. The idea is to keep code generated by
25
+ * Thrift under a 'hadoop.api' namespace, so that a higher-level set of
26
+ * functions and classes may be defined under 'hadoop'.
27
+ */
28
+
29
+ namespace cpp hadoop.api
30
+ namespace csharp Hadoop.API
31
+ namespace java org.apache.hadoop.thriftfs.api
32
+ namespace perl Hadoop.API
33
+ namespace php hadoop_api
34
+ namespace py hadoop.api.common
35
+ namespace rb Hadoop.API
36
+
37
+ /** Generic I/O error */
38
+ exception IOException {
39
+ /** Error message. */
40
+ 1: string msg,
41
+
42
+ /** Textual representation of the call stack. */
43
+ 2: string stack
44
+
45
+ /** The Java class of the Exception (may be a subclass) */
46
+ 3: string clazz
47
+ }
48
+
49
+ /**
50
+ * Information about the compilation version of this server
51
+ */
52
+ struct VersionInfo {
53
+ 1: string version
54
+ 2: string revision
55
+ 4: string compileDate
56
+ 5: string compilingUser
57
+ 6: string url
58
+ 7: string buildVersion
59
+ }
60
+
61
+
62
+ /** A single stack frame in a stack dump */
63
+ struct StackTraceElement {
64
+ 1: string className
65
+ 2: string fileName
66
+ 3: i32 lineNumber
67
+ 4: string methodName
68
+ 5: bool isNativeMethod
69
+ 6: string stringRepresentation
70
+ }
71
+
72
+ /** Info about a thread with its corresponding stack trace */
73
+ struct ThreadStackTrace {
74
+ 1: string threadName
75
+ 2: string threadStringRepresentation
76
+ 3: bool isDaemon
77
+
78
+ 4: list<StackTraceElement> stackTrace;
79
+ }
80
+
81
+ /**
82
+ * Memory available via java.lang.Runtime
83
+ */
84
+ struct RuntimeInfo {
85
+ 1:i64 totalMemory
86
+ 2:i64 freeMemory
87
+ 3:i64 maxMemory
88
+ }
89
+
90
+ /**
91
+ * Context options for every request.
92
+ */
93
+ struct RequestContext {
94
+ /**
95
+ * This map turns into a Configuration object in the server and
96
+ * is currently used to construct a UserGroupInformation to
97
+ * authenticate this request.
98
+ */
99
+ 1:map<string, string> confOptions
100
+ }
101
+
102
+ struct MetricsRecord {
103
+ 2: map<string, string> tags
104
+ 3: map<string, i64> metrics
105
+ }
106
+
107
+ struct MetricsContext {
108
+ 1: string name
109
+ 2: bool isMonitoring
110
+ 3: i32 period
111
+
112
+ 4: map<string, list<MetricsRecord>> records
113
+ }
114
+
115
+ struct ThriftDelegationToken {
116
+ 1: binary delegationTokenBytes
117
+ }
118
+
119
+ service HadoopServiceBase {
120
+ /** Return the version information for this server */
121
+ VersionInfo getVersionInfo(10:RequestContext ctx);
122
+ RuntimeInfo getRuntimeInfo(10:RequestContext ctx);
123
+ list<ThreadStackTrace> getThreadDump(10:RequestContext ctx);
124
+ list<MetricsContext> getAllMetrics(10:RequestContext ctx)
125
+ throws (1:IOException err);
126
+ MetricsContext getMetricsContext(10:RequestContext ctx, 1:string contextName)
127
+ throws (1:IOException err);
128
+ }
129
+