RubyGems - elephant-driver - Versions diffs - 0.1.0 - Mend

elephant-driver 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

data/README.md +44 -0
data/Rakefile +75 -0
data/VERSION +1 -0
data/lib/elephant-driver.rb +19 -0
data/lib/elephant-driver/client.rb +69 -0
data/lib/elephant-driver/job.rb +127 -0
data/lib/elephant-driver/task.rb +46 -0
data/lib/elephant-driver/thrift/common.thrift +129 -0
data/lib/elephant-driver/thrift/common_constants.rb +12 -0
data/lib/elephant-driver/thrift/common_types.rb +209 -0
data/lib/elephant-driver/thrift/hadoop_service_base.rb +314 -0
data/lib/elephant-driver/thrift/jobtracker.rb +1466 -0
data/lib/elephant-driver/thrift/jobtracker.thrift +478 -0
data/lib/elephant-driver/thrift/jobtracker_constants.rb +14 -0
data/lib/elephant-driver/thrift/jobtracker_types.rb +735 -0
data/lib/elephant-driver/tracker.rb +10 -0
data/lib/elephant-driver/version.rb +5 -0
metadata +85 -0

data/README.md ADDED Viewed

@@ -0,0 +1,44 @@
+# About
+'elephant-driver' is a Ruby library to communicate with Hadoop daemons.
+# Notice
+Currently, only communicating with JobTracker is supported. Other daemons (TaskTracker, NameNode, DataNode) are not supported yet.
+# Requirements
+* thfift
+* nokogiri
+# Setup
+This library assumes that you're using CDH3 (Cloudera Distribution for Hadoop, version 3).
+You first need to install 'hue-plugins' package at the JobTracker node. Then, the following settings needs to be included in your mapred-site.xml.
+```xml
+<!-- Enable Hue plugins -->
+<property>
+  <name>mapred.jobtracker.plugins</name>
+  <value>org.apache.hadoop.thriftfs.ThriftJobTrackerPlugin</value>
+  <description>Comma-separated list of jobtracker plug-ins to be activated.</description>
+</property>
+<property>
+  <name>jobtracker.thrift.address</name>
+  <value>0.0.0.0:9290</value>
+</property>
+```
+# Usage
+See spec/ directory for the example usage.
+```ruby
+@cln = ElephantDriver::Client.new($HOST, $PORT)
+@cln.jobs.each { |j|
+  j.tasks.each { |t|
+    t.counters
+  }
+}
+```

data/Rakefile ADDED Viewed

@@ -0,0 +1,75 @@
+require 'rake'
+require 'rake/testtask'
+require 'rake/clean'
+begin
+  require 'jeweler'
+  Jeweler::Tasks.new do |gemspec|
+    gemspec.name = "elephant-driver"
+    gemspec.summary = "Ruby libray for managing Hadoop clusters"
+    gemspec.author = "Kazuki Ohta"
+    gemspec.email = "kazuki.ohta@gmail.com"
+    #gemspec.homepage = "http://.../"
+    gemspec.has_rdoc = false
+    gemspec.require_paths = ["lib"]
+    gemspec.add_dependency "thrift", "~> 0.7.0"
+    gemspec.add_dependency "nokogiri", ">= 1.5.0"
+    gemspec.test_files = Dir["test/**/*.rb", "test/**/*.sh"]
+    gemspec.files = Dir["bin/**/*", "lib/**/*", "test/**/*.rb"]
+    gemspec.executables = []
+  end
+  Jeweler::GemcutterTasks.new
+rescue LoadError
+  puts "Jeweler not available. Install it with: gem install jeweler"
+end
+############################
+require 'spec/rake/spectask'
+desc "Run all specs"
+Spec::Rake::SpecTask.new('spec') do |t|
+  t.spec_opts = ['--colour --format progress --loadby mtime --reverse']
+  t.spec_files = FileList['spec/*_spec.rb']
+end
+desc "Run all examples with RCov"
+Spec::Rake::SpecTask.new('rcov') do |t|
+  t.spec_files = FileList['spec/*_spec.rb']
+  t.rcov = true
+  t.rcov_opts = ['--exclude', 'examples']
+end
+# task :default => :spec
+############################
+task "thrift_gen" do
+  system "rm -f common.thrift jobtracker.thrift"
+  system "wget https://raw.github.com/cloudera/hue/master/desktop/libs/hadoop/java/if/common.thrift"
+  system "wget https://raw.github.com/cloudera/hue/master/desktop/libs/hadoop/java/if/jobtracker.thrift"
+  system "mv common.thrift lib/elephant-driver/thrift/"
+  system "mv jobtracker.thrift lib/elephant-driver/thrift/"
+  system "mkdir -p tmp"
+  system "thrift --gen rb -o tmp lib/elephant-driver/thrift/common.thrift"
+  system "thrift --gen rb -o tmp lib/elephant-driver/thrift/jobtracker.thrift"
+  system "mv tmp/gen-rb/* lib/elephant-driver/thrift"
+  system "rm -fR tmp"
+end
+VERSION_FILE = "lib/elephant-driver/version.rb"
+file VERSION_FILE => ["VERSION"] do |t|
+  version = File.read("VERSION").strip
+  File.open(VERSION_FILE, "w") {|f|
+    f.write <<EOF
+module ElephantDriver
+VERSION = '#{version}'
+end
+EOF
+  }
+end
+task :default => [VERSION_FILE, :build]

data/VERSION ADDED Viewed

	@@ -0,0 +1 @@
1	+ 0.1.0

data/lib/elephant-driver.rb ADDED Viewed

@@ -0,0 +1,19 @@
+require 'thrift'
+# thrift-generated files
+$:.unshift File.join(File.dirname(__FILE__), 'elephant-driver', 'thrift')
+[
+  'common_types',
+  'common_constants',
+  'hadoop_service_base',
+  'jobtracker_types',
+  'jobtracker_constants',
+  'jobtracker',
+].each { |fn|
+  require File.join(File.dirname(__FILE__), 'elephant-driver', 'thrift', fn)
+}
+# library files
+[ 'client', 'task', 'job', 'tracker' ].each { |fn|
+  require File.join(File.dirname(__FILE__), 'elephant-driver', fn)
+}

data/lib/elephant-driver/client.rb ADDED Viewed

@@ -0,0 +1,69 @@
+module ElephantDriver
+  class Client
+    def initialize(host, port=9290, user='mapred', timeout=30)
+      sock = Thrift::Socket.new host, port
+      sock.timeout = timeout * 1000
+      @transport = Thrift::BufferedTransport.new sock
+      @transport.open
+      # 2011/08/23 Kazuki Ohta <kazuki.ohta@gmail.com>
+      # explicitly specify TCP_NODELAY for low-latency communication.
+      raw_sock = sock.to_io
+      raw_sock.setsockopt Socket::IPPROTO_TCP, Socket::TCP_NODELAY, 1
+      protocol = Thrift::BinaryProtocol.new @transport
+      @client = Hadoop::API::Jobtracker::Jobtracker::Client.new protocol
+      options = { 'effective_user' => user }
+      @ctx = Hadoop::API::RequestContext.new(:confOptions => options)
+    end
+    # Jobs
+    def jobs(status=:running)
+      ret =
+        case status
+        when :running   then call :getRunningJobs
+        when :completed then call :getCompletedJobs
+        when :failed    then call :getFailedJobs
+        when :killed    then call :getKilledJobs
+        else call :getAllJobs
+        end
+      ret.jobs.collect{ |j| Job.new(self, j) }
+    end
+    def get_job(job_id)
+      Job.new self, call(:getJob, job_id)
+    end
+    # Trackers
+    def trackers(status=:active)
+      ret =
+        case status
+        when :active then call :getActiveTrackers
+        when :blacklisted then call :getBlacklistedTrackers
+        else call :getAllTrackers
+        end
+      ret.trackers.collect{ |t| Tracker.new(self, t) }
+    end
+    def get_tracker(name)
+      Tracker.new(self, (call :getTracker, name))
+    end
+    # Tasks
+    def tasks
+    end
+    # Status
+    def status
+      call :getClusterStatus
+    end
+    private
+    def call(method, *args)
+      @client.send method, @ctx, *args
+    end
+  end
+end

data/lib/elephant-driver/job.rb ADDED Viewed

@@ -0,0 +1,127 @@
+require 'nokogiri'
+module ElephantDriver
+  class Job
+    STATES = [ :running, :succeeded, :failed, :prep, :killed ]
+    def initialize(cln, thrift_job)
+      @cln = cln
+      @thrift_job = thrift_job
+    end
+    def job_id
+      @thrift_job.jobID
+    end
+    def user
+      @thrift_job.status.user
+    end
+    def completed?
+      state != :running
+    end
+    def start_time
+      @thrift_job.startTime
+    end
+    def launch_time
+      @thrift_job.launchTime
+    end
+    def finish_time
+      @thrift_job.finishTime
+    end
+    def state
+      STATES[@thrift_job.status.runState - 1]
+    end
+    def map_progress
+      @thrift_job.status.mapProgress
+    end
+    def reduce_progress
+      @thrift_job.status.reduceProgress
+    end
+    def cleanup_progress
+      @thrift_job.status.cleanupProgress
+    end
+    def setup_progress
+      @thrift_job.status.setupProgress
+    end
+    def progress
+      (@thrift_job.status.mapProgress + @thrift_job.status.reduceProgress) / 2.0
+    end
+    def config_params
+      xml = call :getJobConfXML
+      #@parsed_config ||= Nokogiri::XML(xml).xpath("//property").inject({}) { |props, xprop|
+      #  props[xprop.xpath("./name").text] = xprop.xpath("./value").text
+      #  props
+      #}
+      {}
+    end
+    def counters
+      counters = {}
+      ret = call :getJobCounters
+      ret.groups.each { |g|
+        h = {}
+        g.counters.each { |name, c| h[name] = c.value }
+        counters[g.name] = h
+      }
+      counters
+    end
+    def tasks
+      types = [
+        Hadoop::API::Jobtracker::ThriftTaskType::MAP,
+        Hadoop::API::Jobtracker::ThriftTaskType::REDUCE,
+        Hadoop::API::Jobtracker::ThriftTaskType::JOB_SETUP,
+        Hadoop::API::Jobtracker::ThriftTaskType::JOB_CLEANUP,
+        Hadoop::API::Jobtracker::ThriftTaskType::TASK_CLEANUP,
+      ]
+      states = [
+        Hadoop::API::Jobtracker::ThriftTaskState::RUNNING,
+        Hadoop::API::Jobtracker::ThriftTaskState::SUCCEEDED,
+        Hadoop::API::Jobtracker::ThriftTaskState::FAILED,
+        Hadoop::API::Jobtracker::ThriftTaskState::UNASSIGNED,
+        Hadoop::API::Jobtracker::ThriftTaskState::KILLED,
+        Hadoop::API::Jobtracker::ThriftTaskState::COMMIT_PENDING,
+        Hadoop::API::Jobtracker::ThriftTaskState::FAILED_UNCLEAN,
+        Hadoop::API::Jobtracker::ThriftTaskState::KILLED_UNCLEAN,
+      ]
+      tasks = (call :getTaskList, types, states, '', 10000, 0).tasks
+      return tasks.collect{ |t| Task.new(self, t) }
+    end
+    def set_priority(priority)
+      prio =
+        case priority
+        when :very_high then Hadoop::API::Jobtracker::ThriftJobPriority::VERY_HIGH
+        when :high      then Hadoop::API::Jobtracker::ThriftJobPriority::HIGH
+        when :normal    then Hadoop::API::Jobtracker::ThriftJobPriority::NORMAL
+        when :low       then Hadoop::API::Jobtracker::ThriftJobPriority::LOW
+        when :very_low  then Hadoop::API::Jobtracker::ThriftJobPriority::VERY_LOW
+        else Hadoop::API::Jobtracker::ThriftJobPriority::NORMAL
+        end
+      call :setJobPriority, prio
+    end
+    def kill!
+      call :killJob
+    end
+    private
+    def call(method, *args)
+      @cln.send :call, method, @thrift_job.jobID, *args
+    end
+  end
+end

data/lib/elephant-driver/task.rb ADDED Viewed

@@ -0,0 +1,46 @@
+require 'nokogiri'
+module ElephantDriver
+  class Task
+    def initialize(cln, thrift_task)
+      @cln = cln
+      @thrift_task = thrift_task
+    end
+    def start_time
+      @thrift_task.startTime
+    end
+    def exec_start_time
+      @thrift_task.execStartTime
+    end
+    def exec_finish_time
+      @thrift_task.execFinishTime
+    end
+    def progress
+      @thrift_task.progress
+    end
+    def failed?
+      @thrift_task.failed
+    end
+    def completed?
+      @thrift_task.complete
+    end
+    def counters
+      counters = {}
+      @thrift_task.counters.groups.each { |g|
+        h = {}
+        g.counters.each { |name, c| h[name] = c.value }
+        counters[g.name] = h
+      }
+      counters
+    end
+  end
+end

data/lib/elephant-driver/thrift/common.thrift ADDED Viewed

@@ -0,0 +1,129 @@
+/*
+ * Licensed to Cloudera, Inc. under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  Cloudera, Inc. licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * Common declarations for Hadoop Thrift interfaces
+ */
+/*
+ * Namespaces for generated code. The idea is to keep code generated by
+ * Thrift under a 'hadoop.api' namespace, so that a higher-level set of
+ * functions and classes may be defined under 'hadoop'.
+ */
+namespace cpp     hadoop.api
+namespace csharp  Hadoop.API
+namespace java    org.apache.hadoop.thriftfs.api
+namespace perl    Hadoop.API
+namespace php     hadoop_api
+namespace py      hadoop.api.common
+namespace rb      Hadoop.API
+/** Generic I/O error */
+exception IOException {
+  /** Error message. */
+  1: string msg,
+  /** Textual representation of the call stack. */
+  2: string stack
+  /** The Java class of the Exception (may be a subclass) */
+  3: string clazz
+}
+/**
+ * Information about the compilation version of this server
+ */
+struct VersionInfo {
+  1: string version
+  2: string revision
+  4: string compileDate
+  5: string compilingUser
+  6: string url
+  7: string buildVersion
+}
+/** A single stack frame in a stack dump */
+struct StackTraceElement {
+  1: string className
+  2: string fileName
+  3: i32 lineNumber
+  4: string methodName
+  5: bool isNativeMethod
+  6: string stringRepresentation
+}
+/** Info about a thread with its corresponding stack trace */
+struct ThreadStackTrace {
+  1: string threadName
+  2: string threadStringRepresentation
+  3: bool isDaemon
+  4: list<StackTraceElement> stackTrace;
+}
+/**
+ * Memory available via java.lang.Runtime
+ */
+struct RuntimeInfo {
+  1:i64 totalMemory
+  2:i64 freeMemory
+  3:i64 maxMemory
+}
+/**
+ * Context options for every request.
+ */
+struct RequestContext {
+  /**
+   * This map turns into a Configuration object in the server and
+   * is currently used to construct a UserGroupInformation to
+   * authenticate this request.
+   */
+  1:map<string, string> confOptions
+}
+struct MetricsRecord {
+  2: map<string, string> tags
+  3: map<string, i64> metrics
+}
+struct MetricsContext {
+  1: string name
+  2: bool isMonitoring
+  3: i32 period
+  4: map<string, list<MetricsRecord>> records
+}
+struct ThriftDelegationToken {
+  1: binary delegationTokenBytes
+}
+service HadoopServiceBase {
+  /** Return the version information for this server */
+  VersionInfo getVersionInfo(10:RequestContext ctx);
+  RuntimeInfo getRuntimeInfo(10:RequestContext ctx);
+  list<ThreadStackTrace> getThreadDump(10:RequestContext ctx);
+  list<MetricsContext> getAllMetrics(10:RequestContext ctx)
+    throws (1:IOException err);
+  MetricsContext getMetricsContext(10:RequestContext ctx, 1:string contextName)
+    throws (1:IOException err);
+}