hadoop-metrics 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source "https://rubygems.org"
2
+
3
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,7 @@
1
+ Copyright (C) 2013 Masahiro Nakagawa
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4
+
5
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6
+
7
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,60 @@
1
+ # hadoop-metrics
2
+
3
+ hadoop-metrics is a wrapper for Hadoop Metrics API.
4
+
5
+ ## Installation
6
+
7
+ ### gem
8
+
9
+ ```
10
+ gem install hadoop-metrics
11
+ ```
12
+
13
+ ## Usage
14
+
15
+ ### Setup
16
+
17
+ Without `jmx` method, getting metrics depends on /metrics API.
18
+ So if you use this library, then you should enable dumping each metrics via hadoop-metrics.properties.
19
+
20
+ ```
21
+ # example
22
+ mapred.class=org.apache.hadoop.metrics.spi.NoEmitMetricsContext
23
+ mapred.period=30
24
+ ```
25
+
26
+ ### JobTracker
27
+
28
+ ```
29
+ require 'hadoop_metrics/job_tracker'
30
+
31
+ jt = HadoopMetrics::JobTracker.new('localhost', 50030)
32
+ puts JSON.pretty_generate(jt.fairscheduler_jobs)
33
+ ```
34
+
35
+ ### TaskTracker
36
+
37
+ ```
38
+ require 'hadoop_metrics/job_tracker'
39
+
40
+ tt = HadoopMetrics::TaskTracker.new('localhost', 50060)
41
+ puts JSON.pretty_generate(tt.shuffle_output)
42
+ ```
43
+
44
+ ### NOTE
45
+
46
+ NameNode and DataNode are not supported yet.
47
+
48
+ ## Copyright
49
+
50
+ <table>
51
+ <tr>
52
+ <td>Author</td><td>Masahiro Nakagawa <repeatedly@gmail.com></td>
53
+ </tr>
54
+ <tr>
55
+ <td>Copyright</td><td>Copyright (c) 2013- Masahiro Nakagawa</td>
56
+ </tr>
57
+ <tr>
58
+ <td>License</td><td>MIT License</td>
59
+ </tr>
60
+ </table>
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,19 @@
1
+ $:.push File.expand_path("../lib", __FILE__)
2
+ require "hadoop_metrics/version"
3
+
4
+ Gem::Specification.new do |s|
5
+ s.name = "hadoop-metrics"
6
+ s.version = HadoopMetrics::VERSION
7
+ s.authors = ["Masahiro Nakagawa"]
8
+ s.email = ["repeatedly@gmail.com"]
9
+ s.homepage = "https://github.com/repeatedly/hadoop-metrics"
10
+ s.summary = %q{Wrapper for Hadoop Metrics API}
11
+ s.description = s.summary
12
+
13
+ s.files = `git ls-files`.split("\n")
14
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
15
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
16
+ s.require_paths = ["lib"]
17
+
18
+ s.add_development_dependency "rake", ">= 0.9.2"
19
+ end
@@ -0,0 +1,13 @@
1
+ require 'hadoop_metrics/job_tracker'
2
+ require 'hadoop_metrics/task_tracker'
3
+
4
+ jt = HadoopMetrics::JobTracker.new('localhost', 50030, :snake_case => false)
5
+ #puts JSON.pretty_generate(jt.fairscheduler_jobs)
6
+ #puts JSON.pretty_generate(jt.mapred_jobtracker('hostName'))
7
+ #puts JSON.pretty_generate(jt.fairscheduler_pools)
8
+ puts JSON.pretty_generate(jt.fairscheduler_running_tasks)
9
+ #puts JSON.pretty_generate(jt.jmx)
10
+
11
+ #tt = HadoopMetrics::TaskTracker.new('localhost', 50060, :snake_case => false)
12
+ #puts JSON.pretty_generate(tt.shuffle_output)
13
+ #puts JSON.pretty_generate(tt.mapred)
@@ -0,0 +1,71 @@
1
+ require 'net/http'
2
+ require 'json'
3
+
4
+ module HadoopMetrics
5
+ module API
6
+ def jmx
7
+ jmx_json = HadoopMetrics.get_response(@jmx_endpoint)['beans'].first
8
+ @json_value_fields.each { |f|
9
+ jmx_json[f] = JSON.parse(jmx_json[f])
10
+ }
11
+ if @snake_case
12
+ jmx_json = HadoopMetrics.snake_cased(jmx_json)
13
+ end
14
+
15
+ jmx_json
16
+ end
17
+
18
+ def metrics
19
+ HadoopMetrics.get_response(@metrics_endpoint)
20
+ end
21
+
22
+ private
23
+
24
+ def group_by(category, target, column)
25
+ categories = metrics[category]
26
+ return nil if categories.nil?
27
+
28
+ targets = categories[target]
29
+ return nil if targets.nil?
30
+
31
+ targets.map { |target|
32
+ HadoopMetrics.merge_data(target, @snake_case)
33
+ }.group_by { |target|
34
+ target[column]
35
+ }
36
+ end
37
+
38
+ def method_missing(method, *args)
39
+ category, target = method.to_s.split('_', 2)
40
+ group_by(category, target, *args)
41
+ end
42
+ end
43
+
44
+ def self.get_response(endpoint)
45
+ response = Net::HTTP.get_response(endpoint)
46
+ if response.code.to_i == 200
47
+ JSON.parse(response.body)
48
+ else
49
+ raise "Failed to get a response: code = #{response.code}, body = #{response.body}"
50
+ end
51
+ end
52
+
53
+ def self.merge_data(data, snake_case)
54
+ f = data.first
55
+ f.merge!(data.last)
56
+ snake_case ? snake_cased(f) : f
57
+ end
58
+
59
+ def self.to_snake_case(name)
60
+ name[0] = name[0].chr.downcase
61
+ name.gsub(/[A-Z]/) { |n| "_#{n.downcase}" }
62
+ end
63
+
64
+ def self.snake_cased(json)
65
+ snake_cased = {}
66
+ json.each_pair { |k, v|
67
+ snake_cased[HadoopMetrics.to_snake_case(k.dup)] = json[k]
68
+ }
69
+ snake_cased
70
+ end
71
+ end
@@ -0,0 +1,38 @@
1
+ require 'hadoop_metrics/api'
2
+
3
+ module HadoopMetrics
4
+ class JobTracker
5
+ include API
6
+
7
+ def initialize(host, port, opts = {})
8
+ @jmx_endpoint = URI("http://#{host}:#{port}/jmx?qry=hadoop:service=JobTracker,name=JobTrackerInfo")
9
+ @metrics_endpoint = URI("http://#{host}:#{port}/metrics?format=json")
10
+ @json_value_fields = %W(SummaryJson AliveNodesInfoJson BlacklistedNodesInfoJson QueueInfoJson)
11
+ @snake_case = opts.has_key?(:snake_case) ? opts[:snake_case] : true
12
+ end
13
+
14
+ def fairscheduler_pools(column = 'name')
15
+ group_by('fairscheduler', 'pools', column)
16
+ end
17
+
18
+ def fairscheduler_jobs(column = 'name')
19
+ group_by('fairscheduler','jobs', column)
20
+ end
21
+
22
+ def fairscheduler_running_tasks(target = 'pools')
23
+ fs = metrics['fairscheduler']
24
+ return nil if fs.nil?
25
+
26
+ targets = fs[target]
27
+ return nil if targets.nil?
28
+
29
+ each_tasks = {}
30
+ targets.each { |target|
31
+ name = target.first['name']
32
+ each_tasks[name] ||= 0
33
+ each_tasks[name] += target.last['runningTasks']
34
+ }
35
+ each_tasks
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,24 @@
1
+ require 'hadoop_metrics/api'
2
+
3
+ module HadoopMetrics
4
+ class TaskTracker
5
+ include API
6
+
7
+ def initialize(host, port, opts = {})
8
+ @jmx_endpoint = URI("http://#{host}:#{port}/jmx?qry=hadoop:service=TaskTracker,name=TaskTrackerInfo")
9
+ @metrics_endpoint = URI("http://#{host}:#{port}/metrics?format=json")
10
+ @json_value_fields = %W(TasksInfoJson)
11
+ @snake_case = opts[:snake_case] || true
12
+ end
13
+
14
+ def shuffle_output(column = 'sessionId')
15
+ column = HadoopMetrics.to_snake_case(column) if @snake_case
16
+ group_by('mapred', 'shuffleOutput', column)
17
+ end
18
+
19
+ def mapred(column = 'sessionId')
20
+ column = HadoopMetrics.to_snake_case(column) if @snake_case
21
+ group_by('mapred', 'tasktracker', column)
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,3 @@
1
+ module HadoopMetrics
2
+ VERSION = '0.1.0'
3
+ end
metadata ADDED
@@ -0,0 +1,77 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: hadoop-metrics
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Masahiro Nakagawa
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-04-25 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rake
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: 0.9.2
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: 0.9.2
30
+ description: Wrapper for Hadoop Metrics API
31
+ email:
32
+ - repeatedly@gmail.com
33
+ executables: []
34
+ extensions: []
35
+ extra_rdoc_files: []
36
+ files:
37
+ - Gemfile
38
+ - LICENSE.txt
39
+ - README.md
40
+ - Rakefile
41
+ - hadoop-metrics.gemspec
42
+ - lib/hadoop_metrics.rb
43
+ - lib/hadoop_metrics/api.rb
44
+ - lib/hadoop_metrics/job_tracker.rb
45
+ - lib/hadoop_metrics/task_tracker.rb
46
+ - lib/hadoop_metrics/version.rb
47
+ homepage: https://github.com/repeatedly/hadoop-metrics
48
+ licenses: []
49
+ post_install_message:
50
+ rdoc_options: []
51
+ require_paths:
52
+ - lib
53
+ required_ruby_version: !ruby/object:Gem::Requirement
54
+ none: false
55
+ requirements:
56
+ - - ! '>='
57
+ - !ruby/object:Gem::Version
58
+ version: '0'
59
+ segments:
60
+ - 0
61
+ hash: 2512778794185577650
62
+ required_rubygems_version: !ruby/object:Gem::Requirement
63
+ none: false
64
+ requirements:
65
+ - - ! '>='
66
+ - !ruby/object:Gem::Version
67
+ version: '0'
68
+ segments:
69
+ - 0
70
+ hash: 2512778794185577650
71
+ requirements: []
72
+ rubyforge_project:
73
+ rubygems_version: 1.8.23
74
+ signing_key:
75
+ specification_version: 3
76
+ summary: Wrapper for Hadoop Metrics API
77
+ test_files: []