vayacondios-server 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +61 -0
- data/.travis.yml +11 -0
- data/.yardopts +10 -0
- data/CHANGELOG.md +0 -0
- data/Gemfile +4 -0
- data/Guardfile +41 -0
- data/LICENSE.md +99 -0
- data/Procfile +2 -0
- data/README.md +183 -0
- data/Rakefile +6 -0
- data/app/http_shim.rb +67 -0
- data/bin/vcd.sh +27 -0
- data/config/http_shim.rb +43 -0
- data/config/vayacondios.example.yaml +4 -0
- data/config/vayacondios.yaml +4 -0
- data/lib/tasks/publish.rake +23 -0
- data/lib/tasks/spec.rake +9 -0
- data/lib/tasks/yard.rake +2 -0
- data/lib/vayacondios/client/configliere.rb +38 -0
- data/lib/vayacondios/client/http_client.rb +49 -0
- data/lib/vayacondios/client/notifier.rb +84 -0
- data/lib/vayacondios/server/handlers/config_handler.rb +35 -0
- data/lib/vayacondios/server/handlers/event_handler.rb +30 -0
- data/lib/vayacondios/server/model/config_document.rb +94 -0
- data/lib/vayacondios/server/model/document.rb +25 -0
- data/lib/vayacondios/server/model/event_document.rb +94 -0
- data/lib/vayacondios/version.rb +3 -0
- data/lib/vayacondios-client.rb +20 -0
- data/lib/vayacondios-server.rb +18 -0
- data/scripts/hadoop_monitor/configurable.rb +74 -0
- data/scripts/hadoop_monitor/hadoop_client.rb +249 -0
- data/scripts/hadoop_monitor/hadoop_monitor.rb +91 -0
- data/scripts/hadoop_monitor/hadoopable.rb +65 -0
- data/scripts/hadoop_monitor/machine_monitor.rb +115 -0
- data/scripts/s3_cataloger/buckets +33 -0
- data/scripts/s3_cataloger/foreach_bucket +88 -0
- data/scripts/s3_cataloger/parse_ls.py +391 -0
- data/spec/client/notifier_spec.rb +120 -0
- data/spec/server/config_spec.rb +55 -0
- data/spec/server/event_spec.rb +44 -0
- data/spec/server/server_spec.rb +20 -0
- data/spec/spec_helper.rb +10 -0
- data/spec/support/mongo_cleaner.rb +26 -0
- data/vayacondios-client.gemspec +26 -0
- data/vayacondios-server.gemspec +30 -0
- metadata +216 -0
@@ -0,0 +1,25 @@
|
|
1
|
+
class Vayacondios::Document < Hash
|
2
|
+
attr_accessor :organization, :topic
|
3
|
+
|
4
|
+
def initialize(options = {})
|
5
|
+
options = options.symbolize_keys
|
6
|
+
@organization = options[:organization]
|
7
|
+
@topic = options[:topic]
|
8
|
+
end
|
9
|
+
|
10
|
+
def self.create
|
11
|
+
raise NotImplementedError.new("#{self.name} must be overriden by a subclass.")
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.update
|
15
|
+
raise NotImplementedError.new("#{self.name} must be overriden by a subclass.")
|
16
|
+
end
|
17
|
+
|
18
|
+
def self.find
|
19
|
+
raise NotImplementedError.new("#{self.name} must be overriden by a subclass.")
|
20
|
+
end
|
21
|
+
|
22
|
+
def destroy
|
23
|
+
raise NotImplementedError.new("#{self.name} must be overriden by a subclass.")
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,94 @@
|
|
1
|
+
require 'vayacondios/server/model/document'
|
2
|
+
|
3
|
+
# The event model
|
4
|
+
#
|
5
|
+
# Event documents are key-value pairs, represented in JSON. A document
|
6
|
+
# consists of a primary key called the topic (_id in mongodb). It belongs to a
|
7
|
+
# collection named "#{organization_name}.#{topic}.events"
|
8
|
+
#
|
9
|
+
# Note: mongodb is passed in beacuse Goliath makes Thread lookups will not
|
10
|
+
# work while Goliath is in a streaming context.
|
11
|
+
|
12
|
+
class Vayacondios::EventDocument < Vayacondios::Document
|
13
|
+
attr_reader :organization, :topic, :body
|
14
|
+
|
15
|
+
def initialize(mongodb, options = {})
|
16
|
+
super options
|
17
|
+
@mongo = mongodb
|
18
|
+
options = sanitize_options(options)
|
19
|
+
|
20
|
+
@body = nil
|
21
|
+
@id = format_id(options[:id])
|
22
|
+
@mongo = mongodb
|
23
|
+
|
24
|
+
collection_name = [organization, topic, 'events'].join('.')
|
25
|
+
@collection = @mongo.collection(collection_name)
|
26
|
+
end
|
27
|
+
|
28
|
+
def self.create(mongodb, document, options={})
|
29
|
+
self.new(mongodb, options).update(document)
|
30
|
+
end
|
31
|
+
|
32
|
+
def self.find(mongodb, options={})
|
33
|
+
self.new(mongodb, options).find
|
34
|
+
end
|
35
|
+
|
36
|
+
def find
|
37
|
+
result = @collection.find_one({_id: @id})
|
38
|
+
if result.present?
|
39
|
+
result.delete("_id")
|
40
|
+
result['_timestamp'] = result.delete("t")
|
41
|
+
result.merge! result.delete("d") if result["d"].present?
|
42
|
+
@body = result
|
43
|
+
self
|
44
|
+
else
|
45
|
+
nil
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def update(document)
|
50
|
+
document = to_mongo(document)
|
51
|
+
|
52
|
+
@body = document[:d]
|
53
|
+
if @id
|
54
|
+
@collection.update({:_id => @id}, document, {upsert: true})
|
55
|
+
else
|
56
|
+
@collection.insert(document)
|
57
|
+
end
|
58
|
+
|
59
|
+
self
|
60
|
+
end
|
61
|
+
|
62
|
+
def destroy(document)
|
63
|
+
super
|
64
|
+
end
|
65
|
+
|
66
|
+
protected
|
67
|
+
|
68
|
+
def sanitize_options(options)
|
69
|
+
options = options.symbolize_keys
|
70
|
+
|
71
|
+
topic = options[:topic].gsub(/\W+/, '_')
|
72
|
+
id = format_id options[:id]
|
73
|
+
|
74
|
+
options.merge!(topic: topic, id: id)
|
75
|
+
end
|
76
|
+
|
77
|
+
def format_id(id)
|
78
|
+
if (id.is_a?(Hash) && id["$oid"].present?)
|
79
|
+
id = BSON::ObjectId(id["$oid"])
|
80
|
+
else
|
81
|
+
id = id.to_s.gsub(/\W/,'')
|
82
|
+
id = BSON::ObjectId(id) if id.match(/^[a-f0-9]{24}$/)
|
83
|
+
end
|
84
|
+
id
|
85
|
+
end
|
86
|
+
|
87
|
+
def to_mongo(document)
|
88
|
+
{}.tap do |result|
|
89
|
+
result[:d] = document.dup
|
90
|
+
result[:_id] = @id if @id
|
91
|
+
result[:t] = document.delete(:_timestamp) || Time.now
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'configliere'
|
2
|
+
require 'multi_json'
|
3
|
+
require 'net/http'
|
4
|
+
|
5
|
+
require 'gorillib/builder'
|
6
|
+
require 'gorillib/configurable'
|
7
|
+
require 'gorillib/enumerable/sum'
|
8
|
+
require 'gorillib/exception/raisers'
|
9
|
+
require 'gorillib/hash/deep_compact'
|
10
|
+
require 'gorillib/hash/deep_merge'
|
11
|
+
require 'gorillib/hash/keys'
|
12
|
+
require 'gorillib/logger/log'
|
13
|
+
require 'gorillib/metaprogramming/class_attribute'
|
14
|
+
require 'gorillib/object/blank'
|
15
|
+
require 'gorillib/string/constantize'
|
16
|
+
require 'gorillib/string/inflections'
|
17
|
+
|
18
|
+
require 'vayacondios/client/http_client'
|
19
|
+
require 'vayacondios/client/notifier'
|
20
|
+
require 'vayacondios/client/configliere'
|
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'goliath'
|
2
|
+
require 'em-mongo'
|
3
|
+
require 'em-synchrony/em-http'
|
4
|
+
require 'em-synchrony/em-mongo'
|
5
|
+
|
6
|
+
require 'gorillib/object/blank'
|
7
|
+
require 'gorillib/enumerable/sum'
|
8
|
+
require 'gorillib/hash/deep_merge'
|
9
|
+
require 'gorillib/hash/keys'
|
10
|
+
require 'gorillib/string/constantize'
|
11
|
+
require 'gorillib/string/inflections'
|
12
|
+
require 'multi_json'
|
13
|
+
|
14
|
+
require 'vayacondios/server/model/config_document'
|
15
|
+
require 'vayacondios/server/model/event_document'
|
16
|
+
|
17
|
+
require 'vayacondios/server/handlers/config_handler'
|
18
|
+
require 'vayacondios/server/handlers/event_handler'
|
@@ -0,0 +1,74 @@
|
|
1
|
+
require 'configliere'
|
2
|
+
require 'logger'
|
3
|
+
|
4
|
+
module Vayacondios
|
5
|
+
|
6
|
+
module Configurable
|
7
|
+
|
8
|
+
#
|
9
|
+
# Declare a name CONST_NAME = :const_name
|
10
|
+
#
|
11
|
+
def self.declare_name symbol
|
12
|
+
const_set symbol.to_s.upcase.to_sym, symbol
|
13
|
+
end
|
14
|
+
|
15
|
+
declare_name :cluster_busy
|
16
|
+
declare_name :cluster_quiet
|
17
|
+
declare_name :event
|
18
|
+
declare_name :time
|
19
|
+
|
20
|
+
attr_reader :logger
|
21
|
+
|
22
|
+
def settings
|
23
|
+
init_settings
|
24
|
+
return @settings
|
25
|
+
end
|
26
|
+
|
27
|
+
def init_settings
|
28
|
+
return if defined? @settings
|
29
|
+
|
30
|
+
@settings = Configliere::Param.new
|
31
|
+
@settings.use :env_var, :config_file, :commandline
|
32
|
+
|
33
|
+
@settings.define(:sleep_seconds,
|
34
|
+
default: 5,
|
35
|
+
description: "Time to sleep in main loops")
|
36
|
+
@settings.define(:log_level,
|
37
|
+
default: "info",
|
38
|
+
description: "Log level. See standard Logger class")
|
39
|
+
@settings.define(:mongo_jobs_db,
|
40
|
+
default: 'job_info',
|
41
|
+
description: "Mongo database to dump hadoop job information into")
|
42
|
+
@settings.define(:mongo_job_logs_collection,
|
43
|
+
default: 'job_logs',
|
44
|
+
description: "Mongo collection to dump job logs into.")
|
45
|
+
@settings.define(:mongo_job_events_collection,
|
46
|
+
default: 'job_events',
|
47
|
+
description: "Mongo collection containing jobs events.")
|
48
|
+
@settings.define(:mongo_machine_stats_collection,
|
49
|
+
default: 'machine_stats',
|
50
|
+
description: "Mongo collection containing machine stats.")
|
51
|
+
@settings.define(:mongo_ip,
|
52
|
+
default: nil,
|
53
|
+
description: "IP address of Hadoop monitor node")
|
54
|
+
@settings.define(:job_logs_size,
|
55
|
+
default: 10 * (1 << 20),
|
56
|
+
description: ("Size (in bytes) of Mongo jobs log collection"))
|
57
|
+
@settings.define(:job_events_size,
|
58
|
+
default: 10 * (1 << 20),
|
59
|
+
description: ("Size (in bytes) of Mongo job events collection"))
|
60
|
+
@settings.define(:machine_stats_size,
|
61
|
+
default: 100 * (1 << 20),
|
62
|
+
description: ("Size (in bytes) of machine stats collection"))
|
63
|
+
|
64
|
+
@settings.resolve!
|
65
|
+
|
66
|
+
@logger = Logger.new(STDERR)
|
67
|
+
@logger.level = Logger.const_get(@settings.log_level.upcase.to_sym)
|
68
|
+
|
69
|
+
@logger.info "Settings: #{@settings}"
|
70
|
+
|
71
|
+
@settings
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
@@ -0,0 +1,249 @@
|
|
1
|
+
require_relative 'configurable'
|
2
|
+
require_relative 'hadoopable'
|
3
|
+
require 'json'
|
4
|
+
require 'optparse'
|
5
|
+
require 'ostruct'
|
6
|
+
require 'logger'
|
7
|
+
require 'pp'
|
8
|
+
require 'gorillib/string/inflections'
|
9
|
+
require 'swineherd-fs'
|
10
|
+
|
11
|
+
module Vayacondios
|
12
|
+
|
13
|
+
class HadoopClient
|
14
|
+
|
15
|
+
include Configurable
|
16
|
+
include Hadoopable
|
17
|
+
|
18
|
+
RUNNING = JobStatus::RUNNING
|
19
|
+
|
20
|
+
def initialize
|
21
|
+
init_settings
|
22
|
+
logger.info "Connecting to job tracker."
|
23
|
+
@job_client = JobClient.new JobConf.new(get_hadoop_conf)
|
24
|
+
end
|
25
|
+
|
26
|
+
#
|
27
|
+
# (Equality doesn't work for jobs, so - will not work as intended
|
28
|
+
# on arrays of jobs.)
|
29
|
+
#
|
30
|
+
def subtract jobs_array1, jobs_array2
|
31
|
+
jobs_array1.reject{|j| jobs_array2.map(&:job_id).map(&:to_s).index j.job_id.to_s}
|
32
|
+
end
|
33
|
+
|
34
|
+
#
|
35
|
+
# Returns the jobs with the specified state. States are specified
|
36
|
+
# by constants in this class.
|
37
|
+
#
|
38
|
+
def jobs_with_state state
|
39
|
+
jobs_by_state[state] || []
|
40
|
+
end
|
41
|
+
|
42
|
+
#
|
43
|
+
# Returns the properties of the specified job as a hash.
|
44
|
+
#
|
45
|
+
def job_properties job
|
46
|
+
host_port = job.get_tracking_url[/^(http:\/\/)?[^\/]*/]
|
47
|
+
job_id = job.get_id.to_s
|
48
|
+
conf_uri = "#{host_port}/logs/#{job_id}_conf.xml"
|
49
|
+
properties = parse_properties(open conf_uri)
|
50
|
+
recordize_properties(properties, job_id.to_s)
|
51
|
+
end
|
52
|
+
|
53
|
+
#
|
54
|
+
# Returns the stats for the current job as a hash.
|
55
|
+
#
|
56
|
+
def job_stats job, finish_time
|
57
|
+
parse_job job.get_id.to_s, finish_time
|
58
|
+
end
|
59
|
+
|
60
|
+
private
|
61
|
+
|
62
|
+
#
|
63
|
+
# Returns a hash JobStatus::<SOME_STATE> => <array of jobs>
|
64
|
+
#
|
65
|
+
def jobs_by_state
|
66
|
+
job_statuses_by_state = @job_client.get_all_jobs.group_by(&:get_run_state)
|
67
|
+
Hash[job_statuses_by_state.map{|state, job_statuses| [state, jobs_from_statuses(job_statuses)]}]
|
68
|
+
end
|
69
|
+
|
70
|
+
#
|
71
|
+
# Some hadoop stuff returns JobStatus objects. This converts them
|
72
|
+
# to RunningJob objects.
|
73
|
+
#
|
74
|
+
def jobs_from_statuses job_statuses
|
75
|
+
job_statuses.map{|job_status| @job_client.get_job job_status.get_job_id}
|
76
|
+
end
|
77
|
+
|
78
|
+
#
|
79
|
+
# Takes an org.apache.hadoop.mapred.RunningJob and returns a hash
|
80
|
+
# object that represents it.
|
81
|
+
#
|
82
|
+
def parse_job job_id, finish_time
|
83
|
+
job = @job_client.get_job job_id
|
84
|
+
job_status = @job_client.get_all_jobs.select{|j| j.get_job_id.to_s == job_id.to_s}.first
|
85
|
+
finished_status = [:FAILED, :KILLED, :COMPLETE]
|
86
|
+
failed_status = [:FAILED]
|
87
|
+
|
88
|
+
job_data = {
|
89
|
+
|
90
|
+
_id: job_id.to_s,
|
91
|
+
|
92
|
+
# not sure what is what. I'm guessing
|
93
|
+
# JobStatus.getStartTime corresponds to the
|
94
|
+
# launch time in the logs, but I'm going to
|
95
|
+
# go ahead and use it twice here.
|
96
|
+
|
97
|
+
launch_time: Time.at(job_status.get_start_time / 1000),
|
98
|
+
submit_time: Time.at(job_status.get_start_time / 1000),
|
99
|
+
|
100
|
+
finish_time: finish_time,
|
101
|
+
|
102
|
+
job_status: case job_status.get_run_state
|
103
|
+
when JobStatus::FAILED then :FAILED
|
104
|
+
when JobStatus::KILLED then :KILLED
|
105
|
+
when JobStatus::PREP then :PREP
|
106
|
+
when JobStatus::RUNNING then :RUNNING
|
107
|
+
when JobStatus::SUCCEEDED then :SUCCEEDED
|
108
|
+
end,
|
109
|
+
|
110
|
+
finished_maps: num_tasks(job_id, :map, finished_status),
|
111
|
+
finished_reduces: num_tasks(job_id, :reduce, finished_status),
|
112
|
+
failed_maps: num_tasks(job_id, :map, failed_status),
|
113
|
+
failed_reduces: num_tasks(job_id, :reduce, failed_status),
|
114
|
+
|
115
|
+
counters: parse_counters(job.get_counters),
|
116
|
+
type: :job,
|
117
|
+
|
118
|
+
}
|
119
|
+
|
120
|
+
job_progress = {
|
121
|
+
|
122
|
+
parent_id: job.job_id,
|
123
|
+
type: :job_progress,
|
124
|
+
# report time in milliseconds for consistency
|
125
|
+
time: Time.now,
|
126
|
+
cleanup_progress: job.cleanup_progress,
|
127
|
+
map_progress: job.map_progress,
|
128
|
+
reduce_progress: job.reduce_progress,
|
129
|
+
setup_progress: job.setup_progress,
|
130
|
+
|
131
|
+
}
|
132
|
+
|
133
|
+
map_task_data = @job_client.get_map_task_reports job_id
|
134
|
+
reduce_task_data = @job_client.get_reduce_task_reports job_id
|
135
|
+
|
136
|
+
m_reports, m_progress_reports, r_reports, r_progress_reports =
|
137
|
+
[
|
138
|
+
map_task_data .map{|task| parse_task task, "MAP", job_id },
|
139
|
+
map_task_data .map{|task| parse_task_progress task, "MAP" },
|
140
|
+
reduce_task_data.map{|task| parse_task task, "REDUCE", job_id },
|
141
|
+
reduce_task_data.map{|task| parse_task_progress task, "REDUCE" },
|
142
|
+
]
|
143
|
+
|
144
|
+
[job_data, job_progress] + m_reports + r_reports + m_progress_reports + r_progress_reports
|
145
|
+
end
|
146
|
+
|
147
|
+
def recordize_properties properties, job_id
|
148
|
+
{
|
149
|
+
parent_id: job_id,
|
150
|
+
type: :conf,
|
151
|
+
properties: properties,
|
152
|
+
_id: [job_id, "_properties"].join
|
153
|
+
}
|
154
|
+
end
|
155
|
+
|
156
|
+
#
|
157
|
+
# Return a hash containing a name => value hash representing the
|
158
|
+
# config for a hadoop job.
|
159
|
+
#
|
160
|
+
def parse_properties conf
|
161
|
+
properties = {}
|
162
|
+
conf.read.scan /[^\n]*\n/ do |line,|
|
163
|
+
m = /<name>([^<]+)<\/name><value>([^<]+)<\/value>/.match line
|
164
|
+
if m and m[1] !~ /fs\.s3n?\.awsSecretAccessKey/ then
|
165
|
+
properties[parse_key m[1]] = parse_atom m[2]
|
166
|
+
end
|
167
|
+
end
|
168
|
+
properties
|
169
|
+
end
|
170
|
+
|
171
|
+
#
|
172
|
+
# Takes an org.apache.hadoop.mapred.TaskReport and returns a Hash
|
173
|
+
# object that represents it.
|
174
|
+
#
|
175
|
+
def parse_task task_report, task_type, parent_job_id
|
176
|
+
{
|
177
|
+
_id: task_report.get_task_id.to_s,
|
178
|
+
parent_id: parent_job_id,
|
179
|
+
task_type: task_type,
|
180
|
+
task_status: task_report.get_current_status.to_s,
|
181
|
+
start_time: Time.at(task_report.get_start_time / 1000),
|
182
|
+
finish_time: Time.at(task_report.get_finish_time / 1000),
|
183
|
+
counters: parse_counters(task_report.get_counters),
|
184
|
+
type: :task,
|
185
|
+
diagnostics: task_report.get_diagnostics.map(&:to_s),
|
186
|
+
running_attempts: task_report.get_running_task_attempts.map(&:to_s),
|
187
|
+
}
|
188
|
+
end
|
189
|
+
|
190
|
+
def parse_task_progress task_report, task_type
|
191
|
+
{
|
192
|
+
parent_id: task_report.get_task_id.to_s,
|
193
|
+
time: Time.now,
|
194
|
+
type: :task_progress,
|
195
|
+
progress: task_report.get_progress,
|
196
|
+
}
|
197
|
+
end
|
198
|
+
|
199
|
+
#
|
200
|
+
# Takes a class of type org.apache.hadoop.mapred.Counters and
|
201
|
+
# returns a Hash object that represents this counter.
|
202
|
+
#
|
203
|
+
def parse_counters counters
|
204
|
+
Hash[counters.map do |group|
|
205
|
+
[parse_key(group.get_name), Hash[group.map do |counter|
|
206
|
+
[parse_key(counter.get_name), counter.get_counter]
|
207
|
+
end]]
|
208
|
+
end]
|
209
|
+
end
|
210
|
+
|
211
|
+
#
|
212
|
+
# Parse a key in a log entry. Log entries consist of a type, which I
|
213
|
+
# consider a key, and a list of key=value pairs.
|
214
|
+
#
|
215
|
+
def parse_key key
|
216
|
+
return (parse_atom key).underscore.gsub ".", "_"
|
217
|
+
end
|
218
|
+
|
219
|
+
#
|
220
|
+
# Parse a value in a Hadoop log.
|
221
|
+
#
|
222
|
+
def parse_atom a
|
223
|
+
if /[0-9][ \r\t\n]*\/[ \r\t\n]*[0-9]+/.match a
|
224
|
+
# "0/6" -> [0,6]
|
225
|
+
return a.split("/").collect{|s| s.to_i}
|
226
|
+
elsif /^[0-9,]*$/.match a
|
227
|
+
# "224" -> 224
|
228
|
+
return a.gsub(',', '').to_i
|
229
|
+
else
|
230
|
+
# \. -> .
|
231
|
+
return a.gsub(/([^\\])\\(.)/, '\1\2')
|
232
|
+
end
|
233
|
+
end
|
234
|
+
|
235
|
+
#
|
236
|
+
# Returns the number of tasks of the specified TIPStatus from the
|
237
|
+
# specified job_client of the specified type (map or reduce)
|
238
|
+
#
|
239
|
+
def num_tasks job_id, map_or_reduce, statuses
|
240
|
+
method_name = "get_#{map_or_reduce}_task_reports".to_sym
|
241
|
+
@job_client.send(method_name, job_id).select do |report|
|
242
|
+
tip_statuses = statuses.map do |status|
|
243
|
+
TIPStatus.const_get status
|
244
|
+
end
|
245
|
+
tip_statuses.index report.get_current_status
|
246
|
+
end.size
|
247
|
+
end
|
248
|
+
end
|
249
|
+
end
|