vayacondios-server 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. data/.gitignore +61 -0
  2. data/.travis.yml +11 -0
  3. data/.yardopts +10 -0
  4. data/CHANGELOG.md +0 -0
  5. data/Gemfile +4 -0
  6. data/Guardfile +41 -0
  7. data/LICENSE.md +99 -0
  8. data/Procfile +2 -0
  9. data/README.md +183 -0
  10. data/Rakefile +6 -0
  11. data/app/http_shim.rb +67 -0
  12. data/bin/vcd.sh +27 -0
  13. data/config/http_shim.rb +43 -0
  14. data/config/vayacondios.example.yaml +4 -0
  15. data/config/vayacondios.yaml +4 -0
  16. data/lib/tasks/publish.rake +23 -0
  17. data/lib/tasks/spec.rake +9 -0
  18. data/lib/tasks/yard.rake +2 -0
  19. data/lib/vayacondios/client/configliere.rb +38 -0
  20. data/lib/vayacondios/client/http_client.rb +49 -0
  21. data/lib/vayacondios/client/notifier.rb +84 -0
  22. data/lib/vayacondios/server/handlers/config_handler.rb +35 -0
  23. data/lib/vayacondios/server/handlers/event_handler.rb +30 -0
  24. data/lib/vayacondios/server/model/config_document.rb +94 -0
  25. data/lib/vayacondios/server/model/document.rb +25 -0
  26. data/lib/vayacondios/server/model/event_document.rb +94 -0
  27. data/lib/vayacondios/version.rb +3 -0
  28. data/lib/vayacondios-client.rb +20 -0
  29. data/lib/vayacondios-server.rb +18 -0
  30. data/scripts/hadoop_monitor/configurable.rb +74 -0
  31. data/scripts/hadoop_monitor/hadoop_client.rb +249 -0
  32. data/scripts/hadoop_monitor/hadoop_monitor.rb +91 -0
  33. data/scripts/hadoop_monitor/hadoopable.rb +65 -0
  34. data/scripts/hadoop_monitor/machine_monitor.rb +115 -0
  35. data/scripts/s3_cataloger/buckets +33 -0
  36. data/scripts/s3_cataloger/foreach_bucket +88 -0
  37. data/scripts/s3_cataloger/parse_ls.py +391 -0
  38. data/spec/client/notifier_spec.rb +120 -0
  39. data/spec/server/config_spec.rb +55 -0
  40. data/spec/server/event_spec.rb +44 -0
  41. data/spec/server/server_spec.rb +20 -0
  42. data/spec/spec_helper.rb +10 -0
  43. data/spec/support/mongo_cleaner.rb +26 -0
  44. data/vayacondios-client.gemspec +26 -0
  45. data/vayacondios-server.gemspec +30 -0
  46. metadata +216 -0
@@ -0,0 +1,25 @@
1
+ class Vayacondios::Document < Hash
2
+ attr_accessor :organization, :topic
3
+
4
+ def initialize(options = {})
5
+ options = options.symbolize_keys
6
+ @organization = options[:organization]
7
+ @topic = options[:topic]
8
+ end
9
+
10
+ def self.create
11
+ raise NotImplementedError.new("#{self.name} must be overriden by a subclass.")
12
+ end
13
+
14
+ def self.update
15
+ raise NotImplementedError.new("#{self.name} must be overriden by a subclass.")
16
+ end
17
+
18
+ def self.find
19
+ raise NotImplementedError.new("#{self.name} must be overriden by a subclass.")
20
+ end
21
+
22
+ def destroy
23
+ raise NotImplementedError.new("#{self.name} must be overriden by a subclass.")
24
+ end
25
+ end
@@ -0,0 +1,94 @@
1
+ require 'vayacondios/server/model/document'
2
+
3
+ # The event model
4
+ #
5
+ # Event documents are key-value pairs, represented in JSON. A document
6
+ # consists of a primary key called the topic (_id in mongodb). It belongs to a
7
+ # collection named "#{organization_name}.#{topic}.events"
8
+ #
9
+ # Note: mongodb is passed in beacuse Goliath makes Thread lookups will not
10
+ # work while Goliath is in a streaming context.
11
+
12
+ class Vayacondios::EventDocument < Vayacondios::Document
13
+ attr_reader :organization, :topic, :body
14
+
15
+ def initialize(mongodb, options = {})
16
+ super options
17
+ @mongo = mongodb
18
+ options = sanitize_options(options)
19
+
20
+ @body = nil
21
+ @id = format_id(options[:id])
22
+ @mongo = mongodb
23
+
24
+ collection_name = [organization, topic, 'events'].join('.')
25
+ @collection = @mongo.collection(collection_name)
26
+ end
27
+
28
+ def self.create(mongodb, document, options={})
29
+ self.new(mongodb, options).update(document)
30
+ end
31
+
32
+ def self.find(mongodb, options={})
33
+ self.new(mongodb, options).find
34
+ end
35
+
36
+ def find
37
+ result = @collection.find_one({_id: @id})
38
+ if result.present?
39
+ result.delete("_id")
40
+ result['_timestamp'] = result.delete("t")
41
+ result.merge! result.delete("d") if result["d"].present?
42
+ @body = result
43
+ self
44
+ else
45
+ nil
46
+ end
47
+ end
48
+
49
+ def update(document)
50
+ document = to_mongo(document)
51
+
52
+ @body = document[:d]
53
+ if @id
54
+ @collection.update({:_id => @id}, document, {upsert: true})
55
+ else
56
+ @collection.insert(document)
57
+ end
58
+
59
+ self
60
+ end
61
+
62
+ def destroy(document)
63
+ super
64
+ end
65
+
66
+ protected
67
+
68
+ def sanitize_options(options)
69
+ options = options.symbolize_keys
70
+
71
+ topic = options[:topic].gsub(/\W+/, '_')
72
+ id = format_id options[:id]
73
+
74
+ options.merge!(topic: topic, id: id)
75
+ end
76
+
77
+ def format_id(id)
78
+ if (id.is_a?(Hash) && id["$oid"].present?)
79
+ id = BSON::ObjectId(id["$oid"])
80
+ else
81
+ id = id.to_s.gsub(/\W/,'')
82
+ id = BSON::ObjectId(id) if id.match(/^[a-f0-9]{24}$/)
83
+ end
84
+ id
85
+ end
86
+
87
+ def to_mongo(document)
88
+ {}.tap do |result|
89
+ result[:d] = document.dup
90
+ result[:_id] = @id if @id
91
+ result[:t] = document.delete(:_timestamp) || Time.now
92
+ end
93
+ end
94
+ end
@@ -0,0 +1,3 @@
1
+ class Vayacondios
2
+ VERSION = '0.0.4'
3
+ end
@@ -0,0 +1,20 @@
1
+ require 'configliere'
2
+ require 'multi_json'
3
+ require 'net/http'
4
+
5
+ require 'gorillib/builder'
6
+ require 'gorillib/configurable'
7
+ require 'gorillib/enumerable/sum'
8
+ require 'gorillib/exception/raisers'
9
+ require 'gorillib/hash/deep_compact'
10
+ require 'gorillib/hash/deep_merge'
11
+ require 'gorillib/hash/keys'
12
+ require 'gorillib/logger/log'
13
+ require 'gorillib/metaprogramming/class_attribute'
14
+ require 'gorillib/object/blank'
15
+ require 'gorillib/string/constantize'
16
+ require 'gorillib/string/inflections'
17
+
18
+ require 'vayacondios/client/http_client'
19
+ require 'vayacondios/client/notifier'
20
+ require 'vayacondios/client/configliere'
@@ -0,0 +1,18 @@
1
+ require 'goliath'
2
+ require 'em-mongo'
3
+ require 'em-synchrony/em-http'
4
+ require 'em-synchrony/em-mongo'
5
+
6
+ require 'gorillib/object/blank'
7
+ require 'gorillib/enumerable/sum'
8
+ require 'gorillib/hash/deep_merge'
9
+ require 'gorillib/hash/keys'
10
+ require 'gorillib/string/constantize'
11
+ require 'gorillib/string/inflections'
12
+ require 'multi_json'
13
+
14
+ require 'vayacondios/server/model/config_document'
15
+ require 'vayacondios/server/model/event_document'
16
+
17
+ require 'vayacondios/server/handlers/config_handler'
18
+ require 'vayacondios/server/handlers/event_handler'
@@ -0,0 +1,74 @@
1
+ require 'configliere'
2
+ require 'logger'
3
+
4
+ module Vayacondios
5
+
6
+ module Configurable
7
+
8
+ #
9
+ # Declare a name CONST_NAME = :const_name
10
+ #
11
+ def self.declare_name symbol
12
+ const_set symbol.to_s.upcase.to_sym, symbol
13
+ end
14
+
15
+ declare_name :cluster_busy
16
+ declare_name :cluster_quiet
17
+ declare_name :event
18
+ declare_name :time
19
+
20
+ attr_reader :logger
21
+
22
+ def settings
23
+ init_settings
24
+ return @settings
25
+ end
26
+
27
+ def init_settings
28
+ return if defined? @settings
29
+
30
+ @settings = Configliere::Param.new
31
+ @settings.use :env_var, :config_file, :commandline
32
+
33
+ @settings.define(:sleep_seconds,
34
+ default: 5,
35
+ description: "Time to sleep in main loops")
36
+ @settings.define(:log_level,
37
+ default: "info",
38
+ description: "Log level. See standard Logger class")
39
+ @settings.define(:mongo_jobs_db,
40
+ default: 'job_info',
41
+ description: "Mongo database to dump hadoop job information into")
42
+ @settings.define(:mongo_job_logs_collection,
43
+ default: 'job_logs',
44
+ description: "Mongo collection to dump job logs into.")
45
+ @settings.define(:mongo_job_events_collection,
46
+ default: 'job_events',
47
+ description: "Mongo collection containing jobs events.")
48
+ @settings.define(:mongo_machine_stats_collection,
49
+ default: 'machine_stats',
50
+ description: "Mongo collection containing machine stats.")
51
+ @settings.define(:mongo_ip,
52
+ default: nil,
53
+ description: "IP address of Hadoop monitor node")
54
+ @settings.define(:job_logs_size,
55
+ default: 10 * (1 << 20),
56
+ description: ("Size (in bytes) of Mongo jobs log collection"))
57
+ @settings.define(:job_events_size,
58
+ default: 10 * (1 << 20),
59
+ description: ("Size (in bytes) of Mongo job events collection"))
60
+ @settings.define(:machine_stats_size,
61
+ default: 100 * (1 << 20),
62
+ description: ("Size (in bytes) of machine stats collection"))
63
+
64
+ @settings.resolve!
65
+
66
+ @logger = Logger.new(STDERR)
67
+ @logger.level = Logger.const_get(@settings.log_level.upcase.to_sym)
68
+
69
+ @logger.info "Settings: #{@settings}"
70
+
71
+ @settings
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,249 @@
1
+ require_relative 'configurable'
2
+ require_relative 'hadoopable'
3
+ require 'json'
4
+ require 'optparse'
5
+ require 'ostruct'
6
+ require 'logger'
7
+ require 'pp'
8
+ require 'gorillib/string/inflections'
9
+ require 'swineherd-fs'
10
+
11
+ module Vayacondios
12
+
13
+ class HadoopClient
14
+
15
+ include Configurable
16
+ include Hadoopable
17
+
18
+ RUNNING = JobStatus::RUNNING
19
+
20
+ def initialize
21
+ init_settings
22
+ logger.info "Connecting to job tracker."
23
+ @job_client = JobClient.new JobConf.new(get_hadoop_conf)
24
+ end
25
+
26
+ #
27
+ # (Equality doesn't work for jobs, so - will not work as intended
28
+ # on arrays of jobs.)
29
+ #
30
+ def subtract jobs_array1, jobs_array2
31
+ jobs_array1.reject{|j| jobs_array2.map(&:job_id).map(&:to_s).index j.job_id.to_s}
32
+ end
33
+
34
+ #
35
+ # Returns the jobs with the specified state. States are specified
36
+ # by constants in this class.
37
+ #
38
+ def jobs_with_state state
39
+ jobs_by_state[state] || []
40
+ end
41
+
42
+ #
43
+ # Returns the properties of the specified job as a hash.
44
+ #
45
+ def job_properties job
46
+ host_port = job.get_tracking_url[/^(http:\/\/)?[^\/]*/]
47
+ job_id = job.get_id.to_s
48
+ conf_uri = "#{host_port}/logs/#{job_id}_conf.xml"
49
+ properties = parse_properties(open conf_uri)
50
+ recordize_properties(properties, job_id.to_s)
51
+ end
52
+
53
+ #
54
+ # Returns the stats for the current job as a hash.
55
+ #
56
+ def job_stats job, finish_time
57
+ parse_job job.get_id.to_s, finish_time
58
+ end
59
+
60
+ private
61
+
62
+ #
63
+ # Returns a hash JobStatus::<SOME_STATE> => <array of jobs>
64
+ #
65
+ def jobs_by_state
66
+ job_statuses_by_state = @job_client.get_all_jobs.group_by(&:get_run_state)
67
+ Hash[job_statuses_by_state.map{|state, job_statuses| [state, jobs_from_statuses(job_statuses)]}]
68
+ end
69
+
70
+ #
71
+ # Some hadoop stuff returns JobStatus objects. This converts them
72
+ # to RunningJob objects.
73
+ #
74
+ def jobs_from_statuses job_statuses
75
+ job_statuses.map{|job_status| @job_client.get_job job_status.get_job_id}
76
+ end
77
+
78
+ #
79
+ # Takes an org.apache.hadoop.mapred.RunningJob and returns a hash
80
+ # object that represents it.
81
+ #
82
+ def parse_job job_id, finish_time
83
+ job = @job_client.get_job job_id
84
+ job_status = @job_client.get_all_jobs.select{|j| j.get_job_id.to_s == job_id.to_s}.first
85
+ finished_status = [:FAILED, :KILLED, :COMPLETE]
86
+ failed_status = [:FAILED]
87
+
88
+ job_data = {
89
+
90
+ _id: job_id.to_s,
91
+
92
+ # not sure what is what. I'm guessing
93
+ # JobStatus.getStartTime corresponds to the
94
+ # launch time in the logs, but I'm going to
95
+ # go ahead and use it twice here.
96
+
97
+ launch_time: Time.at(job_status.get_start_time / 1000),
98
+ submit_time: Time.at(job_status.get_start_time / 1000),
99
+
100
+ finish_time: finish_time,
101
+
102
+ job_status: case job_status.get_run_state
103
+ when JobStatus::FAILED then :FAILED
104
+ when JobStatus::KILLED then :KILLED
105
+ when JobStatus::PREP then :PREP
106
+ when JobStatus::RUNNING then :RUNNING
107
+ when JobStatus::SUCCEEDED then :SUCCEEDED
108
+ end,
109
+
110
+ finished_maps: num_tasks(job_id, :map, finished_status),
111
+ finished_reduces: num_tasks(job_id, :reduce, finished_status),
112
+ failed_maps: num_tasks(job_id, :map, failed_status),
113
+ failed_reduces: num_tasks(job_id, :reduce, failed_status),
114
+
115
+ counters: parse_counters(job.get_counters),
116
+ type: :job,
117
+
118
+ }
119
+
120
+ job_progress = {
121
+
122
+ parent_id: job.job_id,
123
+ type: :job_progress,
124
+ # report time in milliseconds for consistency
125
+ time: Time.now,
126
+ cleanup_progress: job.cleanup_progress,
127
+ map_progress: job.map_progress,
128
+ reduce_progress: job.reduce_progress,
129
+ setup_progress: job.setup_progress,
130
+
131
+ }
132
+
133
+ map_task_data = @job_client.get_map_task_reports job_id
134
+ reduce_task_data = @job_client.get_reduce_task_reports job_id
135
+
136
+ m_reports, m_progress_reports, r_reports, r_progress_reports =
137
+ [
138
+ map_task_data .map{|task| parse_task task, "MAP", job_id },
139
+ map_task_data .map{|task| parse_task_progress task, "MAP" },
140
+ reduce_task_data.map{|task| parse_task task, "REDUCE", job_id },
141
+ reduce_task_data.map{|task| parse_task_progress task, "REDUCE" },
142
+ ]
143
+
144
+ [job_data, job_progress] + m_reports + r_reports + m_progress_reports + r_progress_reports
145
+ end
146
+
147
+ def recordize_properties properties, job_id
148
+ {
149
+ parent_id: job_id,
150
+ type: :conf,
151
+ properties: properties,
152
+ _id: [job_id, "_properties"].join
153
+ }
154
+ end
155
+
156
+ #
157
+ # Return a hash containing a name => value hash representing the
158
+ # config for a hadoop job.
159
+ #
160
+ def parse_properties conf
161
+ properties = {}
162
+ conf.read.scan /[^\n]*\n/ do |line,|
163
+ m = /<name>([^<]+)<\/name><value>([^<]+)<\/value>/.match line
164
+ if m and m[1] !~ /fs\.s3n?\.awsSecretAccessKey/ then
165
+ properties[parse_key m[1]] = parse_atom m[2]
166
+ end
167
+ end
168
+ properties
169
+ end
170
+
171
+ #
172
+ # Takes an org.apache.hadoop.mapred.TaskReport and returns a Hash
173
+ # object that represents it.
174
+ #
175
+ def parse_task task_report, task_type, parent_job_id
176
+ {
177
+ _id: task_report.get_task_id.to_s,
178
+ parent_id: parent_job_id,
179
+ task_type: task_type,
180
+ task_status: task_report.get_current_status.to_s,
181
+ start_time: Time.at(task_report.get_start_time / 1000),
182
+ finish_time: Time.at(task_report.get_finish_time / 1000),
183
+ counters: parse_counters(task_report.get_counters),
184
+ type: :task,
185
+ diagnostics: task_report.get_diagnostics.map(&:to_s),
186
+ running_attempts: task_report.get_running_task_attempts.map(&:to_s),
187
+ }
188
+ end
189
+
190
+ def parse_task_progress task_report, task_type
191
+ {
192
+ parent_id: task_report.get_task_id.to_s,
193
+ time: Time.now,
194
+ type: :task_progress,
195
+ progress: task_report.get_progress,
196
+ }
197
+ end
198
+
199
+ #
200
+ # Takes a class of type org.apache.hadoop.mapred.Counters and
201
+ # returns a Hash object that represents this counter.
202
+ #
203
+ def parse_counters counters
204
+ Hash[counters.map do |group|
205
+ [parse_key(group.get_name), Hash[group.map do |counter|
206
+ [parse_key(counter.get_name), counter.get_counter]
207
+ end]]
208
+ end]
209
+ end
210
+
211
+ #
212
+ # Parse a key in a log entry. Log entries consist of a type, which I
213
+ # consider a key, and a list of key=value pairs.
214
+ #
215
+ def parse_key key
216
+ return (parse_atom key).underscore.gsub ".", "_"
217
+ end
218
+
219
+ #
220
+ # Parse a value in a Hadoop log.
221
+ #
222
+ def parse_atom a
223
+ if /[0-9][ \r\t\n]*\/[ \r\t\n]*[0-9]+/.match a
224
+ # "0/6" -> [0,6]
225
+ return a.split("/").collect{|s| s.to_i}
226
+ elsif /^[0-9,]*$/.match a
227
+ # "224" -> 224
228
+ return a.gsub(',', '').to_i
229
+ else
230
+ # \. -> .
231
+ return a.gsub(/([^\\])\\(.)/, '\1\2')
232
+ end
233
+ end
234
+
235
+ #
236
+ # Returns the number of tasks of the specified TIPStatus from the
237
+ # specified job_client of the specified type (map or reduce)
238
+ #
239
+ def num_tasks job_id, map_or_reduce, statuses
240
+ method_name = "get_#{map_or_reduce}_task_reports".to_sym
241
+ @job_client.send(method_name, job_id).select do |report|
242
+ tip_statuses = statuses.map do |status|
243
+ TIPStatus.const_get status
244
+ end
245
+ tip_statuses.index report.get_current_status
246
+ end.size
247
+ end
248
+ end
249
+ end