vayacondios-server 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. data/.gitignore +61 -0
  2. data/.travis.yml +11 -0
  3. data/.yardopts +10 -0
  4. data/CHANGELOG.md +0 -0
  5. data/Gemfile +4 -0
  6. data/Guardfile +41 -0
  7. data/LICENSE.md +99 -0
  8. data/Procfile +2 -0
  9. data/README.md +183 -0
  10. data/Rakefile +6 -0
  11. data/app/http_shim.rb +67 -0
  12. data/bin/vcd.sh +27 -0
  13. data/config/http_shim.rb +43 -0
  14. data/config/vayacondios.example.yaml +4 -0
  15. data/config/vayacondios.yaml +4 -0
  16. data/lib/tasks/publish.rake +23 -0
  17. data/lib/tasks/spec.rake +9 -0
  18. data/lib/tasks/yard.rake +2 -0
  19. data/lib/vayacondios/client/configliere.rb +38 -0
  20. data/lib/vayacondios/client/http_client.rb +49 -0
  21. data/lib/vayacondios/client/notifier.rb +84 -0
  22. data/lib/vayacondios/server/handlers/config_handler.rb +35 -0
  23. data/lib/vayacondios/server/handlers/event_handler.rb +30 -0
  24. data/lib/vayacondios/server/model/config_document.rb +94 -0
  25. data/lib/vayacondios/server/model/document.rb +25 -0
  26. data/lib/vayacondios/server/model/event_document.rb +94 -0
  27. data/lib/vayacondios/version.rb +3 -0
  28. data/lib/vayacondios-client.rb +20 -0
  29. data/lib/vayacondios-server.rb +18 -0
  30. data/scripts/hadoop_monitor/configurable.rb +74 -0
  31. data/scripts/hadoop_monitor/hadoop_client.rb +249 -0
  32. data/scripts/hadoop_monitor/hadoop_monitor.rb +91 -0
  33. data/scripts/hadoop_monitor/hadoopable.rb +65 -0
  34. data/scripts/hadoop_monitor/machine_monitor.rb +115 -0
  35. data/scripts/s3_cataloger/buckets +33 -0
  36. data/scripts/s3_cataloger/foreach_bucket +88 -0
  37. data/scripts/s3_cataloger/parse_ls.py +391 -0
  38. data/spec/client/notifier_spec.rb +120 -0
  39. data/spec/server/config_spec.rb +55 -0
  40. data/spec/server/event_spec.rb +44 -0
  41. data/spec/server/server_spec.rb +20 -0
  42. data/spec/spec_helper.rb +10 -0
  43. data/spec/support/mongo_cleaner.rb +26 -0
  44. data/vayacondios-client.gemspec +26 -0
  45. data/vayacondios-server.gemspec +30 -0
  46. metadata +216 -0
@@ -0,0 +1,25 @@
1
+ class Vayacondios::Document < Hash
2
+ attr_accessor :organization, :topic
3
+
4
+ def initialize(options = {})
5
+ options = options.symbolize_keys
6
+ @organization = options[:organization]
7
+ @topic = options[:topic]
8
+ end
9
+
10
+ def self.create
11
+ raise NotImplementedError.new("#{self.name} must be overriden by a subclass.")
12
+ end
13
+
14
+ def self.update
15
+ raise NotImplementedError.new("#{self.name} must be overriden by a subclass.")
16
+ end
17
+
18
+ def self.find
19
+ raise NotImplementedError.new("#{self.name} must be overriden by a subclass.")
20
+ end
21
+
22
+ def destroy
23
+ raise NotImplementedError.new("#{self.name} must be overriden by a subclass.")
24
+ end
25
+ end
@@ -0,0 +1,94 @@
1
+ require 'vayacondios/server/model/document'
2
+
3
+ # The event model
4
+ #
5
+ # Event documents are key-value pairs, represented in JSON. A document
6
+ # consists of a primary key called the topic (_id in mongodb). It belongs to a
7
+ # collection named "#{organization_name}.#{topic}.events"
8
+ #
9
+ # Note: mongodb is passed in beacuse Goliath makes Thread lookups will not
10
+ # work while Goliath is in a streaming context.
11
+
12
+ class Vayacondios::EventDocument < Vayacondios::Document
13
+ attr_reader :organization, :topic, :body
14
+
15
+ def initialize(mongodb, options = {})
16
+ super options
17
+ @mongo = mongodb
18
+ options = sanitize_options(options)
19
+
20
+ @body = nil
21
+ @id = format_id(options[:id])
22
+ @mongo = mongodb
23
+
24
+ collection_name = [organization, topic, 'events'].join('.')
25
+ @collection = @mongo.collection(collection_name)
26
+ end
27
+
28
+ def self.create(mongodb, document, options={})
29
+ self.new(mongodb, options).update(document)
30
+ end
31
+
32
+ def self.find(mongodb, options={})
33
+ self.new(mongodb, options).find
34
+ end
35
+
36
+ def find
37
+ result = @collection.find_one({_id: @id})
38
+ if result.present?
39
+ result.delete("_id")
40
+ result['_timestamp'] = result.delete("t")
41
+ result.merge! result.delete("d") if result["d"].present?
42
+ @body = result
43
+ self
44
+ else
45
+ nil
46
+ end
47
+ end
48
+
49
+ def update(document)
50
+ document = to_mongo(document)
51
+
52
+ @body = document[:d]
53
+ if @id
54
+ @collection.update({:_id => @id}, document, {upsert: true})
55
+ else
56
+ @collection.insert(document)
57
+ end
58
+
59
+ self
60
+ end
61
+
62
+ def destroy(document)
63
+ super
64
+ end
65
+
66
+ protected
67
+
68
+ def sanitize_options(options)
69
+ options = options.symbolize_keys
70
+
71
+ topic = options[:topic].gsub(/\W+/, '_')
72
+ id = format_id options[:id]
73
+
74
+ options.merge!(topic: topic, id: id)
75
+ end
76
+
77
+ def format_id(id)
78
+ if (id.is_a?(Hash) && id["$oid"].present?)
79
+ id = BSON::ObjectId(id["$oid"])
80
+ else
81
+ id = id.to_s.gsub(/\W/,'')
82
+ id = BSON::ObjectId(id) if id.match(/^[a-f0-9]{24}$/)
83
+ end
84
+ id
85
+ end
86
+
87
+ def to_mongo(document)
88
+ {}.tap do |result|
89
+ result[:d] = document.dup
90
+ result[:_id] = @id if @id
91
+ result[:t] = document.delete(:_timestamp) || Time.now
92
+ end
93
+ end
94
+ end
@@ -0,0 +1,3 @@
1
+ class Vayacondios
2
+ VERSION = '0.0.4'
3
+ end
@@ -0,0 +1,20 @@
1
+ require 'configliere'
2
+ require 'multi_json'
3
+ require 'net/http'
4
+
5
+ require 'gorillib/builder'
6
+ require 'gorillib/configurable'
7
+ require 'gorillib/enumerable/sum'
8
+ require 'gorillib/exception/raisers'
9
+ require 'gorillib/hash/deep_compact'
10
+ require 'gorillib/hash/deep_merge'
11
+ require 'gorillib/hash/keys'
12
+ require 'gorillib/logger/log'
13
+ require 'gorillib/metaprogramming/class_attribute'
14
+ require 'gorillib/object/blank'
15
+ require 'gorillib/string/constantize'
16
+ require 'gorillib/string/inflections'
17
+
18
+ require 'vayacondios/client/http_client'
19
+ require 'vayacondios/client/notifier'
20
+ require 'vayacondios/client/configliere'
@@ -0,0 +1,18 @@
1
+ require 'goliath'
2
+ require 'em-mongo'
3
+ require 'em-synchrony/em-http'
4
+ require 'em-synchrony/em-mongo'
5
+
6
+ require 'gorillib/object/blank'
7
+ require 'gorillib/enumerable/sum'
8
+ require 'gorillib/hash/deep_merge'
9
+ require 'gorillib/hash/keys'
10
+ require 'gorillib/string/constantize'
11
+ require 'gorillib/string/inflections'
12
+ require 'multi_json'
13
+
14
+ require 'vayacondios/server/model/config_document'
15
+ require 'vayacondios/server/model/event_document'
16
+
17
+ require 'vayacondios/server/handlers/config_handler'
18
+ require 'vayacondios/server/handlers/event_handler'
@@ -0,0 +1,74 @@
1
+ require 'configliere'
2
+ require 'logger'
3
+
4
+ module Vayacondios
5
+
6
+ module Configurable
7
+
8
+ #
9
+ # Declare a name CONST_NAME = :const_name
10
+ #
11
+ def self.declare_name symbol
12
+ const_set symbol.to_s.upcase.to_sym, symbol
13
+ end
14
+
15
+ declare_name :cluster_busy
16
+ declare_name :cluster_quiet
17
+ declare_name :event
18
+ declare_name :time
19
+
20
+ attr_reader :logger
21
+
22
+ def settings
23
+ init_settings
24
+ return @settings
25
+ end
26
+
27
+ def init_settings
28
+ return if defined? @settings
29
+
30
+ @settings = Configliere::Param.new
31
+ @settings.use :env_var, :config_file, :commandline
32
+
33
+ @settings.define(:sleep_seconds,
34
+ default: 5,
35
+ description: "Time to sleep in main loops")
36
+ @settings.define(:log_level,
37
+ default: "info",
38
+ description: "Log level. See standard Logger class")
39
+ @settings.define(:mongo_jobs_db,
40
+ default: 'job_info',
41
+ description: "Mongo database to dump hadoop job information into")
42
+ @settings.define(:mongo_job_logs_collection,
43
+ default: 'job_logs',
44
+ description: "Mongo collection to dump job logs into.")
45
+ @settings.define(:mongo_job_events_collection,
46
+ default: 'job_events',
47
+ description: "Mongo collection containing jobs events.")
48
+ @settings.define(:mongo_machine_stats_collection,
49
+ default: 'machine_stats',
50
+ description: "Mongo collection containing machine stats.")
51
+ @settings.define(:mongo_ip,
52
+ default: nil,
53
+ description: "IP address of Hadoop monitor node")
54
+ @settings.define(:job_logs_size,
55
+ default: 10 * (1 << 20),
56
+ description: ("Size (in bytes) of Mongo jobs log collection"))
57
+ @settings.define(:job_events_size,
58
+ default: 10 * (1 << 20),
59
+ description: ("Size (in bytes) of Mongo job events collection"))
60
+ @settings.define(:machine_stats_size,
61
+ default: 100 * (1 << 20),
62
+ description: ("Size (in bytes) of machine stats collection"))
63
+
64
+ @settings.resolve!
65
+
66
+ @logger = Logger.new(STDERR)
67
+ @logger.level = Logger.const_get(@settings.log_level.upcase.to_sym)
68
+
69
+ @logger.info "Settings: #{@settings}"
70
+
71
+ @settings
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,249 @@
1
+ require_relative 'configurable'
2
+ require_relative 'hadoopable'
3
+ require 'json'
4
+ require 'optparse'
5
+ require 'ostruct'
6
+ require 'logger'
7
+ require 'pp'
8
+ require 'gorillib/string/inflections'
9
+ require 'swineherd-fs'
10
+
11
+ module Vayacondios
12
+
13
+ class HadoopClient
14
+
15
+ include Configurable
16
+ include Hadoopable
17
+
18
+ RUNNING = JobStatus::RUNNING
19
+
20
+ def initialize
21
+ init_settings
22
+ logger.info "Connecting to job tracker."
23
+ @job_client = JobClient.new JobConf.new(get_hadoop_conf)
24
+ end
25
+
26
+ #
27
+ # (Equality doesn't work for jobs, so - will not work as intended
28
+ # on arrays of jobs.)
29
+ #
30
+ def subtract jobs_array1, jobs_array2
31
+ jobs_array1.reject{|j| jobs_array2.map(&:job_id).map(&:to_s).index j.job_id.to_s}
32
+ end
33
+
34
+ #
35
+ # Returns the jobs with the specified state. States are specified
36
+ # by constants in this class.
37
+ #
38
+ def jobs_with_state state
39
+ jobs_by_state[state] || []
40
+ end
41
+
42
+ #
43
+ # Returns the properties of the specified job as a hash.
44
+ #
45
+ def job_properties job
46
+ host_port = job.get_tracking_url[/^(http:\/\/)?[^\/]*/]
47
+ job_id = job.get_id.to_s
48
+ conf_uri = "#{host_port}/logs/#{job_id}_conf.xml"
49
+ properties = parse_properties(open conf_uri)
50
+ recordize_properties(properties, job_id.to_s)
51
+ end
52
+
53
+ #
54
+ # Returns the stats for the current job as a hash.
55
+ #
56
+ def job_stats job, finish_time
57
+ parse_job job.get_id.to_s, finish_time
58
+ end
59
+
60
+ private
61
+
62
+ #
63
+ # Returns a hash JobStatus::<SOME_STATE> => <array of jobs>
64
+ #
65
+ def jobs_by_state
66
+ job_statuses_by_state = @job_client.get_all_jobs.group_by(&:get_run_state)
67
+ Hash[job_statuses_by_state.map{|state, job_statuses| [state, jobs_from_statuses(job_statuses)]}]
68
+ end
69
+
70
+ #
71
+ # Some hadoop stuff returns JobStatus objects. This converts them
72
+ # to RunningJob objects.
73
+ #
74
+ def jobs_from_statuses job_statuses
75
+ job_statuses.map{|job_status| @job_client.get_job job_status.get_job_id}
76
+ end
77
+
78
+ #
79
+ # Takes an org.apache.hadoop.mapred.RunningJob and returns a hash
80
+ # object that represents it.
81
+ #
82
+ def parse_job job_id, finish_time
83
+ job = @job_client.get_job job_id
84
+ job_status = @job_client.get_all_jobs.select{|j| j.get_job_id.to_s == job_id.to_s}.first
85
+ finished_status = [:FAILED, :KILLED, :COMPLETE]
86
+ failed_status = [:FAILED]
87
+
88
+ job_data = {
89
+
90
+ _id: job_id.to_s,
91
+
92
+ # not sure what is what. I'm guessing
93
+ # JobStatus.getStartTime corresponds to the
94
+ # launch time in the logs, but I'm going to
95
+ # go ahead and use it twice here.
96
+
97
+ launch_time: Time.at(job_status.get_start_time / 1000),
98
+ submit_time: Time.at(job_status.get_start_time / 1000),
99
+
100
+ finish_time: finish_time,
101
+
102
+ job_status: case job_status.get_run_state
103
+ when JobStatus::FAILED then :FAILED
104
+ when JobStatus::KILLED then :KILLED
105
+ when JobStatus::PREP then :PREP
106
+ when JobStatus::RUNNING then :RUNNING
107
+ when JobStatus::SUCCEEDED then :SUCCEEDED
108
+ end,
109
+
110
+ finished_maps: num_tasks(job_id, :map, finished_status),
111
+ finished_reduces: num_tasks(job_id, :reduce, finished_status),
112
+ failed_maps: num_tasks(job_id, :map, failed_status),
113
+ failed_reduces: num_tasks(job_id, :reduce, failed_status),
114
+
115
+ counters: parse_counters(job.get_counters),
116
+ type: :job,
117
+
118
+ }
119
+
120
+ job_progress = {
121
+
122
+ parent_id: job.job_id,
123
+ type: :job_progress,
124
+ # report time in milliseconds for consistency
125
+ time: Time.now,
126
+ cleanup_progress: job.cleanup_progress,
127
+ map_progress: job.map_progress,
128
+ reduce_progress: job.reduce_progress,
129
+ setup_progress: job.setup_progress,
130
+
131
+ }
132
+
133
+ map_task_data = @job_client.get_map_task_reports job_id
134
+ reduce_task_data = @job_client.get_reduce_task_reports job_id
135
+
136
+ m_reports, m_progress_reports, r_reports, r_progress_reports =
137
+ [
138
+ map_task_data .map{|task| parse_task task, "MAP", job_id },
139
+ map_task_data .map{|task| parse_task_progress task, "MAP" },
140
+ reduce_task_data.map{|task| parse_task task, "REDUCE", job_id },
141
+ reduce_task_data.map{|task| parse_task_progress task, "REDUCE" },
142
+ ]
143
+
144
+ [job_data, job_progress] + m_reports + r_reports + m_progress_reports + r_progress_reports
145
+ end
146
+
147
+ def recordize_properties properties, job_id
148
+ {
149
+ parent_id: job_id,
150
+ type: :conf,
151
+ properties: properties,
152
+ _id: [job_id, "_properties"].join
153
+ }
154
+ end
155
+
156
+ #
157
+ # Return a hash containing a name => value hash representing the
158
+ # config for a hadoop job.
159
+ #
160
+ def parse_properties conf
161
+ properties = {}
162
+ conf.read.scan /[^\n]*\n/ do |line,|
163
+ m = /<name>([^<]+)<\/name><value>([^<]+)<\/value>/.match line
164
+ if m and m[1] !~ /fs\.s3n?\.awsSecretAccessKey/ then
165
+ properties[parse_key m[1]] = parse_atom m[2]
166
+ end
167
+ end
168
+ properties
169
+ end
170
+
171
+ #
172
+ # Takes an org.apache.hadoop.mapred.TaskReport and returns a Hash
173
+ # object that represents it.
174
+ #
175
+ def parse_task task_report, task_type, parent_job_id
176
+ {
177
+ _id: task_report.get_task_id.to_s,
178
+ parent_id: parent_job_id,
179
+ task_type: task_type,
180
+ task_status: task_report.get_current_status.to_s,
181
+ start_time: Time.at(task_report.get_start_time / 1000),
182
+ finish_time: Time.at(task_report.get_finish_time / 1000),
183
+ counters: parse_counters(task_report.get_counters),
184
+ type: :task,
185
+ diagnostics: task_report.get_diagnostics.map(&:to_s),
186
+ running_attempts: task_report.get_running_task_attempts.map(&:to_s),
187
+ }
188
+ end
189
+
190
+ def parse_task_progress task_report, task_type
191
+ {
192
+ parent_id: task_report.get_task_id.to_s,
193
+ time: Time.now,
194
+ type: :task_progress,
195
+ progress: task_report.get_progress,
196
+ }
197
+ end
198
+
199
+ #
200
+ # Takes a class of type org.apache.hadoop.mapred.Counters and
201
+ # returns a Hash object that represents this counter.
202
+ #
203
+ def parse_counters counters
204
+ Hash[counters.map do |group|
205
+ [parse_key(group.get_name), Hash[group.map do |counter|
206
+ [parse_key(counter.get_name), counter.get_counter]
207
+ end]]
208
+ end]
209
+ end
210
+
211
+ #
212
+ # Parse a key in a log entry. Log entries consist of a type, which I
213
+ # consider a key, and a list of key=value pairs.
214
+ #
215
+ def parse_key key
216
+ return (parse_atom key).underscore.gsub ".", "_"
217
+ end
218
+
219
+ #
220
+ # Parse a value in a Hadoop log.
221
+ #
222
+ def parse_atom a
223
+ if /[0-9][ \r\t\n]*\/[ \r\t\n]*[0-9]+/.match a
224
+ # "0/6" -> [0,6]
225
+ return a.split("/").collect{|s| s.to_i}
226
+ elsif /^[0-9,]*$/.match a
227
+ # "224" -> 224
228
+ return a.gsub(',', '').to_i
229
+ else
230
+ # \. -> .
231
+ return a.gsub(/([^\\])\\(.)/, '\1\2')
232
+ end
233
+ end
234
+
235
+ #
236
+ # Returns the number of tasks of the specified TIPStatus from the
237
+ # specified job_client of the specified type (map or reduce)
238
+ #
239
+ def num_tasks job_id, map_or_reduce, statuses
240
+ method_name = "get_#{map_or_reduce}_task_reports".to_sym
241
+ @job_client.send(method_name, job_id).select do |report|
242
+ tip_statuses = statuses.map do |status|
243
+ TIPStatus.const_get status
244
+ end
245
+ tip_statuses.index report.get_current_status
246
+ end.size
247
+ end
248
+ end
249
+ end