bosh-monitor 1.5.0.pre.1113

Sign up to get free protection for your applications and to get access to all the features.
Files changed (37) hide show
  1. data/README +80 -0
  2. data/bin/bosh-monitor +30 -0
  3. data/bin/bosh-monitor-console +51 -0
  4. data/bin/listener +58 -0
  5. data/lib/bosh/monitor.rb +72 -0
  6. data/lib/bosh/monitor/agent.rb +51 -0
  7. data/lib/bosh/monitor/agent_manager.rb +295 -0
  8. data/lib/bosh/monitor/api_controller.rb +18 -0
  9. data/lib/bosh/monitor/config.rb +71 -0
  10. data/lib/bosh/monitor/core_ext.rb +8 -0
  11. data/lib/bosh/monitor/director.rb +76 -0
  12. data/lib/bosh/monitor/director_monitor.rb +33 -0
  13. data/lib/bosh/monitor/errors.rb +19 -0
  14. data/lib/bosh/monitor/event_processor.rb +109 -0
  15. data/lib/bosh/monitor/events/alert.rb +92 -0
  16. data/lib/bosh/monitor/events/base.rb +70 -0
  17. data/lib/bosh/monitor/events/heartbeat.rb +139 -0
  18. data/lib/bosh/monitor/metric.rb +16 -0
  19. data/lib/bosh/monitor/plugins/base.rb +27 -0
  20. data/lib/bosh/monitor/plugins/cloud_watch.rb +56 -0
  21. data/lib/bosh/monitor/plugins/datadog.rb +78 -0
  22. data/lib/bosh/monitor/plugins/dummy.rb +20 -0
  23. data/lib/bosh/monitor/plugins/email.rb +135 -0
  24. data/lib/bosh/monitor/plugins/http_request_helper.rb +25 -0
  25. data/lib/bosh/monitor/plugins/logger.rb +13 -0
  26. data/lib/bosh/monitor/plugins/nats.rb +43 -0
  27. data/lib/bosh/monitor/plugins/pagerduty.rb +48 -0
  28. data/lib/bosh/monitor/plugins/paging_datadog_client.rb +24 -0
  29. data/lib/bosh/monitor/plugins/resurrector.rb +82 -0
  30. data/lib/bosh/monitor/plugins/resurrector_helper.rb +84 -0
  31. data/lib/bosh/monitor/plugins/tsdb.rb +43 -0
  32. data/lib/bosh/monitor/plugins/varz.rb +17 -0
  33. data/lib/bosh/monitor/protocols/tsdb.rb +68 -0
  34. data/lib/bosh/monitor/runner.rb +162 -0
  35. data/lib/bosh/monitor/version.rb +5 -0
  36. data/lib/bosh/monitor/yaml_helper.rb +18 -0
  37. metadata +246 -0
@@ -0,0 +1,162 @@
1
+ module Bosh::Monitor
2
+ class Runner
3
+ include YamlHelper
4
+
5
+ def self.run(config_file)
6
+ new(config_file).run
7
+ end
8
+
9
+ def initialize(config_file)
10
+ Bhm.config = load_yaml_file(config_file)
11
+
12
+ @logger = Bhm.logger
13
+ @director = Bhm.director
14
+ @intervals = Bhm.intervals
15
+ @mbus = Bhm.mbus
16
+ @agent_manager = Bhm.agent_manager
17
+ end
18
+
19
+ def run
20
+ @logger.info("HealthMonitor starting...")
21
+ EM.kqueue if EM.kqueue?
22
+ EM.epoll if EM.epoll?
23
+
24
+ EM.error_handler { |e| handle_em_error(e) }
25
+
26
+ EM.run do
27
+ connect_to_mbus
28
+ @director_monitor = DirectorMonitor.new(Bhm)
29
+ @director_monitor.subscribe
30
+ @agent_manager.setup_events
31
+ setup_timers
32
+ start_http_server
33
+ @logger.info "BOSH HealthMonitor #{Bhm::VERSION} is running..."
34
+ end
35
+ end
36
+
37
+ def stop(soft=false)
38
+ @logger.info("HealthMonitor shutting down...")
39
+ @http_server.stop! if @http_server
40
+ EM.stop
41
+ exit(0) unless soft
42
+ end
43
+
44
+ def setup_timers
45
+ EM.next_tick do
46
+ poll_director
47
+ EM.add_periodic_timer(@intervals.poll_director) { poll_director }
48
+ EM.add_periodic_timer(@intervals.log_stats) { log_stats }
49
+
50
+ EM.add_timer(@intervals.poll_grace_period) do
51
+ EM.add_periodic_timer(@intervals.analyze_agents) { analyze_agents }
52
+ end
53
+ end
54
+ end
55
+
56
+ def log_stats
57
+ n_deployments = pluralize(@agent_manager.deployments_count, "deployment")
58
+ n_agents = pluralize(@agent_manager.agents_count, "agent")
59
+ @logger.info("Managing #{n_deployments}, #{n_agents}")
60
+ @logger.info("Agent heartbeats received = %s" % [ @agent_manager.heartbeats_received ])
61
+ end
62
+
63
+ def connect_to_mbus
64
+ NATS.on_error do |e|
65
+ unless @shutting_down
66
+ if e.kind_of?(NATS::ConnectError)
67
+ handle_em_error(e)
68
+ else
69
+ log_exception(e)
70
+ end
71
+ end
72
+ end
73
+
74
+ nats_client_options = {
75
+ :uri => @mbus.endpoint,
76
+ :user => @mbus.user,
77
+ :pass => @mbus.password,
78
+ :autostart => false
79
+ }
80
+
81
+ Bhm.nats = NATS.connect(nats_client_options) do
82
+ @logger.info("Connected to NATS at `#{@mbus.endpoint}'")
83
+ end
84
+ end
85
+
86
+ def start_http_server
87
+ @logger.info "HTTP server is starting on port #{Bhm.http_port}..."
88
+ @http_server = Thin::Server.new("0.0.0.0", Bhm.http_port, :signals => false) do
89
+ Thin::Logging.silent = true
90
+ use Rack::Auth::Basic do |user, password|
91
+ [ user, password ] == [ Bhm.http_user, Bhm.http_password ]
92
+ end
93
+ map "/" do
94
+ run Bhm::ApiController.new
95
+ end
96
+ end
97
+ @http_server.start!
98
+ end
99
+
100
+ def poll_director
101
+ @logger.debug "Getting deployments from director..."
102
+ Fiber.new { fetch_deployments }.resume
103
+ Bhm.set_varz("deployments_count", @agent_manager.deployments_count)
104
+ Bhm.set_varz("agents_count", @agent_manager.agents_count)
105
+ end
106
+
107
+ def analyze_agents
108
+ # N.B. Yes, his will block event loop,
109
+ # possibly consider deferring
110
+ @agent_manager.analyze_agents
111
+ end
112
+
113
+ private
114
+
115
+ # This is somewhat controversial approach: instead of swallowing some exceptions
116
+ # and letting event loop run further we force our server to stop. The rationale
117
+ # behind that is to avoid the situation when swallowed exception actually breaks
118
+ # things:
119
+ # 1. Periodic timer will get canceled unless we manually reschedule it
120
+ # in a rescue clause even if we swallow the exception.
121
+ # 2. If we want to perform an operation on next tick AND schedule some operation
122
+ # to be run periodically AND there is an exception swallowed somewhere during the
123
+ # event processing, then on the next tick we don't really process events that follow the buggy one.
124
+ # These things can be pretty painful for HM as we might think it runs fine
125
+ # when it actually just swallows some exception and effectively does nothing.
126
+ # We might revisit that later
127
+ def handle_em_error(e)
128
+ @shutting_down = true
129
+ log_exception(e, :fatal)
130
+ stop
131
+ end
132
+
133
+ def log_exception(e, level = :error)
134
+ level = :error unless level == :fatal
135
+ @logger.send(level, e.to_s)
136
+ if e.respond_to?(:backtrace) && e.backtrace.respond_to?(:join)
137
+ @logger.send(level, e.backtrace.join("\n"))
138
+ end
139
+ end
140
+
141
+ def fetch_deployments
142
+ deployments = @director.get_deployments
143
+
144
+ @agent_manager.sync_deployments(deployments)
145
+
146
+ deployments.each do |deployment|
147
+ deployment_name = deployment["name"]
148
+
149
+ @logger.info "Found deployment `#{deployment_name}'"
150
+
151
+ vms = @director.get_deployment_vms(deployment_name)
152
+ @logger.debug "Fetching VMs information for `#{deployment_name}'..."
153
+
154
+ @agent_manager.sync_agents(deployment_name, vms)
155
+ end
156
+
157
+ rescue Bhm::DirectorError => e
158
+ log_exception(e)
159
+ end
160
+
161
+ end
162
+ end
@@ -0,0 +1,5 @@
1
+ module Bosh
2
+ module Monitor
3
+ VERSION = '1.5.0.pre.1113'
4
+ end
5
+ end
@@ -0,0 +1,18 @@
1
+ module Bosh::Monitor
2
+ module YamlHelper
3
+
4
+ def load_yaml_file(path, expected_type = Hash)
5
+ raise(ConfigError, "Cannot find file `#{path}'") unless File.exists?(path)
6
+ yaml = Psych.load_file(path)
7
+
8
+ if expected_type && !yaml.is_a?(expected_type)
9
+ raise ConfigError, "Incorrect file format in `#{path}', #{expected_type} expected"
10
+ end
11
+
12
+ yaml
13
+ rescue SystemCallError => e
14
+ raise ConfigError, "Cannot load YAML file at `#{path}': #{e}"
15
+ end
16
+
17
+ end
18
+ end
metadata ADDED
@@ -0,0 +1,246 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: bosh-monitor
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.5.0.pre.1113
5
+ prerelease: 6
6
+ platform: ruby
7
+ authors:
8
+ - VMware
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-10-16 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: eventmachine
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: 0.12.10
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: 0.12.10
30
+ - !ruby/object:Gem::Dependency
31
+ name: logging
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ~>
36
+ - !ruby/object:Gem::Version
37
+ version: 1.5.0
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ~>
44
+ - !ruby/object:Gem::Version
45
+ version: 1.5.0
46
+ - !ruby/object:Gem::Dependency
47
+ name: em-http-request
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ~>
52
+ - !ruby/object:Gem::Version
53
+ version: 0.3.0
54
+ type: :runtime
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: 0.3.0
62
+ - !ruby/object:Gem::Dependency
63
+ name: nats
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ~>
68
+ - !ruby/object:Gem::Version
69
+ version: 0.4.28
70
+ type: :runtime
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ~>
76
+ - !ruby/object:Gem::Version
77
+ version: 0.4.28
78
+ - !ruby/object:Gem::Dependency
79
+ name: yajl-ruby
80
+ requirement: !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - ~>
84
+ - !ruby/object:Gem::Version
85
+ version: 1.1.0
86
+ type: :runtime
87
+ prerelease: false
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ~>
92
+ - !ruby/object:Gem::Version
93
+ version: 1.1.0
94
+ - !ruby/object:Gem::Dependency
95
+ name: thin
96
+ requirement: !ruby/object:Gem::Requirement
97
+ none: false
98
+ requirements:
99
+ - - ~>
100
+ - !ruby/object:Gem::Version
101
+ version: 1.5.0
102
+ type: :runtime
103
+ prerelease: false
104
+ version_requirements: !ruby/object:Gem::Requirement
105
+ none: false
106
+ requirements:
107
+ - - ~>
108
+ - !ruby/object:Gem::Version
109
+ version: 1.5.0
110
+ - !ruby/object:Gem::Dependency
111
+ name: sinatra
112
+ requirement: !ruby/object:Gem::Requirement
113
+ none: false
114
+ requirements:
115
+ - - ~>
116
+ - !ruby/object:Gem::Version
117
+ version: 1.4.2
118
+ type: :runtime
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ none: false
122
+ requirements:
123
+ - - ~>
124
+ - !ruby/object:Gem::Version
125
+ version: 1.4.2
126
+ - !ruby/object:Gem::Dependency
127
+ name: aws-sdk
128
+ requirement: !ruby/object:Gem::Requirement
129
+ none: false
130
+ requirements:
131
+ - - '='
132
+ - !ruby/object:Gem::Version
133
+ version: 1.8.5
134
+ type: :runtime
135
+ prerelease: false
136
+ version_requirements: !ruby/object:Gem::Requirement
137
+ none: false
138
+ requirements:
139
+ - - '='
140
+ - !ruby/object:Gem::Version
141
+ version: 1.8.5
142
+ - !ruby/object:Gem::Dependency
143
+ name: dogapi
144
+ requirement: !ruby/object:Gem::Requirement
145
+ none: false
146
+ requirements:
147
+ - - ~>
148
+ - !ruby/object:Gem::Version
149
+ version: 1.6.0
150
+ type: :runtime
151
+ prerelease: false
152
+ version_requirements: !ruby/object:Gem::Requirement
153
+ none: false
154
+ requirements:
155
+ - - ~>
156
+ - !ruby/object:Gem::Version
157
+ version: 1.6.0
158
+ - !ruby/object:Gem::Dependency
159
+ name: uuidtools
160
+ requirement: !ruby/object:Gem::Requirement
161
+ none: false
162
+ requirements:
163
+ - - ~>
164
+ - !ruby/object:Gem::Version
165
+ version: '2.1'
166
+ type: :runtime
167
+ prerelease: false
168
+ version_requirements: !ruby/object:Gem::Requirement
169
+ none: false
170
+ requirements:
171
+ - - ~>
172
+ - !ruby/object:Gem::Version
173
+ version: '2.1'
174
+ description: ! 'BOSH Health Monitor
175
+
176
+ cfd471'
177
+ email: support@cloudfoundry.com
178
+ executables:
179
+ - bosh-monitor-console
180
+ - bosh-monitor
181
+ - listener
182
+ extensions: []
183
+ extra_rdoc_files: []
184
+ files:
185
+ - lib/bosh/monitor.rb
186
+ - lib/bosh/monitor/agent.rb
187
+ - lib/bosh/monitor/agent_manager.rb
188
+ - lib/bosh/monitor/api_controller.rb
189
+ - lib/bosh/monitor/config.rb
190
+ - lib/bosh/monitor/core_ext.rb
191
+ - lib/bosh/monitor/director.rb
192
+ - lib/bosh/monitor/director_monitor.rb
193
+ - lib/bosh/monitor/errors.rb
194
+ - lib/bosh/monitor/event_processor.rb
195
+ - lib/bosh/monitor/events/alert.rb
196
+ - lib/bosh/monitor/events/base.rb
197
+ - lib/bosh/monitor/events/heartbeat.rb
198
+ - lib/bosh/monitor/metric.rb
199
+ - lib/bosh/monitor/plugins/base.rb
200
+ - lib/bosh/monitor/plugins/cloud_watch.rb
201
+ - lib/bosh/monitor/plugins/datadog.rb
202
+ - lib/bosh/monitor/plugins/dummy.rb
203
+ - lib/bosh/monitor/plugins/email.rb
204
+ - lib/bosh/monitor/plugins/http_request_helper.rb
205
+ - lib/bosh/monitor/plugins/logger.rb
206
+ - lib/bosh/monitor/plugins/nats.rb
207
+ - lib/bosh/monitor/plugins/pagerduty.rb
208
+ - lib/bosh/monitor/plugins/paging_datadog_client.rb
209
+ - lib/bosh/monitor/plugins/resurrector.rb
210
+ - lib/bosh/monitor/plugins/resurrector_helper.rb
211
+ - lib/bosh/monitor/plugins/tsdb.rb
212
+ - lib/bosh/monitor/plugins/varz.rb
213
+ - lib/bosh/monitor/protocols/tsdb.rb
214
+ - lib/bosh/monitor/runner.rb
215
+ - lib/bosh/monitor/version.rb
216
+ - lib/bosh/monitor/yaml_helper.rb
217
+ - README
218
+ - bin/bosh-monitor-console
219
+ - bin/bosh-monitor
220
+ - bin/listener
221
+ homepage: https://github.com/cloudfoundry/bosh
222
+ licenses:
223
+ - Apache 2.0
224
+ post_install_message:
225
+ rdoc_options: []
226
+ require_paths:
227
+ - lib
228
+ required_ruby_version: !ruby/object:Gem::Requirement
229
+ none: false
230
+ requirements:
231
+ - - ! '>='
232
+ - !ruby/object:Gem::Version
233
+ version: 1.9.3
234
+ required_rubygems_version: !ruby/object:Gem::Requirement
235
+ none: false
236
+ requirements:
237
+ - - ! '>'
238
+ - !ruby/object:Gem::Version
239
+ version: 1.3.1
240
+ requirements: []
241
+ rubyforge_project:
242
+ rubygems_version: 1.8.23
243
+ signing_key:
244
+ specification_version: 3
245
+ summary: BOSH Health Monitor
246
+ test_files: []