bosh-monitor 1.5.0.pre.1113

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. data/README +80 -0
  2. data/bin/bosh-monitor +30 -0
  3. data/bin/bosh-monitor-console +51 -0
  4. data/bin/listener +58 -0
  5. data/lib/bosh/monitor.rb +72 -0
  6. data/lib/bosh/monitor/agent.rb +51 -0
  7. data/lib/bosh/monitor/agent_manager.rb +295 -0
  8. data/lib/bosh/monitor/api_controller.rb +18 -0
  9. data/lib/bosh/monitor/config.rb +71 -0
  10. data/lib/bosh/monitor/core_ext.rb +8 -0
  11. data/lib/bosh/monitor/director.rb +76 -0
  12. data/lib/bosh/monitor/director_monitor.rb +33 -0
  13. data/lib/bosh/monitor/errors.rb +19 -0
  14. data/lib/bosh/monitor/event_processor.rb +109 -0
  15. data/lib/bosh/monitor/events/alert.rb +92 -0
  16. data/lib/bosh/monitor/events/base.rb +70 -0
  17. data/lib/bosh/monitor/events/heartbeat.rb +139 -0
  18. data/lib/bosh/monitor/metric.rb +16 -0
  19. data/lib/bosh/monitor/plugins/base.rb +27 -0
  20. data/lib/bosh/monitor/plugins/cloud_watch.rb +56 -0
  21. data/lib/bosh/monitor/plugins/datadog.rb +78 -0
  22. data/lib/bosh/monitor/plugins/dummy.rb +20 -0
  23. data/lib/bosh/monitor/plugins/email.rb +135 -0
  24. data/lib/bosh/monitor/plugins/http_request_helper.rb +25 -0
  25. data/lib/bosh/monitor/plugins/logger.rb +13 -0
  26. data/lib/bosh/monitor/plugins/nats.rb +43 -0
  27. data/lib/bosh/monitor/plugins/pagerduty.rb +48 -0
  28. data/lib/bosh/monitor/plugins/paging_datadog_client.rb +24 -0
  29. data/lib/bosh/monitor/plugins/resurrector.rb +82 -0
  30. data/lib/bosh/monitor/plugins/resurrector_helper.rb +84 -0
  31. data/lib/bosh/monitor/plugins/tsdb.rb +43 -0
  32. data/lib/bosh/monitor/plugins/varz.rb +17 -0
  33. data/lib/bosh/monitor/protocols/tsdb.rb +68 -0
  34. data/lib/bosh/monitor/runner.rb +162 -0
  35. data/lib/bosh/monitor/version.rb +5 -0
  36. data/lib/bosh/monitor/yaml_helper.rb +18 -0
  37. metadata +246 -0
@@ -0,0 +1,162 @@
1
+ module Bosh::Monitor
2
+ class Runner
3
+ include YamlHelper
4
+
5
+ def self.run(config_file)
6
+ new(config_file).run
7
+ end
8
+
9
+ def initialize(config_file)
10
+ Bhm.config = load_yaml_file(config_file)
11
+
12
+ @logger = Bhm.logger
13
+ @director = Bhm.director
14
+ @intervals = Bhm.intervals
15
+ @mbus = Bhm.mbus
16
+ @agent_manager = Bhm.agent_manager
17
+ end
18
+
19
+ def run
20
+ @logger.info("HealthMonitor starting...")
21
+ EM.kqueue if EM.kqueue?
22
+ EM.epoll if EM.epoll?
23
+
24
+ EM.error_handler { |e| handle_em_error(e) }
25
+
26
+ EM.run do
27
+ connect_to_mbus
28
+ @director_monitor = DirectorMonitor.new(Bhm)
29
+ @director_monitor.subscribe
30
+ @agent_manager.setup_events
31
+ setup_timers
32
+ start_http_server
33
+ @logger.info "BOSH HealthMonitor #{Bhm::VERSION} is running..."
34
+ end
35
+ end
36
+
37
+ def stop(soft=false)
38
+ @logger.info("HealthMonitor shutting down...")
39
+ @http_server.stop! if @http_server
40
+ EM.stop
41
+ exit(0) unless soft
42
+ end
43
+
44
+ def setup_timers
45
+ EM.next_tick do
46
+ poll_director
47
+ EM.add_periodic_timer(@intervals.poll_director) { poll_director }
48
+ EM.add_periodic_timer(@intervals.log_stats) { log_stats }
49
+
50
+ EM.add_timer(@intervals.poll_grace_period) do
51
+ EM.add_periodic_timer(@intervals.analyze_agents) { analyze_agents }
52
+ end
53
+ end
54
+ end
55
+
56
+ def log_stats
57
+ n_deployments = pluralize(@agent_manager.deployments_count, "deployment")
58
+ n_agents = pluralize(@agent_manager.agents_count, "agent")
59
+ @logger.info("Managing #{n_deployments}, #{n_agents}")
60
+ @logger.info("Agent heartbeats received = %s" % [ @agent_manager.heartbeats_received ])
61
+ end
62
+
63
+ def connect_to_mbus
64
+ NATS.on_error do |e|
65
+ unless @shutting_down
66
+ if e.kind_of?(NATS::ConnectError)
67
+ handle_em_error(e)
68
+ else
69
+ log_exception(e)
70
+ end
71
+ end
72
+ end
73
+
74
+ nats_client_options = {
75
+ :uri => @mbus.endpoint,
76
+ :user => @mbus.user,
77
+ :pass => @mbus.password,
78
+ :autostart => false
79
+ }
80
+
81
+ Bhm.nats = NATS.connect(nats_client_options) do
82
+ @logger.info("Connected to NATS at `#{@mbus.endpoint}'")
83
+ end
84
+ end
85
+
86
+ def start_http_server
87
+ @logger.info "HTTP server is starting on port #{Bhm.http_port}..."
88
+ @http_server = Thin::Server.new("0.0.0.0", Bhm.http_port, :signals => false) do
89
+ Thin::Logging.silent = true
90
+ use Rack::Auth::Basic do |user, password|
91
+ [ user, password ] == [ Bhm.http_user, Bhm.http_password ]
92
+ end
93
+ map "/" do
94
+ run Bhm::ApiController.new
95
+ end
96
+ end
97
+ @http_server.start!
98
+ end
99
+
100
+ def poll_director
101
+ @logger.debug "Getting deployments from director..."
102
+ Fiber.new { fetch_deployments }.resume
103
+ Bhm.set_varz("deployments_count", @agent_manager.deployments_count)
104
+ Bhm.set_varz("agents_count", @agent_manager.agents_count)
105
+ end
106
+
107
+ def analyze_agents
108
+ # N.B. Yes, his will block event loop,
109
+ # possibly consider deferring
110
+ @agent_manager.analyze_agents
111
+ end
112
+
113
+ private
114
+
115
+ # This is somewhat controversial approach: instead of swallowing some exceptions
116
+ # and letting event loop run further we force our server to stop. The rationale
117
+ # behind that is to avoid the situation when swallowed exception actually breaks
118
+ # things:
119
+ # 1. Periodic timer will get canceled unless we manually reschedule it
120
+ # in a rescue clause even if we swallow the exception.
121
+ # 2. If we want to perform an operation on next tick AND schedule some operation
122
+ # to be run periodically AND there is an exception swallowed somewhere during the
123
+ # event processing, then on the next tick we don't really process events that follow the buggy one.
124
+ # These things can be pretty painful for HM as we might think it runs fine
125
+ # when it actually just swallows some exception and effectively does nothing.
126
+ # We might revisit that later
127
+ def handle_em_error(e)
128
+ @shutting_down = true
129
+ log_exception(e, :fatal)
130
+ stop
131
+ end
132
+
133
+ def log_exception(e, level = :error)
134
+ level = :error unless level == :fatal
135
+ @logger.send(level, e.to_s)
136
+ if e.respond_to?(:backtrace) && e.backtrace.respond_to?(:join)
137
+ @logger.send(level, e.backtrace.join("\n"))
138
+ end
139
+ end
140
+
141
+ def fetch_deployments
142
+ deployments = @director.get_deployments
143
+
144
+ @agent_manager.sync_deployments(deployments)
145
+
146
+ deployments.each do |deployment|
147
+ deployment_name = deployment["name"]
148
+
149
+ @logger.info "Found deployment `#{deployment_name}'"
150
+
151
+ vms = @director.get_deployment_vms(deployment_name)
152
+ @logger.debug "Fetching VMs information for `#{deployment_name}'..."
153
+
154
+ @agent_manager.sync_agents(deployment_name, vms)
155
+ end
156
+
157
+ rescue Bhm::DirectorError => e
158
+ log_exception(e)
159
+ end
160
+
161
+ end
162
+ end
@@ -0,0 +1,5 @@
1
+ module Bosh
2
+ module Monitor
3
+ VERSION = '1.5.0.pre.1113'
4
+ end
5
+ end
@@ -0,0 +1,18 @@
1
+ module Bosh::Monitor
2
+ module YamlHelper
3
+
4
+ def load_yaml_file(path, expected_type = Hash)
5
+ raise(ConfigError, "Cannot find file `#{path}'") unless File.exists?(path)
6
+ yaml = Psych.load_file(path)
7
+
8
+ if expected_type && !yaml.is_a?(expected_type)
9
+ raise ConfigError, "Incorrect file format in `#{path}', #{expected_type} expected"
10
+ end
11
+
12
+ yaml
13
+ rescue SystemCallError => e
14
+ raise ConfigError, "Cannot load YAML file at `#{path}': #{e}"
15
+ end
16
+
17
+ end
18
+ end
metadata ADDED
@@ -0,0 +1,246 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: bosh-monitor
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.5.0.pre.1113
5
+ prerelease: 6
6
+ platform: ruby
7
+ authors:
8
+ - VMware
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-10-16 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: eventmachine
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: 0.12.10
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: 0.12.10
30
+ - !ruby/object:Gem::Dependency
31
+ name: logging
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ~>
36
+ - !ruby/object:Gem::Version
37
+ version: 1.5.0
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ~>
44
+ - !ruby/object:Gem::Version
45
+ version: 1.5.0
46
+ - !ruby/object:Gem::Dependency
47
+ name: em-http-request
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ~>
52
+ - !ruby/object:Gem::Version
53
+ version: 0.3.0
54
+ type: :runtime
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: 0.3.0
62
+ - !ruby/object:Gem::Dependency
63
+ name: nats
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ~>
68
+ - !ruby/object:Gem::Version
69
+ version: 0.4.28
70
+ type: :runtime
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ~>
76
+ - !ruby/object:Gem::Version
77
+ version: 0.4.28
78
+ - !ruby/object:Gem::Dependency
79
+ name: yajl-ruby
80
+ requirement: !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - ~>
84
+ - !ruby/object:Gem::Version
85
+ version: 1.1.0
86
+ type: :runtime
87
+ prerelease: false
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ~>
92
+ - !ruby/object:Gem::Version
93
+ version: 1.1.0
94
+ - !ruby/object:Gem::Dependency
95
+ name: thin
96
+ requirement: !ruby/object:Gem::Requirement
97
+ none: false
98
+ requirements:
99
+ - - ~>
100
+ - !ruby/object:Gem::Version
101
+ version: 1.5.0
102
+ type: :runtime
103
+ prerelease: false
104
+ version_requirements: !ruby/object:Gem::Requirement
105
+ none: false
106
+ requirements:
107
+ - - ~>
108
+ - !ruby/object:Gem::Version
109
+ version: 1.5.0
110
+ - !ruby/object:Gem::Dependency
111
+ name: sinatra
112
+ requirement: !ruby/object:Gem::Requirement
113
+ none: false
114
+ requirements:
115
+ - - ~>
116
+ - !ruby/object:Gem::Version
117
+ version: 1.4.2
118
+ type: :runtime
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ none: false
122
+ requirements:
123
+ - - ~>
124
+ - !ruby/object:Gem::Version
125
+ version: 1.4.2
126
+ - !ruby/object:Gem::Dependency
127
+ name: aws-sdk
128
+ requirement: !ruby/object:Gem::Requirement
129
+ none: false
130
+ requirements:
131
+ - - '='
132
+ - !ruby/object:Gem::Version
133
+ version: 1.8.5
134
+ type: :runtime
135
+ prerelease: false
136
+ version_requirements: !ruby/object:Gem::Requirement
137
+ none: false
138
+ requirements:
139
+ - - '='
140
+ - !ruby/object:Gem::Version
141
+ version: 1.8.5
142
+ - !ruby/object:Gem::Dependency
143
+ name: dogapi
144
+ requirement: !ruby/object:Gem::Requirement
145
+ none: false
146
+ requirements:
147
+ - - ~>
148
+ - !ruby/object:Gem::Version
149
+ version: 1.6.0
150
+ type: :runtime
151
+ prerelease: false
152
+ version_requirements: !ruby/object:Gem::Requirement
153
+ none: false
154
+ requirements:
155
+ - - ~>
156
+ - !ruby/object:Gem::Version
157
+ version: 1.6.0
158
+ - !ruby/object:Gem::Dependency
159
+ name: uuidtools
160
+ requirement: !ruby/object:Gem::Requirement
161
+ none: false
162
+ requirements:
163
+ - - ~>
164
+ - !ruby/object:Gem::Version
165
+ version: '2.1'
166
+ type: :runtime
167
+ prerelease: false
168
+ version_requirements: !ruby/object:Gem::Requirement
169
+ none: false
170
+ requirements:
171
+ - - ~>
172
+ - !ruby/object:Gem::Version
173
+ version: '2.1'
174
+ description: ! 'BOSH Health Monitor
175
+
176
+ cfd471'
177
+ email: support@cloudfoundry.com
178
+ executables:
179
+ - bosh-monitor-console
180
+ - bosh-monitor
181
+ - listener
182
+ extensions: []
183
+ extra_rdoc_files: []
184
+ files:
185
+ - lib/bosh/monitor.rb
186
+ - lib/bosh/monitor/agent.rb
187
+ - lib/bosh/monitor/agent_manager.rb
188
+ - lib/bosh/monitor/api_controller.rb
189
+ - lib/bosh/monitor/config.rb
190
+ - lib/bosh/monitor/core_ext.rb
191
+ - lib/bosh/monitor/director.rb
192
+ - lib/bosh/monitor/director_monitor.rb
193
+ - lib/bosh/monitor/errors.rb
194
+ - lib/bosh/monitor/event_processor.rb
195
+ - lib/bosh/monitor/events/alert.rb
196
+ - lib/bosh/monitor/events/base.rb
197
+ - lib/bosh/monitor/events/heartbeat.rb
198
+ - lib/bosh/monitor/metric.rb
199
+ - lib/bosh/monitor/plugins/base.rb
200
+ - lib/bosh/monitor/plugins/cloud_watch.rb
201
+ - lib/bosh/monitor/plugins/datadog.rb
202
+ - lib/bosh/monitor/plugins/dummy.rb
203
+ - lib/bosh/monitor/plugins/email.rb
204
+ - lib/bosh/monitor/plugins/http_request_helper.rb
205
+ - lib/bosh/monitor/plugins/logger.rb
206
+ - lib/bosh/monitor/plugins/nats.rb
207
+ - lib/bosh/monitor/plugins/pagerduty.rb
208
+ - lib/bosh/monitor/plugins/paging_datadog_client.rb
209
+ - lib/bosh/monitor/plugins/resurrector.rb
210
+ - lib/bosh/monitor/plugins/resurrector_helper.rb
211
+ - lib/bosh/monitor/plugins/tsdb.rb
212
+ - lib/bosh/monitor/plugins/varz.rb
213
+ - lib/bosh/monitor/protocols/tsdb.rb
214
+ - lib/bosh/monitor/runner.rb
215
+ - lib/bosh/monitor/version.rb
216
+ - lib/bosh/monitor/yaml_helper.rb
217
+ - README
218
+ - bin/bosh-monitor-console
219
+ - bin/bosh-monitor
220
+ - bin/listener
221
+ homepage: https://github.com/cloudfoundry/bosh
222
+ licenses:
223
+ - Apache 2.0
224
+ post_install_message:
225
+ rdoc_options: []
226
+ require_paths:
227
+ - lib
228
+ required_ruby_version: !ruby/object:Gem::Requirement
229
+ none: false
230
+ requirements:
231
+ - - ! '>='
232
+ - !ruby/object:Gem::Version
233
+ version: 1.9.3
234
+ required_rubygems_version: !ruby/object:Gem::Requirement
235
+ none: false
236
+ requirements:
237
+ - - ! '>'
238
+ - !ruby/object:Gem::Version
239
+ version: 1.3.1
240
+ requirements: []
241
+ rubyforge_project:
242
+ rubygems_version: 1.8.23
243
+ signing_key:
244
+ specification_version: 3
245
+ summary: BOSH Health Monitor
246
+ test_files: []