bosh-monitor 1.5.0.pre.1113
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +80 -0
- data/bin/bosh-monitor +30 -0
- data/bin/bosh-monitor-console +51 -0
- data/bin/listener +58 -0
- data/lib/bosh/monitor.rb +72 -0
- data/lib/bosh/monitor/agent.rb +51 -0
- data/lib/bosh/monitor/agent_manager.rb +295 -0
- data/lib/bosh/monitor/api_controller.rb +18 -0
- data/lib/bosh/monitor/config.rb +71 -0
- data/lib/bosh/monitor/core_ext.rb +8 -0
- data/lib/bosh/monitor/director.rb +76 -0
- data/lib/bosh/monitor/director_monitor.rb +33 -0
- data/lib/bosh/monitor/errors.rb +19 -0
- data/lib/bosh/monitor/event_processor.rb +109 -0
- data/lib/bosh/monitor/events/alert.rb +92 -0
- data/lib/bosh/monitor/events/base.rb +70 -0
- data/lib/bosh/monitor/events/heartbeat.rb +139 -0
- data/lib/bosh/monitor/metric.rb +16 -0
- data/lib/bosh/monitor/plugins/base.rb +27 -0
- data/lib/bosh/monitor/plugins/cloud_watch.rb +56 -0
- data/lib/bosh/monitor/plugins/datadog.rb +78 -0
- data/lib/bosh/monitor/plugins/dummy.rb +20 -0
- data/lib/bosh/monitor/plugins/email.rb +135 -0
- data/lib/bosh/monitor/plugins/http_request_helper.rb +25 -0
- data/lib/bosh/monitor/plugins/logger.rb +13 -0
- data/lib/bosh/monitor/plugins/nats.rb +43 -0
- data/lib/bosh/monitor/plugins/pagerduty.rb +48 -0
- data/lib/bosh/monitor/plugins/paging_datadog_client.rb +24 -0
- data/lib/bosh/monitor/plugins/resurrector.rb +82 -0
- data/lib/bosh/monitor/plugins/resurrector_helper.rb +84 -0
- data/lib/bosh/monitor/plugins/tsdb.rb +43 -0
- data/lib/bosh/monitor/plugins/varz.rb +17 -0
- data/lib/bosh/monitor/protocols/tsdb.rb +68 -0
- data/lib/bosh/monitor/runner.rb +162 -0
- data/lib/bosh/monitor/version.rb +5 -0
- data/lib/bosh/monitor/yaml_helper.rb +18 -0
- metadata +246 -0
@@ -0,0 +1,162 @@
|
|
1
|
+
module Bosh::Monitor
|
2
|
+
class Runner
|
3
|
+
include YamlHelper
|
4
|
+
|
5
|
+
def self.run(config_file)
|
6
|
+
new(config_file).run
|
7
|
+
end
|
8
|
+
|
9
|
+
def initialize(config_file)
|
10
|
+
Bhm.config = load_yaml_file(config_file)
|
11
|
+
|
12
|
+
@logger = Bhm.logger
|
13
|
+
@director = Bhm.director
|
14
|
+
@intervals = Bhm.intervals
|
15
|
+
@mbus = Bhm.mbus
|
16
|
+
@agent_manager = Bhm.agent_manager
|
17
|
+
end
|
18
|
+
|
19
|
+
def run
|
20
|
+
@logger.info("HealthMonitor starting...")
|
21
|
+
EM.kqueue if EM.kqueue?
|
22
|
+
EM.epoll if EM.epoll?
|
23
|
+
|
24
|
+
EM.error_handler { |e| handle_em_error(e) }
|
25
|
+
|
26
|
+
EM.run do
|
27
|
+
connect_to_mbus
|
28
|
+
@director_monitor = DirectorMonitor.new(Bhm)
|
29
|
+
@director_monitor.subscribe
|
30
|
+
@agent_manager.setup_events
|
31
|
+
setup_timers
|
32
|
+
start_http_server
|
33
|
+
@logger.info "BOSH HealthMonitor #{Bhm::VERSION} is running..."
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def stop(soft=false)
|
38
|
+
@logger.info("HealthMonitor shutting down...")
|
39
|
+
@http_server.stop! if @http_server
|
40
|
+
EM.stop
|
41
|
+
exit(0) unless soft
|
42
|
+
end
|
43
|
+
|
44
|
+
def setup_timers
|
45
|
+
EM.next_tick do
|
46
|
+
poll_director
|
47
|
+
EM.add_periodic_timer(@intervals.poll_director) { poll_director }
|
48
|
+
EM.add_periodic_timer(@intervals.log_stats) { log_stats }
|
49
|
+
|
50
|
+
EM.add_timer(@intervals.poll_grace_period) do
|
51
|
+
EM.add_periodic_timer(@intervals.analyze_agents) { analyze_agents }
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def log_stats
|
57
|
+
n_deployments = pluralize(@agent_manager.deployments_count, "deployment")
|
58
|
+
n_agents = pluralize(@agent_manager.agents_count, "agent")
|
59
|
+
@logger.info("Managing #{n_deployments}, #{n_agents}")
|
60
|
+
@logger.info("Agent heartbeats received = %s" % [ @agent_manager.heartbeats_received ])
|
61
|
+
end
|
62
|
+
|
63
|
+
def connect_to_mbus
|
64
|
+
NATS.on_error do |e|
|
65
|
+
unless @shutting_down
|
66
|
+
if e.kind_of?(NATS::ConnectError)
|
67
|
+
handle_em_error(e)
|
68
|
+
else
|
69
|
+
log_exception(e)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
nats_client_options = {
|
75
|
+
:uri => @mbus.endpoint,
|
76
|
+
:user => @mbus.user,
|
77
|
+
:pass => @mbus.password,
|
78
|
+
:autostart => false
|
79
|
+
}
|
80
|
+
|
81
|
+
Bhm.nats = NATS.connect(nats_client_options) do
|
82
|
+
@logger.info("Connected to NATS at `#{@mbus.endpoint}'")
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
def start_http_server
|
87
|
+
@logger.info "HTTP server is starting on port #{Bhm.http_port}..."
|
88
|
+
@http_server = Thin::Server.new("0.0.0.0", Bhm.http_port, :signals => false) do
|
89
|
+
Thin::Logging.silent = true
|
90
|
+
use Rack::Auth::Basic do |user, password|
|
91
|
+
[ user, password ] == [ Bhm.http_user, Bhm.http_password ]
|
92
|
+
end
|
93
|
+
map "/" do
|
94
|
+
run Bhm::ApiController.new
|
95
|
+
end
|
96
|
+
end
|
97
|
+
@http_server.start!
|
98
|
+
end
|
99
|
+
|
100
|
+
def poll_director
|
101
|
+
@logger.debug "Getting deployments from director..."
|
102
|
+
Fiber.new { fetch_deployments }.resume
|
103
|
+
Bhm.set_varz("deployments_count", @agent_manager.deployments_count)
|
104
|
+
Bhm.set_varz("agents_count", @agent_manager.agents_count)
|
105
|
+
end
|
106
|
+
|
107
|
+
def analyze_agents
|
108
|
+
# N.B. Yes, his will block event loop,
|
109
|
+
# possibly consider deferring
|
110
|
+
@agent_manager.analyze_agents
|
111
|
+
end
|
112
|
+
|
113
|
+
private
|
114
|
+
|
115
|
+
# This is somewhat controversial approach: instead of swallowing some exceptions
|
116
|
+
# and letting event loop run further we force our server to stop. The rationale
|
117
|
+
# behind that is to avoid the situation when swallowed exception actually breaks
|
118
|
+
# things:
|
119
|
+
# 1. Periodic timer will get canceled unless we manually reschedule it
|
120
|
+
# in a rescue clause even if we swallow the exception.
|
121
|
+
# 2. If we want to perform an operation on next tick AND schedule some operation
|
122
|
+
# to be run periodically AND there is an exception swallowed somewhere during the
|
123
|
+
# event processing, then on the next tick we don't really process events that follow the buggy one.
|
124
|
+
# These things can be pretty painful for HM as we might think it runs fine
|
125
|
+
# when it actually just swallows some exception and effectively does nothing.
|
126
|
+
# We might revisit that later
|
127
|
+
def handle_em_error(e)
|
128
|
+
@shutting_down = true
|
129
|
+
log_exception(e, :fatal)
|
130
|
+
stop
|
131
|
+
end
|
132
|
+
|
133
|
+
def log_exception(e, level = :error)
|
134
|
+
level = :error unless level == :fatal
|
135
|
+
@logger.send(level, e.to_s)
|
136
|
+
if e.respond_to?(:backtrace) && e.backtrace.respond_to?(:join)
|
137
|
+
@logger.send(level, e.backtrace.join("\n"))
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
def fetch_deployments
|
142
|
+
deployments = @director.get_deployments
|
143
|
+
|
144
|
+
@agent_manager.sync_deployments(deployments)
|
145
|
+
|
146
|
+
deployments.each do |deployment|
|
147
|
+
deployment_name = deployment["name"]
|
148
|
+
|
149
|
+
@logger.info "Found deployment `#{deployment_name}'"
|
150
|
+
|
151
|
+
vms = @director.get_deployment_vms(deployment_name)
|
152
|
+
@logger.debug "Fetching VMs information for `#{deployment_name}'..."
|
153
|
+
|
154
|
+
@agent_manager.sync_agents(deployment_name, vms)
|
155
|
+
end
|
156
|
+
|
157
|
+
rescue Bhm::DirectorError => e
|
158
|
+
log_exception(e)
|
159
|
+
end
|
160
|
+
|
161
|
+
end
|
162
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module Bosh::Monitor
|
2
|
+
module YamlHelper
|
3
|
+
|
4
|
+
def load_yaml_file(path, expected_type = Hash)
|
5
|
+
raise(ConfigError, "Cannot find file `#{path}'") unless File.exists?(path)
|
6
|
+
yaml = Psych.load_file(path)
|
7
|
+
|
8
|
+
if expected_type && !yaml.is_a?(expected_type)
|
9
|
+
raise ConfigError, "Incorrect file format in `#{path}', #{expected_type} expected"
|
10
|
+
end
|
11
|
+
|
12
|
+
yaml
|
13
|
+
rescue SystemCallError => e
|
14
|
+
raise ConfigError, "Cannot load YAML file at `#{path}': #{e}"
|
15
|
+
end
|
16
|
+
|
17
|
+
end
|
18
|
+
end
|
metadata
ADDED
@@ -0,0 +1,246 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: bosh-monitor
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.5.0.pre.1113
|
5
|
+
prerelease: 6
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- VMware
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2013-10-16 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: eventmachine
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ~>
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: 0.12.10
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ~>
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: 0.12.10
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: logging
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - ~>
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: 1.5.0
|
38
|
+
type: :runtime
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ~>
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: 1.5.0
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
name: em-http-request
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ~>
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: 0.3.0
|
54
|
+
type: :runtime
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ~>
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: 0.3.0
|
62
|
+
- !ruby/object:Gem::Dependency
|
63
|
+
name: nats
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
65
|
+
none: false
|
66
|
+
requirements:
|
67
|
+
- - ~>
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: 0.4.28
|
70
|
+
type: :runtime
|
71
|
+
prerelease: false
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ~>
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: 0.4.28
|
78
|
+
- !ruby/object:Gem::Dependency
|
79
|
+
name: yajl-ruby
|
80
|
+
requirement: !ruby/object:Gem::Requirement
|
81
|
+
none: false
|
82
|
+
requirements:
|
83
|
+
- - ~>
|
84
|
+
- !ruby/object:Gem::Version
|
85
|
+
version: 1.1.0
|
86
|
+
type: :runtime
|
87
|
+
prerelease: false
|
88
|
+
version_requirements: !ruby/object:Gem::Requirement
|
89
|
+
none: false
|
90
|
+
requirements:
|
91
|
+
- - ~>
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: 1.1.0
|
94
|
+
- !ruby/object:Gem::Dependency
|
95
|
+
name: thin
|
96
|
+
requirement: !ruby/object:Gem::Requirement
|
97
|
+
none: false
|
98
|
+
requirements:
|
99
|
+
- - ~>
|
100
|
+
- !ruby/object:Gem::Version
|
101
|
+
version: 1.5.0
|
102
|
+
type: :runtime
|
103
|
+
prerelease: false
|
104
|
+
version_requirements: !ruby/object:Gem::Requirement
|
105
|
+
none: false
|
106
|
+
requirements:
|
107
|
+
- - ~>
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: 1.5.0
|
110
|
+
- !ruby/object:Gem::Dependency
|
111
|
+
name: sinatra
|
112
|
+
requirement: !ruby/object:Gem::Requirement
|
113
|
+
none: false
|
114
|
+
requirements:
|
115
|
+
- - ~>
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: 1.4.2
|
118
|
+
type: :runtime
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
none: false
|
122
|
+
requirements:
|
123
|
+
- - ~>
|
124
|
+
- !ruby/object:Gem::Version
|
125
|
+
version: 1.4.2
|
126
|
+
- !ruby/object:Gem::Dependency
|
127
|
+
name: aws-sdk
|
128
|
+
requirement: !ruby/object:Gem::Requirement
|
129
|
+
none: false
|
130
|
+
requirements:
|
131
|
+
- - '='
|
132
|
+
- !ruby/object:Gem::Version
|
133
|
+
version: 1.8.5
|
134
|
+
type: :runtime
|
135
|
+
prerelease: false
|
136
|
+
version_requirements: !ruby/object:Gem::Requirement
|
137
|
+
none: false
|
138
|
+
requirements:
|
139
|
+
- - '='
|
140
|
+
- !ruby/object:Gem::Version
|
141
|
+
version: 1.8.5
|
142
|
+
- !ruby/object:Gem::Dependency
|
143
|
+
name: dogapi
|
144
|
+
requirement: !ruby/object:Gem::Requirement
|
145
|
+
none: false
|
146
|
+
requirements:
|
147
|
+
- - ~>
|
148
|
+
- !ruby/object:Gem::Version
|
149
|
+
version: 1.6.0
|
150
|
+
type: :runtime
|
151
|
+
prerelease: false
|
152
|
+
version_requirements: !ruby/object:Gem::Requirement
|
153
|
+
none: false
|
154
|
+
requirements:
|
155
|
+
- - ~>
|
156
|
+
- !ruby/object:Gem::Version
|
157
|
+
version: 1.6.0
|
158
|
+
- !ruby/object:Gem::Dependency
|
159
|
+
name: uuidtools
|
160
|
+
requirement: !ruby/object:Gem::Requirement
|
161
|
+
none: false
|
162
|
+
requirements:
|
163
|
+
- - ~>
|
164
|
+
- !ruby/object:Gem::Version
|
165
|
+
version: '2.1'
|
166
|
+
type: :runtime
|
167
|
+
prerelease: false
|
168
|
+
version_requirements: !ruby/object:Gem::Requirement
|
169
|
+
none: false
|
170
|
+
requirements:
|
171
|
+
- - ~>
|
172
|
+
- !ruby/object:Gem::Version
|
173
|
+
version: '2.1'
|
174
|
+
description: ! 'BOSH Health Monitor
|
175
|
+
|
176
|
+
cfd471'
|
177
|
+
email: support@cloudfoundry.com
|
178
|
+
executables:
|
179
|
+
- bosh-monitor-console
|
180
|
+
- bosh-monitor
|
181
|
+
- listener
|
182
|
+
extensions: []
|
183
|
+
extra_rdoc_files: []
|
184
|
+
files:
|
185
|
+
- lib/bosh/monitor.rb
|
186
|
+
- lib/bosh/monitor/agent.rb
|
187
|
+
- lib/bosh/monitor/agent_manager.rb
|
188
|
+
- lib/bosh/monitor/api_controller.rb
|
189
|
+
- lib/bosh/monitor/config.rb
|
190
|
+
- lib/bosh/monitor/core_ext.rb
|
191
|
+
- lib/bosh/monitor/director.rb
|
192
|
+
- lib/bosh/monitor/director_monitor.rb
|
193
|
+
- lib/bosh/monitor/errors.rb
|
194
|
+
- lib/bosh/monitor/event_processor.rb
|
195
|
+
- lib/bosh/monitor/events/alert.rb
|
196
|
+
- lib/bosh/monitor/events/base.rb
|
197
|
+
- lib/bosh/monitor/events/heartbeat.rb
|
198
|
+
- lib/bosh/monitor/metric.rb
|
199
|
+
- lib/bosh/monitor/plugins/base.rb
|
200
|
+
- lib/bosh/monitor/plugins/cloud_watch.rb
|
201
|
+
- lib/bosh/monitor/plugins/datadog.rb
|
202
|
+
- lib/bosh/monitor/plugins/dummy.rb
|
203
|
+
- lib/bosh/monitor/plugins/email.rb
|
204
|
+
- lib/bosh/monitor/plugins/http_request_helper.rb
|
205
|
+
- lib/bosh/monitor/plugins/logger.rb
|
206
|
+
- lib/bosh/monitor/plugins/nats.rb
|
207
|
+
- lib/bosh/monitor/plugins/pagerduty.rb
|
208
|
+
- lib/bosh/monitor/plugins/paging_datadog_client.rb
|
209
|
+
- lib/bosh/monitor/plugins/resurrector.rb
|
210
|
+
- lib/bosh/monitor/plugins/resurrector_helper.rb
|
211
|
+
- lib/bosh/monitor/plugins/tsdb.rb
|
212
|
+
- lib/bosh/monitor/plugins/varz.rb
|
213
|
+
- lib/bosh/monitor/protocols/tsdb.rb
|
214
|
+
- lib/bosh/monitor/runner.rb
|
215
|
+
- lib/bosh/monitor/version.rb
|
216
|
+
- lib/bosh/monitor/yaml_helper.rb
|
217
|
+
- README
|
218
|
+
- bin/bosh-monitor-console
|
219
|
+
- bin/bosh-monitor
|
220
|
+
- bin/listener
|
221
|
+
homepage: https://github.com/cloudfoundry/bosh
|
222
|
+
licenses:
|
223
|
+
- Apache 2.0
|
224
|
+
post_install_message:
|
225
|
+
rdoc_options: []
|
226
|
+
require_paths:
|
227
|
+
- lib
|
228
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
229
|
+
none: false
|
230
|
+
requirements:
|
231
|
+
- - ! '>='
|
232
|
+
- !ruby/object:Gem::Version
|
233
|
+
version: 1.9.3
|
234
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
235
|
+
none: false
|
236
|
+
requirements:
|
237
|
+
- - ! '>'
|
238
|
+
- !ruby/object:Gem::Version
|
239
|
+
version: 1.3.1
|
240
|
+
requirements: []
|
241
|
+
rubyforge_project:
|
242
|
+
rubygems_version: 1.8.23
|
243
|
+
signing_key:
|
244
|
+
specification_version: 3
|
245
|
+
summary: BOSH Health Monitor
|
246
|
+
test_files: []
|