nerve_pharmeasy 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +22 -0
- data/.mailmap +2 -0
- data/.nerve.rc +2 -0
- data/.travis.yml +8 -0
- data/CONTRIBUTING.md +28 -0
- data/Gemfile +2 -0
- data/Gemfile.lock +75 -0
- data/LICENSE.txt +22 -0
- data/README.md +116 -0
- data/Rakefile +7 -0
- data/Vagrantfile +121 -0
- data/bin/nerve +16 -0
- data/example/nerve.conf.json +54 -0
- data/example/nerve_services/etcd_service1.json +19 -0
- data/example/nerve_services/zookeeper_service1.json +18 -0
- data/lib/nerve/configuration_manager.rb +106 -0
- data/lib/nerve/log.rb +24 -0
- data/lib/nerve/reporter/base.rb +61 -0
- data/lib/nerve/reporter/etcd.rb +73 -0
- data/lib/nerve/reporter/zookeeper.rb +101 -0
- data/lib/nerve/reporter.rb +18 -0
- data/lib/nerve/ring_buffer.rb +30 -0
- data/lib/nerve/service_watcher/base.rb +65 -0
- data/lib/nerve/service_watcher/http.rb +70 -0
- data/lib/nerve/service_watcher/rabbitmq.rb +68 -0
- data/lib/nerve/service_watcher/tcp.rb +56 -0
- data/lib/nerve/service_watcher.rb +152 -0
- data/lib/nerve/utils.rb +17 -0
- data/lib/nerve/version.rb +3 -0
- data/lib/nerve.rb +249 -0
- data/nerve.conf.json +23 -0
- data/nerve.gemspec +33 -0
- data/spec/.gitkeep +0 -0
- data/spec/configuration_manager_spec.rb +31 -0
- data/spec/example_services_spec.rb +42 -0
- data/spec/factories/check.rb +16 -0
- data/spec/factories/service.rb +26 -0
- data/spec/lib/nerve/reporter_etcd_spec.rb +18 -0
- data/spec/lib/nerve/reporter_spec.rb +86 -0
- data/spec/lib/nerve/reporter_zookeeper_spec.rb +32 -0
- data/spec/lib/nerve/service_watcher_spec.rb +89 -0
- data/spec/lib/nerve_spec.rb +186 -0
- data/spec/spec_helper.rb +33 -0
- metadata +216 -0
@@ -0,0 +1,61 @@
|
|
1
|
+
|
2
|
+
class Nerve::Reporter
|
3
|
+
class Base
|
4
|
+
include Nerve::Utils
|
5
|
+
include Nerve::Logging
|
6
|
+
|
7
|
+
def initialize(opts)
|
8
|
+
end
|
9
|
+
|
10
|
+
def start
|
11
|
+
end
|
12
|
+
|
13
|
+
def stop
|
14
|
+
end
|
15
|
+
|
16
|
+
def report_up
|
17
|
+
end
|
18
|
+
|
19
|
+
def report_down
|
20
|
+
end
|
21
|
+
|
22
|
+
def ping?
|
23
|
+
end
|
24
|
+
|
25
|
+
def get_service_data(service)
|
26
|
+
%w{instance_id host port}.each do |required|
|
27
|
+
raise ArgumentError, "missing required argument #{required} for new service watcher" unless service[required]
|
28
|
+
end
|
29
|
+
d = {
|
30
|
+
'host' => service['host'],
|
31
|
+
'port' => service['port'],
|
32
|
+
'name' => service['instance_id']
|
33
|
+
}
|
34
|
+
|
35
|
+
# Weight is optional, but it should be well formed if supplied
|
36
|
+
if service.has_key?('weight')
|
37
|
+
if service['weight'].to_i >= 0 and "#{service['weight']}".match /^\d+$/
|
38
|
+
d['weight'] = service['weight'].to_i
|
39
|
+
else
|
40
|
+
raise ArgumentError, "invalid 'weight' argument in service data: #{service.inspect}"
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
if service.has_key?('haproxy_server_options')
|
45
|
+
d['haproxy_server_options'] = service['haproxy_server_options']
|
46
|
+
end
|
47
|
+
|
48
|
+
if service.has_key?('labels')
|
49
|
+
d['labels'] = service['labels']
|
50
|
+
end
|
51
|
+
d
|
52
|
+
end
|
53
|
+
|
54
|
+
protected
|
55
|
+
def parse_data(data)
|
56
|
+
return data if data.class == String
|
57
|
+
return data.to_json
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
@@ -0,0 +1,73 @@
|
|
1
|
+
require 'nerve/reporter/base'
|
2
|
+
require 'etcd'
|
3
|
+
|
4
|
+
class Nerve::Reporter
|
5
|
+
class Etcd < Base
|
6
|
+
def initialize(service)
|
7
|
+
raise ArgumentError, "missing required argument etcd_host for new service watcher" unless service['etcd_host']
|
8
|
+
@host = service['etcd_host']
|
9
|
+
@port = service['etcd_port'] || 4003
|
10
|
+
path = service['etcd_path'] || '/'
|
11
|
+
@path = path.split('/').push(service['instance_id']).join('/')
|
12
|
+
@data = parse_data(get_service_data(service))
|
13
|
+
@key = nil
|
14
|
+
@ttl = (service['check_interval'] || 0.5) * 5
|
15
|
+
@ttl = @ttl.ceil
|
16
|
+
end
|
17
|
+
|
18
|
+
def start()
|
19
|
+
log.info "nerve: connecting to etcd at #{@host}:#{@port}"
|
20
|
+
@etcd = ::Etcd.client(:host => @host, :port => @port)
|
21
|
+
log.info "nerve: successfully created etcd connection to #{@host}:#{@port}"
|
22
|
+
end
|
23
|
+
|
24
|
+
def stop()
|
25
|
+
report_down
|
26
|
+
@etcd = nil
|
27
|
+
end
|
28
|
+
|
29
|
+
def report_up()
|
30
|
+
etcd_save
|
31
|
+
end
|
32
|
+
|
33
|
+
def report_down
|
34
|
+
etcd_delete
|
35
|
+
end
|
36
|
+
|
37
|
+
def ping?
|
38
|
+
# we get a ping every check_interval.
|
39
|
+
if @key
|
40
|
+
# we have made a key: save it to prevent the TTL from expiring.
|
41
|
+
etcd_save
|
42
|
+
else
|
43
|
+
# we haven't created a key, so just frob the etcd API to assure that
|
44
|
+
# it's alive.
|
45
|
+
@etcd.leader
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
private
|
50
|
+
|
51
|
+
def etcd_delete
|
52
|
+
return unless @etcd and @key
|
53
|
+
begin
|
54
|
+
@etcd.delete(@key)
|
55
|
+
rescue ::Etcd::NotFile
|
56
|
+
rescue Errno::ECONNREFUSED
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def etcd_create
|
61
|
+
# we use create_in_order to create a unique key under our path,
|
62
|
+
# permitting multiple registrations from the same instance_id.
|
63
|
+
@key = @etcd.create_in_order(@path, :value => @data, :ttl => @ttl).key
|
64
|
+
log.info "registered etcd key #{@key} with value #{@data}, TTL #{@ttl}"
|
65
|
+
end
|
66
|
+
|
67
|
+
def etcd_save
|
68
|
+
return etcd_create unless @key
|
69
|
+
@etcd.set(@key, :value => @data, :ttl => @ttl)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
@@ -0,0 +1,101 @@
|
|
1
|
+
require 'nerve/reporter/base'
|
2
|
+
require 'thread'
|
3
|
+
require 'zk'
|
4
|
+
|
5
|
+
|
6
|
+
class Nerve::Reporter
|
7
|
+
class Zookeeper < Base
|
8
|
+
@@zk_pool = {}
|
9
|
+
@@zk_pool_count = {}
|
10
|
+
@@zk_pool_lock = Mutex.new
|
11
|
+
|
12
|
+
def initialize(service)
|
13
|
+
%w{zk_hosts zk_path}.each do |required|
|
14
|
+
raise ArgumentError, "missing required argument #{required} for new service watcher" unless service[required]
|
15
|
+
end
|
16
|
+
# Since we pool we get one connection per zookeeper cluster
|
17
|
+
@zk_connection_string = service['zk_hosts'].sort.join(',')
|
18
|
+
@data = parse_data(get_service_data(service))
|
19
|
+
|
20
|
+
@zk_path = service['zk_path']
|
21
|
+
@key_prefix = @zk_path + "/#{service['instance_id']}_"
|
22
|
+
@full_key = nil
|
23
|
+
end
|
24
|
+
|
25
|
+
def start()
|
26
|
+
log.info "nerve: waiting to connect to zookeeper to #{@zk_connection_string}"
|
27
|
+
# Ensure that all Zookeeper reporters re-use a single zookeeper
|
28
|
+
# connection to any given set of zk hosts.
|
29
|
+
@@zk_pool_lock.synchronize {
|
30
|
+
unless @@zk_pool.has_key?(@zk_connection_string)
|
31
|
+
log.info "nerve: creating pooled connection to #{@zk_connection_string}"
|
32
|
+
@@zk_pool[@zk_connection_string] = ZK.new(@zk_connection_string, :timeout => 5)
|
33
|
+
@@zk_pool_count[@zk_connection_string] = 1
|
34
|
+
log.info "nerve: successfully created zk connection to #{@zk_connection_string}"
|
35
|
+
else
|
36
|
+
@@zk_pool_count[@zk_connection_string] += 1
|
37
|
+
log.info "nerve: re-using existing zookeeper connection to #{@zk_connection_string}"
|
38
|
+
end
|
39
|
+
@zk = @@zk_pool[@zk_connection_string]
|
40
|
+
log.info "nerve: retrieved zk connection to #{@zk_connection_string}"
|
41
|
+
}
|
42
|
+
end
|
43
|
+
|
44
|
+
def stop()
|
45
|
+
log.info "nerve: removing zk node at #{@full_key}" if @full_key
|
46
|
+
begin
|
47
|
+
report_down
|
48
|
+
ensure
|
49
|
+
@@zk_pool_lock.synchronize {
|
50
|
+
@@zk_pool_count[@zk_connection_string] -= 1
|
51
|
+
# Last thread to use the connection closes it
|
52
|
+
if @@zk_pool_count[@zk_connection_string] == 0
|
53
|
+
log.info "nerve: closing zk connection to #{@zk_connection_string}"
|
54
|
+
begin
|
55
|
+
@zk.close!
|
56
|
+
ensure
|
57
|
+
@@zk_pool.delete(@zk_connection_string)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
}
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def report_up()
|
65
|
+
zk_save
|
66
|
+
end
|
67
|
+
|
68
|
+
def report_down
|
69
|
+
zk_delete
|
70
|
+
end
|
71
|
+
|
72
|
+
def ping?
|
73
|
+
return @zk.connected? && @zk.exists?(@full_key || '/')
|
74
|
+
end
|
75
|
+
|
76
|
+
private
|
77
|
+
|
78
|
+
def zk_delete
|
79
|
+
if @full_key
|
80
|
+
@zk.delete(@full_key, :ignore => :no_node)
|
81
|
+
@full_key = nil
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
def zk_create
|
86
|
+
@zk.mkdir_p(@zk_path)
|
87
|
+
@full_key = @zk.create(@key_prefix, :data => @data, :mode => :ephemeral_sequential)
|
88
|
+
end
|
89
|
+
|
90
|
+
def zk_save
|
91
|
+
return zk_create unless @full_key
|
92
|
+
|
93
|
+
begin
|
94
|
+
@zk.set(@full_key, @data)
|
95
|
+
rescue ZK::Exceptions::NoNode
|
96
|
+
zk_create
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'nerve/utils'
|
2
|
+
require 'nerve/log'
|
3
|
+
require 'nerve/reporter/base'
|
4
|
+
|
5
|
+
module Nerve
|
6
|
+
class Reporter
|
7
|
+
def self.new_from_service(service)
|
8
|
+
type = service['reporter_type'] || 'zookeeper'
|
9
|
+
reporter = begin
|
10
|
+
require "nerve/reporter/#{type.downcase}"
|
11
|
+
self.const_get(type.downcase.capitalize)
|
12
|
+
rescue Exception => e
|
13
|
+
raise ArgumentError, "specified a reporter_type of #{type}, which could not be found: #{e}"
|
14
|
+
end
|
15
|
+
reporter.new(service)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module Nerve
|
2
|
+
class RingBuffer < Array
|
3
|
+
alias_method :array_push, :push
|
4
|
+
alias_method :array_element, :[]
|
5
|
+
|
6
|
+
def initialize( size )
|
7
|
+
@ring_size = size.to_i
|
8
|
+
super( @ring_size )
|
9
|
+
end
|
10
|
+
|
11
|
+
def average
|
12
|
+
self.inject(0.0) { |sum, el| sum + el } / self.size
|
13
|
+
end
|
14
|
+
|
15
|
+
def push( element )
|
16
|
+
if length == @ring_size
|
17
|
+
shift # loose element
|
18
|
+
end
|
19
|
+
array_push element
|
20
|
+
end
|
21
|
+
|
22
|
+
# Access elements in the RingBuffer
|
23
|
+
#
|
24
|
+
# offset will be typically negative!
|
25
|
+
#
|
26
|
+
def []( offset = 0 )
|
27
|
+
return self.array_element( - 1 + offset )
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,65 @@
|
|
1
|
+
require 'nerve/ring_buffer'
|
2
|
+
|
3
|
+
module Nerve
|
4
|
+
module ServiceCheck
|
5
|
+
class BaseServiceCheck
|
6
|
+
include Utils
|
7
|
+
include Logging
|
8
|
+
|
9
|
+
def initialize(opts={})
|
10
|
+
@timeout = opts['timeout'] ? opts['timeout'].to_f : 0.1
|
11
|
+
@rise = opts['rise'] ? opts['rise'].to_i : 1
|
12
|
+
@fall = opts['fall'] ? opts['fall'].to_i : 1
|
13
|
+
@name = opts['name'] ? opts['name'] : "undefined"
|
14
|
+
|
15
|
+
@check_buffer = RingBuffer.new([@rise, @fall].max)
|
16
|
+
@last_result = nil
|
17
|
+
end
|
18
|
+
|
19
|
+
def up?
|
20
|
+
# do the check
|
21
|
+
check_result = !!catch_errors do
|
22
|
+
check
|
23
|
+
end
|
24
|
+
|
25
|
+
# this is the first check -- initialize buffer
|
26
|
+
if @last_result == nil
|
27
|
+
@last_result = check_result
|
28
|
+
@check_buffer.size.times {@check_buffer.push check_result}
|
29
|
+
log.info "nerve: service check #{@name} initial check returned #{check_result}"
|
30
|
+
end
|
31
|
+
|
32
|
+
log.debug "nerve: service check #{@name} returned #{check_result}"
|
33
|
+
@check_buffer.push(check_result)
|
34
|
+
|
35
|
+
# we've failed if the last @fall times are false
|
36
|
+
unless @check_buffer.last(@fall).reduce(:|)
|
37
|
+
log.info "nerve: service check #{@name} transitions to down after #{@fall} failures" if @last_result
|
38
|
+
@last_result = false
|
39
|
+
end
|
40
|
+
|
41
|
+
# we've succeeded if the last @rise times is true
|
42
|
+
if @check_buffer.last(@rise).reduce(:&)
|
43
|
+
log.info "nerve: service check #{@name} transitions to up after #{@rise} successes" unless @last_result
|
44
|
+
@last_result = true
|
45
|
+
end
|
46
|
+
|
47
|
+
# otherwise return the last result
|
48
|
+
return @last_result
|
49
|
+
end
|
50
|
+
|
51
|
+
def catch_errors(&block)
|
52
|
+
begin
|
53
|
+
return yield
|
54
|
+
rescue Object => error
|
55
|
+
log.info "nerve: service check #{@name} got error #{error.inspect}"
|
56
|
+
return false
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
CHECKS ||= {}
|
62
|
+
CHECKS['base'] = BaseServiceCheck
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
@@ -0,0 +1,70 @@
|
|
1
|
+
require 'nerve/service_watcher/base'
|
2
|
+
|
3
|
+
module Nerve
|
4
|
+
module ServiceCheck
|
5
|
+
require 'net/http'
|
6
|
+
|
7
|
+
class HttpServiceCheck < BaseServiceCheck
|
8
|
+
def initialize(opts={})
|
9
|
+
super
|
10
|
+
|
11
|
+
%w{port uri}.each do |required|
|
12
|
+
raise ArgumentError, "missing required argument #{required} in http check" unless
|
13
|
+
opts[required]
|
14
|
+
instance_variable_set("@#{required}",opts[required])
|
15
|
+
end
|
16
|
+
|
17
|
+
@host = opts['host'] || '127.0.0.1'
|
18
|
+
@ssl = opts['ssl'] || false
|
19
|
+
|
20
|
+
@read_timeout = opts['read_timeout'] || @timeout
|
21
|
+
@open_timeout = opts['open_timeout'] || 0.2
|
22
|
+
@ssl_timeout = opts['ssl_timeout'] || 0.2
|
23
|
+
|
24
|
+
@headers = opts['headers'] || {}
|
25
|
+
|
26
|
+
@expect = opts['expect']
|
27
|
+
|
28
|
+
@name = "http-#{@host}:#{@port}#{@uri}"
|
29
|
+
end
|
30
|
+
|
31
|
+
def check
|
32
|
+
log.debug "running health check #{@name}"
|
33
|
+
|
34
|
+
connection = get_connection
|
35
|
+
response = connection.get(@uri, @headers)
|
36
|
+
code = response.code.to_i
|
37
|
+
body = response.body
|
38
|
+
|
39
|
+
# Any 2xx or 3xx code should be considered healthy. This is standard
|
40
|
+
# practice in HAProxy, nginx, etc ...
|
41
|
+
if code >= 200 and code < 400 and (@expect == nil || body.include?(@expect))
|
42
|
+
log.debug "nerve: check #{@name} got response code #{code} with body \"#{body}\""
|
43
|
+
return true
|
44
|
+
else
|
45
|
+
log.warn "nerve: check #{@name} got response code #{code} with body \"#{body}\""
|
46
|
+
return false
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
private
|
51
|
+
def get_connection
|
52
|
+
con = Net::HTTP.new(@host, @port)
|
53
|
+
con.read_timeout = @read_timeout
|
54
|
+
con.open_timeout = @open_timeout
|
55
|
+
|
56
|
+
if @ssl
|
57
|
+
con.use_ssl = true
|
58
|
+
con.ssl_timeout = @ssl_timeout
|
59
|
+
con.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
60
|
+
end
|
61
|
+
|
62
|
+
return con
|
63
|
+
end
|
64
|
+
|
65
|
+
end
|
66
|
+
|
67
|
+
CHECKS ||= {}
|
68
|
+
CHECKS['http'] = HttpServiceCheck
|
69
|
+
end
|
70
|
+
end
|
@@ -0,0 +1,68 @@
|
|
1
|
+
require 'nerve/service_watcher/base'
|
2
|
+
require 'bunny'
|
3
|
+
|
4
|
+
module Nerve
|
5
|
+
module ServiceCheck
|
6
|
+
class RabbitMQServiceCheck < BaseServiceCheck
|
7
|
+
require 'socket'
|
8
|
+
include Socket::Constants
|
9
|
+
|
10
|
+
def initialize(opts={})
|
11
|
+
super
|
12
|
+
|
13
|
+
raise ArgumentError, "missing required argument 'port' in rabbitmq check" unless opts['port']
|
14
|
+
|
15
|
+
@port = opts['port']
|
16
|
+
@host = opts['host'] || '127.0.0.1'
|
17
|
+
@user = opts['username'] || 'guest'
|
18
|
+
@pass = opts['password'] || 'guest'
|
19
|
+
end
|
20
|
+
|
21
|
+
def check
|
22
|
+
# the idea for this check was taken from the one in rabbitmq management
|
23
|
+
# -- the aliveness_test:
|
24
|
+
# https://github.com/rabbitmq/rabbitmq-management/blob/9a8e3d1ab5144e3f6a1cb9a4639eb738713b926d/src/rabbit_mgmt_wm_aliveness_test.erl
|
25
|
+
log.debug "nerve: running rabbitmq health check #{@name}"
|
26
|
+
|
27
|
+
conn = Bunny.new(
|
28
|
+
:host => @host,
|
29
|
+
:port => @port,
|
30
|
+
:user => @user,
|
31
|
+
:pass => @pass,
|
32
|
+
:log_file => STDERR,
|
33
|
+
:continuation_timeout => @timeout,
|
34
|
+
:automatically_recover => false,
|
35
|
+
:heartbeat => false,
|
36
|
+
:threaded => false
|
37
|
+
)
|
38
|
+
|
39
|
+
begin
|
40
|
+
conn.start
|
41
|
+
ch = conn.create_channel
|
42
|
+
|
43
|
+
# create a queue, publish to it
|
44
|
+
log.debug "nerve: publishing to rabbitmq"
|
45
|
+
ch.queue('nerve')
|
46
|
+
ch.basic_publish('nerve test message', '', 'nerve', :mandatory => true, :expiration => 2 * 1000)
|
47
|
+
|
48
|
+
# read and ack the message
|
49
|
+
log.debug "nerve: consuming from rabbitmq"
|
50
|
+
delivery_info, properties, payload = ch.basic_get('nerve', :ack => true)
|
51
|
+
|
52
|
+
if payload
|
53
|
+
ch.acknowledge(delivery_info.delivery_tag)
|
54
|
+
return true
|
55
|
+
else
|
56
|
+
log.debug "nerve: rabbitmq consumption returned no payload"
|
57
|
+
return false
|
58
|
+
end
|
59
|
+
ensure
|
60
|
+
conn.close
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
CHECKS ||= {}
|
66
|
+
CHECKS['rabbitmq'] = RabbitMQServiceCheck
|
67
|
+
end
|
68
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
require 'nerve/service_watcher/base'
|
2
|
+
|
3
|
+
module Nerve
|
4
|
+
module ServiceCheck
|
5
|
+
class TcpServiceCheck < BaseServiceCheck
|
6
|
+
require 'socket'
|
7
|
+
include Socket::Constants
|
8
|
+
|
9
|
+
def initialize(opts={})
|
10
|
+
super
|
11
|
+
|
12
|
+
raise ArgumentError, "missing required argument 'port' in tcp check" unless opts['port']
|
13
|
+
|
14
|
+
@port = opts['port']
|
15
|
+
@host = opts['host'] || '127.0.0.1'
|
16
|
+
|
17
|
+
@address = Socket.sockaddr_in(@port, @host)
|
18
|
+
end
|
19
|
+
|
20
|
+
def check
|
21
|
+
log.debug "nerve: running TCP health check #{@name}"
|
22
|
+
|
23
|
+
# create a TCP socket
|
24
|
+
socket = Socket.new(AF_INET, SOCK_STREAM, 0)
|
25
|
+
|
26
|
+
begin
|
27
|
+
# open a non-blocking connection
|
28
|
+
socket.connect_nonblock(@address)
|
29
|
+
rescue Errno::EINPROGRESS
|
30
|
+
# opening a non-blocking socket will usually raise
|
31
|
+
# this exception. it's just connect returning immediately,
|
32
|
+
# so it's not really an exception, but ruby makes it into
|
33
|
+
# one. if we got here, we are now free to wait until the timeout
|
34
|
+
# expires for the socket to be writeable
|
35
|
+
IO.select(nil, [socket], nil, @timeout)
|
36
|
+
|
37
|
+
# we should be connected now; allow any other exception through
|
38
|
+
begin
|
39
|
+
socket.connect_nonblock(@address)
|
40
|
+
rescue Errno::EISCONN
|
41
|
+
return true
|
42
|
+
end
|
43
|
+
else
|
44
|
+
# we managed to connect REALLY REALLY FAST
|
45
|
+
log.debug "nerve: connected to non-blocking socket without an exception"
|
46
|
+
return true
|
47
|
+
ensure
|
48
|
+
socket.close
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
CHECKS ||= {}
|
54
|
+
CHECKS['tcp'] = TcpServiceCheck
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,152 @@
|
|
1
|
+
require 'nerve/service_watcher/tcp'
|
2
|
+
require 'nerve/service_watcher/http'
|
3
|
+
require 'nerve/service_watcher/rabbitmq'
|
4
|
+
|
5
|
+
module Nerve
|
6
|
+
class ServiceWatcher
|
7
|
+
include Utils
|
8
|
+
include Logging
|
9
|
+
|
10
|
+
attr_reader :was_up
|
11
|
+
|
12
|
+
def initialize(service={})
|
13
|
+
log.debug "nerve: creating service watcher object"
|
14
|
+
|
15
|
+
# check that we have all of the required arguments
|
16
|
+
%w{name instance_id host port}.each do |required|
|
17
|
+
raise ArgumentError, "missing required argument #{required} for new service watcher" unless service[required]
|
18
|
+
end
|
19
|
+
|
20
|
+
@name = service['name']
|
21
|
+
|
22
|
+
# configure the reporter, which we use for reporting status to the registry
|
23
|
+
@reporter = Reporter.new_from_service(service)
|
24
|
+
|
25
|
+
# instantiate the checks for this service
|
26
|
+
@service_checks = []
|
27
|
+
service['checks'] ||= []
|
28
|
+
service['checks'].each do |check|
|
29
|
+
# checks inherit attributes from the service overall
|
30
|
+
check['host'] ||= service['host']
|
31
|
+
check['port'] ||= service['port']
|
32
|
+
|
33
|
+
# generate a nice readable name for each check
|
34
|
+
check['name'] ||= "#{@name} #{check['type']}-#{check['host']}:#{check['port']}"
|
35
|
+
|
36
|
+
# make sure a type is set
|
37
|
+
check['type'] ||= "undefined"
|
38
|
+
|
39
|
+
# require a 3rd-party module if necessary for external checkers
|
40
|
+
unless ServiceCheck::CHECKS[check['type']]
|
41
|
+
m = check['module'] ? check['module'] : "nerve-watcher-#{check['type']}"
|
42
|
+
require m
|
43
|
+
end
|
44
|
+
|
45
|
+
# instantiate the check object
|
46
|
+
service_check_class = ServiceCheck::CHECKS[check['type']]
|
47
|
+
if service_check_class.nil?
|
48
|
+
raise ArgumentError,
|
49
|
+
"invalid service check type #{check['type']}; valid types: #{ServiceCheck::CHECKS.keys.join(',')}"
|
50
|
+
end
|
51
|
+
|
52
|
+
# save the check object
|
53
|
+
@service_checks << service_check_class.new(check)
|
54
|
+
end
|
55
|
+
|
56
|
+
# how often do we initiate service checks?
|
57
|
+
@check_interval = service['check_interval'] || 0.5
|
58
|
+
|
59
|
+
# force an initial report on startup
|
60
|
+
@was_up = nil
|
61
|
+
|
62
|
+
# when this watcher is started it will store the
|
63
|
+
# thread here
|
64
|
+
@run_thread = nil
|
65
|
+
@should_finish = false
|
66
|
+
|
67
|
+
log.debug "nerve: created service watcher for #{@name} with #{@service_checks.size} checks"
|
68
|
+
end
|
69
|
+
|
70
|
+
def start()
|
71
|
+
unless @run_thread
|
72
|
+
@run_thread = Thread.new { self.run() }
|
73
|
+
else
|
74
|
+
log.error "nerve: tried to double start a watcher"
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
def stop()
|
79
|
+
log.info "nerve: stopping service watch #{@name}"
|
80
|
+
@should_finish = true
|
81
|
+
return true if @run_thread.nil?
|
82
|
+
|
83
|
+
unclean_shutdown = @run_thread.join(10).nil?
|
84
|
+
if unclean_shutdown
|
85
|
+
log.error "nerve: unclean shutdown of #{@name}, killing thread"
|
86
|
+
Thread.kill(@run_thread)
|
87
|
+
end
|
88
|
+
@run_thread = nil
|
89
|
+
!unclean_shutdown
|
90
|
+
end
|
91
|
+
|
92
|
+
def alive?()
|
93
|
+
!@run_thread.nil? && @run_thread.alive?
|
94
|
+
end
|
95
|
+
|
96
|
+
def run()
|
97
|
+
log.info "nerve: starting service watch #{@name}"
|
98
|
+
@reporter.start()
|
99
|
+
|
100
|
+
until watcher_should_exit?
|
101
|
+
check_and_report
|
102
|
+
|
103
|
+
# wait to run more checks but make sure to exit if $EXIT
|
104
|
+
# we avoid sleeping for the entire check interval at once
|
105
|
+
# so that nerve can exit promptly if required
|
106
|
+
responsive_sleep (@check_interval) { watcher_should_exit? }
|
107
|
+
end
|
108
|
+
rescue StandardError => e
|
109
|
+
log.error "nerve: error in service watcher #{@name}: #{e.inspect}"
|
110
|
+
raise e
|
111
|
+
ensure
|
112
|
+
log.info "nerve: stopping reporter for #{@name}"
|
113
|
+
@reporter.stop
|
114
|
+
end
|
115
|
+
|
116
|
+
def check_and_report
|
117
|
+
if !@reporter.ping?
|
118
|
+
# If the reporter can't ping, then we do not know the status
|
119
|
+
# and must force a new report.
|
120
|
+
@was_up = nil
|
121
|
+
end
|
122
|
+
|
123
|
+
# what is the status of the service?
|
124
|
+
is_up = check?
|
125
|
+
log.debug "nerve: current service status for #{@name} is #{is_up.inspect}"
|
126
|
+
|
127
|
+
if is_up != @was_up
|
128
|
+
if is_up
|
129
|
+
@reporter.report_up
|
130
|
+
log.info "nerve: service #{@name} is now up"
|
131
|
+
else
|
132
|
+
@reporter.report_down
|
133
|
+
log.warn "nerve: service #{@name} is now down"
|
134
|
+
end
|
135
|
+
@was_up = is_up
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
def check?
|
140
|
+
@service_checks.each do |check|
|
141
|
+
return false unless check.up?
|
142
|
+
end
|
143
|
+
return true
|
144
|
+
end
|
145
|
+
|
146
|
+
private
|
147
|
+
def watcher_should_exit?
|
148
|
+
$EXIT || @should_finish
|
149
|
+
end
|
150
|
+
|
151
|
+
end
|
152
|
+
end
|