arborist 0.2.0.pre20170519125456 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data.tar.gz.sig +0 -0
- data/ChangeLog +670 -1
- data/History.md +67 -0
- data/Manifest.txt +9 -6
- data/README.md +1 -3
- data/Rakefile +39 -4
- data/TODO.md +22 -31
- data/lib/arborist.rb +9 -2
- data/lib/arborist/cli.rb +67 -85
- data/lib/arborist/client.rb +125 -59
- data/lib/arborist/command/ack.rb +86 -0
- data/lib/arborist/command/reset.rb +48 -0
- data/lib/arborist/command/start.rb +11 -1
- data/lib/arborist/command/summary.rb +173 -0
- data/lib/arborist/command/tree.rb +215 -0
- data/lib/arborist/command/watch.rb +22 -22
- data/lib/arborist/dependency.rb +24 -4
- data/lib/arborist/event.rb +18 -2
- data/lib/arborist/event/node.rb +6 -2
- data/lib/arborist/event/node_warn.rb +16 -0
- data/lib/arborist/manager.rb +179 -48
- data/lib/arborist/mixins.rb +11 -0
- data/lib/arborist/monitor.rb +29 -17
- data/lib/arborist/monitor/connection_batching.rb +293 -0
- data/lib/arborist/monitor/socket.rb +101 -167
- data/lib/arborist/monitor_runner.rb +101 -24
- data/lib/arborist/node.rb +297 -68
- data/lib/arborist/node/ack.rb +1 -1
- data/lib/arborist/node/host.rb +26 -5
- data/lib/arborist/node/resource.rb +14 -5
- data/lib/arborist/node/root.rb +12 -3
- data/lib/arborist/node/service.rb +29 -26
- data/lib/arborist/node_subscription.rb +65 -0
- data/lib/arborist/observer.rb +8 -0
- data/lib/arborist/observer/action.rb +6 -0
- data/lib/arborist/subscription.rb +22 -16
- data/lib/arborist/tree_api.rb +7 -2
- data/spec/arborist/client_spec.rb +157 -51
- data/spec/arborist/dependency_spec.rb +21 -0
- data/spec/arborist/event/node_spec.rb +5 -0
- data/spec/arborist/event_spec.rb +3 -3
- data/spec/arborist/manager_spec.rb +626 -347
- data/spec/arborist/mixins_spec.rb +19 -0
- data/spec/arborist/monitor/socket_spec.rb +1 -2
- data/spec/arborist/monitor_runner_spec.rb +81 -29
- data/spec/arborist/monitor_spec.rb +89 -14
- data/spec/arborist/node/host_spec.rb +68 -0
- data/spec/arborist/node/resource_spec.rb +2 -0
- data/spec/arborist/node/root_spec.rb +13 -0
- data/spec/arborist/node/service_spec.rb +9 -0
- data/spec/arborist/node_spec.rb +673 -111
- data/spec/arborist/node_subscription_spec.rb +54 -0
- data/spec/arborist/observer/action_spec.rb +6 -0
- data/spec/arborist/observer_runner_spec.rb +8 -1
- data/spec/arborist/tree_api_spec.rb +111 -8
- data/spec/data/monitors/pings.rb +0 -11
- data/spec/data/monitors/port_checks.rb +0 -9
- data/spec/data/nodes/sidonie.rb +1 -0
- data/spec/data/nodes/vhosts.rb +23 -0
- data/spec/data/nodes/yevaud.rb +4 -2
- data/spec/spec_helper.rb +71 -1
- metadata +91 -28
- metadata.gz.sig +0 -0
- data/Events.md +0 -35
- data/Monitors.md +0 -155
- data/Nodes.md +0 -70
- data/Observers.md +0 -72
- data/Protocol.md +0 -276
- data/Tutorial.md +0 -8
data/lib/arborist/mixins.rb
CHANGED
@@ -86,6 +86,17 @@ module Arborist
|
|
86
86
|
attr_predicate( attrname )
|
87
87
|
end
|
88
88
|
|
89
|
+
|
90
|
+
### Create an method that is both a reader and a writer for an instance
|
91
|
+
### variable. If called with a (non-nil) argument, it will set the variable to
|
92
|
+
### the new value. It returns whatever the instance variable is set to.
|
93
|
+
def dsl_accessor( attrname )
|
94
|
+
define_method( attrname ) do |arg=nil|
|
95
|
+
instance_variable_set( "@#{attrname}", arg ) unless arg.nil?
|
96
|
+
return instance_variable_get( "@#{attrname}" )
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
89
100
|
end # module MethodUtilities
|
90
101
|
|
91
102
|
|
data/lib/arborist/monitor.rb
CHANGED
@@ -10,8 +10,9 @@ using Arborist::TimeRefinements
|
|
10
10
|
|
11
11
|
# A declaration of an action to run against Manager nodes to update their state.
|
12
12
|
class Arborist::Monitor
|
13
|
-
extend
|
14
|
-
|
13
|
+
extend Configurability,
|
14
|
+
Loggability,
|
15
|
+
Arborist::MethodUtilities
|
15
16
|
|
16
17
|
# Loggability API -- write logs to the Arborist log host
|
17
18
|
log_to :arborist
|
@@ -30,9 +31,16 @@ class Arborist::Monitor
|
|
30
31
|
# The default monitoring interval, in seconds
|
31
32
|
DEFAULT_INTERVAL = 5.minutes
|
32
33
|
|
33
|
-
|
34
|
-
#
|
35
|
-
|
34
|
+
|
35
|
+
# Configurability API -- use the 'arborist' section
|
36
|
+
configurability( 'arborist.monitor' ) do
|
37
|
+
|
38
|
+
##
|
39
|
+
# A default splay to apply to all Monitors.
|
40
|
+
setting :splay, default: 0 do |value|
|
41
|
+
Float( value )
|
42
|
+
end
|
43
|
+
end
|
36
44
|
|
37
45
|
|
38
46
|
Arborist.add_dsl_constructor( self ) do |description=nil, key=nil, &block|
|
@@ -58,7 +66,8 @@ class Arborist::Monitor
|
|
58
66
|
nodes.each do |(identifier, data)|
|
59
67
|
self.log.debug "Serializing node properties for %s" % [ identifier ]
|
60
68
|
prop_map = data.collect do |key, val|
|
61
|
-
|
69
|
+
val = val.join( ',' ) if val.is_a?( Array )
|
70
|
+
"%s=%s" % [ key, Shellwords.escape(val) ]
|
62
71
|
end
|
63
72
|
|
64
73
|
self.log.debug " writing %d properties to %p" % [ prop_map.size, io ]
|
@@ -145,13 +154,13 @@ class Arborist::Monitor
|
|
145
154
|
### returned.
|
146
155
|
def initialize( description=nil, key=nil, &block )
|
147
156
|
@key = key
|
148
|
-
@description = description
|
157
|
+
@description = description || self.class.name
|
149
158
|
@interval = DEFAULT_INTERVAL
|
150
|
-
@splay =
|
159
|
+
@splay = Arborist::Monitor.splay
|
151
160
|
|
152
161
|
@positive_criteria = {}
|
153
162
|
@negative_criteria = {}
|
154
|
-
@
|
163
|
+
@exclude_down = false
|
155
164
|
@node_properties = []
|
156
165
|
|
157
166
|
@exec_command = nil
|
@@ -198,7 +207,7 @@ class Arborist::Monitor
|
|
198
207
|
##
|
199
208
|
# Flag for whether the monitor will include downed hosts in its search. Defaults
|
200
209
|
# to +false+.
|
201
|
-
attr_predicate :
|
210
|
+
attr_predicate :exclude_down
|
202
211
|
|
203
212
|
##
|
204
213
|
# The list of node properties to include when running the monitor.
|
@@ -224,7 +233,7 @@ class Arborist::Monitor
|
|
224
233
|
|
225
234
|
### Return a string representation of the object suitable for debugging.
|
226
235
|
def inspect
|
227
|
-
return "#<%p:%#x %s (every %ds
|
236
|
+
return "#<%p:%#x %s (every %ds +-%ds)>" % [
|
228
237
|
self.class,
|
229
238
|
self.object_id * 2,
|
230
239
|
self.description || "(no description)",
|
@@ -330,7 +339,7 @@ class Arborist::Monitor
|
|
330
339
|
### for nodes it will run against.
|
331
340
|
def match( criteria )
|
332
341
|
self.positive_criteria.merge!( criteria )
|
333
|
-
@
|
342
|
+
@exclude_down = self.exclude_down &&
|
334
343
|
Arborist::Node::UNREACHABLE_STATES.include?( self.positive_criteria[:status] )
|
335
344
|
end
|
336
345
|
|
@@ -344,9 +353,9 @@ class Arborist::Monitor
|
|
344
353
|
|
345
354
|
### Specify that the monitor should (or should not) include nodes which have been
|
346
355
|
### marked 'down'.
|
347
|
-
def
|
348
|
-
@
|
349
|
-
return @
|
356
|
+
def exclude_down( flag=nil )
|
357
|
+
@exclude_down = flag unless flag.nil?
|
358
|
+
return @exclude_down
|
350
359
|
end
|
351
360
|
|
352
361
|
|
@@ -363,9 +372,11 @@ class Arborist::Monitor
|
|
363
372
|
self.log.warn "Ignored block with exec %s (%p)" % [ command.first, block ] if block
|
364
373
|
|
365
374
|
if command.first.respond_to?( :run )
|
366
|
-
|
375
|
+
runner = command.first
|
376
|
+
@exec_block = runner.method( :run )
|
377
|
+
@node_properties |= runner.node_properties if runner.respond_to?( :node_properties )
|
367
378
|
else
|
368
|
-
@exec_command = command
|
379
|
+
@exec_command = command.map( &:to_s )
|
369
380
|
end
|
370
381
|
|
371
382
|
return
|
@@ -408,6 +419,7 @@ class Arborist::Monitor
|
|
408
419
|
### external command.
|
409
420
|
def exec_callbacks( mod )
|
410
421
|
self.log.info "Setting exec callbacks handler to: %p" % [ mod.name ]
|
422
|
+
@node_properties |= mod.node_properties if mod.respond_to?( :node_properties )
|
411
423
|
self.exec_callbacks_mod = mod
|
412
424
|
end
|
413
425
|
|
@@ -0,0 +1,293 @@
|
|
1
|
+
# -*- ruby -*-
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
require 'loggability'
|
5
|
+
|
6
|
+
require 'arborist/monitor' unless defined?( Arborist::Monitor )
|
7
|
+
require 'arborist/mixins'
|
8
|
+
|
9
|
+
using Arborist::TimeRefinements
|
10
|
+
|
11
|
+
|
12
|
+
# A mixin for adding batched connections for socket-based monitors.
|
13
|
+
module Arborist::Monitor::ConnectionBatching
|
14
|
+
|
15
|
+
# The default number of connections to have open -- this should be well under
|
16
|
+
# the RLIMIT_NOFILE of the current process.
|
17
|
+
DEFAULT_BATCH_SIZE = 150
|
18
|
+
|
19
|
+
# The default connection timeout
|
20
|
+
DEFAULT_TIMEOUT = 2.0
|
21
|
+
|
22
|
+
|
23
|
+
# An object that manages batching of connections and gathering results.
|
24
|
+
class BatchRunner
|
25
|
+
extend Loggability
|
26
|
+
|
27
|
+
# Loggability API -- log to the Arborist logger
|
28
|
+
log_to :arborist
|
29
|
+
|
30
|
+
|
31
|
+
### Create a new BatchRunner for the specified +enum+ (an Enumerator)
|
32
|
+
def initialize( enum, batch_size, timeout )
|
33
|
+
@enum = enum
|
34
|
+
@results = {}
|
35
|
+
@current_batch = []
|
36
|
+
@connection_hashes = {}
|
37
|
+
@start = nil
|
38
|
+
@batch_size = batch_size || DEFAULT_BATCH_SIZE
|
39
|
+
@timeout = timeout || DEFAULT_TIMEOUT
|
40
|
+
end
|
41
|
+
|
42
|
+
|
43
|
+
######
|
44
|
+
public
|
45
|
+
######
|
46
|
+
|
47
|
+
##
|
48
|
+
# The Enumerator that yields connection hashes
|
49
|
+
attr_accessor :enum
|
50
|
+
|
51
|
+
##
|
52
|
+
# The results hash
|
53
|
+
attr_reader :results
|
54
|
+
|
55
|
+
##
|
56
|
+
# The batch of connection hashes that are currently being selected, ordered from
|
57
|
+
# oldest to newest.
|
58
|
+
attr_reader :current_batch
|
59
|
+
|
60
|
+
##
|
61
|
+
# An index of the current batch's connection hashes by connection.
|
62
|
+
attr_reader :connection_hashes
|
63
|
+
|
64
|
+
##
|
65
|
+
# The Time the batch runner started.
|
66
|
+
attr_accessor :start
|
67
|
+
|
68
|
+
##
|
69
|
+
# The maximum number of connections to have running at any time.
|
70
|
+
attr_reader :batch_size
|
71
|
+
|
72
|
+
##
|
73
|
+
# The connection timeout from the monitor, in seconds
|
74
|
+
attr_reader :timeout
|
75
|
+
|
76
|
+
|
77
|
+
### Returns +true+ if the runner has been run and all connections have been
|
78
|
+
### handled.
|
79
|
+
def finished?
|
80
|
+
return self.start && self.enum.nil? && self.current_batch.empty?
|
81
|
+
end
|
82
|
+
|
83
|
+
|
84
|
+
### Returns +true+ if the current batch is at capacity.
|
85
|
+
def batch_full?
|
86
|
+
return self.current_batch.length >= self.batch_size
|
87
|
+
end
|
88
|
+
|
89
|
+
|
90
|
+
### Fetch the next connection from the Enumerator, unsetting the enumerator and
|
91
|
+
### returning +nil+ when it reaches the end.
|
92
|
+
def next_connection
|
93
|
+
conn_hash = self.enum.next
|
94
|
+
conn_hash[:start] = Time.now
|
95
|
+
conn_hash[:timeout_at] = conn_hash[:start] + self.timeout
|
96
|
+
|
97
|
+
return conn_hash
|
98
|
+
rescue StopIteration
|
99
|
+
self.log.debug "Reached the end of the connections enum."
|
100
|
+
self.enum = nil
|
101
|
+
return nil
|
102
|
+
end
|
103
|
+
|
104
|
+
|
105
|
+
### Add a new conn_hash to the currrent batch. If the +conn_hash+'s connection
|
106
|
+
### is an exception, don't add it and just add an error status for it built from
|
107
|
+
### the exception.
|
108
|
+
def add_connection( conn_hash )
|
109
|
+
if conn_hash[:conn].is_a?( ::Exception )
|
110
|
+
self.log.debug "Adding an error result for %{identifier}." % conn_hash
|
111
|
+
self.results[ conn_hash[:identifier] ] = { error: conn_hash[:conn].message }
|
112
|
+
else
|
113
|
+
self.log.debug "Added connection for %{identifier} to the batch." % conn_hash
|
114
|
+
self.current_batch.push( conn_hash )
|
115
|
+
self.connection_hashes[ conn_hash[:conn] ] = conn_hash
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
|
120
|
+
### Remove the specified +conn_hash+ from the current batch.
|
121
|
+
def remove_connection( conn_hash )
|
122
|
+
self.current_batch.delete( conn_hash )
|
123
|
+
self.connection_hashes.delete( conn_hash[:conn] )
|
124
|
+
end
|
125
|
+
|
126
|
+
|
127
|
+
### Remove the connection hash for the specified +socket+ from the current
|
128
|
+
### batch and return it (if it was in the batch).
|
129
|
+
def remove_socket( socket )
|
130
|
+
conn_hash = self.connection_hashes.delete( socket )
|
131
|
+
self.current_batch.delete( conn_hash )
|
132
|
+
|
133
|
+
return conn_hash
|
134
|
+
end
|
135
|
+
|
136
|
+
|
137
|
+
### Fill the #current_batch if it's not yet at capacity and there are more
|
138
|
+
### connections to be made.
|
139
|
+
def fill_batch
|
140
|
+
# If the enum is not nil and the array isn't full, fetch a new connection
|
141
|
+
while self.enum && !self.batch_full?
|
142
|
+
self.log.debug "Adding connections to the queue."
|
143
|
+
conn_hash = self.next_connection or break
|
144
|
+
self.add_connection( conn_hash )
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
|
149
|
+
### Shift any connections which have timed out off of the current batch and
|
150
|
+
### return the timeout of the oldest non-timed-out connection.
|
151
|
+
def remove_timedout_connections
|
152
|
+
expired = self.current_batch.take_while do |conn_hash|
|
153
|
+
conn_hash[ :timeout_at ].past?
|
154
|
+
end
|
155
|
+
|
156
|
+
wait_seconds = if self.current_batch.empty?
|
157
|
+
1
|
158
|
+
else
|
159
|
+
self.current_batch.first[:timeout_at] - Time.now
|
160
|
+
end
|
161
|
+
|
162
|
+
expired.each do |conn_hash|
|
163
|
+
self.remove_connection( conn_hash )
|
164
|
+
self.log.debug "Discarding timed-out socket for %{identifier}." % conn_hash
|
165
|
+
|
166
|
+
elapsed = conn_hash[:timeout_at] - conn_hash[:start]
|
167
|
+
self.results[ conn_hash[:identifier] ] = {
|
168
|
+
error: "Timeout after %0.3fs" % [ elapsed ]
|
169
|
+
}
|
170
|
+
end
|
171
|
+
|
172
|
+
return wait_seconds.abs
|
173
|
+
end
|
174
|
+
|
175
|
+
|
176
|
+
### Wait at most +wait_seconds+ for one of the sockets in the current batch
|
177
|
+
### to become ready. If any are ready before the +wait_seconds+ have elapsed,
|
178
|
+
### returns them as an Array. If +wait_seconds+ goes by without any sockets becoming
|
179
|
+
### ready, or if there were no sockets to wait on, returns +nil+.
|
180
|
+
def wait_for_ready_connections( wait_seconds )
|
181
|
+
sockets = self.connection_hashes.keys
|
182
|
+
ready = nil
|
183
|
+
|
184
|
+
self.log.debug "Selecting on %d sockets." % [ sockets.length ]
|
185
|
+
_, ready, _ = IO.select( nil, sockets, nil, wait_seconds ) unless sockets.empty?
|
186
|
+
|
187
|
+
return ready
|
188
|
+
end
|
189
|
+
|
190
|
+
|
191
|
+
### Run the batch runner, yielding to the specified +block+ as each connection
|
192
|
+
### becomes ready.
|
193
|
+
def run( &block )
|
194
|
+
self.start = Time.now
|
195
|
+
|
196
|
+
until self.finished?
|
197
|
+
self.log.debug "Getting the status of %d connections." %
|
198
|
+
[ self.current_batch.length ]
|
199
|
+
|
200
|
+
self.fill_batch
|
201
|
+
wait_seconds = self.remove_timedout_connections
|
202
|
+
ready = self.wait_for_ready_connections( wait_seconds )
|
203
|
+
|
204
|
+
# If the select returns ready sockets
|
205
|
+
# Build successful status for each ready socket
|
206
|
+
now = Time.now
|
207
|
+
ready.each do |sock|
|
208
|
+
conn_hash = self.remove_socket( sock ) or
|
209
|
+
raise "Ready socket %p was not in the current batch!" % [ sock ]
|
210
|
+
|
211
|
+
identifier, start = conn_hash.values_at( :identifier, :start )
|
212
|
+
duration = now - start
|
213
|
+
|
214
|
+
results[ identifier ] = block.call( conn_hash, duration )
|
215
|
+
end if ready
|
216
|
+
end
|
217
|
+
|
218
|
+
return Time.now - self.start
|
219
|
+
end
|
220
|
+
|
221
|
+
end # class BatchRunner
|
222
|
+
|
223
|
+
|
224
|
+
### Inclusion callback -- add the #batchsize attribute to including monitors.
|
225
|
+
def self::included( mod )
|
226
|
+
mod.attr_accessor :timeout
|
227
|
+
mod.attr_accessor :batch_size
|
228
|
+
|
229
|
+
super
|
230
|
+
end
|
231
|
+
|
232
|
+
|
233
|
+
### Return a clone of the receiving monitor with its batch size set to
|
234
|
+
### +new_size+.
|
235
|
+
def with_batch_size( new_size )
|
236
|
+
copy = self.clone
|
237
|
+
copy.batch_size = new_size
|
238
|
+
return copy
|
239
|
+
end
|
240
|
+
|
241
|
+
|
242
|
+
### Return a clone of receiving monitor with its timeout set to +new_timeout+.
|
243
|
+
def with_timeout( new_timeout )
|
244
|
+
copy = self.clone
|
245
|
+
copy.timeout = new_timeout
|
246
|
+
return copy
|
247
|
+
end
|
248
|
+
|
249
|
+
|
250
|
+
### Run the monitor, batching connections for the specified +nodes+ so the
|
251
|
+
### monitor doesn't exhaust its file descriptors.
|
252
|
+
def run( nodes )
|
253
|
+
connections = self.make_connections_enum( nodes )
|
254
|
+
return self.handle_connections( connections )
|
255
|
+
end
|
256
|
+
|
257
|
+
|
258
|
+
### Return an Enumerator that yields Hashes that describe the connections to be
|
259
|
+
### made. They must contain, at a minimum, the following keys:
|
260
|
+
###
|
261
|
+
### +conn+:: The Socket (or other IO object) that is used to communicate with the
|
262
|
+
### monitored host. This should be created using non-blocking connection.
|
263
|
+
### +identifier+:: The node identifier associated with the +conn+.
|
264
|
+
###
|
265
|
+
### You can add any other members to each Hash that you require to actually use
|
266
|
+
### the connection when it becomes available.
|
267
|
+
def make_connections_enum( nodes )
|
268
|
+
raise "%p does not provide a %s method!" % [ __method__ ]
|
269
|
+
end
|
270
|
+
|
271
|
+
|
272
|
+
### Called when a socket becomes ready. It should generate a status update for
|
273
|
+
### the node that corresponds to the given +node_hash+ and return it as a Hash.
|
274
|
+
### The +duration+ is how long it took for the connection to be ready, in
|
275
|
+
### seconds.
|
276
|
+
def status_for_conn( conn_hash, duration )
|
277
|
+
raise "%p does not provide a %s method!" % [ __method__ ]
|
278
|
+
end
|
279
|
+
|
280
|
+
|
281
|
+
### Fetch connections from +connections_enum+ and build a Hash of node updates
|
282
|
+
### keyed by identifier based on the results.
|
283
|
+
def handle_connections( connections_enum )
|
284
|
+
runner = BatchRunner.new( connections_enum, self.batch_size, self.timeout )
|
285
|
+
runner.run do |conn_hash, duration|
|
286
|
+
self.status_for_conn( conn_hash, duration )
|
287
|
+
end
|
288
|
+
return runner.results
|
289
|
+
end
|
290
|
+
|
291
|
+
end # module Arborist::Monitor::ConnectionBatching
|
292
|
+
|
293
|
+
|
@@ -7,23 +7,36 @@ require 'timeout'
|
|
7
7
|
require 'socket'
|
8
8
|
|
9
9
|
require 'arborist/monitor' unless defined?( Arborist::Monitor )
|
10
|
-
|
11
|
-
using Arborist::TimeRefinements
|
10
|
+
require 'arborist/monitor/connection_batching'
|
12
11
|
|
13
12
|
|
14
13
|
# Socket-related Arborist monitor logic
|
15
14
|
module Arborist::Monitor::Socket
|
15
|
+
extend Configurability
|
16
|
+
|
17
|
+
|
18
|
+
configurability( 'arborist.monitors.socket' ) do
|
19
|
+
|
20
|
+
##
|
21
|
+
# The default timeout employed by the socket monitors, in floating-point
|
22
|
+
# seconds.
|
23
|
+
setting :default_timeout, default: 2.0 do |val|
|
24
|
+
Float( val )
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
28
|
+
|
16
29
|
|
17
30
|
# Arborist TCP socket monitor logic
|
18
31
|
class TCP
|
19
32
|
extend Loggability
|
33
|
+
include Arborist::Monitor::ConnectionBatching
|
34
|
+
|
20
35
|
log_to :arborist
|
21
36
|
|
22
37
|
|
23
|
-
#
|
24
|
-
|
25
|
-
timeout: 2.seconds
|
26
|
-
}
|
38
|
+
# Always request the node addresses and port.
|
39
|
+
USED_PROPERTIES = [ :addresses, :port ].freeze
|
27
40
|
|
28
41
|
|
29
42
|
### Instantiate a monitor check and run it for the specified +nodes+.
|
@@ -32,16 +45,18 @@ module Arborist::Monitor::Socket
|
|
32
45
|
end
|
33
46
|
|
34
47
|
|
48
|
+
### Return the properties used by this monitor.
|
49
|
+
def self::node_properties
|
50
|
+
return USED_PROPERTIES
|
51
|
+
end
|
52
|
+
|
53
|
+
|
35
54
|
### Create a new TCP monitor with the specified +options+. Valid options are:
|
36
55
|
###
|
37
56
|
### +:timeout+
|
38
57
|
### Set the number of seconds to wait for a connection for each node.
|
39
|
-
def initialize(
|
40
|
-
|
41
|
-
|
42
|
-
options.each do |name, value|
|
43
|
-
self.public_send( "#{name}=", value )
|
44
|
-
end
|
58
|
+
def initialize( timeout: Arborist::Monitor::Socket.default_timeout )
|
59
|
+
self.timeout = timeout
|
45
60
|
end
|
46
61
|
|
47
62
|
|
@@ -49,33 +64,11 @@ module Arborist::Monitor::Socket
|
|
49
64
|
public
|
50
65
|
######
|
51
66
|
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
### updates for them based on trying to connect to them.
|
58
|
-
def run( nodes )
|
59
|
-
self.log.debug "Got nodes to TCP check: %p" % [ nodes ]
|
60
|
-
|
61
|
-
connections = self.make_connections( nodes )
|
62
|
-
return self.wait_for_connections( connections )
|
63
|
-
end
|
64
|
-
|
65
|
-
|
66
|
-
### Return a clone of this object with its timeout set to +new_timeout+.
|
67
|
-
def with_timeout( new_timeout )
|
68
|
-
copy = self.clone
|
69
|
-
copy.timeout = new_timeout
|
70
|
-
return copy
|
71
|
-
end
|
72
|
-
|
73
|
-
|
74
|
-
### Open a socket for each of the specified nodes using non-blocking connect(2), and
|
75
|
-
### return a Hash of the sockets (or the error from the connection attempt) keyed by
|
76
|
-
### node identifier.
|
77
|
-
def make_connections( nodes )
|
78
|
-
return nodes.each_with_object( {} ) do |(identifier, node_data), accum|
|
67
|
+
### Return an Enumerator that lazily yields Hashes of the form expected by the
|
68
|
+
### ConnectionBatching mixin for each of the specified +nodes+.
|
69
|
+
def make_connections_enum( nodes )
|
70
|
+
return nodes.lazy.map do |identifier, node_data|
|
71
|
+
self.log.debug "Creating a socket for %s" % [ identifier ]
|
79
72
|
|
80
73
|
# :TODO: Should this try all the addresses? Should you be able to specify an
|
81
74
|
# address for a Service?
|
@@ -89,6 +82,7 @@ module Arborist::Monitor::Socket
|
|
89
82
|
conn = begin
|
90
83
|
sockaddr = Socket.sockaddr_in( port, address )
|
91
84
|
sock.connect_nonblock( sockaddr )
|
85
|
+
sock
|
92
86
|
rescue Errno::EINPROGRESS
|
93
87
|
self.log.debug " connection started"
|
94
88
|
sock
|
@@ -97,74 +91,40 @@ module Arborist::Monitor::Socket
|
|
97
91
|
err
|
98
92
|
end
|
99
93
|
|
100
|
-
|
94
|
+
{ conn: conn, identifier: identifier }
|
101
95
|
end
|
102
96
|
end
|
103
97
|
|
104
98
|
|
105
|
-
###
|
106
|
-
###
|
107
|
-
def
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
until connections.empty? || wait_seconds <= 0
|
122
|
-
self.log.debug "Waiting on %d connections for %0.3fs..." %
|
123
|
-
[ connections.values.length, wait_seconds ]
|
124
|
-
|
125
|
-
_, ready, _ = IO.select( nil, connections.keys, nil, wait_seconds )
|
126
|
-
|
127
|
-
now = Time.now
|
128
|
-
ready.each do |sock|
|
129
|
-
identifier, sockaddr = *connections.delete( sock )
|
130
|
-
|
131
|
-
begin
|
132
|
-
res = sock.getpeername
|
133
|
-
self.log.debug "connected to %s" % [ identifier ]
|
134
|
-
results[ identifier ] = {
|
135
|
-
tcp_socket_connect: { time: now.iso8601, duration: now - start }
|
136
|
-
}
|
137
|
-
rescue SocketError, SystemCallError => err
|
138
|
-
begin
|
139
|
-
sock.read( 1 )
|
140
|
-
rescue => err
|
141
|
-
self.log.debug "read: %p: %s" % [ err.class, err.message ]
|
142
|
-
results[ identifier ] = { error: err.message }
|
143
|
-
end
|
144
|
-
ensure
|
145
|
-
sock.close
|
146
|
-
end
|
147
|
-
|
148
|
-
end if ready
|
149
|
-
|
150
|
-
wait_seconds = timeout_at - Time.now
|
151
|
-
end
|
152
|
-
|
153
|
-
# Anything left is a timeout
|
154
|
-
connections.each do |sock, (identifier, _)|
|
155
|
-
self.log.debug "%s: timeout (no connection in %0.3ds)" % [ identifier, self.timeout ]
|
156
|
-
results[ identifier ] = { error: "Timeout after %0.3fs" % [self.timeout] }
|
157
|
-
sock.close
|
99
|
+
### Build a status for the specified +conn_hash+ after its :conn has indicated
|
100
|
+
### it is ready.
|
101
|
+
def status_for_conn( conn_hash, duration )
|
102
|
+
sock = conn_hash[:conn]
|
103
|
+
# Why getpeername? Testing socket success without read()ing, I think?
|
104
|
+
# FreeBSD source?
|
105
|
+
res = sock.getpeername
|
106
|
+
return {
|
107
|
+
tcp_socket_connect: { duration: duration }
|
108
|
+
}
|
109
|
+
rescue SocketError, SystemCallError => err
|
110
|
+
self.log.debug "Got %p while connecting to %s" % [ err.class, conn_hash[:identifier] ]
|
111
|
+
begin
|
112
|
+
sock.read( 1 )
|
113
|
+
rescue => err
|
114
|
+
return { error: err.message }
|
158
115
|
end
|
159
|
-
|
160
|
-
|
116
|
+
ensure
|
117
|
+
sock.close if sock
|
161
118
|
end
|
119
|
+
|
162
120
|
end # class TCP
|
163
121
|
|
164
122
|
|
165
123
|
# Arborist UDP socket monitor logic
|
166
124
|
class UDP
|
167
125
|
extend Loggability
|
126
|
+
include Arborist::Monitor::ConnectionBatching
|
127
|
+
|
168
128
|
log_to :arborist
|
169
129
|
|
170
130
|
|
@@ -173,6 +133,9 @@ module Arborist::Monitor::Socket
|
|
173
133
|
timeout: 0.001
|
174
134
|
}
|
175
135
|
|
136
|
+
# Always request the node addresses and port.
|
137
|
+
USED_PROPERTIES = [ :addresses, :port ].freeze
|
138
|
+
|
176
139
|
|
177
140
|
### Instantiate a monitor check and run it for the specified +nodes+.
|
178
141
|
def self::run( nodes )
|
@@ -180,16 +143,18 @@ module Arborist::Monitor::Socket
|
|
180
143
|
end
|
181
144
|
|
182
145
|
|
146
|
+
### Return the properties used by this monitor.
|
147
|
+
def self::node_properties
|
148
|
+
return USED_PROPERTIES
|
149
|
+
end
|
150
|
+
|
151
|
+
|
183
152
|
### Create a new UDP monitor with the specified +options+. Valid options are:
|
184
153
|
###
|
185
154
|
### +:timeout+
|
186
155
|
### Set the number of seconds to wait for a connection for each node.
|
187
|
-
def initialize(
|
188
|
-
|
189
|
-
|
190
|
-
options.each do |name, value|
|
191
|
-
self.public_send( "#{name}=", value )
|
192
|
-
end
|
156
|
+
def initialize( timeout: Arborist::Monitor::Socket.default_timeout )
|
157
|
+
self.timeout = timeout
|
193
158
|
end
|
194
159
|
|
195
160
|
|
@@ -197,26 +162,11 @@ module Arborist::Monitor::Socket
|
|
197
162
|
public
|
198
163
|
######
|
199
164
|
|
200
|
-
# The timeout for connecting, in seconds.
|
201
|
-
attr_accessor :timeout
|
202
|
-
|
203
|
-
|
204
|
-
### Run the UDP check for each of the specified Hash of +nodes+ and return a Hash of
|
205
|
-
### updates for them based on trying to connect to them.
|
206
|
-
def run( nodes )
|
207
|
-
self.log.debug "Got nodes to UDP check: %p" % [ nodes ]
|
208
|
-
|
209
|
-
connections = self.make_connections( nodes )
|
210
|
-
return self.wait_for_connections( connections )
|
211
|
-
end
|
212
|
-
|
213
|
-
|
214
165
|
### Open a socket for each of the specified nodes and return a Hash of
|
215
166
|
### the sockets (or the error from the connection attempt) keyed by
|
216
167
|
### node identifier.
|
217
|
-
def
|
218
|
-
return nodes.
|
219
|
-
|
168
|
+
def make_connections_enum( nodes )
|
169
|
+
return nodes.lazy.map do |identifier, node_data|
|
220
170
|
address = node_data['addresses'].first
|
221
171
|
port = node_data['port']
|
222
172
|
|
@@ -224,61 +174,45 @@ module Arborist::Monitor::Socket
|
|
224
174
|
sock = Socket.new( :INET, :DGRAM )
|
225
175
|
|
226
176
|
conn = begin
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
### For any elements of +connections+ that are sockets, wait on them to complete or error
|
242
|
-
### and then return a Hash of node updates keyed by identifier based on the results.
|
243
|
-
def wait_for_connections( connections )
|
244
|
-
results = {}
|
245
|
-
start = Time.now
|
177
|
+
sockaddr = Socket.sockaddr_in( port, address )
|
178
|
+
sock.connect( sockaddr )
|
179
|
+
sock.send( '', 0 )
|
180
|
+
sock.recvfrom_nonblock( 1 )
|
181
|
+
sock
|
182
|
+
rescue Errno::EAGAIN
|
183
|
+
self.log.debug " connection started"
|
184
|
+
sock
|
185
|
+
rescue => err
|
186
|
+
self.log.error " %p setting up connection: %s" % [ err.class, err.message ]
|
187
|
+
err
|
188
|
+
end
|
246
189
|
|
247
|
-
|
248
|
-
|
249
|
-
next false if sock.respond_to?( :recvfrom_nonblock ) # Keep sockets
|
250
|
-
self.log.debug " removing connect error for node %s" % [ identifier ]
|
251
|
-
results[ identifier ] = { error: sock.message }
|
190
|
+
self.log.debug "UDP connection object is: %p" % [ conn ]
|
191
|
+
{ conn: conn, identifier: identifier }
|
252
192
|
end
|
193
|
+
end
|
253
194
|
|
254
|
-
# Test all connections
|
255
|
-
connections.each do |sock, (identifier, _)|
|
256
|
-
begin
|
257
|
-
sock.recvfrom_nonblock( 1 )
|
258
|
-
|
259
|
-
rescue IO::WaitReadable
|
260
|
-
ready, _, _ = IO.select( [sock], [], [], self.timeout )
|
261
|
-
if ready.nil?
|
262
|
-
now = Time.now
|
263
|
-
results[ identifier ] = {
|
264
|
-
udp_socket_connect: { time: now.iso8601, duration: now - start }
|
265
|
-
}
|
266
|
-
self.log.debug " connection successful"
|
267
|
-
else
|
268
|
-
retry
|
269
|
-
end
|
270
195
|
|
271
|
-
|
272
|
-
|
273
|
-
|
196
|
+
### Build a status for the specified +conn_hash+ after its :conn has indicated
|
197
|
+
### it is ready.
|
198
|
+
def status_for_conn( conn_hash, duration )
|
199
|
+
sock = conn_hash[:conn]
|
200
|
+
sock.recvfrom_nonblock( 1 )
|
201
|
+
return {
|
202
|
+
udp_socket_connect: { duration: duration }
|
203
|
+
}
|
204
|
+
rescue Errno::EAGAIN
|
205
|
+
return {
|
206
|
+
udp_socket_connect: { duration: duration }
|
207
|
+
}
|
208
|
+
rescue SocketError, SystemCallError => err
|
209
|
+
self.log.debug "Got %p while connecting to %s" % [ err.class, conn_hash[:identifier] ]
|
210
|
+
return { error: err.message }
|
211
|
+
ensure
|
212
|
+
sock.close if sock
|
213
|
+
end
|
274
214
|
|
275
|
-
ensure
|
276
|
-
sock.close
|
277
|
-
end
|
278
|
-
end
|
279
215
|
|
280
|
-
return results
|
281
|
-
end
|
282
216
|
end # class UDP
|
283
217
|
|
284
218
|
|