arborist 0.0.1.pre20160106113421

Sign up to get free protection for your applications and to get access to all the features.
Files changed (74) hide show
  1. checksums.yaml +7 -0
  2. data/.document +4 -0
  3. data/.simplecov +9 -0
  4. data/ChangeLog +417 -0
  5. data/Events.md +20 -0
  6. data/History.md +4 -0
  7. data/LICENSE +29 -0
  8. data/Manifest.txt +72 -0
  9. data/Monitors.md +141 -0
  10. data/Nodes.md +0 -0
  11. data/Observers.md +72 -0
  12. data/Protocol.md +214 -0
  13. data/README.md +75 -0
  14. data/Rakefile +81 -0
  15. data/TODO.md +24 -0
  16. data/bin/amanagerd +10 -0
  17. data/bin/amonitord +12 -0
  18. data/bin/aobserverd +12 -0
  19. data/lib/arborist.rb +182 -0
  20. data/lib/arborist/client.rb +191 -0
  21. data/lib/arborist/event.rb +61 -0
  22. data/lib/arborist/event/node_acked.rb +18 -0
  23. data/lib/arborist/event/node_delta.rb +20 -0
  24. data/lib/arborist/event/node_matching.rb +34 -0
  25. data/lib/arborist/event/node_update.rb +19 -0
  26. data/lib/arborist/event/sys_reloaded.rb +15 -0
  27. data/lib/arborist/exceptions.rb +21 -0
  28. data/lib/arborist/manager.rb +508 -0
  29. data/lib/arborist/manager/event_publisher.rb +97 -0
  30. data/lib/arborist/manager/tree_api.rb +207 -0
  31. data/lib/arborist/mixins.rb +363 -0
  32. data/lib/arborist/monitor.rb +377 -0
  33. data/lib/arborist/monitor/socket.rb +163 -0
  34. data/lib/arborist/monitor_runner.rb +217 -0
  35. data/lib/arborist/node.rb +700 -0
  36. data/lib/arborist/node/host.rb +87 -0
  37. data/lib/arborist/node/root.rb +60 -0
  38. data/lib/arborist/node/service.rb +112 -0
  39. data/lib/arborist/observer.rb +176 -0
  40. data/lib/arborist/observer/action.rb +125 -0
  41. data/lib/arborist/observer/summarize.rb +105 -0
  42. data/lib/arborist/observer_runner.rb +181 -0
  43. data/lib/arborist/subscription.rb +82 -0
  44. data/spec/arborist/client_spec.rb +282 -0
  45. data/spec/arborist/event/node_update_spec.rb +71 -0
  46. data/spec/arborist/event_spec.rb +64 -0
  47. data/spec/arborist/manager/event_publisher_spec.rb +66 -0
  48. data/spec/arborist/manager/tree_api_spec.rb +458 -0
  49. data/spec/arborist/manager_spec.rb +442 -0
  50. data/spec/arborist/mixins_spec.rb +195 -0
  51. data/spec/arborist/monitor/socket_spec.rb +195 -0
  52. data/spec/arborist/monitor_runner_spec.rb +152 -0
  53. data/spec/arborist/monitor_spec.rb +251 -0
  54. data/spec/arborist/node/host_spec.rb +104 -0
  55. data/spec/arborist/node/root_spec.rb +29 -0
  56. data/spec/arborist/node/service_spec.rb +98 -0
  57. data/spec/arborist/node_spec.rb +552 -0
  58. data/spec/arborist/observer/action_spec.rb +205 -0
  59. data/spec/arborist/observer/summarize_spec.rb +294 -0
  60. data/spec/arborist/observer_spec.rb +146 -0
  61. data/spec/arborist/subscription_spec.rb +71 -0
  62. data/spec/arborist_spec.rb +146 -0
  63. data/spec/data/monitors/pings.rb +80 -0
  64. data/spec/data/monitors/port_checks.rb +27 -0
  65. data/spec/data/monitors/system_resources.rb +30 -0
  66. data/spec/data/monitors/web_services.rb +17 -0
  67. data/spec/data/nodes/duir.rb +20 -0
  68. data/spec/data/nodes/localhost.rb +15 -0
  69. data/spec/data/nodes/sidonie.rb +29 -0
  70. data/spec/data/nodes/yevaud.rb +26 -0
  71. data/spec/data/observers/auditor.rb +23 -0
  72. data/spec/data/observers/webservices.rb +18 -0
  73. data/spec/spec_helper.rb +117 -0
  74. metadata +368 -0
@@ -0,0 +1,377 @@
1
+ # -*- ruby -*-
2
+ #encoding: utf-8
3
+
4
+ require 'shellwords'
5
+ require 'arborist' unless defined?( Arborist )
6
+ require 'arborist/mixins'
7
+
8
+ using Arborist::TimeRefinements
9
+
10
+
11
+ # A declaration of an action to run against Manager nodes to update their state.
12
+ class Arborist::Monitor
13
+ extend Loggability,
14
+ Arborist::MethodUtilities
15
+
16
+ # Loggability API -- write logs to the Arborist log host
17
+ log_to :arborist
18
+
19
+
20
+ ##
21
+ # The key for the thread local that is used to track instances as they're
22
+ # loaded.
23
+ LOADED_INSTANCE_KEY = :loaded_monitor_instances
24
+
25
+ ##
26
+ # The glob pattern to use for searching for monitors
27
+ MONITOR_FILE_PATTERN = '**/*.rb'
28
+
29
+ ##
30
+ # The default monitoring interval, in seconds
31
+ DEFAULT_INTERVAL = 5.minutes
32
+
33
+ ##
34
+ # The default number of seconds to defer startup to splay common intervals
35
+ DEFAULT_SPLAY = 0
36
+
37
+
38
+ Arborist.add_dsl_constructor( :Monitor ) do |description, &block|
39
+ Arborist::Monitor.new( description, &block )
40
+ end
41
+
42
+
43
+ # The module that contains the default logic for invoking an external program
44
+ # to do the work of a Monitor.
45
+ module DefaultCallbacks
46
+
47
+ ### Given one or more +nodes+, return an Array of arguments that should be
48
+ ### appended to the external command.
49
+ def exec_arguments( nodes )
50
+ return []
51
+ end
52
+
53
+
54
+ ### Write the specified +nodes+ as serialized data to the given +io+.
55
+ def exec_input( nodes, io )
56
+ return if io.closed?
57
+
58
+ nodes.each do |node|
59
+ self.log.debug "Serializing node properties for %s" % [ node.identifier ]
60
+ prop_map = node.properties.collect do |key, val|
61
+ "%s=%s" % [key, Shellwords.escape(val)]
62
+ end
63
+
64
+ self.log.debug " writing %d properties to %p" % [ prop_map.size, io ]
65
+ io.puts "%s %s" % [ node.identifier, prop_map.join(' ') ]
66
+ self.log.debug " wrote the node to FD %d" % [ io.fileno ]
67
+ end
68
+
69
+ self.log.debug "done writing to FD %d" % [ io.fileno ]
70
+ end
71
+
72
+
73
+ ### Return the results of running the external command
74
+ def handle_results( pid, out, err )
75
+ err.flush
76
+ err.close
77
+ self.log.debug "Closed child's stderr."
78
+
79
+ # identifier key1=val1 key2=val2
80
+ results = out.each_line.with_object({}) do |line, accum|
81
+ identifier, attributes = line.split( ' ', 2 )
82
+ attrhash = Shellwords.shellsplit( attributes ).each_with_object({}) do |pair, hash|
83
+ key, val = pair.split( '=', 2 )
84
+ hash[ key ] = val
85
+ end
86
+
87
+ accum[ identifier ] = attrhash
88
+ end
89
+ out.close
90
+
91
+ self.log.debug "Waiting on PID %d" % [ pid ]
92
+ Process.waitpid( pid )
93
+
94
+ return results
95
+ end
96
+
97
+ end # module DefaultCallbacks
98
+
99
+
100
+ # An object class for creating a disposable binding in which to run the exec
101
+ # callbacks.
102
+ class RunContext
103
+ extend Loggability
104
+ log_to :arborist
105
+ include DefaultCallbacks
106
+ end # class RunContext
107
+
108
+
109
+
110
+ ### Overridden to track instances of created nodes for the DSL.
111
+ def self::new( * )
112
+ new_instance = super
113
+ Arborist::Monitor.add_loaded_instance( new_instance )
114
+ return new_instance
115
+ end
116
+
117
+
118
+ ### Record a new loaded instance if the Thread-local variable is set up to track
119
+ ### them.
120
+ def self::add_loaded_instance( new_instance )
121
+ instances = Thread.current[ LOADED_INSTANCE_KEY ] or return
122
+ instances << new_instance
123
+ end
124
+
125
+
126
+ ### Load the specified +file+ and return any new Nodes created as a result.
127
+ def self::load( file )
128
+ self.log.info "Loading monitor file %s..." % [ file ]
129
+ Thread.current[ LOADED_INSTANCE_KEY ] = []
130
+ Kernel.load( file )
131
+ return Thread.current[ LOADED_INSTANCE_KEY ]
132
+ ensure
133
+ Thread.current[ LOADED_INSTANCE_KEY ] = nil
134
+ end
135
+
136
+
137
+ ### Return an iterator for all the monitor files in the specified +directory+.
138
+ def self::each_in( directory )
139
+ path = Pathname( directory )
140
+ paths = if path.directory?
141
+ Pathname.glob( directory + MONITOR_FILE_PATTERN )
142
+ else
143
+ [ path ]
144
+ end
145
+
146
+ return paths.flat_map do |file|
147
+ file_url = "file://%s" % [ file.expand_path ]
148
+ monitors = self.load( file )
149
+ self.log.debug "Loaded monitors %p..." % [ monitors ]
150
+ monitors.each do |monitor|
151
+ monitor.source = file_url
152
+ end
153
+ monitors
154
+ end
155
+ end
156
+
157
+
158
+ ### Create a new Monitor with the specified +description+. If the +block+ is
159
+ ### given, it will be evaluated in the context of the new Monitor before it's
160
+ ### returned.
161
+ def initialize( description, &block )
162
+ @description = description
163
+ @interval = DEFAULT_INTERVAL
164
+ @splay = DEFAULT_SPLAY
165
+
166
+ @positive_criteria = {}
167
+ @negative_criteria = {}
168
+ @include_down = false
169
+ @node_properties = []
170
+
171
+ @exec_command = nil
172
+ @exec_block = nil
173
+ @exec_callbacks_mod = Module.new
174
+
175
+ @source = nil
176
+
177
+ self.instance_exec( &block ) if block
178
+ end
179
+
180
+
181
+ ######
182
+ public
183
+ ######
184
+
185
+ ##
186
+ # The object's description
187
+ attr_accessor :description
188
+
189
+ ##
190
+ # The interval between runs in seconds, as set by `every`.
191
+ attr_writer :interval
192
+
193
+ ##
194
+ # The number of seconds of splay to use when running the monitor.
195
+ attr_writer :splay
196
+
197
+ ##
198
+ # A Hash of criteria to pass to the Manager when searching for nodes to monitor.
199
+ attr_reader :positive_criteria
200
+
201
+ ##
202
+ # A Hash of criteria to pass to the Manager to filter out nodes to monitor.
203
+ attr_reader :negative_criteria
204
+
205
+ ##
206
+ # Flag for whether the monitor will include downed hosts in its search. Defaults
207
+ # to +false+.
208
+ attr_predicate :include_down
209
+
210
+ ##
211
+ # The list of node properties to include when running the monitor.
212
+ attr_reader :node_properties
213
+
214
+ ##
215
+ # The shell command to exec when running the monitor (if any). This can be
216
+ # any valid arguments to the `Kernel.spawn` method.
217
+ attr_accessor :exec_command
218
+
219
+ ##
220
+ # The callback to invoke when the monitor is run.
221
+ attr_accessor :exec_block
222
+
223
+ ##
224
+ # The monitor's execution callbacks contained in a Module
225
+ attr_accessor :exec_callbacks_mod
226
+
227
+ ##
228
+ # The path to the source this Monitor was loaded from, if applicable
229
+ attr_accessor :source
230
+
231
+
232
+ ### Run the monitor
233
+ def run( nodes )
234
+ if self.exec_block
235
+ return self.exec_block.call( nodes )
236
+ elsif self.exec_command
237
+ command = self.exec_command
238
+ return self.run_external_command( command, nodes )
239
+ end
240
+ end
241
+
242
+
243
+ ### Run the external +command+ against the specified +nodes+.
244
+ def run_external_command( command, nodes )
245
+ self.log.debug "Running external command %p for %d nodes" % [ command, nodes.size ]
246
+ context = Arborist::Monitor::RunContext.new
247
+ context.extend( self.exec_callbacks_mod ) if self.exec_callbacks_mod
248
+
249
+ arguments = Array( context.exec_arguments(nodes) )
250
+ command += arguments.flatten( 1 )
251
+ self.log.debug " command after adding arguments: %p" % [ command ]
252
+
253
+ child_stdin, parent_writer = IO.pipe
254
+ parent_reader, child_stdout = IO.pipe
255
+ parent_err_reader, child_stderr = IO.pipe
256
+
257
+ self.log.debug "Spawning command: %s" % [ Shellwords.join(command) ]
258
+ pid = Process.spawn( *command, out: child_stdout, in: child_stdin, err: child_stderr )
259
+
260
+ child_stdout.close
261
+ child_stdin.close
262
+ child_stderr.close
263
+
264
+ context.exec_input( nodes, parent_writer )
265
+ parent_writer.close
266
+
267
+ return context.handle_results( pid, parent_reader, parent_err_reader )
268
+ ensure
269
+ if pid
270
+ begin
271
+ Process.kill( 0, pid ) # waitpid if it's still alive
272
+ Process.waitpid( pid )
273
+ rescue Errno::ESRCH
274
+ end
275
+ end
276
+ end
277
+
278
+
279
+ ### Specify that the monitor should be run every +seconds+ seconds.
280
+ def every( seconds=nil )
281
+ @interval = seconds if seconds
282
+ return @interval
283
+ end
284
+ alias_method :interval, :every
285
+
286
+
287
+ ### Specify the number of seconds of interval splay that should be used when
288
+ ### running the monitor.
289
+ def splay( seconds=nil )
290
+ @splay = seconds if seconds
291
+ return @splay
292
+ end
293
+
294
+
295
+ ### Specify that the monitor should include the specified +criteria+ when searching
296
+ ### for nodes it will run against.
297
+ def match( criteria )
298
+ @positive_criteria.merge!( criteria )
299
+ end
300
+
301
+
302
+ ### Specify that the monitor should exclude nodes which match the specified
303
+ ### +criteria+ when searching for nodes it will run against.
304
+ def exclude( criteria )
305
+ @negative_criteria.merge!( criteria )
306
+ end
307
+
308
+
309
+ ### Specify that the monitor should (or should not) include nodes which have been
310
+ ### marked 'down'.
311
+ def include_down( flag=nil )
312
+ @include_down = flag unless flag.nil?
313
+ return @include_down
314
+ end
315
+
316
+
317
+ ### Specify properties from each node to provide to the monitor.
318
+ def use( *properties )
319
+ @node_properties = properties
320
+ end
321
+
322
+
323
+ ### Specify what should be run to do the actual monitoring. Accepts an Array of strings
324
+ ### (which are passed to `spawn`), a block, or an object that responds to the #run method.
325
+ def exec( *command, &block )
326
+ unless command.empty?
327
+ self.log.warn "Ignored block with exec %s (%p)" % [ command.first, block ] if block
328
+
329
+ if command.first.respond_to?( :run )
330
+ @exec_block = command.first.method( :run )
331
+ else
332
+ @exec_command = command
333
+ end
334
+
335
+ return
336
+ end
337
+ @exec_block = block
338
+ end
339
+
340
+
341
+ ### Declare an argument-building callback for the command run by 'exec'. The +block+
342
+ ### should accept an Array of nodes and return an Array of arguments for the command.
343
+ def exec_arguments( &block )
344
+ self.exec_callbacks_mod.instance_exec( block ) do |method_body|
345
+ define_method( :exec_arguments, &method_body )
346
+ end
347
+ end
348
+
349
+
350
+ ### Declare an input-building callback for the command run by 'exec'. The +block+
351
+ ### should accept an Array of nodes and a writable IO object, and should write out
352
+ ### the necessary input to drive the command to the IO.
353
+ def exec_input( &block )
354
+ self.exec_callbacks_mod.instance_exec( block ) do |method_body|
355
+ define_method( :exec_input, &method_body )
356
+ end
357
+ end
358
+
359
+
360
+ ### Declare a results handler +block+ that will be used to parse the results for
361
+ ### external commands. The block should accept 2 or 3 arguments: a PID, an IO that will
362
+ ### be opened to the command's STDOUT, and optionally an IO that will be opened to the
363
+ ### command's STDERR.
364
+ def handle_results( &block )
365
+ self.exec_callbacks_mod.instance_exec( block ) do |method_body|
366
+ define_method( :handle_results, &method_body )
367
+ end
368
+ end
369
+
370
+
371
+ ### Set the module to use for the callbacks when interacting with the executed
372
+ ### external command.
373
+ def exec_callbacks( mod )
374
+ self.exec_callbacks_mod = mod
375
+ end
376
+
377
+ end # class Arborist::Monitor
@@ -0,0 +1,163 @@
1
+ # -*- ruby -*-
2
+ #encoding: utf-8
3
+
4
+ require 'loggability'
5
+ require 'timeout'
6
+
7
+ require 'arborist/monitor' unless defined?( Arborist::Monitor )
8
+
9
+ using Arborist::TimeRefinements
10
+
11
+
12
+ # Socket-related Arborist monitor logic
13
+ module Arborist::Monitor::Socket
14
+
15
+
16
+ # Arborist TCP socket monitor logic
17
+ class TCP
18
+ extend Loggability
19
+ log_to :arborist
20
+
21
+
22
+ # Defaults for instances of this monitor
23
+ DEFAULT_OPTIONS = {
24
+ timeout: 2.seconds
25
+ }
26
+
27
+
28
+ ### Instantiate a monitor check and run it for the specified +nodes+.
29
+ def self::run( nodes )
30
+ return self.new.run( nodes )
31
+ end
32
+
33
+
34
+ ### Create a new TCP monitor with the specified +options+. Valid options are:
35
+ ###
36
+ ### +:timeout+
37
+ ### Set the number of seconds to wait for a connection for each node.
38
+ def initialize( options=DEFAULT_OPTIONS )
39
+ options = DEFAULT_OPTIONS.merge( options || {} )
40
+
41
+ options.each do |name, value|
42
+ self.public_send( "#{name}=", value )
43
+ end
44
+ end
45
+
46
+
47
+ ######
48
+ public
49
+ ######
50
+
51
+ # The timeout for connecting, in seconds.
52
+ attr_accessor :timeout
53
+
54
+
55
+ ### Run the TCP check for each of the specified Hash of +nodes+ and return a Hash of
56
+ ### updates for them based on trying to connect to them.
57
+ def run( nodes )
58
+ self.log.debug "Got nodes to TCP check: %p" % [ nodes ]
59
+
60
+ connections = self.make_connections( nodes )
61
+ return self.wait_for_connections( connections )
62
+ end
63
+
64
+
65
+ ### Return a clone of this object with its timeout set to +new_timeout+.
66
+ def with_timeout( new_timeout )
67
+ copy = self.clone
68
+ copy.timeout = new_timeout
69
+ return copy
70
+ end
71
+
72
+
73
+ ### Open a socket for each of the specified nodes using non-blocking connect(2), and
74
+ ### return a Hash of the sockets (or the error from the connection attempt) keyed by
75
+ ### node identifier.
76
+ def make_connections( nodes )
77
+ return nodes.each_with_object( {} ) do |(identifier, node_data), accum|
78
+
79
+ # :TODO: Should this try all the addresses? Should you be able to specify an
80
+ # address for a Service?
81
+ address = node_data['addresses'].first
82
+ port = node_data['port']
83
+ sockaddr = nil
84
+
85
+ self.log.debug "Creating TCP connection for %s:%d" % [ address, port ]
86
+ sock = Socket.new( :INET, :STREAM )
87
+
88
+ conn = begin
89
+ sockaddr = Socket.sockaddr_in( port, address )
90
+ sock.connect_nonblock( sockaddr )
91
+ rescue Errno::EINPROGRESS
92
+ self.log.debug " connection started"
93
+ sock
94
+ rescue => err
95
+ self.log.error " %p setting up connection: %s" % [ err.class, err.message ]
96
+ err
97
+ end
98
+
99
+ accum[ conn ] = [ identifier, sockaddr ]
100
+ end
101
+ end
102
+
103
+
104
+ ### For any elements of +connections+ that are sockets, wait on them to complete or error
105
+ ### and then return a Hash of node updates keyed by identifier based on the results.
106
+ def wait_for_connections( connections )
107
+ results = {}
108
+ start = Time.now
109
+ timeout_at = Time.now + self.timeout
110
+
111
+ # First strip out all the ones that failed in the first #connect_nonblock
112
+ connections.delete_if do |sock, (identifier, _)|
113
+ next false if sock.respond_to?( :connect_nonblock ) # Keep sockets
114
+ self.log.debug " removing connect error for node %s" % [ identifier ]
115
+ results[ identifier ] = { error: sock.message }
116
+ end
117
+
118
+ # Now wait for connections to complete
119
+ until connections.empty? || timeout_at.past?
120
+ self.log.debug "Waiting on %d connections for %0.3ds..." %
121
+ [ connections.values.length, timeout_at - Time.now ]
122
+ _, ready, _ = IO.select( nil, connections.keys, nil, timeout_at - Time.now )
123
+
124
+ self.log.debug " select returned: %p" % [ ready ]
125
+ ready.each do |sock|
126
+ self.log.debug " %p is ready" % [ sock ]
127
+ identifier, sockaddr = *connections.delete( sock )
128
+ self.log.debug "%p became writable: testing connection state" % [ sock ]
129
+
130
+ begin
131
+ self.log.debug " trying another connection to %p" % [ sockaddr ]
132
+ sock.connect_nonblock( sockaddr )
133
+ rescue Errno::EISCONN
134
+ self.log.debug " connection successful"
135
+ results[ identifier ] = {
136
+ tcp_socket_connect: { time: Time.now.to_s, duration: Time.now - start }
137
+ }
138
+ rescue SocketError, SystemCallError => err
139
+ self.log.debug "%p during connection: %s" % [ err.class, err.message ]
140
+ results[ identifier ] = { error: err.message }
141
+ ensure
142
+ sock.close
143
+ end
144
+ end if ready
145
+
146
+ end
147
+
148
+ # Anything left is a timeout
149
+ connections.each do |sock, (identifier, _)|
150
+ self.log.debug "%s: timeout (no connection in %0.3ds)" % [ identifier, self.timeout ]
151
+ results[ identifier ] = { error: "Timeout after %0.3fs" % [self.timeout] }
152
+ sock.close
153
+ end
154
+
155
+ return results
156
+ end
157
+
158
+ end # class TCP
159
+
160
+
161
+ end # module Arborist::Monitor::Socket
162
+
163
+