arborist 0.0.1.pre20160106113421

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. checksums.yaml +7 -0
  2. data/.document +4 -0
  3. data/.simplecov +9 -0
  4. data/ChangeLog +417 -0
  5. data/Events.md +20 -0
  6. data/History.md +4 -0
  7. data/LICENSE +29 -0
  8. data/Manifest.txt +72 -0
  9. data/Monitors.md +141 -0
  10. data/Nodes.md +0 -0
  11. data/Observers.md +72 -0
  12. data/Protocol.md +214 -0
  13. data/README.md +75 -0
  14. data/Rakefile +81 -0
  15. data/TODO.md +24 -0
  16. data/bin/amanagerd +10 -0
  17. data/bin/amonitord +12 -0
  18. data/bin/aobserverd +12 -0
  19. data/lib/arborist.rb +182 -0
  20. data/lib/arborist/client.rb +191 -0
  21. data/lib/arborist/event.rb +61 -0
  22. data/lib/arborist/event/node_acked.rb +18 -0
  23. data/lib/arborist/event/node_delta.rb +20 -0
  24. data/lib/arborist/event/node_matching.rb +34 -0
  25. data/lib/arborist/event/node_update.rb +19 -0
  26. data/lib/arborist/event/sys_reloaded.rb +15 -0
  27. data/lib/arborist/exceptions.rb +21 -0
  28. data/lib/arborist/manager.rb +508 -0
  29. data/lib/arborist/manager/event_publisher.rb +97 -0
  30. data/lib/arborist/manager/tree_api.rb +207 -0
  31. data/lib/arborist/mixins.rb +363 -0
  32. data/lib/arborist/monitor.rb +377 -0
  33. data/lib/arborist/monitor/socket.rb +163 -0
  34. data/lib/arborist/monitor_runner.rb +217 -0
  35. data/lib/arborist/node.rb +700 -0
  36. data/lib/arborist/node/host.rb +87 -0
  37. data/lib/arborist/node/root.rb +60 -0
  38. data/lib/arborist/node/service.rb +112 -0
  39. data/lib/arborist/observer.rb +176 -0
  40. data/lib/arborist/observer/action.rb +125 -0
  41. data/lib/arborist/observer/summarize.rb +105 -0
  42. data/lib/arborist/observer_runner.rb +181 -0
  43. data/lib/arborist/subscription.rb +82 -0
  44. data/spec/arborist/client_spec.rb +282 -0
  45. data/spec/arborist/event/node_update_spec.rb +71 -0
  46. data/spec/arborist/event_spec.rb +64 -0
  47. data/spec/arborist/manager/event_publisher_spec.rb +66 -0
  48. data/spec/arborist/manager/tree_api_spec.rb +458 -0
  49. data/spec/arborist/manager_spec.rb +442 -0
  50. data/spec/arborist/mixins_spec.rb +195 -0
  51. data/spec/arborist/monitor/socket_spec.rb +195 -0
  52. data/spec/arborist/monitor_runner_spec.rb +152 -0
  53. data/spec/arborist/monitor_spec.rb +251 -0
  54. data/spec/arborist/node/host_spec.rb +104 -0
  55. data/spec/arborist/node/root_spec.rb +29 -0
  56. data/spec/arborist/node/service_spec.rb +98 -0
  57. data/spec/arborist/node_spec.rb +552 -0
  58. data/spec/arborist/observer/action_spec.rb +205 -0
  59. data/spec/arborist/observer/summarize_spec.rb +294 -0
  60. data/spec/arborist/observer_spec.rb +146 -0
  61. data/spec/arborist/subscription_spec.rb +71 -0
  62. data/spec/arborist_spec.rb +146 -0
  63. data/spec/data/monitors/pings.rb +80 -0
  64. data/spec/data/monitors/port_checks.rb +27 -0
  65. data/spec/data/monitors/system_resources.rb +30 -0
  66. data/spec/data/monitors/web_services.rb +17 -0
  67. data/spec/data/nodes/duir.rb +20 -0
  68. data/spec/data/nodes/localhost.rb +15 -0
  69. data/spec/data/nodes/sidonie.rb +29 -0
  70. data/spec/data/nodes/yevaud.rb +26 -0
  71. data/spec/data/observers/auditor.rb +23 -0
  72. data/spec/data/observers/webservices.rb +18 -0
  73. data/spec/spec_helper.rb +117 -0
  74. metadata +368 -0
@@ -0,0 +1,377 @@
1
+ # -*- ruby -*-
2
+ #encoding: utf-8
3
+
4
+ require 'shellwords'
5
+ require 'arborist' unless defined?( Arborist )
6
+ require 'arborist/mixins'
7
+
8
+ using Arborist::TimeRefinements
9
+
10
+
11
+ # A declaration of an action to run against Manager nodes to update their state.
12
+ class Arborist::Monitor
13
+ extend Loggability,
14
+ Arborist::MethodUtilities
15
+
16
+ # Loggability API -- write logs to the Arborist log host
17
+ log_to :arborist
18
+
19
+
20
+ ##
21
+ # The key for the thread local that is used to track instances as they're
22
+ # loaded.
23
+ LOADED_INSTANCE_KEY = :loaded_monitor_instances
24
+
25
+ ##
26
+ # The glob pattern to use for searching for monitors
27
+ MONITOR_FILE_PATTERN = '**/*.rb'
28
+
29
+ ##
30
+ # The default monitoring interval, in seconds
31
+ DEFAULT_INTERVAL = 5.minutes
32
+
33
+ ##
34
+ # The default number of seconds to defer startup to splay common intervals
35
+ DEFAULT_SPLAY = 0
36
+
37
+
38
+ Arborist.add_dsl_constructor( :Monitor ) do |description, &block|
39
+ Arborist::Monitor.new( description, &block )
40
+ end
41
+
42
+
43
+ # The module that contains the default logic for invoking an external program
44
+ # to do the work of a Monitor.
45
+ module DefaultCallbacks
46
+
47
+ ### Given one or more +nodes+, return an Array of arguments that should be
48
+ ### appended to the external command.
49
+ def exec_arguments( nodes )
50
+ return []
51
+ end
52
+
53
+
54
+ ### Write the specified +nodes+ as serialized data to the given +io+.
55
+ def exec_input( nodes, io )
56
+ return if io.closed?
57
+
58
+ nodes.each do |node|
59
+ self.log.debug "Serializing node properties for %s" % [ node.identifier ]
60
+ prop_map = node.properties.collect do |key, val|
61
+ "%s=%s" % [key, Shellwords.escape(val)]
62
+ end
63
+
64
+ self.log.debug " writing %d properties to %p" % [ prop_map.size, io ]
65
+ io.puts "%s %s" % [ node.identifier, prop_map.join(' ') ]
66
+ self.log.debug " wrote the node to FD %d" % [ io.fileno ]
67
+ end
68
+
69
+ self.log.debug "done writing to FD %d" % [ io.fileno ]
70
+ end
71
+
72
+
73
+ ### Return the results of running the external command
74
+ def handle_results( pid, out, err )
75
+ err.flush
76
+ err.close
77
+ self.log.debug "Closed child's stderr."
78
+
79
+ # identifier key1=val1 key2=val2
80
+ results = out.each_line.with_object({}) do |line, accum|
81
+ identifier, attributes = line.split( ' ', 2 )
82
+ attrhash = Shellwords.shellsplit( attributes ).each_with_object({}) do |pair, hash|
83
+ key, val = pair.split( '=', 2 )
84
+ hash[ key ] = val
85
+ end
86
+
87
+ accum[ identifier ] = attrhash
88
+ end
89
+ out.close
90
+
91
+ self.log.debug "Waiting on PID %d" % [ pid ]
92
+ Process.waitpid( pid )
93
+
94
+ return results
95
+ end
96
+
97
+ end # module DefaultCallbacks
98
+
99
+
100
+ # An object class for creating a disposable binding in which to run the exec
101
+ # callbacks.
102
+ class RunContext
103
+ extend Loggability
104
+ log_to :arborist
105
+ include DefaultCallbacks
106
+ end # class RunContext
107
+
108
+
109
+
110
+ ### Overridden to track instances of created nodes for the DSL.
111
+ def self::new( * )
112
+ new_instance = super
113
+ Arborist::Monitor.add_loaded_instance( new_instance )
114
+ return new_instance
115
+ end
116
+
117
+
118
+ ### Record a new loaded instance if the Thread-local variable is set up to track
119
+ ### them.
120
+ def self::add_loaded_instance( new_instance )
121
+ instances = Thread.current[ LOADED_INSTANCE_KEY ] or return
122
+ instances << new_instance
123
+ end
124
+
125
+
126
+ ### Load the specified +file+ and return any new Nodes created as a result.
127
+ def self::load( file )
128
+ self.log.info "Loading monitor file %s..." % [ file ]
129
+ Thread.current[ LOADED_INSTANCE_KEY ] = []
130
+ Kernel.load( file )
131
+ return Thread.current[ LOADED_INSTANCE_KEY ]
132
+ ensure
133
+ Thread.current[ LOADED_INSTANCE_KEY ] = nil
134
+ end
135
+
136
+
137
+ ### Return an iterator for all the monitor files in the specified +directory+.
138
+ def self::each_in( directory )
139
+ path = Pathname( directory )
140
+ paths = if path.directory?
141
+ Pathname.glob( directory + MONITOR_FILE_PATTERN )
142
+ else
143
+ [ path ]
144
+ end
145
+
146
+ return paths.flat_map do |file|
147
+ file_url = "file://%s" % [ file.expand_path ]
148
+ monitors = self.load( file )
149
+ self.log.debug "Loaded monitors %p..." % [ monitors ]
150
+ monitors.each do |monitor|
151
+ monitor.source = file_url
152
+ end
153
+ monitors
154
+ end
155
+ end
156
+
157
+
158
+ ### Create a new Monitor with the specified +description+. If the +block+ is
159
+ ### given, it will be evaluated in the context of the new Monitor before it's
160
+ ### returned.
161
+ def initialize( description, &block )
162
+ @description = description
163
+ @interval = DEFAULT_INTERVAL
164
+ @splay = DEFAULT_SPLAY
165
+
166
+ @positive_criteria = {}
167
+ @negative_criteria = {}
168
+ @include_down = false
169
+ @node_properties = []
170
+
171
+ @exec_command = nil
172
+ @exec_block = nil
173
+ @exec_callbacks_mod = Module.new
174
+
175
+ @source = nil
176
+
177
+ self.instance_exec( &block ) if block
178
+ end
179
+
180
+
181
+ ######
182
+ public
183
+ ######
184
+
185
+ ##
186
+ # The object's description
187
+ attr_accessor :description
188
+
189
+ ##
190
+ # The interval between runs in seconds, as set by `every`.
191
+ attr_writer :interval
192
+
193
+ ##
194
+ # The number of seconds of splay to use when running the monitor.
195
+ attr_writer :splay
196
+
197
+ ##
198
+ # A Hash of criteria to pass to the Manager when searching for nodes to monitor.
199
+ attr_reader :positive_criteria
200
+
201
+ ##
202
+ # A Hash of criteria to pass to the Manager to filter out nodes to monitor.
203
+ attr_reader :negative_criteria
204
+
205
+ ##
206
+ # Flag for whether the monitor will include downed hosts in its search. Defaults
207
+ # to +false+.
208
+ attr_predicate :include_down
209
+
210
+ ##
211
+ # The list of node properties to include when running the monitor.
212
+ attr_reader :node_properties
213
+
214
+ ##
215
+ # The shell command to exec when running the monitor (if any). This can be
216
+ # any valid arguments to the `Kernel.spawn` method.
217
+ attr_accessor :exec_command
218
+
219
+ ##
220
+ # The callback to invoke when the monitor is run.
221
+ attr_accessor :exec_block
222
+
223
+ ##
224
+ # The monitor's execution callbacks contained in a Module
225
+ attr_accessor :exec_callbacks_mod
226
+
227
+ ##
228
+ # The path to the source this Monitor was loaded from, if applicable
229
+ attr_accessor :source
230
+
231
+
232
+ ### Run the monitor
233
+ def run( nodes )
234
+ if self.exec_block
235
+ return self.exec_block.call( nodes )
236
+ elsif self.exec_command
237
+ command = self.exec_command
238
+ return self.run_external_command( command, nodes )
239
+ end
240
+ end
241
+
242
+
243
+ ### Run the external +command+ against the specified +nodes+.
244
+ def run_external_command( command, nodes )
245
+ self.log.debug "Running external command %p for %d nodes" % [ command, nodes.size ]
246
+ context = Arborist::Monitor::RunContext.new
247
+ context.extend( self.exec_callbacks_mod ) if self.exec_callbacks_mod
248
+
249
+ arguments = Array( context.exec_arguments(nodes) )
250
+ command += arguments.flatten( 1 )
251
+ self.log.debug " command after adding arguments: %p" % [ command ]
252
+
253
+ child_stdin, parent_writer = IO.pipe
254
+ parent_reader, child_stdout = IO.pipe
255
+ parent_err_reader, child_stderr = IO.pipe
256
+
257
+ self.log.debug "Spawning command: %s" % [ Shellwords.join(command) ]
258
+ pid = Process.spawn( *command, out: child_stdout, in: child_stdin, err: child_stderr )
259
+
260
+ child_stdout.close
261
+ child_stdin.close
262
+ child_stderr.close
263
+
264
+ context.exec_input( nodes, parent_writer )
265
+ parent_writer.close
266
+
267
+ return context.handle_results( pid, parent_reader, parent_err_reader )
268
+ ensure
269
+ if pid
270
+ begin
271
+ Process.kill( 0, pid ) # waitpid if it's still alive
272
+ Process.waitpid( pid )
273
+ rescue Errno::ESRCH
274
+ end
275
+ end
276
+ end
277
+
278
+
279
+ ### Specify that the monitor should be run every +seconds+ seconds.
280
+ def every( seconds=nil )
281
+ @interval = seconds if seconds
282
+ return @interval
283
+ end
284
+ alias_method :interval, :every
285
+
286
+
287
+ ### Specify the number of seconds of interval splay that should be used when
288
+ ### running the monitor.
289
+ def splay( seconds=nil )
290
+ @splay = seconds if seconds
291
+ return @splay
292
+ end
293
+
294
+
295
+ ### Specify that the monitor should include the specified +criteria+ when searching
296
+ ### for nodes it will run against.
297
+ def match( criteria )
298
+ @positive_criteria.merge!( criteria )
299
+ end
300
+
301
+
302
+ ### Specify that the monitor should exclude nodes which match the specified
303
+ ### +criteria+ when searching for nodes it will run against.
304
+ def exclude( criteria )
305
+ @negative_criteria.merge!( criteria )
306
+ end
307
+
308
+
309
+ ### Specify that the monitor should (or should not) include nodes which have been
310
+ ### marked 'down'.
311
+ def include_down( flag=nil )
312
+ @include_down = flag unless flag.nil?
313
+ return @include_down
314
+ end
315
+
316
+
317
+ ### Specify properties from each node to provide to the monitor.
318
+ def use( *properties )
319
+ @node_properties = properties
320
+ end
321
+
322
+
323
+ ### Specify what should be run to do the actual monitoring. Accepts an Array of strings
324
+ ### (which are passed to `spawn`), a block, or an object that responds to the #run method.
325
+ def exec( *command, &block )
326
+ unless command.empty?
327
+ self.log.warn "Ignored block with exec %s (%p)" % [ command.first, block ] if block
328
+
329
+ if command.first.respond_to?( :run )
330
+ @exec_block = command.first.method( :run )
331
+ else
332
+ @exec_command = command
333
+ end
334
+
335
+ return
336
+ end
337
+ @exec_block = block
338
+ end
339
+
340
+
341
+ ### Declare an argument-building callback for the command run by 'exec'. The +block+
342
+ ### should accept an Array of nodes and return an Array of arguments for the command.
343
+ def exec_arguments( &block )
344
+ self.exec_callbacks_mod.instance_exec( block ) do |method_body|
345
+ define_method( :exec_arguments, &method_body )
346
+ end
347
+ end
348
+
349
+
350
+ ### Declare an input-building callback for the command run by 'exec'. The +block+
351
+ ### should accept an Array of nodes and a writable IO object, and should write out
352
+ ### the necessary input to drive the command to the IO.
353
+ def exec_input( &block )
354
+ self.exec_callbacks_mod.instance_exec( block ) do |method_body|
355
+ define_method( :exec_input, &method_body )
356
+ end
357
+ end
358
+
359
+
360
+ ### Declare a results handler +block+ that will be used to parse the results for
361
+ ### external commands. The block should accept 2 or 3 arguments: a PID, an IO that will
362
+ ### be opened to the command's STDOUT, and optionally an IO that will be opened to the
363
+ ### command's STDERR.
364
+ def handle_results( &block )
365
+ self.exec_callbacks_mod.instance_exec( block ) do |method_body|
366
+ define_method( :handle_results, &method_body )
367
+ end
368
+ end
369
+
370
+
371
+ ### Set the module to use for the callbacks when interacting with the executed
372
+ ### external command.
373
+ def exec_callbacks( mod )
374
+ self.exec_callbacks_mod = mod
375
+ end
376
+
377
+ end # class Arborist::Monitor
@@ -0,0 +1,163 @@
1
+ # -*- ruby -*-
2
+ #encoding: utf-8
3
+
4
+ require 'loggability'
5
+ require 'timeout'
6
+
7
+ require 'arborist/monitor' unless defined?( Arborist::Monitor )
8
+
9
+ using Arborist::TimeRefinements
10
+
11
+
12
+ # Socket-related Arborist monitor logic
13
+ module Arborist::Monitor::Socket
14
+
15
+
16
+ # Arborist TCP socket monitor logic
17
+ class TCP
18
+ extend Loggability
19
+ log_to :arborist
20
+
21
+
22
+ # Defaults for instances of this monitor
23
+ DEFAULT_OPTIONS = {
24
+ timeout: 2.seconds
25
+ }
26
+
27
+
28
+ ### Instantiate a monitor check and run it for the specified +nodes+.
29
+ def self::run( nodes )
30
+ return self.new.run( nodes )
31
+ end
32
+
33
+
34
+ ### Create a new TCP monitor with the specified +options+. Valid options are:
35
+ ###
36
+ ### +:timeout+
37
+ ### Set the number of seconds to wait for a connection for each node.
38
+ def initialize( options=DEFAULT_OPTIONS )
39
+ options = DEFAULT_OPTIONS.merge( options || {} )
40
+
41
+ options.each do |name, value|
42
+ self.public_send( "#{name}=", value )
43
+ end
44
+ end
45
+
46
+
47
+ ######
48
+ public
49
+ ######
50
+
51
+ # The timeout for connecting, in seconds.
52
+ attr_accessor :timeout
53
+
54
+
55
+ ### Run the TCP check for each of the specified Hash of +nodes+ and return a Hash of
56
+ ### updates for them based on trying to connect to them.
57
+ def run( nodes )
58
+ self.log.debug "Got nodes to TCP check: %p" % [ nodes ]
59
+
60
+ connections = self.make_connections( nodes )
61
+ return self.wait_for_connections( connections )
62
+ end
63
+
64
+
65
+ ### Return a clone of this object with its timeout set to +new_timeout+.
66
+ def with_timeout( new_timeout )
67
+ copy = self.clone
68
+ copy.timeout = new_timeout
69
+ return copy
70
+ end
71
+
72
+
73
+ ### Open a socket for each of the specified nodes using non-blocking connect(2), and
74
+ ### return a Hash of the sockets (or the error from the connection attempt) keyed by
75
+ ### node identifier.
76
+ def make_connections( nodes )
77
+ return nodes.each_with_object( {} ) do |(identifier, node_data), accum|
78
+
79
+ # :TODO: Should this try all the addresses? Should you be able to specify an
80
+ # address for a Service?
81
+ address = node_data['addresses'].first
82
+ port = node_data['port']
83
+ sockaddr = nil
84
+
85
+ self.log.debug "Creating TCP connection for %s:%d" % [ address, port ]
86
+ sock = Socket.new( :INET, :STREAM )
87
+
88
+ conn = begin
89
+ sockaddr = Socket.sockaddr_in( port, address )
90
+ sock.connect_nonblock( sockaddr )
91
+ rescue Errno::EINPROGRESS
92
+ self.log.debug " connection started"
93
+ sock
94
+ rescue => err
95
+ self.log.error " %p setting up connection: %s" % [ err.class, err.message ]
96
+ err
97
+ end
98
+
99
+ accum[ conn ] = [ identifier, sockaddr ]
100
+ end
101
+ end
102
+
103
+
104
+ ### For any elements of +connections+ that are sockets, wait on them to complete or error
105
+ ### and then return a Hash of node updates keyed by identifier based on the results.
106
+ def wait_for_connections( connections )
107
+ results = {}
108
+ start = Time.now
109
+ timeout_at = Time.now + self.timeout
110
+
111
+ # First strip out all the ones that failed in the first #connect_nonblock
112
+ connections.delete_if do |sock, (identifier, _)|
113
+ next false if sock.respond_to?( :connect_nonblock ) # Keep sockets
114
+ self.log.debug " removing connect error for node %s" % [ identifier ]
115
+ results[ identifier ] = { error: sock.message }
116
+ end
117
+
118
+ # Now wait for connections to complete
119
+ until connections.empty? || timeout_at.past?
120
+ self.log.debug "Waiting on %d connections for %0.3ds..." %
121
+ [ connections.values.length, timeout_at - Time.now ]
122
+ _, ready, _ = IO.select( nil, connections.keys, nil, timeout_at - Time.now )
123
+
124
+ self.log.debug " select returned: %p" % [ ready ]
125
+ ready.each do |sock|
126
+ self.log.debug " %p is ready" % [ sock ]
127
+ identifier, sockaddr = *connections.delete( sock )
128
+ self.log.debug "%p became writable: testing connection state" % [ sock ]
129
+
130
+ begin
131
+ self.log.debug " trying another connection to %p" % [ sockaddr ]
132
+ sock.connect_nonblock( sockaddr )
133
+ rescue Errno::EISCONN
134
+ self.log.debug " connection successful"
135
+ results[ identifier ] = {
136
+ tcp_socket_connect: { time: Time.now.to_s, duration: Time.now - start }
137
+ }
138
+ rescue SocketError, SystemCallError => err
139
+ self.log.debug "%p during connection: %s" % [ err.class, err.message ]
140
+ results[ identifier ] = { error: err.message }
141
+ ensure
142
+ sock.close
143
+ end
144
+ end if ready
145
+
146
+ end
147
+
148
+ # Anything left is a timeout
149
+ connections.each do |sock, (identifier, _)|
150
+ self.log.debug "%s: timeout (no connection in %0.3ds)" % [ identifier, self.timeout ]
151
+ results[ identifier ] = { error: "Timeout after %0.3fs" % [self.timeout] }
152
+ sock.close
153
+ end
154
+
155
+ return results
156
+ end
157
+
158
+ end # class TCP
159
+
160
+
161
+ end # module Arborist::Monitor::Socket
162
+
163
+