bfire 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. data/LICENSE +0 -0
  2. data/README.md +90 -0
  3. data/bin/bfire +120 -0
  4. data/examples/benchmark.rb +18 -0
  5. data/examples/dag.rb +26 -0
  6. data/examples/elasticity.rb +105 -0
  7. data/examples/ibbt.rb +125 -0
  8. data/examples/mine.rb +40 -0
  9. data/examples/modules/apache2/manifests/init.pp +44 -0
  10. data/examples/modules/app/files/app/app.rb +29 -0
  11. data/examples/modules/app/files/app/config.ru +2 -0
  12. data/examples/modules/app/files/app.phtml +4 -0
  13. data/examples/modules/app/manifests/init.pp +19 -0
  14. data/examples/modules/common/manifests/init.pp +8 -0
  15. data/examples/modules/haproxy/files/default +4 -0
  16. data/examples/modules/haproxy/files/haproxy.rsyslog.conf +2 -0
  17. data/examples/modules/haproxy/manifests/init.pp +21 -0
  18. data/examples/modules/mysql/manifests/init.pp +40 -0
  19. data/examples/modules/rsyslog/files/rsyslog.conf +116 -0
  20. data/examples/modules/rsyslog/manifests/init.pp +15 -0
  21. data/examples/modules/sinatra/manifests/init.pp +9 -0
  22. data/examples/modules/web/files/monitor/app.rb +55 -0
  23. data/examples/modules/web/files/monitor/config.ru +2 -0
  24. data/examples/modules/web/files/monitor/haproxy.cfg.erb +50 -0
  25. data/examples/modules/web/manifests/init.pp +26 -0
  26. data/examples/simple.rb +58 -0
  27. data/lib/bfire/aggregator/zabbix.rb +55 -0
  28. data/lib/bfire/engine.rb +546 -0
  29. data/lib/bfire/group.rb +241 -0
  30. data/lib/bfire/metric.rb +36 -0
  31. data/lib/bfire/provider/puppet.rb +58 -0
  32. data/lib/bfire/pub_sub/publisher.rb +40 -0
  33. data/lib/bfire/rule.rb +110 -0
  34. data/lib/bfire/template.rb +142 -0
  35. data/lib/bfire/version.rb +3 -0
  36. data/lib/bfire.rb +10 -0
  37. metadata +241 -0
@@ -0,0 +1,546 @@
1
+ require 'restfully'
2
+ require 'restfully/media_type/application_vnd_bonfire_xml'
3
+ require 'thread'
4
+ require 'thwait'
5
+
6
+ require 'net/ssh'
7
+ require 'net/scp'
8
+ require 'net/sftp'
9
+ require 'net/ssh/gateway'
10
+ require 'net/ssh/multi'
11
+
12
+ # Ruby Graph Library
13
+ require 'rgl/adjacency'
14
+ require 'rgl/topsort'
15
+
16
+ require 'bfire/group'
17
+ require 'bfire/aggregator/zabbix'
18
+ require 'bfire/metric'
19
+
20
+ module Bfire
21
+ class Engine
22
+ include PubSub::Publisher
23
+
24
+ DEBUG = Logger::DEBUG
25
+ INFO = Logger::INFO
26
+ WARN = Logger::WARN
27
+ ERROR = Logger::ERROR
28
+ UNKNOWN = Logger::UNKNOWN
29
+
30
+ # Engine configuration hash:
31
+ attr_reader :properties
32
+ # A Restfully::Session object:
33
+ attr_reader :session
34
+
35
+ def initialize(opts = {})
36
+ @root = opts[:root] || Dir.pwd
37
+ @properties = {}
38
+ @vmgroups = {}
39
+ @networks = {}
40
+ @storages = {}
41
+ @locations = {}
42
+ @mutex = Mutex.new
43
+ @experiment = nil
44
+
45
+ # The group of all master threads.
46
+ @tg_master = ThreadGroup.new
47
+ # The group of all threads related to a Group.
48
+ @tg_groups = ThreadGroup.new
49
+
50
+ reset
51
+ end
52
+
53
+ def path_to(path)
54
+ File.expand_path(path, @root)
55
+ end
56
+
57
+ def reset
58
+ conf[:name] ||= "Bfire experiment"
59
+ conf[:description] ||= "Anonymous description"
60
+ conf[:walltime] ||= 3600
61
+ conf[:logger] ||= Logger.new(STDOUT)
62
+ conf[:logging] ||= INFO
63
+ conf[:user] ||= ENV['USER']
64
+ conf[:ssh_max_attempts] ||= 3
65
+ public_key, private_key = keychain
66
+ conf[:key] ||= private_key
67
+ conf[:authorized_keys] ||= public_key
68
+ end
69
+
70
+ def keychain
71
+ private_key = nil
72
+ public_key = Dir[File.expand_path("~/.ssh/*.pub")].find{|key|
73
+ private_key = key.gsub(/\.pub$/,"")
74
+ File.exist?(private_key)
75
+ }
76
+ if public_key.nil?
77
+ nil
78
+ else
79
+ [public_key, private_key]
80
+ end
81
+ end
82
+
83
+ # Returns the directed acyclic graph for the given group names, based on
84
+ # their declared dependencies.
85
+ def dag(nodes)
86
+ dg = RGL::DirectedAdjacencyGraph.new
87
+ nodes.each{|n|
88
+ dg.add_vertex(n)
89
+ group(n).dependencies.each{|m, block|
90
+ dg.add_vertex(m)
91
+ dg.add_edge(m, n)
92
+ }
93
+ }
94
+
95
+ raise Error, "Your dependency graph is not acyclic!" unless dg.acyclic?
96
+ dg
97
+ end
98
+
99
+ # Launch procedure. Will execute each group in a separate thread,
100
+ # and launch a thread to monitor experiment status.
101
+ def run!
102
+ # call #session to initiate Restfully::Session object outside of threads
103
+ logger.info "#{banner}Using bonfire-api/#{session.root['version']}"
104
+
105
+ on(:error) { cleanup! }
106
+ on(:terminated) { cleanup! }
107
+
108
+ @tg_master.add(Thread.new {
109
+ Thread.current.abort_on_exception = true
110
+ monitor
111
+ })
112
+
113
+ initialized = if dev? && experiment(conf[:name])
114
+ resuscitate!
115
+ else
116
+ deploy!
117
+ end
118
+
119
+ experiment.update(:status => "running")
120
+
121
+ if initialized
122
+ launch!
123
+ else
124
+ cleanup!
125
+ end
126
+
127
+ ThreadsWait.all_waits(*@tg_master.list)
128
+ rescue Exception => e
129
+ logger.error "#{banner}#{e.class.name}: #{e.message}"
130
+ logger.debug e.backtrace.join("; ")
131
+ trigger :error
132
+ end
133
+
134
+ def deploy!
135
+ dg = dag(@vmgroups.keys)
136
+ topsort_iterator = dg.topsort_iterator
137
+ logger.info "#{banner}Launching groups in the following topological order: #{topsort_iterator.clone.to_a.inspect}."
138
+
139
+ launch_waiting_groups(topsort_iterator)
140
+ end
141
+
142
+ # This launches the group in the topological order,
143
+ # and waits for the end of that initialization procedure.
144
+ def launch_waiting_groups(topsort_iterator)
145
+ return true if topsort_iterator.at_end?
146
+ return false if error?
147
+
148
+ # ugly, but I don't know why the lib don't give access to it...
149
+ waiting = topsort_iterator.instance_variable_get("@waiting")
150
+ logger.info "#{banner}Launching #{waiting.inspect}"
151
+ # Make sure you don't touch the topsort_iterator in the each block,
152
+ # otherwise you can get side-effects.
153
+ waiting.each do |group_name|
154
+ g = group(group_name)
155
+ # in case that group was error'ed by the engine...
156
+ next if g.error?
157
+ Thread.new {
158
+ Thread.current.abort_on_exception = true
159
+ g.launch_initial_resources
160
+ }.join
161
+ end
162
+ waiting.length.times { topsort_iterator.forward }
163
+ launch_waiting_groups(topsort_iterator)
164
+ end
165
+
166
+
167
+ # Launch a monitor for each group, and waits for their termination before
168
+ # saying "ready".
169
+ def launch!
170
+ @vmgroups.each{|name, group|
171
+ @tg_groups.add(Thread.new {
172
+ Thread.current.abort_on_exception = true
173
+ group.monitor
174
+ })
175
+ }
176
+
177
+ until @vmgroups.all?{|(n,g)| g.triggered_events.include?(:ready)}
178
+ sleep 5
179
+ end
180
+
181
+ logger.info "#{banner}All groups are now READY: #{groups.inspect}."
182
+
183
+ trigger :ready
184
+ ThreadsWait.all_waits(*@tg_groups.list) do |t|
185
+ # http://apidock.com/ruby/Thread/status
186
+ if t.status.nil? || t.status == "aborting" || t[:ko]
187
+ trigger :error
188
+ end
189
+ end
190
+ end
191
+
192
+ # Reloads vmgroups, networks and storages linked to an experiment.
193
+ def resuscitate!
194
+ experiment.networks.each do |network|
195
+ @networks[network['name']] = network
196
+ end
197
+ experiment.storages.each do |storage|
198
+ @storages[storage['name']] = storage
199
+ end
200
+ experiment.computes.each do |compute|
201
+ group_name, template_name, guid = compute['name'].split("--")
202
+ g = group(group_name)
203
+ if g.nil?
204
+ raise Error, "Group #{group_name} is not declared in the DSL."
205
+ else
206
+ g.template(template_name).instances.push(compute)
207
+ end
208
+ end
209
+ @vmgroups.each do |name, vmgroup|
210
+ vmgroup.merge_templates!
211
+ vmgroup.check!
212
+ end
213
+ true
214
+ end
215
+
216
+ # Define a new group (if block given), or return the group corresponding
217
+ # to the given <tt>name</tt>.
218
+ def group(name, options = {}, &block)
219
+ if block
220
+ @vmgroups[name.to_sym] ||= Group.new(
221
+ self,
222
+ name.to_sym,
223
+ options.symbolize_keys
224
+ )
225
+ @vmgroups[name.to_sym].instance_eval(&block)
226
+ else
227
+ @vmgroups[name.to_sym]
228
+ end
229
+ end
230
+
231
+ # =================================================
232
+ # = Resource declaration/finding/creation methods =
233
+ # =================================================
234
+
235
+ # Returns the Restfully::Session object
236
+ def session
237
+ @session ||= Restfully::Session.new(
238
+ :configuration_file => conf[:restfully_config],
239
+ :logger => logger
240
+ )
241
+ end
242
+
243
+ # Define a network. A network is location dependent.
244
+ def network(name, options = {}, &block)
245
+ @networks[name.to_s] = block
246
+ end
247
+
248
+ def fetch_network(name, location)
249
+ sname = name.to_s
250
+ key = [location['name'], sname].join(".")
251
+ logger.debug "#{banner}Looking for network #{name.inspect} at #{location['name'].inspect}. key=#{key.inspect}"
252
+ exp = experiment
253
+ synchronize {
254
+ # Duplicate general networks if present
255
+ @networks[key] = @networks[sname].clone if @networks[sname]
256
+
257
+ @networks[key] = case @networks[key]
258
+ when Restfully::Resource
259
+ @networks[key]
260
+ when Proc
261
+ @networks[key].call(name, location, exp)
262
+ else
263
+ location.networks.find{|n|
264
+ if name.kind_of?(Regexp)
265
+ n['name'] =~ name
266
+ else
267
+ n['name'] == sname
268
+ end
269
+ }
270
+ end
271
+ }
272
+ @networks[key]
273
+ end
274
+
275
+ # Define a storage. A storage is location dependent.
276
+ def storage(name, options = {}, &block)
277
+ @storages[name.to_s] = block
278
+ end
279
+
280
+ def fetch_storage(name, location)
281
+ sname = name.to_s
282
+ key = [location['name'], sname].join(".")
283
+ logger.debug "#{banner}Looking for storage #{name.inspect} at #{location['name'].inspect}. key=#{key.inspect}"
284
+ exp = experiment
285
+ synchronize {
286
+ # Duplicate general storages if present
287
+ @storages[key] = @storages[sname].clone if @storages[sname]
288
+
289
+ @storages[key] = case @storages[key]
290
+ when Restfully::Resource
291
+ @storages[key]
292
+ when Proc
293
+ @storages[key].call(name, location, exp)
294
+ else
295
+ location.storages.find{|n|
296
+ if name.kind_of?(Regexp)
297
+ n['name'] =~ name
298
+ else
299
+ n['name'] == sname
300
+ end
301
+ }
302
+ end
303
+ }
304
+ @storages[key]
305
+ end
306
+
307
+ def fetch_location(name)
308
+ name = name.to_sym
309
+ location = if (name == :any)
310
+ choices = session.root.locations
311
+ return nil if choices.length == 0
312
+ choices[rand(choices.length)]
313
+ else
314
+ @locations[name] || session.root.locations[name]
315
+ end
316
+ raise Error, "#{banner}Can't find #{name.inspect} location" if location.nil?
317
+ synchronize {
318
+ @locations[location['name'].to_sym] ||= location
319
+ }
320
+ location
321
+ end
322
+
323
+ # Laucnh a number of compute resources based on the given
324
+ # <tt>template</tt>.
325
+ def launch_compute(template, count = 1)
326
+ h = template.to_h
327
+ count.times.map do |i|
328
+ logger.debug "#{banner}#{i+1}/#{count} - Launching compute with the following data: #{h.inspect}"
329
+ experiment.computes.submit(h)
330
+ end
331
+ end
332
+
333
+ # If given a name, attempts to find an existing running experiment with
334
+ # the same name.
335
+ # If name is nil or omitted, creates a new experiment.
336
+ #
337
+ # Returns a Restfully::Resource object, or nil.
338
+ def experiment(name = nil)
339
+ connection = session
340
+ synchronize {
341
+ @experiment ||= if name.nil?
342
+ connection.root.experiments.submit(
343
+ :name => conf[:name],
344
+ :description => conf[:description],
345
+ :walltime => conf[:walltime],
346
+ :status => "waiting"
347
+ )
348
+ else
349
+ connection.root.experiments.find{|exp|
350
+ exp['status'] == 'running' && exp['name'] == name
351
+ }
352
+ end
353
+ }
354
+ end
355
+
356
+ def metric(name, options = {})
357
+ hosts = [options.delete(:hosts) || []].flatten.map{|h|
358
+ [h['name'], h['id']].join("-")
359
+ }
360
+ @zabbix ||= Aggregator::Zabbix.new(session, experiment)
361
+
362
+ items = @zabbix.request("item.get", {
363
+ :filter => {
364
+ "host" => hosts[0],
365
+ "key_" => name.to_s
366
+ },
367
+ "output" => "extend"
368
+ }).map{|i| i['itemid']}
369
+
370
+ # Most recent last
371
+ now = Time.now.to_i
372
+ results = @zabbix.request("history.get", {
373
+ "itemids" => items[0..1],
374
+ # FIX once we can correctly specify metric type
375
+ "history" => 1, # STRING
376
+ "output" => "extend",
377
+ "time_from" => now-3600,
378
+ "time_till" => now
379
+ })
380
+
381
+ Metric.new(name, results, options)
382
+ end
383
+
384
+ # =========================
385
+ # = Configuration methods =
386
+ # =========================
387
+
388
+ # Sets the given <tt>property</tt> to the given <tt>value</tt>.
389
+ def set(property, value)
390
+ @properties[property.to_sym] = value
391
+ end
392
+
393
+ # Returns the configuration Hash.
394
+ def conf
395
+ @properties
396
+ end
397
+
398
+ # =====================
399
+ # = Cleanup procedure =
400
+ # =====================
401
+
402
+ def cleanup!
403
+ unless @tg_groups.list.empty?
404
+ synchronize{
405
+ @tg_groups.list.each(&:kill)
406
+ }
407
+ end
408
+ if cleanup? && !@experiment.nil?
409
+ logger.warn "#{banner}Cleaning up in 5 seconds. Hit CTRL-C now to keep your experiment running."
410
+ sleep 5
411
+ @experiment.delete
412
+ else
413
+ logger.warn "#{banner}Not cleaning up experiment."
414
+ end
415
+ end
416
+
417
+ def cleanup?
418
+ return false if dev? || conf[:no_cancel]
419
+ return false if conf[:no_cleanup] && !error?
420
+ true
421
+ end
422
+
423
+ # ===================
424
+ # = Helpers methods =
425
+ # ===================
426
+
427
+ def engine
428
+ self
429
+ end
430
+
431
+ def groups
432
+ @vmgroups
433
+ end
434
+
435
+ def banner
436
+ "[BFIRE] "
437
+ end
438
+
439
+ def dev?
440
+ !!conf[:dev]
441
+ end
442
+
443
+ # Returns the logger for the engine.
444
+ def logger
445
+ @logger ||= begin
446
+ l = conf[:logger]
447
+ l.level = conf[:logging]
448
+ l
449
+ end
450
+ end
451
+
452
+ # Synchronization primitive
453
+ def synchronize(&block)
454
+ @mutex.synchronize { block.call }
455
+ end
456
+
457
+ # ===============
458
+ # = SSH methods =
459
+ # ===============
460
+
461
+ # Setup an SSH connection as <tt>username</tt> to <tt>fqdn</tt>.
462
+ # @param [String] fqdn the fully qualified domain name of the host to connect to.
463
+ # @param [String] username the login to use to connect to the host.
464
+ # @param [Hash] options a hash of additional options to pass.
465
+ # @yield [Net::SSH::Connection::Session] ssh a SSH handler.
466
+ #
467
+ # By default, the SSH connection will be retried at most <tt>ssh_max_attempts</tt> times if the host is unreachable. You can overwrite that default locally by passing a different <tt>ssh_max_attempts</tt> option.
468
+ # Same for <tt>:timeout</tt> and <tt>:keys</tt> options.
469
+ #
470
+ # If option <tt>:multi</tt> is given and true, then an instance of Net::SSH::Multi::Session is yielded. See <http://net-ssh.github.com/multi/v1/api/index.html> for more information.
471
+ def ssh(fqdn, username, options = {}, &block)
472
+ raise ArgumentError, "You MUST provide a block when calling #ssh" if block.nil?
473
+ log = !!options.delete(:log)
474
+ options[:timeout] ||= 10
475
+ if options.has_key?(:password)
476
+ options[:auth_methods] ||= ['keyboard-interactive']
477
+ else
478
+ options[:keys] ||= [conf[:key]].compact
479
+ end
480
+ max_attempts = options[:max_attempts] || conf[:ssh_max_attempts]
481
+ logger.info "#{banner}SSHing to #{username}@#{fqdn.inspect}..." if log
482
+ attempts = 0
483
+ begin
484
+ attempts += 1
485
+ if options[:multi]
486
+ Net::SSH::Multi.start(
487
+ :concurrent_connections => (
488
+ options[:concurrent_connections] || 10
489
+ )
490
+ ) do |s|
491
+ s.via conf[:gateway], conf[:user] unless conf[:gateway].nil?
492
+ fqdn.each {|h| s.use "#{username}@#{h}"}
493
+ block.call(s)
494
+ end
495
+ else
496
+ if conf[:gateway]
497
+ gw_handler = Net::SSH::Gateway.new(conf[:gateway], conf[:user], :forward_agent => true)
498
+ gw_handler.ssh(fqdn, username, options, &block)
499
+ gw_handler.shutdown!
500
+ else
501
+ Net::SSH.start(fqdn, username, options, &block)
502
+ end
503
+ end
504
+ rescue Errno::EHOSTUNREACH => e
505
+ if attempts <= max_attempts
506
+ logger.info "#{banner}No route to host #{fqdn}. Retrying in 5 secs..." if log
507
+ sleep 5
508
+ retry
509
+ else
510
+ logger.info "#{banner}No route to host #{fqdn}. Won't retry." if log
511
+ raise e
512
+ end
513
+ end
514
+ end
515
+
516
+ protected
517
+ def monitor
518
+ @experiment_state ||= nil
519
+ sleep_time = nil
520
+ logger.info "#{banner}Monitoring experiment..."
521
+ experiment.reload
522
+ has_changed = (@experiment_state != experiment['status'])
523
+ case experiment['status']
524
+ when 'waiting'
525
+ logger.info "#{banner}Experiment is waiting. Nothing to do..."
526
+ sleep_time = 10
527
+ when 'running'
528
+ logger.info "#{banner}Experiment is running."
529
+ trigger :running if has_changed
530
+ sleep_time = 30
531
+ when 'terminating', 'canceling'
532
+ trigger :stopped if has_changed
533
+ sleep_time = 10
534
+ when 'terminated', 'canceled'
535
+ trigger :terminated if has_changed
536
+ end
537
+ @experiment_state = experiment['status']
538
+
539
+ unless sleep_time.nil?
540
+ sleep sleep_time
541
+ monitor
542
+ end
543
+ end
544
+
545
+ end
546
+ end