bfire 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (37) hide show
  1. data/LICENSE +0 -0
  2. data/README.md +90 -0
  3. data/bin/bfire +120 -0
  4. data/examples/benchmark.rb +18 -0
  5. data/examples/dag.rb +26 -0
  6. data/examples/elasticity.rb +105 -0
  7. data/examples/ibbt.rb +125 -0
  8. data/examples/mine.rb +40 -0
  9. data/examples/modules/apache2/manifests/init.pp +44 -0
  10. data/examples/modules/app/files/app/app.rb +29 -0
  11. data/examples/modules/app/files/app/config.ru +2 -0
  12. data/examples/modules/app/files/app.phtml +4 -0
  13. data/examples/modules/app/manifests/init.pp +19 -0
  14. data/examples/modules/common/manifests/init.pp +8 -0
  15. data/examples/modules/haproxy/files/default +4 -0
  16. data/examples/modules/haproxy/files/haproxy.rsyslog.conf +2 -0
  17. data/examples/modules/haproxy/manifests/init.pp +21 -0
  18. data/examples/modules/mysql/manifests/init.pp +40 -0
  19. data/examples/modules/rsyslog/files/rsyslog.conf +116 -0
  20. data/examples/modules/rsyslog/manifests/init.pp +15 -0
  21. data/examples/modules/sinatra/manifests/init.pp +9 -0
  22. data/examples/modules/web/files/monitor/app.rb +55 -0
  23. data/examples/modules/web/files/monitor/config.ru +2 -0
  24. data/examples/modules/web/files/monitor/haproxy.cfg.erb +50 -0
  25. data/examples/modules/web/manifests/init.pp +26 -0
  26. data/examples/simple.rb +58 -0
  27. data/lib/bfire/aggregator/zabbix.rb +55 -0
  28. data/lib/bfire/engine.rb +546 -0
  29. data/lib/bfire/group.rb +241 -0
  30. data/lib/bfire/metric.rb +36 -0
  31. data/lib/bfire/provider/puppet.rb +58 -0
  32. data/lib/bfire/pub_sub/publisher.rb +40 -0
  33. data/lib/bfire/rule.rb +110 -0
  34. data/lib/bfire/template.rb +142 -0
  35. data/lib/bfire/version.rb +3 -0
  36. data/lib/bfire.rb +10 -0
  37. metadata +241 -0
@@ -0,0 +1,546 @@
1
+ require 'restfully'
2
+ require 'restfully/media_type/application_vnd_bonfire_xml'
3
+ require 'thread'
4
+ require 'thwait'
5
+
6
+ require 'net/ssh'
7
+ require 'net/scp'
8
+ require 'net/sftp'
9
+ require 'net/ssh/gateway'
10
+ require 'net/ssh/multi'
11
+
12
+ # Ruby Graph Library
13
+ require 'rgl/adjacency'
14
+ require 'rgl/topsort'
15
+
16
+ require 'bfire/group'
17
+ require 'bfire/aggregator/zabbix'
18
+ require 'bfire/metric'
19
+
20
+ module Bfire
21
+ class Engine
22
+ include PubSub::Publisher
23
+
24
+ DEBUG = Logger::DEBUG
25
+ INFO = Logger::INFO
26
+ WARN = Logger::WARN
27
+ ERROR = Logger::ERROR
28
+ UNKNOWN = Logger::UNKNOWN
29
+
30
+ # Engine configuration hash:
31
+ attr_reader :properties
32
+ # A Restfully::Session object:
33
+ attr_reader :session
34
+
35
+ def initialize(opts = {})
36
+ @root = opts[:root] || Dir.pwd
37
+ @properties = {}
38
+ @vmgroups = {}
39
+ @networks = {}
40
+ @storages = {}
41
+ @locations = {}
42
+ @mutex = Mutex.new
43
+ @experiment = nil
44
+
45
+ # The group of all master threads.
46
+ @tg_master = ThreadGroup.new
47
+ # The group of all threads related to a Group.
48
+ @tg_groups = ThreadGroup.new
49
+
50
+ reset
51
+ end
52
+
53
+ def path_to(path)
54
+ File.expand_path(path, @root)
55
+ end
56
+
57
+ def reset
58
+ conf[:name] ||= "Bfire experiment"
59
+ conf[:description] ||= "Anonymous description"
60
+ conf[:walltime] ||= 3600
61
+ conf[:logger] ||= Logger.new(STDOUT)
62
+ conf[:logging] ||= INFO
63
+ conf[:user] ||= ENV['USER']
64
+ conf[:ssh_max_attempts] ||= 3
65
+ public_key, private_key = keychain
66
+ conf[:key] ||= private_key
67
+ conf[:authorized_keys] ||= public_key
68
+ end
69
+
70
+ def keychain
71
+ private_key = nil
72
+ public_key = Dir[File.expand_path("~/.ssh/*.pub")].find{|key|
73
+ private_key = key.gsub(/\.pub$/,"")
74
+ File.exist?(private_key)
75
+ }
76
+ if public_key.nil?
77
+ nil
78
+ else
79
+ [public_key, private_key]
80
+ end
81
+ end
82
+
83
+ # Returns the directed acyclic graph for the given group names, based on
84
+ # their declared dependencies.
85
+ def dag(nodes)
86
+ dg = RGL::DirectedAdjacencyGraph.new
87
+ nodes.each{|n|
88
+ dg.add_vertex(n)
89
+ group(n).dependencies.each{|m, block|
90
+ dg.add_vertex(m)
91
+ dg.add_edge(m, n)
92
+ }
93
+ }
94
+
95
+ raise Error, "Your dependency graph is not acyclic!" unless dg.acyclic?
96
+ dg
97
+ end
98
+
99
+ # Launch procedure. Will execute each group in a separate thread,
100
+ # and launch a thread to monitor experiment status.
101
+ def run!
102
+ # call #session to initiate Restfully::Session object outside of threads
103
+ logger.info "#{banner}Using bonfire-api/#{session.root['version']}"
104
+
105
+ on(:error) { cleanup! }
106
+ on(:terminated) { cleanup! }
107
+
108
+ @tg_master.add(Thread.new {
109
+ Thread.current.abort_on_exception = true
110
+ monitor
111
+ })
112
+
113
+ initialized = if dev? && experiment(conf[:name])
114
+ resuscitate!
115
+ else
116
+ deploy!
117
+ end
118
+
119
+ experiment.update(:status => "running")
120
+
121
+ if initialized
122
+ launch!
123
+ else
124
+ cleanup!
125
+ end
126
+
127
+ ThreadsWait.all_waits(*@tg_master.list)
128
+ rescue Exception => e
129
+ logger.error "#{banner}#{e.class.name}: #{e.message}"
130
+ logger.debug e.backtrace.join("; ")
131
+ trigger :error
132
+ end
133
+
134
+ def deploy!
135
+ dg = dag(@vmgroups.keys)
136
+ topsort_iterator = dg.topsort_iterator
137
+ logger.info "#{banner}Launching groups in the following topological order: #{topsort_iterator.clone.to_a.inspect}."
138
+
139
+ launch_waiting_groups(topsort_iterator)
140
+ end
141
+
142
+ # This launches the group in the topological order,
143
+ # and waits for the end of that initialization procedure.
144
+ def launch_waiting_groups(topsort_iterator)
145
+ return true if topsort_iterator.at_end?
146
+ return false if error?
147
+
148
+ # ugly, but I don't know why the lib don't give access to it...
149
+ waiting = topsort_iterator.instance_variable_get("@waiting")
150
+ logger.info "#{banner}Launching #{waiting.inspect}"
151
+ # Make sure you don't touch the topsort_iterator in the each block,
152
+ # otherwise you can get side-effects.
153
+ waiting.each do |group_name|
154
+ g = group(group_name)
155
+ # in case that group was error'ed by the engine...
156
+ next if g.error?
157
+ Thread.new {
158
+ Thread.current.abort_on_exception = true
159
+ g.launch_initial_resources
160
+ }.join
161
+ end
162
+ waiting.length.times { topsort_iterator.forward }
163
+ launch_waiting_groups(topsort_iterator)
164
+ end
165
+
166
+
167
+ # Launch a monitor for each group, and waits for their termination before
168
+ # saying "ready".
169
+ def launch!
170
+ @vmgroups.each{|name, group|
171
+ @tg_groups.add(Thread.new {
172
+ Thread.current.abort_on_exception = true
173
+ group.monitor
174
+ })
175
+ }
176
+
177
+ until @vmgroups.all?{|(n,g)| g.triggered_events.include?(:ready)}
178
+ sleep 5
179
+ end
180
+
181
+ logger.info "#{banner}All groups are now READY: #{groups.inspect}."
182
+
183
+ trigger :ready
184
+ ThreadsWait.all_waits(*@tg_groups.list) do |t|
185
+ # http://apidock.com/ruby/Thread/status
186
+ if t.status.nil? || t.status == "aborting" || t[:ko]
187
+ trigger :error
188
+ end
189
+ end
190
+ end
191
+
192
+ # Reloads vmgroups, networks and storages linked to an experiment.
193
+ def resuscitate!
194
+ experiment.networks.each do |network|
195
+ @networks[network['name']] = network
196
+ end
197
+ experiment.storages.each do |storage|
198
+ @storages[storage['name']] = storage
199
+ end
200
+ experiment.computes.each do |compute|
201
+ group_name, template_name, guid = compute['name'].split("--")
202
+ g = group(group_name)
203
+ if g.nil?
204
+ raise Error, "Group #{group_name} is not declared in the DSL."
205
+ else
206
+ g.template(template_name).instances.push(compute)
207
+ end
208
+ end
209
+ @vmgroups.each do |name, vmgroup|
210
+ vmgroup.merge_templates!
211
+ vmgroup.check!
212
+ end
213
+ true
214
+ end
215
+
216
+ # Define a new group (if block given), or return the group corresponding
217
+ # to the given <tt>name</tt>.
218
+ def group(name, options = {}, &block)
219
+ if block
220
+ @vmgroups[name.to_sym] ||= Group.new(
221
+ self,
222
+ name.to_sym,
223
+ options.symbolize_keys
224
+ )
225
+ @vmgroups[name.to_sym].instance_eval(&block)
226
+ else
227
+ @vmgroups[name.to_sym]
228
+ end
229
+ end
230
+
231
+ # =================================================
232
+ # = Resource declaration/finding/creation methods =
233
+ # =================================================
234
+
235
+ # Returns the Restfully::Session object
236
+ def session
237
+ @session ||= Restfully::Session.new(
238
+ :configuration_file => conf[:restfully_config],
239
+ :logger => logger
240
+ )
241
+ end
242
+
243
+ # Define a network. A network is location dependent.
244
+ def network(name, options = {}, &block)
245
+ @networks[name.to_s] = block
246
+ end
247
+
248
+ def fetch_network(name, location)
249
+ sname = name.to_s
250
+ key = [location['name'], sname].join(".")
251
+ logger.debug "#{banner}Looking for network #{name.inspect} at #{location['name'].inspect}. key=#{key.inspect}"
252
+ exp = experiment
253
+ synchronize {
254
+ # Duplicate general networks if present
255
+ @networks[key] = @networks[sname].clone if @networks[sname]
256
+
257
+ @networks[key] = case @networks[key]
258
+ when Restfully::Resource
259
+ @networks[key]
260
+ when Proc
261
+ @networks[key].call(name, location, exp)
262
+ else
263
+ location.networks.find{|n|
264
+ if name.kind_of?(Regexp)
265
+ n['name'] =~ name
266
+ else
267
+ n['name'] == sname
268
+ end
269
+ }
270
+ end
271
+ }
272
+ @networks[key]
273
+ end
274
+
275
+ # Define a storage. A storage is location dependent.
276
+ def storage(name, options = {}, &block)
277
+ @storages[name.to_s] = block
278
+ end
279
+
280
+ def fetch_storage(name, location)
281
+ sname = name.to_s
282
+ key = [location['name'], sname].join(".")
283
+ logger.debug "#{banner}Looking for storage #{name.inspect} at #{location['name'].inspect}. key=#{key.inspect}"
284
+ exp = experiment
285
+ synchronize {
286
+ # Duplicate general storages if present
287
+ @storages[key] = @storages[sname].clone if @storages[sname]
288
+
289
+ @storages[key] = case @storages[key]
290
+ when Restfully::Resource
291
+ @storages[key]
292
+ when Proc
293
+ @storages[key].call(name, location, exp)
294
+ else
295
+ location.storages.find{|n|
296
+ if name.kind_of?(Regexp)
297
+ n['name'] =~ name
298
+ else
299
+ n['name'] == sname
300
+ end
301
+ }
302
+ end
303
+ }
304
+ @storages[key]
305
+ end
306
+
307
+ def fetch_location(name)
308
+ name = name.to_sym
309
+ location = if (name == :any)
310
+ choices = session.root.locations
311
+ return nil if choices.length == 0
312
+ choices[rand(choices.length)]
313
+ else
314
+ @locations[name] || session.root.locations[name]
315
+ end
316
+ raise Error, "#{banner}Can't find #{name.inspect} location" if location.nil?
317
+ synchronize {
318
+ @locations[location['name'].to_sym] ||= location
319
+ }
320
+ location
321
+ end
322
+
323
+ # Laucnh a number of compute resources based on the given
324
+ # <tt>template</tt>.
325
+ def launch_compute(template, count = 1)
326
+ h = template.to_h
327
+ count.times.map do |i|
328
+ logger.debug "#{banner}#{i+1}/#{count} - Launching compute with the following data: #{h.inspect}"
329
+ experiment.computes.submit(h)
330
+ end
331
+ end
332
+
333
+ # If given a name, attempts to find an existing running experiment with
334
+ # the same name.
335
+ # If name is nil or omitted, creates a new experiment.
336
+ #
337
+ # Returns a Restfully::Resource object, or nil.
338
+ def experiment(name = nil)
339
+ connection = session
340
+ synchronize {
341
+ @experiment ||= if name.nil?
342
+ connection.root.experiments.submit(
343
+ :name => conf[:name],
344
+ :description => conf[:description],
345
+ :walltime => conf[:walltime],
346
+ :status => "waiting"
347
+ )
348
+ else
349
+ connection.root.experiments.find{|exp|
350
+ exp['status'] == 'running' && exp['name'] == name
351
+ }
352
+ end
353
+ }
354
+ end
355
+
356
+ def metric(name, options = {})
357
+ hosts = [options.delete(:hosts) || []].flatten.map{|h|
358
+ [h['name'], h['id']].join("-")
359
+ }
360
+ @zabbix ||= Aggregator::Zabbix.new(session, experiment)
361
+
362
+ items = @zabbix.request("item.get", {
363
+ :filter => {
364
+ "host" => hosts[0],
365
+ "key_" => name.to_s
366
+ },
367
+ "output" => "extend"
368
+ }).map{|i| i['itemid']}
369
+
370
+ # Most recent last
371
+ now = Time.now.to_i
372
+ results = @zabbix.request("history.get", {
373
+ "itemids" => items[0..1],
374
+ # FIX once we can correctly specify metric type
375
+ "history" => 1, # STRING
376
+ "output" => "extend",
377
+ "time_from" => now-3600,
378
+ "time_till" => now
379
+ })
380
+
381
+ Metric.new(name, results, options)
382
+ end
383
+
384
+ # =========================
385
+ # = Configuration methods =
386
+ # =========================
387
+
388
+ # Sets the given <tt>property</tt> to the given <tt>value</tt>.
389
+ def set(property, value)
390
+ @properties[property.to_sym] = value
391
+ end
392
+
393
+ # Returns the configuration Hash.
394
+ def conf
395
+ @properties
396
+ end
397
+
398
+ # =====================
399
+ # = Cleanup procedure =
400
+ # =====================
401
+
402
+ def cleanup!
403
+ unless @tg_groups.list.empty?
404
+ synchronize{
405
+ @tg_groups.list.each(&:kill)
406
+ }
407
+ end
408
+ if cleanup? && !@experiment.nil?
409
+ logger.warn "#{banner}Cleaning up in 5 seconds. Hit CTRL-C now to keep your experiment running."
410
+ sleep 5
411
+ @experiment.delete
412
+ else
413
+ logger.warn "#{banner}Not cleaning up experiment."
414
+ end
415
+ end
416
+
417
+ def cleanup?
418
+ return false if dev? || conf[:no_cancel]
419
+ return false if conf[:no_cleanup] && !error?
420
+ true
421
+ end
422
+
423
+ # ===================
424
+ # = Helpers methods =
425
+ # ===================
426
+
427
+ def engine
428
+ self
429
+ end
430
+
431
+ def groups
432
+ @vmgroups
433
+ end
434
+
435
+ def banner
436
+ "[BFIRE] "
437
+ end
438
+
439
+ def dev?
440
+ !!conf[:dev]
441
+ end
442
+
443
+ # Returns the logger for the engine.
444
+ def logger
445
+ @logger ||= begin
446
+ l = conf[:logger]
447
+ l.level = conf[:logging]
448
+ l
449
+ end
450
+ end
451
+
452
+ # Synchronization primitive
453
+ def synchronize(&block)
454
+ @mutex.synchronize { block.call }
455
+ end
456
+
457
+ # ===============
458
+ # = SSH methods =
459
+ # ===============
460
+
461
+ # Setup an SSH connection as <tt>username</tt> to <tt>fqdn</tt>.
462
+ # @param [String] fqdn the fully qualified domain name of the host to connect to.
463
+ # @param [String] username the login to use to connect to the host.
464
+ # @param [Hash] options a hash of additional options to pass.
465
+ # @yield [Net::SSH::Connection::Session] ssh a SSH handler.
466
+ #
467
+ # By default, the SSH connection will be retried at most <tt>ssh_max_attempts</tt> times if the host is unreachable. You can overwrite that default locally by passing a different <tt>ssh_max_attempts</tt> option.
468
+ # Same for <tt>:timeout</tt> and <tt>:keys</tt> options.
469
+ #
470
+ # If option <tt>:multi</tt> is given and true, then an instance of Net::SSH::Multi::Session is yielded. See <http://net-ssh.github.com/multi/v1/api/index.html> for more information.
471
+ def ssh(fqdn, username, options = {}, &block)
472
+ raise ArgumentError, "You MUST provide a block when calling #ssh" if block.nil?
473
+ log = !!options.delete(:log)
474
+ options[:timeout] ||= 10
475
+ if options.has_key?(:password)
476
+ options[:auth_methods] ||= ['keyboard-interactive']
477
+ else
478
+ options[:keys] ||= [conf[:key]].compact
479
+ end
480
+ max_attempts = options[:max_attempts] || conf[:ssh_max_attempts]
481
+ logger.info "#{banner}SSHing to #{username}@#{fqdn.inspect}..." if log
482
+ attempts = 0
483
+ begin
484
+ attempts += 1
485
+ if options[:multi]
486
+ Net::SSH::Multi.start(
487
+ :concurrent_connections => (
488
+ options[:concurrent_connections] || 10
489
+ )
490
+ ) do |s|
491
+ s.via conf[:gateway], conf[:user] unless conf[:gateway].nil?
492
+ fqdn.each {|h| s.use "#{username}@#{h}"}
493
+ block.call(s)
494
+ end
495
+ else
496
+ if conf[:gateway]
497
+ gw_handler = Net::SSH::Gateway.new(conf[:gateway], conf[:user], :forward_agent => true)
498
+ gw_handler.ssh(fqdn, username, options, &block)
499
+ gw_handler.shutdown!
500
+ else
501
+ Net::SSH.start(fqdn, username, options, &block)
502
+ end
503
+ end
504
+ rescue Errno::EHOSTUNREACH => e
505
+ if attempts <= max_attempts
506
+ logger.info "#{banner}No route to host #{fqdn}. Retrying in 5 secs..." if log
507
+ sleep 5
508
+ retry
509
+ else
510
+ logger.info "#{banner}No route to host #{fqdn}. Won't retry." if log
511
+ raise e
512
+ end
513
+ end
514
+ end
515
+
516
+ protected
517
+ def monitor
518
+ @experiment_state ||= nil
519
+ sleep_time = nil
520
+ logger.info "#{banner}Monitoring experiment..."
521
+ experiment.reload
522
+ has_changed = (@experiment_state != experiment['status'])
523
+ case experiment['status']
524
+ when 'waiting'
525
+ logger.info "#{banner}Experiment is waiting. Nothing to do..."
526
+ sleep_time = 10
527
+ when 'running'
528
+ logger.info "#{banner}Experiment is running."
529
+ trigger :running if has_changed
530
+ sleep_time = 30
531
+ when 'terminating', 'canceling'
532
+ trigger :stopped if has_changed
533
+ sleep_time = 10
534
+ when 'terminated', 'canceled'
535
+ trigger :terminated if has_changed
536
+ end
537
+ @experiment_state = experiment['status']
538
+
539
+ unless sleep_time.nil?
540
+ sleep sleep_time
541
+ monitor
542
+ end
543
+ end
544
+
545
+ end
546
+ end