xpflow 0.1b

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. data/bin/xpflow +96 -0
  2. data/lib/colorado.rb +198 -0
  3. data/lib/json/add/core.rb +243 -0
  4. data/lib/json/add/rails.rb +8 -0
  5. data/lib/json/common.rb +423 -0
  6. data/lib/json/editor.rb +1369 -0
  7. data/lib/json/ext.rb +28 -0
  8. data/lib/json/pure/generator.rb +442 -0
  9. data/lib/json/pure/parser.rb +320 -0
  10. data/lib/json/pure.rb +15 -0
  11. data/lib/json/version.rb +8 -0
  12. data/lib/json.rb +62 -0
  13. data/lib/mime/types.rb +881 -0
  14. data/lib/mime-types.rb +3 -0
  15. data/lib/restclient/abstract_response.rb +106 -0
  16. data/lib/restclient/exceptions.rb +193 -0
  17. data/lib/restclient/net_http_ext.rb +55 -0
  18. data/lib/restclient/payload.rb +235 -0
  19. data/lib/restclient/raw_response.rb +34 -0
  20. data/lib/restclient/request.rb +316 -0
  21. data/lib/restclient/resource.rb +169 -0
  22. data/lib/restclient/response.rb +24 -0
  23. data/lib/restclient.rb +174 -0
  24. data/lib/xpflow/bash.rb +341 -0
  25. data/lib/xpflow/bundle.rb +113 -0
  26. data/lib/xpflow/cmdline.rb +249 -0
  27. data/lib/xpflow/collection.rb +122 -0
  28. data/lib/xpflow/concurrency.rb +79 -0
  29. data/lib/xpflow/data.rb +393 -0
  30. data/lib/xpflow/dsl.rb +816 -0
  31. data/lib/xpflow/engine.rb +574 -0
  32. data/lib/xpflow/ensemble.rb +135 -0
  33. data/lib/xpflow/events.rb +56 -0
  34. data/lib/xpflow/experiment.rb +65 -0
  35. data/lib/xpflow/exts/facter.rb +30 -0
  36. data/lib/xpflow/exts/g5k.rb +931 -0
  37. data/lib/xpflow/exts/g5k_use.rb +50 -0
  38. data/lib/xpflow/exts/gui.rb +140 -0
  39. data/lib/xpflow/exts/model.rb +155 -0
  40. data/lib/xpflow/graph.rb +1603 -0
  41. data/lib/xpflow/graph_xpflow.rb +251 -0
  42. data/lib/xpflow/import.rb +196 -0
  43. data/lib/xpflow/library.rb +349 -0
  44. data/lib/xpflow/logging.rb +153 -0
  45. data/lib/xpflow/manager.rb +147 -0
  46. data/lib/xpflow/nodes.rb +1250 -0
  47. data/lib/xpflow/runs.rb +773 -0
  48. data/lib/xpflow/runtime.rb +125 -0
  49. data/lib/xpflow/scope.rb +168 -0
  50. data/lib/xpflow/ssh.rb +186 -0
  51. data/lib/xpflow/stat.rb +50 -0
  52. data/lib/xpflow/stdlib.rb +381 -0
  53. data/lib/xpflow/structs.rb +369 -0
  54. data/lib/xpflow/taktuk.rb +193 -0
  55. data/lib/xpflow/templates/ssh-config.basic +14 -0
  56. data/lib/xpflow/templates/ssh-config.inria +18 -0
  57. data/lib/xpflow/templates/ssh-config.proxy +13 -0
  58. data/lib/xpflow/templates/taktuk +6590 -0
  59. data/lib/xpflow/templates/utils/batch +4 -0
  60. data/lib/xpflow/templates/utils/bootstrap +12 -0
  61. data/lib/xpflow/templates/utils/hostname +3 -0
  62. data/lib/xpflow/templates/utils/ping +3 -0
  63. data/lib/xpflow/templates/utils/rsync +12 -0
  64. data/lib/xpflow/templates/utils/scp +17 -0
  65. data/lib/xpflow/templates/utils/scp_many +8 -0
  66. data/lib/xpflow/templates/utils/ssh +3 -0
  67. data/lib/xpflow/templates/utils/ssh-interactive +4 -0
  68. data/lib/xpflow/templates/utils/taktuk +19 -0
  69. data/lib/xpflow/threads.rb +187 -0
  70. data/lib/xpflow/utils.rb +569 -0
  71. data/lib/xpflow/visual.rb +230 -0
  72. data/lib/xpflow/with_g5k.rb +7 -0
  73. data/lib/xpflow.rb +349 -0
  74. metadata +135 -0
@@ -0,0 +1,1250 @@
1
+
2
+ require 'erb'
3
+ require 'ostruct'
4
+ require 'xpflow/exts/g5k'
5
+ require 'yaml'
6
+ require 'thread'
7
+ require 'shellwords'
8
+
9
+ def get_g5k_username
10
+ raise "No G5K username!" if $g5k_user.nil?
11
+ return $g5k_user
12
+ end
13
+
14
+ module XPFlow
15
+
16
+ # manages all nodes
17
+
18
+ class NodesManager
19
+
20
+ def initialize(directory)
21
+ @directory = directory
22
+ @mutex = Mutex.new
23
+ @node_counter = 0
24
+ end
25
+
26
+ def synchronize(&block)
27
+ return @mutex.synchronize(&block)
28
+ end
29
+
30
+ def subdir(name)
31
+ return @directory.subdir(name)
32
+ end
33
+
34
+ def get_node(user, host, factory, opts = {})
35
+ synchronize do
36
+ @node_counter += 1
37
+ node_directory = subdir("#{host}--#{user}--#{factory.name}--#{@node_counter}")
38
+ opts[:factory] = factory
39
+ factory.build(user, host, node_directory, opts)
40
+ end
41
+ end
42
+
43
+ end
44
+
45
+ class SimpleNodeFactory
46
+ # build a directly-reachable host
47
+
48
+ def name
49
+ return "normal"
50
+ end
51
+
52
+ def build(*args)
53
+ return SimpleNode.new(*args)
54
+ end
55
+
56
+ end
57
+
58
+ class G5KNodeFactory
59
+
60
+ def name
61
+ return "grid5000"
62
+ end
63
+
64
+ def build(user, host, node_directory, opts)
65
+ opts = { :group => _get_group(host) }.merge(opts)
66
+ return G5KNode.new(user, host, node_directory, opts)
67
+ end
68
+
69
+ def _get_group(host)
70
+ m = /^(\w+)-(\d+).+$/.match(host) # <cluster>-<nodeid> ...
71
+ if m
72
+ return m.captures.first
73
+ else
74
+ return nil
75
+ end
76
+ end
77
+
78
+ end
79
+
80
+ class ProxiedFactory
81
+
82
+ def initialize(node)
83
+ @node = node
84
+ end
85
+
86
+ def name
87
+ return "proxy"
88
+ end
89
+
90
+ def build(user, host, directory, opts)
91
+ opts[:proxy] = @node
92
+ return ProxiedNode.new(user, host, directory, opts)
93
+ end
94
+
95
+ end
96
+
97
+ class AbstractNode
98
+
99
+ # a node that is installed using a set of templates
100
+
101
+ attr_reader :user
102
+ attr_reader :host
103
+ attr_reader :directory
104
+
105
+ def initialize(user, host, directory, opts = {})
106
+ @user = user
107
+ @host = host
108
+ @directory = directory
109
+ @mutex = Mutex.new
110
+ @opts = opts
111
+
112
+ __setup__()
113
+ end
114
+
115
+ def options
116
+ return @opts
117
+ end
118
+
119
+ def domain
120
+ # gets accessibility domain of a node (nodes from within one domain
121
+ # are pairwise accessible and reachable)
122
+ return @opts[:factory].name
123
+ end
124
+
125
+ def synchronize(&block)
126
+ return @mutex.synchronize(&block)
127
+ end
128
+
129
+ def group
130
+ return @opts[:group]
131
+ end
132
+
133
+ def userhost
134
+ return "#{@user}@#{@host}"
135
+ end
136
+
137
+ def tmpfile
138
+ return execute("mktemp").stdout.strip
139
+ end
140
+
141
+ def to_s
142
+ return "#{self.class}('#{userhost}')"
143
+ end
144
+
145
+ def path
146
+ return @directory.path
147
+ end
148
+
149
+ def md5sum(files)
150
+ h = {}
151
+ output = execute("md5sum #{files.join(' ')}").stdout.strip
152
+ output.lines.each do |line|
153
+ hash, filename = line.split
154
+ h[filename] = hash
155
+ end
156
+ return h
157
+ end
158
+
159
+ def run(*args)
160
+ return @directory.run(*args)
161
+ end
162
+
163
+ def execute(cmd, env = {}, opts = {})
164
+ # opts will become an env of execution
165
+ wd = opts[:wd]
166
+ env = env.each_pair.select { |k, v| v.is_a?(String) }.map { |k, v| [ k.to_s, v ] }
167
+ env = Hash[env]
168
+ return @directory.run_ssh(cmd, :env => env, :wd => wd)
169
+ end
170
+
171
+ def execute_with_files(cmd, out, err)
172
+ return @directory.run_ssh(cmd, :out => out, :err => err)
173
+ end
174
+
175
+ def ping()
176
+ @directory.run("ping")
177
+ end
178
+
179
+ def hostname()
180
+ @directory.run("hostname")
181
+ end
182
+
183
+ def scp(from, to)
184
+ @directory.run("scp #{from} #{to}")
185
+ end
186
+
187
+ def scp_many(files, to_dir)
188
+ args = files.join(" ") # TODO: escaping?
189
+ return @directory.run("scp_many #{to_dir} #{args}")
190
+ end
191
+
192
+ def file(path, &block)
193
+ # creates a file here and then does scp to the node
194
+ filename = @directory.mktemp(&block)
195
+ scp(filename, path)
196
+ end
197
+
198
+ def proxy_factory
199
+ # generates a node factory for nodes proxied through this one
200
+ return ProxiedFactory.new(self)
201
+ end
202
+
203
+ def context
204
+ return {
205
+ :user => self.user,
206
+ :host => self.host,
207
+ :path => self.path
208
+ }
209
+ end
210
+
211
+ end
212
+
213
+ class TemplateNode < AbstractNode
214
+
215
+ def __setup__
216
+ raise "No template!" if template_name().nil?
217
+ NodeUtils.install_templates(path(), template_name(), context)
218
+ end
219
+
220
+ end
221
+
222
+ class SimpleNode < TemplateNode
223
+
224
+ # a simple, directly reachable node
225
+ def template_name
226
+ "basic"
227
+ end
228
+
229
+ end
230
+
231
+ class ProxiedNode < TemplateNode
232
+
233
+ def template_name
234
+ "proxy"
235
+ end
236
+
237
+ def context
238
+ return super.merge({ :proxy => "#{@opts[:proxy].path}/ssh" })
239
+ end
240
+
241
+ end
242
+
243
+ class G5KNode < AbstractNode
244
+
245
+ def labels
246
+ # TODO: I don't know if it is a good way to do it
247
+ sites = %w{bordeaux grenoble lille luxembourg lyon nancy reims rennes sophia toulouse}
248
+ site = sites.select { |x| @host.include?(x) }.map(&:to_sym)
249
+ return [ :g5k ] + site[0..0]
250
+ end
251
+
252
+ def self.inside_g5k
253
+ return $hostname.end_with?('grid5000.fr')
254
+ end
255
+
256
+ def __setup__
257
+ # here we make some magic; there are 3 cases:
258
+ # 1. Inside grid5000, we make efficient bootstrap using basic-templates
259
+ # 2. Inside inria, we have efficient access to G5K
260
+ # 3. Really outside (``chez moi'' for example)
261
+
262
+ is_g5k = G5KNode.inside_g5k()
263
+ return install_inside_g5k() if is_g5k
264
+ if File.exist?("/etc/resolv.conf")
265
+ resolv = IO.read("/etc/resolv.conf").lines.
266
+ select { |line| line.start_with?("domain ") or line.start_with?("search ") }
267
+ domain = resolv.first
268
+ if domain.nil? == false
269
+ domain = domain.split[1]
270
+ gw = inria_gateway(domain)
271
+ return install_inside_inria(gw) if (gw.nil? == false)
272
+ end
273
+ end
274
+ return install_generic()
275
+ end
276
+
277
+ def inria_gateway(domain)
278
+ # more stuff has to be implemented here
279
+ return "grid5000.loria.fr" if domain == "loria.fr"
280
+ return nil
281
+ end
282
+
283
+ def install_inside_g5k
284
+ NodeUtils.install_templates(path(), "basic",
285
+ context.merge({ :g5k_user => get_g5k_username() })
286
+ )
287
+ end
288
+
289
+ def install_inside_inria(gw)
290
+ NodeUtils.install_templates(path(), "inria",
291
+ context.merge({ :g5k_user => get_g5k_username(), :gw => gw })
292
+ )
293
+ end
294
+
295
+ def install_generic
296
+ NodeUtils.install_templates(path(), "inria",
297
+ context.merge({ :g5k_user => get_g5k_username(), :gw => "access.grid5000.fr" })
298
+ )
299
+ end
300
+
301
+ def self.kavlan(job, manager)
302
+ link = job["links"].select { |x| x["rel"] == "parent" }.first["href"]
303
+ site = link.split("/").last
304
+ front = manager.get_node(get_g5k_username(), "#{site}.grid5000.fr", G5KNodeFactory.new)
305
+ ns = job["assigned_nodes"]
306
+ begin
307
+ ns = front.execute("kavlan -l -j #{job["uid"]}").lines.map(&:strip)
308
+ rescue
309
+ nil
310
+ end
311
+ return ns.map { |x| manager.get_node("root", x, G5KNodeFactory.new) }
312
+ end
313
+
314
+ def self.get_ssh_config
315
+ return %x(echo ~/.ssh/config).strip
316
+ end
317
+
318
+ def self.obtain_ssh_pubkey_path
319
+ return %x(echo ~/.ssh/id_rsa.pub).strip
320
+ end
321
+
322
+ end
323
+
324
+ ### TYPES OF RESULTS ###
325
+
326
+ class BasicRemoteResult
327
+
328
+ attr_reader :opts
329
+ attr_reader :node
330
+
331
+ def initialize(node, cmd, stdout, stderr, opts = {})
332
+ @node = node
333
+ @cmd = cmd
334
+ @stdout = stdout
335
+ @stderr = stderr
336
+ @opts = opts
337
+ end
338
+
339
+ def stdout
340
+ return @stdout
341
+ end
342
+
343
+ def stderr
344
+ return @stderr
345
+ end
346
+
347
+ def command
348
+ return @cmd
349
+ end
350
+
351
+ def save_stdout(filename)
352
+ IO.write(filename, @stdout)
353
+ end
354
+
355
+ def save_stderr(filename)
356
+ IO.write(filename, @stderr)
357
+ end
358
+
359
+ def to_s
360
+ return "BasicRemoteResult('#{@cmd}' on #{@node})"
361
+ end
362
+
363
+ end
364
+
365
+ class FileRemoteResult < BasicRemoteResult
366
+
367
+ def initialize(node, cmd, stdout, stderr, opts = {})
368
+ # however! stdout & stderr are *paths*!
369
+ super
370
+ end
371
+
372
+ def stdout
373
+ return @node.execute("cat #{@stdout}").stdout
374
+ end
375
+
376
+ def stderr
377
+ return @node.execute("cat #{@stderr}").stdout
378
+ end
379
+
380
+ def stdout_file
381
+ return @stdout
382
+ end
383
+
384
+ def stderr_file
385
+ return @stderr
386
+ end
387
+
388
+ def save_stdout(filename)
389
+ return @node.execute_with_files("cat #{@stdout}", filename, "/dev/null")
390
+ end
391
+
392
+ def save_stderr(filename)
393
+ return @node.execute_with_files("cat #{@stderr}", filename, "/dev/null")
394
+ end
395
+
396
+ def to_s
397
+ return "FileRemoteResult('#{@cmd}' on #{@node}, out => #{@stdout}, err => #{@stderr})"
398
+ end
399
+
400
+ end
401
+
402
+ class FileRemote
403
+ # a remote file (but usually already local)
404
+ # result is probably: LocalExecutionResult
405
+
406
+ attr_reader :path
407
+ attr_reader :result
408
+
409
+ def initialize(path, result)
410
+ @path = path
411
+ @result = result
412
+ end
413
+
414
+ end
415
+
416
+ class ManyExecutionResult
417
+
418
+ def initialize(list, cmd)
419
+ @list = list
420
+ @command = cmd
421
+ end
422
+
423
+ def to_list
424
+ return @list
425
+ end
426
+
427
+ def length
428
+ return @list.length
429
+ end
430
+
431
+ def to_s
432
+ return "ManyResult('#{@command}' on #{@list.length} nodes)"
433
+ end
434
+
435
+ def each(&block)
436
+ return @list.each(&block)
437
+ end
438
+
439
+ end
440
+
441
+
442
+
443
+ # we obtain a global grid5000 user
444
+ $hostname = %x(hostname).strip
445
+ $ssh_key = G5KNode.obtain_ssh_pubkey_path
446
+
447
+ ## XPFLOW LIBRARY
448
+
449
+ class NodesLibrary < ActivityLibrary
450
+
451
+ activities :node_list, :execute, :copy, :run_script, :check_node, :file,
452
+ :g5k_get_avail, :proxy_node, :broadcast, :g5k_site, :g5k_job,
453
+ :g5k_nodes, :monitor_node,
454
+ :g5k_kavlan_id, :g5k_kavlan_nodes_file, :g5k_frontend_from_job,
455
+ :g5k_kavlan_nodes, :nodes_file, :execute_funny,
456
+ :g5k_node, :execute_many, :execute_many_local, :all_prefixes, :execute_one, :distribute_one,
457
+ :execute_many_ignore_errors, :g5k_kadeploy, :execute_many_here,
458
+ :bootstrap_taktuk, :simple_node, :node_range, :taktuk_raw, :test_connectivity,
459
+ :nodes_from_file, :nodes_from_result, :distribute, :chain_copy, :ssh_key,
460
+ :nodes_from_machinefile, :g5k_deploy_keys, :localhost, :file_consistency,
461
+ :ping_localhost, :ping_node, :g5k_reserve_nodes
462
+
463
+ def setup
464
+ nil
465
+ end
466
+
467
+ def ping_node(node, target)
468
+ result = execute_one(node,"ping #{target} -c 1")
469
+ result.stdout[/time=(\d+.*) /,1].to_f
470
+ end
471
+
472
+ def ping_localhost(node = nil)
473
+ node = self.localhost() if node.nil?
474
+ ping_node(node, "localhost")
475
+ end
476
+
477
+ def get_g5k_tmpfile(prefix = "tmp")
478
+ hash = 16.times.map { |x| (rand * 16).to_i.to_s(16) }.join
479
+ return "/tmp/.#{prefix}-#{get_g5k_username()}-#{hash}"
480
+ end
481
+
482
+ def all_prefixes(nodes, inc = 1)
483
+ arr = []
484
+ i = inc - 1
485
+ while i < nodes.length
486
+ arr.push(nodes[0..i])
487
+ i += inc
488
+ end
489
+ return arr
490
+ end
491
+
492
+ def nodes
493
+ return Scope.current[:__nodes__]
494
+ end
495
+
496
+ def _transform_nodes(x)
497
+ if x.is_a?(String)
498
+ return x.strip.split
499
+ elsif x.is_a?(Hash)
500
+ h = x.map { |k, v| [ k.strip, _transform_nodes(v) ] }
501
+ return Hash[h]
502
+ elsif x.is_a?(Array)
503
+ return x.map { |x| _transform_nodes(x) }
504
+ else
505
+ raise "Error!"
506
+ end
507
+ end
508
+
509
+ def _get_node_via_proxy(name, parent = nil)
510
+ name = name.strip
511
+ name = "nancy.g5k" if name == "g5k"
512
+ if /^(.+)\.g5k$/.match(name)
513
+ raise "G5K proxies must be topmost" if !parent.nil?
514
+ # special syntax for G5K
515
+ site = name.split(".").first
516
+ proxy = g5k_site(site)
517
+ return proxy
518
+ else
519
+ user, host = name.split("@")
520
+ if parent.nil?
521
+ proxy = simple_node(name)
522
+ else
523
+ proxy = proxy_node(parent, user, host)
524
+ end
525
+ return proxy
526
+ end
527
+ end
528
+
529
+ def __transform_with_proxy(structure, proxy, nodes)
530
+ # proxy_node(via, user, host)
531
+ if structure.is_a?(String)
532
+ node = _get_node_via_proxy(structure, proxy)
533
+ nodes[:nodes].push(node)
534
+ elsif structure.is_a?(Array)
535
+ structure.each { |x| __transform_with_proxy(x, proxy, nodes) }
536
+ elsif structure.is_a?(Hash)
537
+ structure.each_pair do |p, sub|
538
+ new_proxy = _get_node_via_proxy(p, proxy)
539
+ nodes[:proxies].push(new_proxy)
540
+ __transform_with_proxy(sub, new_proxy, nodes)
541
+ end
542
+ else
543
+ raise "Error!"
544
+ end
545
+ end
546
+
547
+ def _transform_with_proxy(tree)
548
+ nodes = { :nodes => [], :proxies => [] }
549
+ __transform_with_proxy(tree, nil, nodes)
550
+ return nodes
551
+ end
552
+
553
+ def nodes_from_machinefile(filename, opts = {})
554
+ nodes = IO.read(filename).strip.split
555
+ nodes = nodes.map { |x| "#{opts[:user]}@#{x}" }
556
+ return nodes.map { |x| simple_node(x) }
557
+ end
558
+
559
+ def nodes_from_file(filename, opts = {})
560
+ contents = IO.read(filename)
561
+ yaml = YAML.load(contents)
562
+ tree = _transform_nodes(yaml)
563
+ nodes = _transform_with_proxy(tree)
564
+ return nodes[:nodes]
565
+ end
566
+
567
+ def _parse_opts(array)
568
+ h = {}
569
+ array.each do |o|
570
+ k, v = o.split("=")
571
+ h[k.to_sym] = v
572
+ end
573
+ return h
574
+ end
575
+
576
+ def nodes_from_result(result, proxy = nil)
577
+ lines = result.stdout.strip.lines.map(&:strip)
578
+ r = lines.map do |line|
579
+ userhost = line.split.first
580
+ opts = _parse_opts(line.split[1..-1])
581
+ if proxy.nil?
582
+ simple_node(userhost, opts)
583
+ else
584
+ u, h = userhost.split("@")
585
+ proxy_node(proxy, u, h, opts)
586
+ end
587
+ end
588
+ return r
589
+ end
590
+
591
+ # activities
592
+
593
+ def node_list()
594
+ return nodes()
595
+ end
596
+
597
+ def get_node_list(args)
598
+ # extracts nodes and a command from arguments
599
+ if args.length == 1
600
+ return [ node_list(), args.first ]
601
+ end
602
+ if args.length == 2
603
+ cmd = args.last
604
+ return [ arrayize(args.first), cmd ]
605
+ end
606
+ raise "Wrong number of arguments"
607
+ end
608
+
609
+ def arrayize(nodes)
610
+ # turns the argument into a list of nodes
611
+ if !nodes.is_a?(Array)
612
+ nodes = [ nodes ]
613
+ end
614
+ return nodes
615
+ end
616
+
617
+ def execute(nodes, cmd, env = {})
618
+ wd = env.delete(:wd)
619
+ nodes = arrayize(nodes)
620
+ arr = []
621
+ nodes.each do |node|
622
+ res = node.execute(cmd, env, :wd => wd)
623
+ arr.push(res)
624
+ end
625
+ return arr
626
+ end
627
+
628
+ def execute_one(node, cmd, env = {})
629
+ return execute(node, cmd, env).first
630
+ end
631
+
632
+ def _execute_many_parse_args(args)
633
+ opts = {}
634
+ opts = args.pop if args.last.is_a?(Hash)
635
+ nodes, cmd = get_node_list(args)
636
+ return [ nodes, cmd, opts ]
637
+ end
638
+
639
+ def _get_taktuk(nodes, options = {})
640
+ # domains = nodes.map(&:domain).uniq
641
+ # raise "TakTuk: nodes span different domains" if domains.length != 1
642
+
643
+ master = nodes.first
644
+ nodes = nodes.tail if options[:exclude_master]
645
+ taktuk = File.join(master.directory.path, "ssh taktuk")
646
+ directory = proxy.engine.main_directory
647
+ opts = {
648
+ :stdout => directory.mktemp(),
649
+ :stderr => directory.mktemp(),
650
+ :filename => directory.mktemp()
651
+ }.merge(options)
652
+ return TakTukRun.new(taktuk, nodes, opts)
653
+ end
654
+
655
+ def bootstrap_taktuk(nodes)
656
+
657
+ if nodes.is_a?(AbstractNode)
658
+ nodes = [ nodes ]
659
+ end
660
+
661
+ if nodes.length == 0
662
+ return
663
+ end
664
+
665
+ cmd = "(dpkg -l | grep taktuk) || apt-get install -y --force-yes taktuk"
666
+ escaped_cmd = Shellwords.escape(cmd)
667
+ master = nodes.first
668
+ master.execute(escaped_cmd)
669
+
670
+ proxy.log("#{master} has TakTuk now")
671
+
672
+ return execute_many(nodes, cmd, :propagate => true)
673
+ end
674
+
675
+ def execute_many_here(*args)
676
+ nodes, cmd, opts = _execute_many_parse_args(args)
677
+
678
+ return ManyExecutionResult.new([], cmd) if nodes.length == 0
679
+
680
+ taktuk = _get_taktuk(nodes, opts)
681
+
682
+ # command has to be escape 2 times: local shell and remote shell
683
+ succ, fail = taktuk.execute(cmd, :escape => 2)
684
+
685
+ if succ.length != nodes.length
686
+ raise "TakTuk: Some nodes failed (success: #{succ.length}/#{nodes.length}). See #{taktuk.stdout}"
687
+ end
688
+
689
+ results = succ.map do |x|
690
+ BasicRemoteResult.new(x[:node], cmd, x[:stdout], x[:stderr])
691
+ end
692
+ return ManyExecutionResult.new(results, cmd)
693
+ end
694
+
695
+ def execute_many(*args)
696
+ nodes, cmd, opts = _execute_many_parse_args(args)
697
+
698
+ return ManyExecutionResult.new([], cmd) if nodes.length == 0
699
+
700
+ taktuk = _get_taktuk(nodes, opts)
701
+
702
+ succ, fail = taktuk.execute_remote(cmd, :escape => 2)
703
+
704
+ if succ.length != nodes.length
705
+ msg = "TakTuk: Some nodes failed (success: #{succ.length}/#{nodes.length})."
706
+ if fail.length > 0
707
+ msg += " See #{fail.first[:stdout_file]}"
708
+ end
709
+ msg += " See #{taktuk.stdout}"
710
+ raise msg
711
+ end
712
+
713
+ results = succ.map do |x|
714
+ FileRemoteResult.new(x[:node], cmd, x[:stdout_file], x[:stderr_file])
715
+ end
716
+ return ManyExecutionResult.new(results, cmd)
717
+ end
718
+
719
+ def distribute_one(f, nodes, dest, opts = {})
720
+
721
+ nodes = [ nodes ] unless nodes.is_a?(Array)
722
+
723
+ if f.is_a?(String)
724
+ # nothing
725
+ elsif f.is_a?(LocalExecutionResult)
726
+ f = f.stdout_file
727
+ elsif f.is_a?(FileRemote)
728
+ f = f.result.stdout_file
729
+ else
730
+ raise "I don't know how to distribute #{f.class}"
731
+ end
732
+
733
+ if dest.end_with?("/") # a directory
734
+ dest = File.join(dest, File.basename(f))
735
+ else # a file
736
+ # it's fine
737
+ end
738
+
739
+ proxy.log("Saving to: #{dest}")
740
+
741
+ master = nodes.first
742
+
743
+ return nil if nodes.length == 0
744
+
745
+ master = nodes.first
746
+ master.scp_many([ f ], dest)
747
+
748
+ return nil if nodes.length == 1
749
+
750
+ taktuk = _get_taktuk(nodes, :exclude_master => true)
751
+ results = taktuk.put(dest, dest, :escape => 2)
752
+
753
+ if results.length + 1 != nodes.length
754
+ raise "Some nodes did not respond."
755
+ end
756
+
757
+ thelist = results.map { |x| x[:hash] }.uniq
758
+ raise "Some hashes were different." if thelist.length != 1
759
+
760
+ orig_hash = md5sum(nodes, [ dest ]).map { |x| x[:hash] }.uniq
761
+
762
+ if orig_hash.length != 1
763
+ raise "Hashes could not be verified."
764
+ end
765
+
766
+ return nil
767
+ end
768
+
769
+ def file_consistency(nodes, fs)
770
+ if !fs.is_a?(Array)
771
+ fs = [ fs ]
772
+ end
773
+ sums = md5sum(nodes, fs)
774
+ hashes = Hash.new { |h, k| h[k] = [] }
775
+ sums.each do |h|
776
+ hashes[h[:filename]].push(h[:hash])
777
+ end
778
+ hashes.each_pair do |f, h|
779
+ raise "File #{f} is not consistent" if h.uniq.length != 1
780
+ end
781
+ end
782
+
783
+ def md5sum(nodes, files)
784
+ h = []
785
+ results = execute_many_here nodes, "md5sum #{files.join(' ')}"
786
+ results.to_list.each do |r|
787
+ r.stdout.strip.lines do |line|
788
+ hash, filename = line.split
789
+ h.push({ :filename => filename, :hash => hash })
790
+ end
791
+ end
792
+ return h
793
+ end
794
+
795
+ def distribute(glob, nodes, dest, opts = {})
796
+
797
+ nodes = [ nodes ] unless nodes.is_a?(Array)
798
+ glob = glob.first if glob.is_a?(Array)
799
+
800
+ if glob.is_a?(String)
801
+ files = Dir[glob] # get a list of files
802
+ elsif glob.is_a?(LocalExecutionResult)
803
+ files = [ glob.stdout_file ]
804
+ elsif glob.is_a?(FileRemote)
805
+ files = [ glob.result.stdout_file ]
806
+ else
807
+ raise "I don't know how to distribute #{glob.class}"
808
+ end
809
+ remote_files = files.map { |f| File.join(dest, File.basename(f)) }
810
+
811
+ proxy.log("Found #{files.length} files to distribute.")
812
+
813
+ return nil if nodes.length == 0
814
+
815
+ master = nodes.first
816
+ master.scp_many(files, dest)
817
+
818
+ proxy.log("Files copied to #{master}:#{dest}")
819
+
820
+ if nodes.length == 1
821
+ return nil
822
+ end
823
+
824
+ md5sum = master.execute("md5sum #{remote_files.join(' ')}").stdout
825
+ orig_hashes = {}
826
+ md5sum.strip.lines do |line|
827
+ hash, filename = line.strip.split
828
+ orig_hashes[filename] = hash
829
+ end
830
+
831
+ hashes = {}
832
+ remote_files.each do |filepath|
833
+ proxy.log("Distributing #{filepath}")
834
+ taktuk = _get_taktuk(nodes, :exclude_master => true)
835
+ results = taktuk.put(filepath, filepath, :escape => 2)
836
+
837
+ if results.length + 1 != nodes.length
838
+ raise "Some nodes did not respond."
839
+ end
840
+
841
+ thelist = results.map { |x| x[:hash] }.uniq
842
+ raise "Some hashes were different." if thelist.length != 1
843
+
844
+ hashes[filepath] = thelist.first
845
+ end
846
+
847
+ raise "Weird?" if hashes.keys.sort != orig_hashes.keys.sort
848
+
849
+ orig_hashes.keys.each do |filepath|
850
+ raise "Hash for #{filepath} does not match." \
851
+ if orig_hashes[filepath] != hashes[filepath]
852
+ end
853
+
854
+ proxy.log("MD5s match.")
855
+
856
+ end
857
+
858
+ def ssh_key
859
+ return $ssh_key
860
+ end
861
+
862
+ def test_connectivity(nodes)
863
+ execute_many nodes, "true"
864
+ end
865
+
866
+ def chain_copy(nodes, src, dest, opts = {})
867
+
868
+ nodes = [ nodes ] unless nodes.is_a?(Array)
869
+
870
+ raise "'#{src}' does not exist!" unless File.exist?(src)
871
+
872
+ dest = File.join(dest, File.basename(src))
873
+
874
+ return nil if nodes.length == 0
875
+
876
+ master = nodes.first
877
+ laster = nodes.last
878
+
879
+ master.scp(src, dest)
880
+
881
+ proxy.log("#{src} copied to #{master}:#{dest}")
882
+
883
+ if nodes.length == 1
884
+ return nil
885
+ end
886
+
887
+ orig_hash = master.execute("md5sum #{dest}").stdout.strip.split.first
888
+
889
+ execute_many(nodes, "apt-get install -y mbuffer")
890
+
891
+ # mbuffer creates a buffer of size 2% * memory (by default)
892
+ # we do chainsend thing
893
+
894
+ second = nodes[1].host
895
+ ssh_opts = "-o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no"
896
+ master.file("/tmp/.chainsend") do |f|
897
+ f.puts "set -eu"
898
+ f.puts "cat $1 | ssh #{ssh_opts} #{second} \"bash /tmp/.chainsend $1\""
899
+ end
900
+ laster.file("/tmp/.chainsend") do |f|
901
+ f.puts "set -eu"
902
+ f.puts "cat > $1"
903
+ end
904
+ nodes.each_with_index do |node, i|
905
+ next if i == 0 or i == nodes.length - 1
906
+ next_host = nodes[i+1].host
907
+ node.file("/tmp/.chainsend") do |f|
908
+ f.puts "set -eu"
909
+ f.puts "tee $1 | mbuffer -q | ssh #{ssh_opts} #{next_host} \"bash /tmp/.chainsend $1\""
910
+ end
911
+ end
912
+
913
+ proxy.log("Chain prepared.")
914
+
915
+ master.execute("bash /tmp/.chainsend #{dest}")
916
+ results = execute_many_here(nodes, "md5sum #{dest}")
917
+
918
+ hashes = results.to_list.map { |x| x.stdout.split.first }.uniq
919
+
920
+ raise "Hashes differ." if hashes.length != 1
921
+
922
+ return nil
923
+ end
924
+
925
+ def file(node, path)
926
+ r = execute_one(node, "cat #{path}")
927
+ return FileRemote.new(path, r)
928
+ end
929
+
930
+ def copy(name, nodes, where)
931
+ # make a copy of a file named "name" to "nodes"
932
+ # at path "where"; name can be:
933
+ # * a path (at local fs)
934
+ # * a LocalExecutionResult (as a result of remote execution)
935
+ # * RemoteFile - pointer to a file on a remote node
936
+
937
+ name = name.first if name.is_a?(Array)
938
+
939
+ if name.is_a?(String)
940
+ # ok
941
+ elsif name.is_a?(LocalExecutionResult)
942
+ name = name.stdout_file
943
+ elsif name.is_a?(FileRemote)
944
+ name = name.result.stdout_file
945
+ else
946
+ raise "Unknown file source: #{name.class}"
947
+ end
948
+
949
+ nodes = arrayize(nodes)
950
+ result = nodes.map do |node|
951
+ node.scp(name, where)
952
+ end
953
+ return result
954
+ end
955
+
956
+ def run_script(name, label = :all)
957
+ path = $files[name]
958
+ node_list(label).each do |node|
959
+ f = node.tmpfile
960
+ node.scp(path, f)
961
+ node.chmod(f, "700")
962
+ out = node.execute(f)
963
+ node.rm(f)
964
+ end
965
+ end
966
+
967
+ def check_node(node)
968
+ debug "Checking #{node}..."
969
+ failed = []
970
+ begin
971
+ node.ping()
972
+ rescue ExecutionError => e
973
+ return false
974
+ end
975
+ return true
976
+ end
977
+
978
+ def g5k_reserve_nodes(*args)
979
+ p = proxy()
980
+ lib = G5K::Library.new
981
+ lib.logging = proc { |x| p.engine.log(x) }
982
+ lib.proxy = proxy
983
+ return lib.reserve_nodes(*args)
984
+ end
985
+
986
+ def g5k_get_avail(opts = {})
987
+ p = proxy()
988
+ lib = G5K::Library.new
989
+ lib.logging = proc { |x| p.engine.log(x) }
990
+ lib.proxy = proxy
991
+ job = lib.pick_reservation(opts)
992
+ return job
993
+ end
994
+
995
+ def nodes_file(user, filepath)
996
+ nodes = IO.read(filepath).chomp.lines.map(&:chomp)
997
+ nodes = nodes.map do |host|
998
+ simple_node("#{user}@#{host}")
999
+ end
1000
+ return nodes
1001
+ end
1002
+
1003
+ def g5k_job(opts = {})
1004
+ lib = G5K::Library.new
1005
+ lib.logging = proc { |x| puts x }
1006
+ job = lib.job(opts[:site], opts[:id])
1007
+ return job
1008
+ end
1009
+
1010
+ def g5k_nodes(job)
1011
+ hosts = job["assigned_nodes"]
1012
+ username = get_g5k_username()
1013
+ return hosts.map { |h| g5k_node(username, h) }
1014
+ end
1015
+
1016
+ def g5k_site_from_job(job)
1017
+ nodes = job["assigned_nodes"]
1018
+ link = job["links"].select { |x| x["rel"] == "parent" }.first
1019
+ site = link["href"].split("/").last
1020
+ return site
1021
+ end
1022
+
1023
+ def _filter_vlan(ok_nodes, vlan_nodes)
1024
+ h = {}
1025
+ hosts = ok_nodes.each { |x| h[x.split(".").first] = true }
1026
+ # puts hosts.inspect
1027
+ good_ones = vlan_nodes.select { |x| h.key?(x.split("-kavlan").first) }
1028
+ # puts good_ones.inspect
1029
+ return good_ones
1030
+ end
1031
+
1032
+ def g5k_deploy_keys(nodes, site)
1033
+ # assumes you are on the frontend
1034
+ key = get_g5k_tmpfile("ssh_key")
1035
+ frontend = g5k_site(site)
1036
+ frontend.execute("rm -f #{key} #{key}.pub; ssh-keygen -f #{key} -q -N \'\'")
1037
+ nodes.each do |n|
1038
+ proxy.log "Sending key to: #{n.userhost}"
1039
+ frontend.execute("scp -o 'BatchMode=yes' -o 'UserKnownHostsFile=/dev/null' #{key} #{n.userhost}:.ssh/id_rsa")
1040
+ frontend.execute("ssh-copy-id -i #{key}.pub #{n.userhost}")
1041
+ end
1042
+ frontend.execute("rm -f #{key} #{key}.pub")
1043
+ end
1044
+
1045
+ def g5k_kadeploy(job, env, custom = "", opts = {})
1046
+
1047
+ site = g5k_site_from_job(job)
1048
+ frontend = g5k_site(site)
1049
+ nodes = job["assigned_nodes"]
1050
+
1051
+ nodes = opts.key?(:count) ? nodes[0...opts[:count]] : nodes
1052
+ final_nodes = opts[:real_nodes]
1053
+
1054
+ proxy.log("Using #{nodes.length} machines.")
1055
+
1056
+ machinefile = get_g5k_tmpfile("machines")
1057
+ nodes_ok = get_g5k_tmpfile("good_nodes")
1058
+
1059
+ IO.write(machinefile, nodes.join("\n"))
1060
+ frontend.scp(machinefile, machinefile)
1061
+ kadeploy = "kadeploy3 -f #{machinefile} -e #{env} -k #{custom} -o #{nodes_ok}"
1062
+ proxy.log("Running deployment: #{kadeploy}")
1063
+ frontend.execute(kadeploy)
1064
+ frontend.execute("rm -f #{machinefile}")
1065
+
1066
+ key = get_g5k_tmpfile("ssh_key")
1067
+ frontend.execute("rm -f #{key} #{key}.pub; ssh-keygen -f #{key} -q -N \'\'")
1068
+
1069
+ ok_nodes = frontend.execute("sort -V #{nodes_ok}").stdout.split
1070
+ frontend.execute("rm -f #{nodes_ok}")
1071
+
1072
+ proxy.log("Nodes that survived: #{ok_nodes.length}/#{nodes.length}")
1073
+ proxy.log("Final nodes: #{final_nodes}")
1074
+
1075
+ if final_nodes.nil?
1076
+ final_nodes = ok_nodes
1077
+ else
1078
+ final_nodes = _filter_vlan(ok_nodes, final_nodes)
1079
+ end
1080
+
1081
+ # we have to install SSH keys
1082
+ final_nodes.each do |n|
1083
+ host = "root@#{n}"
1084
+ frontend.execute("scp -o 'BatchMode=yes' -o 'UserKnownHostsFile=/dev/null' #{key} #{host}:.ssh/id_rsa")
1085
+ frontend.execute("ssh-copy-id -i #{key}.pub #{host}")
1086
+ end
1087
+ frontend.execute("rm -f #{key} #{key}.pub")
1088
+
1089
+ all_nodes = final_nodes.map { |x| g5k_node("root", x) }
1090
+ return all_nodes
1091
+ end
1092
+
1093
+ def g5k_frontend_from_job(job)
1094
+ site = g5k_site_from_job(job)
1095
+ frontend = g5k_site(site)
1096
+ return frontend
1097
+ end
1098
+
1099
+ def g5k_kavlan_id(job)
1100
+ uid = job['uid']
1101
+ frontend = g5k_frontend_from_job(job)
1102
+ out = frontend.execute("kavlan -V -j #{uid}")
1103
+ return out.stdout.strip.to_i
1104
+ end
1105
+
1106
+ def g5k_kavlan_nodes_file(job)
1107
+ # TODO
1108
+ uid = job['uid']
1109
+ kavlan_nodes = get_g5k_tmpfile("kavlan_nodes")
1110
+ frontend = g5k_frontend_from_job(job)
1111
+ out = frontend.execute("kavlan -l -j #{uid}")
1112
+ IO.write(kavlan_nodes, out.stdout)
1113
+ frontend.scp(kavlan_nodes, kavlan_nodes)
1114
+ return kavlan_nodes
1115
+ end
1116
+
1117
+ def g5k_kavlan_nodes(job)
1118
+ uid = job['uid']
1119
+ frontend = g5k_frontend_from_job(job)
1120
+ out = frontend.execute("kavlan -l -j #{uid}")
1121
+ hosts = out.stdout.strip.lines.map(&:strip)
1122
+ nodes = hosts.map { |x| g5k_node("root", x) }
1123
+ return nodes
1124
+ end
1125
+
1126
+ def g5k_site(site)
1127
+ manager = proxy.engine.nodes_manager
1128
+ return manager.get_node(get_g5k_username(), "#{site}.grid5000.fr", G5KNodeFactory.new)
1129
+ end
1130
+
1131
+ def g5k_node(user, host)
1132
+ manager = proxy.engine.nodes_manager
1133
+ return manager.get_node(user, host, G5KNodeFactory.new)
1134
+ end
1135
+
1136
+ def proxy_node(via, user, host, opts = {})
1137
+ manager = proxy.engine.nodes_manager
1138
+ return manager.get_node(user, host, ProxiedFactory.new(via), opts)
1139
+ end
1140
+
1141
+ def split(pattern)
1142
+ pattern = pattern.strip
1143
+ if pattern == ""
1144
+ raise "Empty host specification"
1145
+ end
1146
+ parts = pattern.split("@")
1147
+ if parts.length == 2
1148
+ return parts
1149
+ elsif parts.length > 2
1150
+ raise "Invalid host specification: #{pattern}"
1151
+ else
1152
+ proxy.log "User not specified. This is not reproducible."
1153
+ return [ Etc.getlogin, pattern ]
1154
+ end
1155
+ end
1156
+
1157
+ def broadcast(pattern)
1158
+ manager = proxy.engine.nodes_manager
1159
+ user, address = split(pattern)
1160
+ nodes = NodeUtils.broadcast_ping(address)
1161
+ proxy.log("Found #{nodes.length} hosts via ICMP broadcast")
1162
+ return nodes.map { |n| manager.get_node(user, n) }
1163
+ end
1164
+
1165
+ def node_range(ip_start, count)
1166
+ user, host = split(ip_start)
1167
+ parts = host.split(".").map(&:to_i)
1168
+ ips = []
1169
+ count.times do |i|
1170
+ j = 3
1171
+ while j > 0 and parts[j] == 256
1172
+ parts[j] = 0
1173
+ parts[j - 1] += 1
1174
+ j -= 1
1175
+ end
1176
+ ip = parts.map(&:to_s).join(".")
1177
+ ips.push(ip)
1178
+ parts[3] += 1
1179
+ end
1180
+ return ips.map { |x| simple_node("#{user}@#{x}") }
1181
+ end
1182
+
1183
+ def simple_node(pattern, opts = {})
1184
+ user, host = split(pattern)
1185
+ manager = proxy.engine.nodes_manager
1186
+ return manager.get_node(user, host, SimpleNodeFactory.new, opts)
1187
+ end
1188
+
1189
+ def localhost()
1190
+ user = ENV["USER"]
1191
+ return simple_node("#{user}@127.0.0.1")
1192
+ end
1193
+
1194
+ end
1195
+
1196
+ module NodeUtils
1197
+
1198
+ def self.get_templates_dir(name)
1199
+ here = File.dirname(__FILE__)
1200
+ return realpath(File.join(here, name))
1201
+ end
1202
+
1203
+ def self.render_file(t, path, ctx)
1204
+ out = Erb.render(IO.read(t), ctx)
1205
+ File.open(path, "wb") do |f|
1206
+ f.write(out)
1207
+ f.chmod(0700)
1208
+ end
1209
+ end
1210
+
1211
+ def self.install_templates(path, name, ctx)
1212
+ templates = get_templates_dir("templates")
1213
+ ctx = ctx.merge({ :templates => templates })
1214
+ tdir = get_templates_dir("templates/utils")
1215
+ Dir.entries(tdir).each do |f|
1216
+ template = File.join(tdir, f)
1217
+ output = File.join(path, f)
1218
+ render_file(template, output, ctx) if File.file?(template)
1219
+ end
1220
+ config = get_templates_dir("templates/ssh-config.#{name}")
1221
+ ssh_config = File.join(path, "ssh-config")
1222
+ render_file(config, ssh_config, ctx)
1223
+ end
1224
+
1225
+ # uses a broadcast ping to get a list of nodes that respond
1226
+ def self.broadcast_ping(address, timeout = 2)
1227
+ output = %x(ping -c 3 -n -b #{address} -w #{timeout} 2> /dev/null)
1228
+ # raise "Ping returned error #{$?.exitstatus}." if $?.exitstatus != 0
1229
+ ms = output.scan(/from ([\.0-9]+): icmp_seq=/)
1230
+ addresses = ms.flatten.uniq.sort
1231
+ raise "No nodes found" if addresses.length == 0
1232
+ return addresses
1233
+ end
1234
+
1235
+ class Erb < OpenStruct
1236
+
1237
+ def render(hash)
1238
+ ERB.new(hash).result(binding)
1239
+ end
1240
+
1241
+ def self.render(template, hash)
1242
+ x = Erb.new(hash)
1243
+ return x.render(template)
1244
+ end
1245
+
1246
+ end
1247
+
1248
+ end
1249
+
1250
+ end