xpflow 0.1b

Sign up to get free protection for your applications and to get access to all the features.
Files changed (74) hide show
  1. data/bin/xpflow +96 -0
  2. data/lib/colorado.rb +198 -0
  3. data/lib/json/add/core.rb +243 -0
  4. data/lib/json/add/rails.rb +8 -0
  5. data/lib/json/common.rb +423 -0
  6. data/lib/json/editor.rb +1369 -0
  7. data/lib/json/ext.rb +28 -0
  8. data/lib/json/pure/generator.rb +442 -0
  9. data/lib/json/pure/parser.rb +320 -0
  10. data/lib/json/pure.rb +15 -0
  11. data/lib/json/version.rb +8 -0
  12. data/lib/json.rb +62 -0
  13. data/lib/mime/types.rb +881 -0
  14. data/lib/mime-types.rb +3 -0
  15. data/lib/restclient/abstract_response.rb +106 -0
  16. data/lib/restclient/exceptions.rb +193 -0
  17. data/lib/restclient/net_http_ext.rb +55 -0
  18. data/lib/restclient/payload.rb +235 -0
  19. data/lib/restclient/raw_response.rb +34 -0
  20. data/lib/restclient/request.rb +316 -0
  21. data/lib/restclient/resource.rb +169 -0
  22. data/lib/restclient/response.rb +24 -0
  23. data/lib/restclient.rb +174 -0
  24. data/lib/xpflow/bash.rb +341 -0
  25. data/lib/xpflow/bundle.rb +113 -0
  26. data/lib/xpflow/cmdline.rb +249 -0
  27. data/lib/xpflow/collection.rb +122 -0
  28. data/lib/xpflow/concurrency.rb +79 -0
  29. data/lib/xpflow/data.rb +393 -0
  30. data/lib/xpflow/dsl.rb +816 -0
  31. data/lib/xpflow/engine.rb +574 -0
  32. data/lib/xpflow/ensemble.rb +135 -0
  33. data/lib/xpflow/events.rb +56 -0
  34. data/lib/xpflow/experiment.rb +65 -0
  35. data/lib/xpflow/exts/facter.rb +30 -0
  36. data/lib/xpflow/exts/g5k.rb +931 -0
  37. data/lib/xpflow/exts/g5k_use.rb +50 -0
  38. data/lib/xpflow/exts/gui.rb +140 -0
  39. data/lib/xpflow/exts/model.rb +155 -0
  40. data/lib/xpflow/graph.rb +1603 -0
  41. data/lib/xpflow/graph_xpflow.rb +251 -0
  42. data/lib/xpflow/import.rb +196 -0
  43. data/lib/xpflow/library.rb +349 -0
  44. data/lib/xpflow/logging.rb +153 -0
  45. data/lib/xpflow/manager.rb +147 -0
  46. data/lib/xpflow/nodes.rb +1250 -0
  47. data/lib/xpflow/runs.rb +773 -0
  48. data/lib/xpflow/runtime.rb +125 -0
  49. data/lib/xpflow/scope.rb +168 -0
  50. data/lib/xpflow/ssh.rb +186 -0
  51. data/lib/xpflow/stat.rb +50 -0
  52. data/lib/xpflow/stdlib.rb +381 -0
  53. data/lib/xpflow/structs.rb +369 -0
  54. data/lib/xpflow/taktuk.rb +193 -0
  55. data/lib/xpflow/templates/ssh-config.basic +14 -0
  56. data/lib/xpflow/templates/ssh-config.inria +18 -0
  57. data/lib/xpflow/templates/ssh-config.proxy +13 -0
  58. data/lib/xpflow/templates/taktuk +6590 -0
  59. data/lib/xpflow/templates/utils/batch +4 -0
  60. data/lib/xpflow/templates/utils/bootstrap +12 -0
  61. data/lib/xpflow/templates/utils/hostname +3 -0
  62. data/lib/xpflow/templates/utils/ping +3 -0
  63. data/lib/xpflow/templates/utils/rsync +12 -0
  64. data/lib/xpflow/templates/utils/scp +17 -0
  65. data/lib/xpflow/templates/utils/scp_many +8 -0
  66. data/lib/xpflow/templates/utils/ssh +3 -0
  67. data/lib/xpflow/templates/utils/ssh-interactive +4 -0
  68. data/lib/xpflow/templates/utils/taktuk +19 -0
  69. data/lib/xpflow/threads.rb +187 -0
  70. data/lib/xpflow/utils.rb +569 -0
  71. data/lib/xpflow/visual.rb +230 -0
  72. data/lib/xpflow/with_g5k.rb +7 -0
  73. data/lib/xpflow.rb +349 -0
  74. metadata +135 -0
@@ -0,0 +1,1250 @@
1
+
2
+ require 'erb'
3
+ require 'ostruct'
4
+ require 'xpflow/exts/g5k'
5
+ require 'yaml'
6
+ require 'thread'
7
+ require 'shellwords'
8
+
9
+ def get_g5k_username
10
+ raise "No G5K username!" if $g5k_user.nil?
11
+ return $g5k_user
12
+ end
13
+
14
+ module XPFlow
15
+
16
+ # manages all nodes
17
+
18
+ class NodesManager
19
+
20
+ def initialize(directory)
21
+ @directory = directory
22
+ @mutex = Mutex.new
23
+ @node_counter = 0
24
+ end
25
+
26
+ def synchronize(&block)
27
+ return @mutex.synchronize(&block)
28
+ end
29
+
30
+ def subdir(name)
31
+ return @directory.subdir(name)
32
+ end
33
+
34
+ def get_node(user, host, factory, opts = {})
35
+ synchronize do
36
+ @node_counter += 1
37
+ node_directory = subdir("#{host}--#{user}--#{factory.name}--#{@node_counter}")
38
+ opts[:factory] = factory
39
+ factory.build(user, host, node_directory, opts)
40
+ end
41
+ end
42
+
43
+ end
44
+
45
+ class SimpleNodeFactory
46
+ # build a directly-reachable host
47
+
48
+ def name
49
+ return "normal"
50
+ end
51
+
52
+ def build(*args)
53
+ return SimpleNode.new(*args)
54
+ end
55
+
56
+ end
57
+
58
+ class G5KNodeFactory
59
+
60
+ def name
61
+ return "grid5000"
62
+ end
63
+
64
+ def build(user, host, node_directory, opts)
65
+ opts = { :group => _get_group(host) }.merge(opts)
66
+ return G5KNode.new(user, host, node_directory, opts)
67
+ end
68
+
69
+ def _get_group(host)
70
+ m = /^(\w+)-(\d+).+$/.match(host) # <cluster>-<nodeid> ...
71
+ if m
72
+ return m.captures.first
73
+ else
74
+ return nil
75
+ end
76
+ end
77
+
78
+ end
79
+
80
+ class ProxiedFactory
81
+
82
+ def initialize(node)
83
+ @node = node
84
+ end
85
+
86
+ def name
87
+ return "proxy"
88
+ end
89
+
90
+ def build(user, host, directory, opts)
91
+ opts[:proxy] = @node
92
+ return ProxiedNode.new(user, host, directory, opts)
93
+ end
94
+
95
+ end
96
+
97
+ class AbstractNode
98
+
99
+ # a node that is installed using a set of templates
100
+
101
+ attr_reader :user
102
+ attr_reader :host
103
+ attr_reader :directory
104
+
105
+ def initialize(user, host, directory, opts = {})
106
+ @user = user
107
+ @host = host
108
+ @directory = directory
109
+ @mutex = Mutex.new
110
+ @opts = opts
111
+
112
+ __setup__()
113
+ end
114
+
115
+ def options
116
+ return @opts
117
+ end
118
+
119
+ def domain
120
+ # gets accessibility domain of a node (nodes from within one domain
121
+ # are pairwise accessible and reachable)
122
+ return @opts[:factory].name
123
+ end
124
+
125
+ def synchronize(&block)
126
+ return @mutex.synchronize(&block)
127
+ end
128
+
129
+ def group
130
+ return @opts[:group]
131
+ end
132
+
133
+ def userhost
134
+ return "#{@user}@#{@host}"
135
+ end
136
+
137
+ def tmpfile
138
+ return execute("mktemp").stdout.strip
139
+ end
140
+
141
+ def to_s
142
+ return "#{self.class}('#{userhost}')"
143
+ end
144
+
145
+ def path
146
+ return @directory.path
147
+ end
148
+
149
+ def md5sum(files)
150
+ h = {}
151
+ output = execute("md5sum #{files.join(' ')}").stdout.strip
152
+ output.lines.each do |line|
153
+ hash, filename = line.split
154
+ h[filename] = hash
155
+ end
156
+ return h
157
+ end
158
+
159
+ def run(*args)
160
+ return @directory.run(*args)
161
+ end
162
+
163
+ def execute(cmd, env = {}, opts = {})
164
+ # opts will become an env of execution
165
+ wd = opts[:wd]
166
+ env = env.each_pair.select { |k, v| v.is_a?(String) }.map { |k, v| [ k.to_s, v ] }
167
+ env = Hash[env]
168
+ return @directory.run_ssh(cmd, :env => env, :wd => wd)
169
+ end
170
+
171
+ def execute_with_files(cmd, out, err)
172
+ return @directory.run_ssh(cmd, :out => out, :err => err)
173
+ end
174
+
175
+ def ping()
176
+ @directory.run("ping")
177
+ end
178
+
179
+ def hostname()
180
+ @directory.run("hostname")
181
+ end
182
+
183
+ def scp(from, to)
184
+ @directory.run("scp #{from} #{to}")
185
+ end
186
+
187
+ def scp_many(files, to_dir)
188
+ args = files.join(" ") # TODO: escaping?
189
+ return @directory.run("scp_many #{to_dir} #{args}")
190
+ end
191
+
192
+ def file(path, &block)
193
+ # creates a file here and then does scp to the node
194
+ filename = @directory.mktemp(&block)
195
+ scp(filename, path)
196
+ end
197
+
198
+ def proxy_factory
199
+ # generates a node factory for nodes proxied through this one
200
+ return ProxiedFactory.new(self)
201
+ end
202
+
203
+ def context
204
+ return {
205
+ :user => self.user,
206
+ :host => self.host,
207
+ :path => self.path
208
+ }
209
+ end
210
+
211
+ end
212
+
213
+ class TemplateNode < AbstractNode
214
+
215
+ def __setup__
216
+ raise "No template!" if template_name().nil?
217
+ NodeUtils.install_templates(path(), template_name(), context)
218
+ end
219
+
220
+ end
221
+
222
+ class SimpleNode < TemplateNode
223
+
224
+ # a simple, directly reachable node
225
+ def template_name
226
+ "basic"
227
+ end
228
+
229
+ end
230
+
231
+ class ProxiedNode < TemplateNode
232
+
233
+ def template_name
234
+ "proxy"
235
+ end
236
+
237
+ def context
238
+ return super.merge({ :proxy => "#{@opts[:proxy].path}/ssh" })
239
+ end
240
+
241
+ end
242
+
243
+ class G5KNode < AbstractNode
244
+
245
+ def labels
246
+ # TODO: I don't know if it is a good way to do it
247
+ sites = %w{bordeaux grenoble lille luxembourg lyon nancy reims rennes sophia toulouse}
248
+ site = sites.select { |x| @host.include?(x) }.map(&:to_sym)
249
+ return [ :g5k ] + site[0..0]
250
+ end
251
+
252
+ def self.inside_g5k
253
+ return $hostname.end_with?('grid5000.fr')
254
+ end
255
+
256
+ def __setup__
257
+ # here we make some magic; there are 3 cases:
258
+ # 1. Inside grid5000, we make efficient bootstrap using basic-templates
259
+ # 2. Inside inria, we have efficient access to G5K
260
+ # 3. Really outside (``chez moi'' for example)
261
+
262
+ is_g5k = G5KNode.inside_g5k()
263
+ return install_inside_g5k() if is_g5k
264
+ if File.exist?("/etc/resolv.conf")
265
+ resolv = IO.read("/etc/resolv.conf").lines.
266
+ select { |line| line.start_with?("domain ") or line.start_with?("search ") }
267
+ domain = resolv.first
268
+ if domain.nil? == false
269
+ domain = domain.split[1]
270
+ gw = inria_gateway(domain)
271
+ return install_inside_inria(gw) if (gw.nil? == false)
272
+ end
273
+ end
274
+ return install_generic()
275
+ end
276
+
277
+ def inria_gateway(domain)
278
+ # more stuff has to be implemented here
279
+ return "grid5000.loria.fr" if domain == "loria.fr"
280
+ return nil
281
+ end
282
+
283
+ def install_inside_g5k
284
+ NodeUtils.install_templates(path(), "basic",
285
+ context.merge({ :g5k_user => get_g5k_username() })
286
+ )
287
+ end
288
+
289
+ def install_inside_inria(gw)
290
+ NodeUtils.install_templates(path(), "inria",
291
+ context.merge({ :g5k_user => get_g5k_username(), :gw => gw })
292
+ )
293
+ end
294
+
295
+ def install_generic
296
+ NodeUtils.install_templates(path(), "inria",
297
+ context.merge({ :g5k_user => get_g5k_username(), :gw => "access.grid5000.fr" })
298
+ )
299
+ end
300
+
301
+ def self.kavlan(job, manager)
302
+ link = job["links"].select { |x| x["rel"] == "parent" }.first["href"]
303
+ site = link.split("/").last
304
+ front = manager.get_node(get_g5k_username(), "#{site}.grid5000.fr", G5KNodeFactory.new)
305
+ ns = job["assigned_nodes"]
306
+ begin
307
+ ns = front.execute("kavlan -l -j #{job["uid"]}").lines.map(&:strip)
308
+ rescue
309
+ nil
310
+ end
311
+ return ns.map { |x| manager.get_node("root", x, G5KNodeFactory.new) }
312
+ end
313
+
314
+ def self.get_ssh_config
315
+ return %x(echo ~/.ssh/config).strip
316
+ end
317
+
318
+ def self.obtain_ssh_pubkey_path
319
+ return %x(echo ~/.ssh/id_rsa.pub).strip
320
+ end
321
+
322
+ end
323
+
324
+ ### TYPES OF RESULTS ###
325
+
326
+ class BasicRemoteResult
327
+
328
+ attr_reader :opts
329
+ attr_reader :node
330
+
331
+ def initialize(node, cmd, stdout, stderr, opts = {})
332
+ @node = node
333
+ @cmd = cmd
334
+ @stdout = stdout
335
+ @stderr = stderr
336
+ @opts = opts
337
+ end
338
+
339
+ def stdout
340
+ return @stdout
341
+ end
342
+
343
+ def stderr
344
+ return @stderr
345
+ end
346
+
347
+ def command
348
+ return @cmd
349
+ end
350
+
351
+ def save_stdout(filename)
352
+ IO.write(filename, @stdout)
353
+ end
354
+
355
+ def save_stderr(filename)
356
+ IO.write(filename, @stderr)
357
+ end
358
+
359
+ def to_s
360
+ return "BasicRemoteResult('#{@cmd}' on #{@node})"
361
+ end
362
+
363
+ end
364
+
365
+ class FileRemoteResult < BasicRemoteResult
366
+
367
+ def initialize(node, cmd, stdout, stderr, opts = {})
368
+ # however! stdout & stderr are *paths*!
369
+ super
370
+ end
371
+
372
+ def stdout
373
+ return @node.execute("cat #{@stdout}").stdout
374
+ end
375
+
376
+ def stderr
377
+ return @node.execute("cat #{@stderr}").stdout
378
+ end
379
+
380
+ def stdout_file
381
+ return @stdout
382
+ end
383
+
384
+ def stderr_file
385
+ return @stderr
386
+ end
387
+
388
+ def save_stdout(filename)
389
+ return @node.execute_with_files("cat #{@stdout}", filename, "/dev/null")
390
+ end
391
+
392
+ def save_stderr(filename)
393
+ return @node.execute_with_files("cat #{@stderr}", filename, "/dev/null")
394
+ end
395
+
396
+ def to_s
397
+ return "FileRemoteResult('#{@cmd}' on #{@node}, out => #{@stdout}, err => #{@stderr})"
398
+ end
399
+
400
+ end
401
+
402
+ class FileRemote
403
+ # a remote file (but usually already local)
404
+ # result is probably: LocalExecutionResult
405
+
406
+ attr_reader :path
407
+ attr_reader :result
408
+
409
+ def initialize(path, result)
410
+ @path = path
411
+ @result = result
412
+ end
413
+
414
+ end
415
+
416
+ class ManyExecutionResult
417
+
418
+ def initialize(list, cmd)
419
+ @list = list
420
+ @command = cmd
421
+ end
422
+
423
+ def to_list
424
+ return @list
425
+ end
426
+
427
+ def length
428
+ return @list.length
429
+ end
430
+
431
+ def to_s
432
+ return "ManyResult('#{@command}' on #{@list.length} nodes)"
433
+ end
434
+
435
+ def each(&block)
436
+ return @list.each(&block)
437
+ end
438
+
439
+ end
440
+
441
+
442
+
443
+ # we obtain a global grid5000 user
444
+ $hostname = %x(hostname).strip
445
+ $ssh_key = G5KNode.obtain_ssh_pubkey_path
446
+
447
+ ## XPFLOW LIBRARY
448
+
449
+ class NodesLibrary < ActivityLibrary
450
+
451
+ activities :node_list, :execute, :copy, :run_script, :check_node, :file,
452
+ :g5k_get_avail, :proxy_node, :broadcast, :g5k_site, :g5k_job,
453
+ :g5k_nodes, :monitor_node,
454
+ :g5k_kavlan_id, :g5k_kavlan_nodes_file, :g5k_frontend_from_job,
455
+ :g5k_kavlan_nodes, :nodes_file, :execute_funny,
456
+ :g5k_node, :execute_many, :execute_many_local, :all_prefixes, :execute_one, :distribute_one,
457
+ :execute_many_ignore_errors, :g5k_kadeploy, :execute_many_here,
458
+ :bootstrap_taktuk, :simple_node, :node_range, :taktuk_raw, :test_connectivity,
459
+ :nodes_from_file, :nodes_from_result, :distribute, :chain_copy, :ssh_key,
460
+ :nodes_from_machinefile, :g5k_deploy_keys, :localhost, :file_consistency,
461
+ :ping_localhost, :ping_node, :g5k_reserve_nodes
462
+
463
+ def setup
464
+ nil
465
+ end
466
+
467
+ def ping_node(node, target)
468
+ result = execute_one(node,"ping #{target} -c 1")
469
+ result.stdout[/time=(\d+.*) /,1].to_f
470
+ end
471
+
472
+ def ping_localhost(node = nil)
473
+ node = self.localhost() if node.nil?
474
+ ping_node(node, "localhost")
475
+ end
476
+
477
+ def get_g5k_tmpfile(prefix = "tmp")
478
+ hash = 16.times.map { |x| (rand * 16).to_i.to_s(16) }.join
479
+ return "/tmp/.#{prefix}-#{get_g5k_username()}-#{hash}"
480
+ end
481
+
482
+ def all_prefixes(nodes, inc = 1)
483
+ arr = []
484
+ i = inc - 1
485
+ while i < nodes.length
486
+ arr.push(nodes[0..i])
487
+ i += inc
488
+ end
489
+ return arr
490
+ end
491
+
492
+ def nodes
493
+ return Scope.current[:__nodes__]
494
+ end
495
+
496
+ def _transform_nodes(x)
497
+ if x.is_a?(String)
498
+ return x.strip.split
499
+ elsif x.is_a?(Hash)
500
+ h = x.map { |k, v| [ k.strip, _transform_nodes(v) ] }
501
+ return Hash[h]
502
+ elsif x.is_a?(Array)
503
+ return x.map { |x| _transform_nodes(x) }
504
+ else
505
+ raise "Error!"
506
+ end
507
+ end
508
+
509
+ def _get_node_via_proxy(name, parent = nil)
510
+ name = name.strip
511
+ name = "nancy.g5k" if name == "g5k"
512
+ if /^(.+)\.g5k$/.match(name)
513
+ raise "G5K proxies must be topmost" if !parent.nil?
514
+ # special syntax for G5K
515
+ site = name.split(".").first
516
+ proxy = g5k_site(site)
517
+ return proxy
518
+ else
519
+ user, host = name.split("@")
520
+ if parent.nil?
521
+ proxy = simple_node(name)
522
+ else
523
+ proxy = proxy_node(parent, user, host)
524
+ end
525
+ return proxy
526
+ end
527
+ end
528
+
529
+ def __transform_with_proxy(structure, proxy, nodes)
530
+ # proxy_node(via, user, host)
531
+ if structure.is_a?(String)
532
+ node = _get_node_via_proxy(structure, proxy)
533
+ nodes[:nodes].push(node)
534
+ elsif structure.is_a?(Array)
535
+ structure.each { |x| __transform_with_proxy(x, proxy, nodes) }
536
+ elsif structure.is_a?(Hash)
537
+ structure.each_pair do |p, sub|
538
+ new_proxy = _get_node_via_proxy(p, proxy)
539
+ nodes[:proxies].push(new_proxy)
540
+ __transform_with_proxy(sub, new_proxy, nodes)
541
+ end
542
+ else
543
+ raise "Error!"
544
+ end
545
+ end
546
+
547
+ def _transform_with_proxy(tree)
548
+ nodes = { :nodes => [], :proxies => [] }
549
+ __transform_with_proxy(tree, nil, nodes)
550
+ return nodes
551
+ end
552
+
553
+ def nodes_from_machinefile(filename, opts = {})
554
+ nodes = IO.read(filename).strip.split
555
+ nodes = nodes.map { |x| "#{opts[:user]}@#{x}" }
556
+ return nodes.map { |x| simple_node(x) }
557
+ end
558
+
559
+ def nodes_from_file(filename, opts = {})
560
+ contents = IO.read(filename)
561
+ yaml = YAML.load(contents)
562
+ tree = _transform_nodes(yaml)
563
+ nodes = _transform_with_proxy(tree)
564
+ return nodes[:nodes]
565
+ end
566
+
567
+ def _parse_opts(array)
568
+ h = {}
569
+ array.each do |o|
570
+ k, v = o.split("=")
571
+ h[k.to_sym] = v
572
+ end
573
+ return h
574
+ end
575
+
576
+ def nodes_from_result(result, proxy = nil)
577
+ lines = result.stdout.strip.lines.map(&:strip)
578
+ r = lines.map do |line|
579
+ userhost = line.split.first
580
+ opts = _parse_opts(line.split[1..-1])
581
+ if proxy.nil?
582
+ simple_node(userhost, opts)
583
+ else
584
+ u, h = userhost.split("@")
585
+ proxy_node(proxy, u, h, opts)
586
+ end
587
+ end
588
+ return r
589
+ end
590
+
591
+ # activities
592
+
593
+ def node_list()
594
+ return nodes()
595
+ end
596
+
597
+ def get_node_list(args)
598
+ # extracts nodes and a command from arguments
599
+ if args.length == 1
600
+ return [ node_list(), args.first ]
601
+ end
602
+ if args.length == 2
603
+ cmd = args.last
604
+ return [ arrayize(args.first), cmd ]
605
+ end
606
+ raise "Wrong number of arguments"
607
+ end
608
+
609
+ def arrayize(nodes)
610
+ # turns the argument into a list of nodes
611
+ if !nodes.is_a?(Array)
612
+ nodes = [ nodes ]
613
+ end
614
+ return nodes
615
+ end
616
+
617
+ def execute(nodes, cmd, env = {})
618
+ wd = env.delete(:wd)
619
+ nodes = arrayize(nodes)
620
+ arr = []
621
+ nodes.each do |node|
622
+ res = node.execute(cmd, env, :wd => wd)
623
+ arr.push(res)
624
+ end
625
+ return arr
626
+ end
627
+
628
+ def execute_one(node, cmd, env = {})
629
+ return execute(node, cmd, env).first
630
+ end
631
+
632
+ def _execute_many_parse_args(args)
633
+ opts = {}
634
+ opts = args.pop if args.last.is_a?(Hash)
635
+ nodes, cmd = get_node_list(args)
636
+ return [ nodes, cmd, opts ]
637
+ end
638
+
639
+ def _get_taktuk(nodes, options = {})
640
+ # domains = nodes.map(&:domain).uniq
641
+ # raise "TakTuk: nodes span different domains" if domains.length != 1
642
+
643
+ master = nodes.first
644
+ nodes = nodes.tail if options[:exclude_master]
645
+ taktuk = File.join(master.directory.path, "ssh taktuk")
646
+ directory = proxy.engine.main_directory
647
+ opts = {
648
+ :stdout => directory.mktemp(),
649
+ :stderr => directory.mktemp(),
650
+ :filename => directory.mktemp()
651
+ }.merge(options)
652
+ return TakTukRun.new(taktuk, nodes, opts)
653
+ end
654
+
655
+ def bootstrap_taktuk(nodes)
656
+
657
+ if nodes.is_a?(AbstractNode)
658
+ nodes = [ nodes ]
659
+ end
660
+
661
+ if nodes.length == 0
662
+ return
663
+ end
664
+
665
+ cmd = "(dpkg -l | grep taktuk) || apt-get install -y --force-yes taktuk"
666
+ escaped_cmd = Shellwords.escape(cmd)
667
+ master = nodes.first
668
+ master.execute(escaped_cmd)
669
+
670
+ proxy.log("#{master} has TakTuk now")
671
+
672
+ return execute_many(nodes, cmd, :propagate => true)
673
+ end
674
+
675
+ def execute_many_here(*args)
676
+ nodes, cmd, opts = _execute_many_parse_args(args)
677
+
678
+ return ManyExecutionResult.new([], cmd) if nodes.length == 0
679
+
680
+ taktuk = _get_taktuk(nodes, opts)
681
+
682
+ # command has to be escape 2 times: local shell and remote shell
683
+ succ, fail = taktuk.execute(cmd, :escape => 2)
684
+
685
+ if succ.length != nodes.length
686
+ raise "TakTuk: Some nodes failed (success: #{succ.length}/#{nodes.length}). See #{taktuk.stdout}"
687
+ end
688
+
689
+ results = succ.map do |x|
690
+ BasicRemoteResult.new(x[:node], cmd, x[:stdout], x[:stderr])
691
+ end
692
+ return ManyExecutionResult.new(results, cmd)
693
+ end
694
+
695
+ def execute_many(*args)
696
+ nodes, cmd, opts = _execute_many_parse_args(args)
697
+
698
+ return ManyExecutionResult.new([], cmd) if nodes.length == 0
699
+
700
+ taktuk = _get_taktuk(nodes, opts)
701
+
702
+ succ, fail = taktuk.execute_remote(cmd, :escape => 2)
703
+
704
+ if succ.length != nodes.length
705
+ msg = "TakTuk: Some nodes failed (success: #{succ.length}/#{nodes.length})."
706
+ if fail.length > 0
707
+ msg += " See #{fail.first[:stdout_file]}"
708
+ end
709
+ msg += " See #{taktuk.stdout}"
710
+ raise msg
711
+ end
712
+
713
+ results = succ.map do |x|
714
+ FileRemoteResult.new(x[:node], cmd, x[:stdout_file], x[:stderr_file])
715
+ end
716
+ return ManyExecutionResult.new(results, cmd)
717
+ end
718
+
719
+ def distribute_one(f, nodes, dest, opts = {})
720
+
721
+ nodes = [ nodes ] unless nodes.is_a?(Array)
722
+
723
+ if f.is_a?(String)
724
+ # nothing
725
+ elsif f.is_a?(LocalExecutionResult)
726
+ f = f.stdout_file
727
+ elsif f.is_a?(FileRemote)
728
+ f = f.result.stdout_file
729
+ else
730
+ raise "I don't know how to distribute #{f.class}"
731
+ end
732
+
733
+ if dest.end_with?("/") # a directory
734
+ dest = File.join(dest, File.basename(f))
735
+ else # a file
736
+ # it's fine
737
+ end
738
+
739
+ proxy.log("Saving to: #{dest}")
740
+
741
+ master = nodes.first
742
+
743
+ return nil if nodes.length == 0
744
+
745
+ master = nodes.first
746
+ master.scp_many([ f ], dest)
747
+
748
+ return nil if nodes.length == 1
749
+
750
+ taktuk = _get_taktuk(nodes, :exclude_master => true)
751
+ results = taktuk.put(dest, dest, :escape => 2)
752
+
753
+ if results.length + 1 != nodes.length
754
+ raise "Some nodes did not respond."
755
+ end
756
+
757
+ thelist = results.map { |x| x[:hash] }.uniq
758
+ raise "Some hashes were different." if thelist.length != 1
759
+
760
+ orig_hash = md5sum(nodes, [ dest ]).map { |x| x[:hash] }.uniq
761
+
762
+ if orig_hash.length != 1
763
+ raise "Hashes could not be verified."
764
+ end
765
+
766
+ return nil
767
+ end
768
+
769
+ def file_consistency(nodes, fs)
770
+ if !fs.is_a?(Array)
771
+ fs = [ fs ]
772
+ end
773
+ sums = md5sum(nodes, fs)
774
+ hashes = Hash.new { |h, k| h[k] = [] }
775
+ sums.each do |h|
776
+ hashes[h[:filename]].push(h[:hash])
777
+ end
778
+ hashes.each_pair do |f, h|
779
+ raise "File #{f} is not consistent" if h.uniq.length != 1
780
+ end
781
+ end
782
+
783
+ def md5sum(nodes, files)
784
+ h = []
785
+ results = execute_many_here nodes, "md5sum #{files.join(' ')}"
786
+ results.to_list.each do |r|
787
+ r.stdout.strip.lines do |line|
788
+ hash, filename = line.split
789
+ h.push({ :filename => filename, :hash => hash })
790
+ end
791
+ end
792
+ return h
793
+ end
794
+
795
+ def distribute(glob, nodes, dest, opts = {})
796
+
797
+ nodes = [ nodes ] unless nodes.is_a?(Array)
798
+ glob = glob.first if glob.is_a?(Array)
799
+
800
+ if glob.is_a?(String)
801
+ files = Dir[glob] # get a list of files
802
+ elsif glob.is_a?(LocalExecutionResult)
803
+ files = [ glob.stdout_file ]
804
+ elsif glob.is_a?(FileRemote)
805
+ files = [ glob.result.stdout_file ]
806
+ else
807
+ raise "I don't know how to distribute #{glob.class}"
808
+ end
809
+ remote_files = files.map { |f| File.join(dest, File.basename(f)) }
810
+
811
+ proxy.log("Found #{files.length} files to distribute.")
812
+
813
+ return nil if nodes.length == 0
814
+
815
+ master = nodes.first
816
+ master.scp_many(files, dest)
817
+
818
+ proxy.log("Files copied to #{master}:#{dest}")
819
+
820
+ if nodes.length == 1
821
+ return nil
822
+ end
823
+
824
+ md5sum = master.execute("md5sum #{remote_files.join(' ')}").stdout
825
+ orig_hashes = {}
826
+ md5sum.strip.lines do |line|
827
+ hash, filename = line.strip.split
828
+ orig_hashes[filename] = hash
829
+ end
830
+
831
+ hashes = {}
832
+ remote_files.each do |filepath|
833
+ proxy.log("Distributing #{filepath}")
834
+ taktuk = _get_taktuk(nodes, :exclude_master => true)
835
+ results = taktuk.put(filepath, filepath, :escape => 2)
836
+
837
+ if results.length + 1 != nodes.length
838
+ raise "Some nodes did not respond."
839
+ end
840
+
841
+ thelist = results.map { |x| x[:hash] }.uniq
842
+ raise "Some hashes were different." if thelist.length != 1
843
+
844
+ hashes[filepath] = thelist.first
845
+ end
846
+
847
+ raise "Weird?" if hashes.keys.sort != orig_hashes.keys.sort
848
+
849
+ orig_hashes.keys.each do |filepath|
850
+ raise "Hash for #{filepath} does not match." \
851
+ if orig_hashes[filepath] != hashes[filepath]
852
+ end
853
+
854
+ proxy.log("MD5s match.")
855
+
856
+ end
857
+
858
+ def ssh_key
859
+ return $ssh_key
860
+ end
861
+
862
+ def test_connectivity(nodes)
863
+ execute_many nodes, "true"
864
+ end
865
+
866
+ def chain_copy(nodes, src, dest, opts = {})
867
+
868
+ nodes = [ nodes ] unless nodes.is_a?(Array)
869
+
870
+ raise "'#{src}' does not exist!" unless File.exist?(src)
871
+
872
+ dest = File.join(dest, File.basename(src))
873
+
874
+ return nil if nodes.length == 0
875
+
876
+ master = nodes.first
877
+ laster = nodes.last
878
+
879
+ master.scp(src, dest)
880
+
881
+ proxy.log("#{src} copied to #{master}:#{dest}")
882
+
883
+ if nodes.length == 1
884
+ return nil
885
+ end
886
+
887
+ orig_hash = master.execute("md5sum #{dest}").stdout.strip.split.first
888
+
889
+ execute_many(nodes, "apt-get install -y mbuffer")
890
+
891
+ # mbuffer creates a buffer of size 2% * memory (by default)
892
+ # we do chainsend thing
893
+
894
+ second = nodes[1].host
895
+ ssh_opts = "-o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no"
896
+ master.file("/tmp/.chainsend") do |f|
897
+ f.puts "set -eu"
898
+ f.puts "cat $1 | ssh #{ssh_opts} #{second} \"bash /tmp/.chainsend $1\""
899
+ end
900
+ laster.file("/tmp/.chainsend") do |f|
901
+ f.puts "set -eu"
902
+ f.puts "cat > $1"
903
+ end
904
+ nodes.each_with_index do |node, i|
905
+ next if i == 0 or i == nodes.length - 1
906
+ next_host = nodes[i+1].host
907
+ node.file("/tmp/.chainsend") do |f|
908
+ f.puts "set -eu"
909
+ f.puts "tee $1 | mbuffer -q | ssh #{ssh_opts} #{next_host} \"bash /tmp/.chainsend $1\""
910
+ end
911
+ end
912
+
913
+ proxy.log("Chain prepared.")
914
+
915
+ master.execute("bash /tmp/.chainsend #{dest}")
916
+ results = execute_many_here(nodes, "md5sum #{dest}")
917
+
918
+ hashes = results.to_list.map { |x| x.stdout.split.first }.uniq
919
+
920
+ raise "Hashes differ." if hashes.length != 1
921
+
922
+ return nil
923
+ end
924
+
925
+ def file(node, path)
926
+ r = execute_one(node, "cat #{path}")
927
+ return FileRemote.new(path, r)
928
+ end
929
+
930
+ def copy(name, nodes, where)
931
+ # make a copy of a file named "name" to "nodes"
932
+ # at path "where"; name can be:
933
+ # * a path (at local fs)
934
+ # * a LocalExecutionResult (as a result of remote execution)
935
+ # * RemoteFile - pointer to a file on a remote node
936
+
937
+ name = name.first if name.is_a?(Array)
938
+
939
+ if name.is_a?(String)
940
+ # ok
941
+ elsif name.is_a?(LocalExecutionResult)
942
+ name = name.stdout_file
943
+ elsif name.is_a?(FileRemote)
944
+ name = name.result.stdout_file
945
+ else
946
+ raise "Unknown file source: #{name.class}"
947
+ end
948
+
949
+ nodes = arrayize(nodes)
950
+ result = nodes.map do |node|
951
+ node.scp(name, where)
952
+ end
953
+ return result
954
+ end
955
+
956
+ def run_script(name, label = :all)
957
+ path = $files[name]
958
+ node_list(label).each do |node|
959
+ f = node.tmpfile
960
+ node.scp(path, f)
961
+ node.chmod(f, "700")
962
+ out = node.execute(f)
963
+ node.rm(f)
964
+ end
965
+ end
966
+
967
+ def check_node(node)
968
+ debug "Checking #{node}..."
969
+ failed = []
970
+ begin
971
+ node.ping()
972
+ rescue ExecutionError => e
973
+ return false
974
+ end
975
+ return true
976
+ end
977
+
978
+ def g5k_reserve_nodes(*args)
979
+ p = proxy()
980
+ lib = G5K::Library.new
981
+ lib.logging = proc { |x| p.engine.log(x) }
982
+ lib.proxy = proxy
983
+ return lib.reserve_nodes(*args)
984
+ end
985
+
986
+ def g5k_get_avail(opts = {})
987
+ p = proxy()
988
+ lib = G5K::Library.new
989
+ lib.logging = proc { |x| p.engine.log(x) }
990
+ lib.proxy = proxy
991
+ job = lib.pick_reservation(opts)
992
+ return job
993
+ end
994
+
995
+ def nodes_file(user, filepath)
996
+ nodes = IO.read(filepath).chomp.lines.map(&:chomp)
997
+ nodes = nodes.map do |host|
998
+ simple_node("#{user}@#{host}")
999
+ end
1000
+ return nodes
1001
+ end
1002
+
1003
+ def g5k_job(opts = {})
1004
+ lib = G5K::Library.new
1005
+ lib.logging = proc { |x| puts x }
1006
+ job = lib.job(opts[:site], opts[:id])
1007
+ return job
1008
+ end
1009
+
1010
+ def g5k_nodes(job)
1011
+ hosts = job["assigned_nodes"]
1012
+ username = get_g5k_username()
1013
+ return hosts.map { |h| g5k_node(username, h) }
1014
+ end
1015
+
1016
+ def g5k_site_from_job(job)
1017
+ nodes = job["assigned_nodes"]
1018
+ link = job["links"].select { |x| x["rel"] == "parent" }.first
1019
+ site = link["href"].split("/").last
1020
+ return site
1021
+ end
1022
+
1023
+ def _filter_vlan(ok_nodes, vlan_nodes)
1024
+ h = {}
1025
+ hosts = ok_nodes.each { |x| h[x.split(".").first] = true }
1026
+ # puts hosts.inspect
1027
+ good_ones = vlan_nodes.select { |x| h.key?(x.split("-kavlan").first) }
1028
+ # puts good_ones.inspect
1029
+ return good_ones
1030
+ end
1031
+
1032
+ def g5k_deploy_keys(nodes, site)
1033
+ # assumes you are on the frontend
1034
+ key = get_g5k_tmpfile("ssh_key")
1035
+ frontend = g5k_site(site)
1036
+ frontend.execute("rm -f #{key} #{key}.pub; ssh-keygen -f #{key} -q -N \'\'")
1037
+ nodes.each do |n|
1038
+ proxy.log "Sending key to: #{n.userhost}"
1039
+ frontend.execute("scp -o 'BatchMode=yes' -o 'UserKnownHostsFile=/dev/null' #{key} #{n.userhost}:.ssh/id_rsa")
1040
+ frontend.execute("ssh-copy-id -i #{key}.pub #{n.userhost}")
1041
+ end
1042
+ frontend.execute("rm -f #{key} #{key}.pub")
1043
+ end
1044
+
1045
+ def g5k_kadeploy(job, env, custom = "", opts = {})
1046
+
1047
+ site = g5k_site_from_job(job)
1048
+ frontend = g5k_site(site)
1049
+ nodes = job["assigned_nodes"]
1050
+
1051
+ nodes = opts.key?(:count) ? nodes[0...opts[:count]] : nodes
1052
+ final_nodes = opts[:real_nodes]
1053
+
1054
+ proxy.log("Using #{nodes.length} machines.")
1055
+
1056
+ machinefile = get_g5k_tmpfile("machines")
1057
+ nodes_ok = get_g5k_tmpfile("good_nodes")
1058
+
1059
+ IO.write(machinefile, nodes.join("\n"))
1060
+ frontend.scp(machinefile, machinefile)
1061
+ kadeploy = "kadeploy3 -f #{machinefile} -e #{env} -k #{custom} -o #{nodes_ok}"
1062
+ proxy.log("Running deployment: #{kadeploy}")
1063
+ frontend.execute(kadeploy)
1064
+ frontend.execute("rm -f #{machinefile}")
1065
+
1066
+ key = get_g5k_tmpfile("ssh_key")
1067
+ frontend.execute("rm -f #{key} #{key}.pub; ssh-keygen -f #{key} -q -N \'\'")
1068
+
1069
+ ok_nodes = frontend.execute("sort -V #{nodes_ok}").stdout.split
1070
+ frontend.execute("rm -f #{nodes_ok}")
1071
+
1072
+ proxy.log("Nodes that survived: #{ok_nodes.length}/#{nodes.length}")
1073
+ proxy.log("Final nodes: #{final_nodes}")
1074
+
1075
+ if final_nodes.nil?
1076
+ final_nodes = ok_nodes
1077
+ else
1078
+ final_nodes = _filter_vlan(ok_nodes, final_nodes)
1079
+ end
1080
+
1081
+ # we have to install SSH keys
1082
+ final_nodes.each do |n|
1083
+ host = "root@#{n}"
1084
+ frontend.execute("scp -o 'BatchMode=yes' -o 'UserKnownHostsFile=/dev/null' #{key} #{host}:.ssh/id_rsa")
1085
+ frontend.execute("ssh-copy-id -i #{key}.pub #{host}")
1086
+ end
1087
+ frontend.execute("rm -f #{key} #{key}.pub")
1088
+
1089
+ all_nodes = final_nodes.map { |x| g5k_node("root", x) }
1090
+ return all_nodes
1091
+ end
1092
+
1093
+ def g5k_frontend_from_job(job)
1094
+ site = g5k_site_from_job(job)
1095
+ frontend = g5k_site(site)
1096
+ return frontend
1097
+ end
1098
+
1099
+ def g5k_kavlan_id(job)
1100
+ uid = job['uid']
1101
+ frontend = g5k_frontend_from_job(job)
1102
+ out = frontend.execute("kavlan -V -j #{uid}")
1103
+ return out.stdout.strip.to_i
1104
+ end
1105
+
1106
+ def g5k_kavlan_nodes_file(job)
1107
+ # TODO
1108
+ uid = job['uid']
1109
+ kavlan_nodes = get_g5k_tmpfile("kavlan_nodes")
1110
+ frontend = g5k_frontend_from_job(job)
1111
+ out = frontend.execute("kavlan -l -j #{uid}")
1112
+ IO.write(kavlan_nodes, out.stdout)
1113
+ frontend.scp(kavlan_nodes, kavlan_nodes)
1114
+ return kavlan_nodes
1115
+ end
1116
+
1117
+ def g5k_kavlan_nodes(job)
1118
+ uid = job['uid']
1119
+ frontend = g5k_frontend_from_job(job)
1120
+ out = frontend.execute("kavlan -l -j #{uid}")
1121
+ hosts = out.stdout.strip.lines.map(&:strip)
1122
+ nodes = hosts.map { |x| g5k_node("root", x) }
1123
+ return nodes
1124
+ end
1125
+
1126
+ def g5k_site(site)
1127
+ manager = proxy.engine.nodes_manager
1128
+ return manager.get_node(get_g5k_username(), "#{site}.grid5000.fr", G5KNodeFactory.new)
1129
+ end
1130
+
1131
+ def g5k_node(user, host)
1132
+ manager = proxy.engine.nodes_manager
1133
+ return manager.get_node(user, host, G5KNodeFactory.new)
1134
+ end
1135
+
1136
+ def proxy_node(via, user, host, opts = {})
1137
+ manager = proxy.engine.nodes_manager
1138
+ return manager.get_node(user, host, ProxiedFactory.new(via), opts)
1139
+ end
1140
+
1141
+ def split(pattern)
1142
+ pattern = pattern.strip
1143
+ if pattern == ""
1144
+ raise "Empty host specification"
1145
+ end
1146
+ parts = pattern.split("@")
1147
+ if parts.length == 2
1148
+ return parts
1149
+ elsif parts.length > 2
1150
+ raise "Invalid host specification: #{pattern}"
1151
+ else
1152
+ proxy.log "User not specified. This is not reproducible."
1153
+ return [ Etc.getlogin, pattern ]
1154
+ end
1155
+ end
1156
+
1157
+ def broadcast(pattern)
1158
+ manager = proxy.engine.nodes_manager
1159
+ user, address = split(pattern)
1160
+ nodes = NodeUtils.broadcast_ping(address)
1161
+ proxy.log("Found #{nodes.length} hosts via ICMP broadcast")
1162
+ return nodes.map { |n| manager.get_node(user, n) }
1163
+ end
1164
+
1165
+ def node_range(ip_start, count)
1166
+ user, host = split(ip_start)
1167
+ parts = host.split(".").map(&:to_i)
1168
+ ips = []
1169
+ count.times do |i|
1170
+ j = 3
1171
+ while j > 0 and parts[j] == 256
1172
+ parts[j] = 0
1173
+ parts[j - 1] += 1
1174
+ j -= 1
1175
+ end
1176
+ ip = parts.map(&:to_s).join(".")
1177
+ ips.push(ip)
1178
+ parts[3] += 1
1179
+ end
1180
+ return ips.map { |x| simple_node("#{user}@#{x}") }
1181
+ end
1182
+
1183
+ def simple_node(pattern, opts = {})
1184
+ user, host = split(pattern)
1185
+ manager = proxy.engine.nodes_manager
1186
+ return manager.get_node(user, host, SimpleNodeFactory.new, opts)
1187
+ end
1188
+
1189
+ def localhost()
1190
+ user = ENV["USER"]
1191
+ return simple_node("#{user}@127.0.0.1")
1192
+ end
1193
+
1194
+ end
1195
+
1196
+ module NodeUtils
1197
+
1198
+ def self.get_templates_dir(name)
1199
+ here = File.dirname(__FILE__)
1200
+ return realpath(File.join(here, name))
1201
+ end
1202
+
1203
+ def self.render_file(t, path, ctx)
1204
+ out = Erb.render(IO.read(t), ctx)
1205
+ File.open(path, "wb") do |f|
1206
+ f.write(out)
1207
+ f.chmod(0700)
1208
+ end
1209
+ end
1210
+
1211
+ def self.install_templates(path, name, ctx)
1212
+ templates = get_templates_dir("templates")
1213
+ ctx = ctx.merge({ :templates => templates })
1214
+ tdir = get_templates_dir("templates/utils")
1215
+ Dir.entries(tdir).each do |f|
1216
+ template = File.join(tdir, f)
1217
+ output = File.join(path, f)
1218
+ render_file(template, output, ctx) if File.file?(template)
1219
+ end
1220
+ config = get_templates_dir("templates/ssh-config.#{name}")
1221
+ ssh_config = File.join(path, "ssh-config")
1222
+ render_file(config, ssh_config, ctx)
1223
+ end
1224
+
1225
+ # uses a broadcast ping to get a list of nodes that respond
1226
+ def self.broadcast_ping(address, timeout = 2)
1227
+ output = %x(ping -c 3 -n -b #{address} -w #{timeout} 2> /dev/null)
1228
+ # raise "Ping returned error #{$?.exitstatus}." if $?.exitstatus != 0
1229
+ ms = output.scan(/from ([\.0-9]+): icmp_seq=/)
1230
+ addresses = ms.flatten.uniq.sort
1231
+ raise "No nodes found" if addresses.length == 0
1232
+ return addresses
1233
+ end
1234
+
1235
+ class Erb < OpenStruct
1236
+
1237
+ def render(hash)
1238
+ ERB.new(hash).result(binding)
1239
+ end
1240
+
1241
+ def self.render(template, hash)
1242
+ x = Erb.new(hash)
1243
+ return x.render(template)
1244
+ end
1245
+
1246
+ end
1247
+
1248
+ end
1249
+
1250
+ end