xpflow 0.1b
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/xpflow +96 -0
- data/lib/colorado.rb +198 -0
- data/lib/json/add/core.rb +243 -0
- data/lib/json/add/rails.rb +8 -0
- data/lib/json/common.rb +423 -0
- data/lib/json/editor.rb +1369 -0
- data/lib/json/ext.rb +28 -0
- data/lib/json/pure/generator.rb +442 -0
- data/lib/json/pure/parser.rb +320 -0
- data/lib/json/pure.rb +15 -0
- data/lib/json/version.rb +8 -0
- data/lib/json.rb +62 -0
- data/lib/mime/types.rb +881 -0
- data/lib/mime-types.rb +3 -0
- data/lib/restclient/abstract_response.rb +106 -0
- data/lib/restclient/exceptions.rb +193 -0
- data/lib/restclient/net_http_ext.rb +55 -0
- data/lib/restclient/payload.rb +235 -0
- data/lib/restclient/raw_response.rb +34 -0
- data/lib/restclient/request.rb +316 -0
- data/lib/restclient/resource.rb +169 -0
- data/lib/restclient/response.rb +24 -0
- data/lib/restclient.rb +174 -0
- data/lib/xpflow/bash.rb +341 -0
- data/lib/xpflow/bundle.rb +113 -0
- data/lib/xpflow/cmdline.rb +249 -0
- data/lib/xpflow/collection.rb +122 -0
- data/lib/xpflow/concurrency.rb +79 -0
- data/lib/xpflow/data.rb +393 -0
- data/lib/xpflow/dsl.rb +816 -0
- data/lib/xpflow/engine.rb +574 -0
- data/lib/xpflow/ensemble.rb +135 -0
- data/lib/xpflow/events.rb +56 -0
- data/lib/xpflow/experiment.rb +65 -0
- data/lib/xpflow/exts/facter.rb +30 -0
- data/lib/xpflow/exts/g5k.rb +931 -0
- data/lib/xpflow/exts/g5k_use.rb +50 -0
- data/lib/xpflow/exts/gui.rb +140 -0
- data/lib/xpflow/exts/model.rb +155 -0
- data/lib/xpflow/graph.rb +1603 -0
- data/lib/xpflow/graph_xpflow.rb +251 -0
- data/lib/xpflow/import.rb +196 -0
- data/lib/xpflow/library.rb +349 -0
- data/lib/xpflow/logging.rb +153 -0
- data/lib/xpflow/manager.rb +147 -0
- data/lib/xpflow/nodes.rb +1250 -0
- data/lib/xpflow/runs.rb +773 -0
- data/lib/xpflow/runtime.rb +125 -0
- data/lib/xpflow/scope.rb +168 -0
- data/lib/xpflow/ssh.rb +186 -0
- data/lib/xpflow/stat.rb +50 -0
- data/lib/xpflow/stdlib.rb +381 -0
- data/lib/xpflow/structs.rb +369 -0
- data/lib/xpflow/taktuk.rb +193 -0
- data/lib/xpflow/templates/ssh-config.basic +14 -0
- data/lib/xpflow/templates/ssh-config.inria +18 -0
- data/lib/xpflow/templates/ssh-config.proxy +13 -0
- data/lib/xpflow/templates/taktuk +6590 -0
- data/lib/xpflow/templates/utils/batch +4 -0
- data/lib/xpflow/templates/utils/bootstrap +12 -0
- data/lib/xpflow/templates/utils/hostname +3 -0
- data/lib/xpflow/templates/utils/ping +3 -0
- data/lib/xpflow/templates/utils/rsync +12 -0
- data/lib/xpflow/templates/utils/scp +17 -0
- data/lib/xpflow/templates/utils/scp_many +8 -0
- data/lib/xpflow/templates/utils/ssh +3 -0
- data/lib/xpflow/templates/utils/ssh-interactive +4 -0
- data/lib/xpflow/templates/utils/taktuk +19 -0
- data/lib/xpflow/threads.rb +187 -0
- data/lib/xpflow/utils.rb +569 -0
- data/lib/xpflow/visual.rb +230 -0
- data/lib/xpflow/with_g5k.rb +7 -0
- data/lib/xpflow.rb +349 -0
- metadata +135 -0
data/lib/xpflow/nodes.rb
ADDED
@@ -0,0 +1,1250 @@
|
|
1
|
+
|
2
|
+
require 'erb'
|
3
|
+
require 'ostruct'
|
4
|
+
require 'xpflow/exts/g5k'
|
5
|
+
require 'yaml'
|
6
|
+
require 'thread'
|
7
|
+
require 'shellwords'
|
8
|
+
|
9
|
+
def get_g5k_username
|
10
|
+
raise "No G5K username!" if $g5k_user.nil?
|
11
|
+
return $g5k_user
|
12
|
+
end
|
13
|
+
|
14
|
+
module XPFlow
|
15
|
+
|
16
|
+
# manages all nodes
|
17
|
+
|
18
|
+
class NodesManager
|
19
|
+
|
20
|
+
def initialize(directory)
|
21
|
+
@directory = directory
|
22
|
+
@mutex = Mutex.new
|
23
|
+
@node_counter = 0
|
24
|
+
end
|
25
|
+
|
26
|
+
def synchronize(&block)
|
27
|
+
return @mutex.synchronize(&block)
|
28
|
+
end
|
29
|
+
|
30
|
+
def subdir(name)
|
31
|
+
return @directory.subdir(name)
|
32
|
+
end
|
33
|
+
|
34
|
+
def get_node(user, host, factory, opts = {})
|
35
|
+
synchronize do
|
36
|
+
@node_counter += 1
|
37
|
+
node_directory = subdir("#{host}--#{user}--#{factory.name}--#{@node_counter}")
|
38
|
+
opts[:factory] = factory
|
39
|
+
factory.build(user, host, node_directory, opts)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
44
|
+
|
45
|
+
class SimpleNodeFactory
|
46
|
+
# build a directly-reachable host
|
47
|
+
|
48
|
+
def name
|
49
|
+
return "normal"
|
50
|
+
end
|
51
|
+
|
52
|
+
def build(*args)
|
53
|
+
return SimpleNode.new(*args)
|
54
|
+
end
|
55
|
+
|
56
|
+
end
|
57
|
+
|
58
|
+
class G5KNodeFactory
|
59
|
+
|
60
|
+
def name
|
61
|
+
return "grid5000"
|
62
|
+
end
|
63
|
+
|
64
|
+
def build(user, host, node_directory, opts)
|
65
|
+
opts = { :group => _get_group(host) }.merge(opts)
|
66
|
+
return G5KNode.new(user, host, node_directory, opts)
|
67
|
+
end
|
68
|
+
|
69
|
+
def _get_group(host)
|
70
|
+
m = /^(\w+)-(\d+).+$/.match(host) # <cluster>-<nodeid> ...
|
71
|
+
if m
|
72
|
+
return m.captures.first
|
73
|
+
else
|
74
|
+
return nil
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
end
|
79
|
+
|
80
|
+
class ProxiedFactory
|
81
|
+
|
82
|
+
def initialize(node)
|
83
|
+
@node = node
|
84
|
+
end
|
85
|
+
|
86
|
+
def name
|
87
|
+
return "proxy"
|
88
|
+
end
|
89
|
+
|
90
|
+
def build(user, host, directory, opts)
|
91
|
+
opts[:proxy] = @node
|
92
|
+
return ProxiedNode.new(user, host, directory, opts)
|
93
|
+
end
|
94
|
+
|
95
|
+
end
|
96
|
+
|
97
|
+
class AbstractNode
|
98
|
+
|
99
|
+
# a node that is installed using a set of templates
|
100
|
+
|
101
|
+
attr_reader :user
|
102
|
+
attr_reader :host
|
103
|
+
attr_reader :directory
|
104
|
+
|
105
|
+
def initialize(user, host, directory, opts = {})
|
106
|
+
@user = user
|
107
|
+
@host = host
|
108
|
+
@directory = directory
|
109
|
+
@mutex = Mutex.new
|
110
|
+
@opts = opts
|
111
|
+
|
112
|
+
__setup__()
|
113
|
+
end
|
114
|
+
|
115
|
+
def options
|
116
|
+
return @opts
|
117
|
+
end
|
118
|
+
|
119
|
+
def domain
|
120
|
+
# gets accessibility domain of a node (nodes from within one domain
|
121
|
+
# are pairwise accessible and reachable)
|
122
|
+
return @opts[:factory].name
|
123
|
+
end
|
124
|
+
|
125
|
+
def synchronize(&block)
|
126
|
+
return @mutex.synchronize(&block)
|
127
|
+
end
|
128
|
+
|
129
|
+
def group
|
130
|
+
return @opts[:group]
|
131
|
+
end
|
132
|
+
|
133
|
+
def userhost
|
134
|
+
return "#{@user}@#{@host}"
|
135
|
+
end
|
136
|
+
|
137
|
+
def tmpfile
|
138
|
+
return execute("mktemp").stdout.strip
|
139
|
+
end
|
140
|
+
|
141
|
+
def to_s
|
142
|
+
return "#{self.class}('#{userhost}')"
|
143
|
+
end
|
144
|
+
|
145
|
+
def path
|
146
|
+
return @directory.path
|
147
|
+
end
|
148
|
+
|
149
|
+
def md5sum(files)
|
150
|
+
h = {}
|
151
|
+
output = execute("md5sum #{files.join(' ')}").stdout.strip
|
152
|
+
output.lines.each do |line|
|
153
|
+
hash, filename = line.split
|
154
|
+
h[filename] = hash
|
155
|
+
end
|
156
|
+
return h
|
157
|
+
end
|
158
|
+
|
159
|
+
def run(*args)
|
160
|
+
return @directory.run(*args)
|
161
|
+
end
|
162
|
+
|
163
|
+
def execute(cmd, env = {}, opts = {})
|
164
|
+
# opts will become an env of execution
|
165
|
+
wd = opts[:wd]
|
166
|
+
env = env.each_pair.select { |k, v| v.is_a?(String) }.map { |k, v| [ k.to_s, v ] }
|
167
|
+
env = Hash[env]
|
168
|
+
return @directory.run_ssh(cmd, :env => env, :wd => wd)
|
169
|
+
end
|
170
|
+
|
171
|
+
def execute_with_files(cmd, out, err)
|
172
|
+
return @directory.run_ssh(cmd, :out => out, :err => err)
|
173
|
+
end
|
174
|
+
|
175
|
+
def ping()
|
176
|
+
@directory.run("ping")
|
177
|
+
end
|
178
|
+
|
179
|
+
def hostname()
|
180
|
+
@directory.run("hostname")
|
181
|
+
end
|
182
|
+
|
183
|
+
def scp(from, to)
|
184
|
+
@directory.run("scp #{from} #{to}")
|
185
|
+
end
|
186
|
+
|
187
|
+
def scp_many(files, to_dir)
|
188
|
+
args = files.join(" ") # TODO: escaping?
|
189
|
+
return @directory.run("scp_many #{to_dir} #{args}")
|
190
|
+
end
|
191
|
+
|
192
|
+
def file(path, &block)
|
193
|
+
# creates a file here and then does scp to the node
|
194
|
+
filename = @directory.mktemp(&block)
|
195
|
+
scp(filename, path)
|
196
|
+
end
|
197
|
+
|
198
|
+
def proxy_factory
|
199
|
+
# generates a node factory for nodes proxied through this one
|
200
|
+
return ProxiedFactory.new(self)
|
201
|
+
end
|
202
|
+
|
203
|
+
def context
|
204
|
+
return {
|
205
|
+
:user => self.user,
|
206
|
+
:host => self.host,
|
207
|
+
:path => self.path
|
208
|
+
}
|
209
|
+
end
|
210
|
+
|
211
|
+
end
|
212
|
+
|
213
|
+
class TemplateNode < AbstractNode
|
214
|
+
|
215
|
+
def __setup__
|
216
|
+
raise "No template!" if template_name().nil?
|
217
|
+
NodeUtils.install_templates(path(), template_name(), context)
|
218
|
+
end
|
219
|
+
|
220
|
+
end
|
221
|
+
|
222
|
+
class SimpleNode < TemplateNode
|
223
|
+
|
224
|
+
# a simple, directly reachable node
|
225
|
+
def template_name
|
226
|
+
"basic"
|
227
|
+
end
|
228
|
+
|
229
|
+
end
|
230
|
+
|
231
|
+
class ProxiedNode < TemplateNode
|
232
|
+
|
233
|
+
def template_name
|
234
|
+
"proxy"
|
235
|
+
end
|
236
|
+
|
237
|
+
def context
|
238
|
+
return super.merge({ :proxy => "#{@opts[:proxy].path}/ssh" })
|
239
|
+
end
|
240
|
+
|
241
|
+
end
|
242
|
+
|
243
|
+
class G5KNode < AbstractNode
|
244
|
+
|
245
|
+
def labels
|
246
|
+
# TODO: I don't know if it is a good way to do it
|
247
|
+
sites = %w{bordeaux grenoble lille luxembourg lyon nancy reims rennes sophia toulouse}
|
248
|
+
site = sites.select { |x| @host.include?(x) }.map(&:to_sym)
|
249
|
+
return [ :g5k ] + site[0..0]
|
250
|
+
end
|
251
|
+
|
252
|
+
def self.inside_g5k
|
253
|
+
return $hostname.end_with?('grid5000.fr')
|
254
|
+
end
|
255
|
+
|
256
|
+
def __setup__
|
257
|
+
# here we make some magic; there are 3 cases:
|
258
|
+
# 1. Inside grid5000, we make efficient bootstrap using basic-templates
|
259
|
+
# 2. Inside inria, we have efficient access to G5K
|
260
|
+
# 3. Really outside (``chez moi'' for example)
|
261
|
+
|
262
|
+
is_g5k = G5KNode.inside_g5k()
|
263
|
+
return install_inside_g5k() if is_g5k
|
264
|
+
if File.exist?("/etc/resolv.conf")
|
265
|
+
resolv = IO.read("/etc/resolv.conf").lines.
|
266
|
+
select { |line| line.start_with?("domain ") or line.start_with?("search ") }
|
267
|
+
domain = resolv.first
|
268
|
+
if domain.nil? == false
|
269
|
+
domain = domain.split[1]
|
270
|
+
gw = inria_gateway(domain)
|
271
|
+
return install_inside_inria(gw) if (gw.nil? == false)
|
272
|
+
end
|
273
|
+
end
|
274
|
+
return install_generic()
|
275
|
+
end
|
276
|
+
|
277
|
+
def inria_gateway(domain)
|
278
|
+
# more stuff has to be implemented here
|
279
|
+
return "grid5000.loria.fr" if domain == "loria.fr"
|
280
|
+
return nil
|
281
|
+
end
|
282
|
+
|
283
|
+
def install_inside_g5k
|
284
|
+
NodeUtils.install_templates(path(), "basic",
|
285
|
+
context.merge({ :g5k_user => get_g5k_username() })
|
286
|
+
)
|
287
|
+
end
|
288
|
+
|
289
|
+
def install_inside_inria(gw)
|
290
|
+
NodeUtils.install_templates(path(), "inria",
|
291
|
+
context.merge({ :g5k_user => get_g5k_username(), :gw => gw })
|
292
|
+
)
|
293
|
+
end
|
294
|
+
|
295
|
+
def install_generic
|
296
|
+
NodeUtils.install_templates(path(), "inria",
|
297
|
+
context.merge({ :g5k_user => get_g5k_username(), :gw => "access.grid5000.fr" })
|
298
|
+
)
|
299
|
+
end
|
300
|
+
|
301
|
+
def self.kavlan(job, manager)
|
302
|
+
link = job["links"].select { |x| x["rel"] == "parent" }.first["href"]
|
303
|
+
site = link.split("/").last
|
304
|
+
front = manager.get_node(get_g5k_username(), "#{site}.grid5000.fr", G5KNodeFactory.new)
|
305
|
+
ns = job["assigned_nodes"]
|
306
|
+
begin
|
307
|
+
ns = front.execute("kavlan -l -j #{job["uid"]}").lines.map(&:strip)
|
308
|
+
rescue
|
309
|
+
nil
|
310
|
+
end
|
311
|
+
return ns.map { |x| manager.get_node("root", x, G5KNodeFactory.new) }
|
312
|
+
end
|
313
|
+
|
314
|
+
def self.get_ssh_config
|
315
|
+
return %x(echo ~/.ssh/config).strip
|
316
|
+
end
|
317
|
+
|
318
|
+
def self.obtain_ssh_pubkey_path
|
319
|
+
return %x(echo ~/.ssh/id_rsa.pub).strip
|
320
|
+
end
|
321
|
+
|
322
|
+
end
|
323
|
+
|
324
|
+
### TYPES OF RESULTS ###
|
325
|
+
|
326
|
+
class BasicRemoteResult
|
327
|
+
|
328
|
+
attr_reader :opts
|
329
|
+
attr_reader :node
|
330
|
+
|
331
|
+
def initialize(node, cmd, stdout, stderr, opts = {})
|
332
|
+
@node = node
|
333
|
+
@cmd = cmd
|
334
|
+
@stdout = stdout
|
335
|
+
@stderr = stderr
|
336
|
+
@opts = opts
|
337
|
+
end
|
338
|
+
|
339
|
+
def stdout
|
340
|
+
return @stdout
|
341
|
+
end
|
342
|
+
|
343
|
+
def stderr
|
344
|
+
return @stderr
|
345
|
+
end
|
346
|
+
|
347
|
+
def command
|
348
|
+
return @cmd
|
349
|
+
end
|
350
|
+
|
351
|
+
def save_stdout(filename)
|
352
|
+
IO.write(filename, @stdout)
|
353
|
+
end
|
354
|
+
|
355
|
+
def save_stderr(filename)
|
356
|
+
IO.write(filename, @stderr)
|
357
|
+
end
|
358
|
+
|
359
|
+
def to_s
|
360
|
+
return "BasicRemoteResult('#{@cmd}' on #{@node})"
|
361
|
+
end
|
362
|
+
|
363
|
+
end
|
364
|
+
|
365
|
+
class FileRemoteResult < BasicRemoteResult
|
366
|
+
|
367
|
+
def initialize(node, cmd, stdout, stderr, opts = {})
|
368
|
+
# however! stdout & stderr are *paths*!
|
369
|
+
super
|
370
|
+
end
|
371
|
+
|
372
|
+
def stdout
|
373
|
+
return @node.execute("cat #{@stdout}").stdout
|
374
|
+
end
|
375
|
+
|
376
|
+
def stderr
|
377
|
+
return @node.execute("cat #{@stderr}").stdout
|
378
|
+
end
|
379
|
+
|
380
|
+
def stdout_file
|
381
|
+
return @stdout
|
382
|
+
end
|
383
|
+
|
384
|
+
def stderr_file
|
385
|
+
return @stderr
|
386
|
+
end
|
387
|
+
|
388
|
+
def save_stdout(filename)
|
389
|
+
return @node.execute_with_files("cat #{@stdout}", filename, "/dev/null")
|
390
|
+
end
|
391
|
+
|
392
|
+
def save_stderr(filename)
|
393
|
+
return @node.execute_with_files("cat #{@stderr}", filename, "/dev/null")
|
394
|
+
end
|
395
|
+
|
396
|
+
def to_s
|
397
|
+
return "FileRemoteResult('#{@cmd}' on #{@node}, out => #{@stdout}, err => #{@stderr})"
|
398
|
+
end
|
399
|
+
|
400
|
+
end
|
401
|
+
|
402
|
+
class FileRemote
|
403
|
+
# a remote file (but usually already local)
|
404
|
+
# result is probably: LocalExecutionResult
|
405
|
+
|
406
|
+
attr_reader :path
|
407
|
+
attr_reader :result
|
408
|
+
|
409
|
+
def initialize(path, result)
|
410
|
+
@path = path
|
411
|
+
@result = result
|
412
|
+
end
|
413
|
+
|
414
|
+
end
|
415
|
+
|
416
|
+
class ManyExecutionResult
|
417
|
+
|
418
|
+
def initialize(list, cmd)
|
419
|
+
@list = list
|
420
|
+
@command = cmd
|
421
|
+
end
|
422
|
+
|
423
|
+
def to_list
|
424
|
+
return @list
|
425
|
+
end
|
426
|
+
|
427
|
+
def length
|
428
|
+
return @list.length
|
429
|
+
end
|
430
|
+
|
431
|
+
def to_s
|
432
|
+
return "ManyResult('#{@command}' on #{@list.length} nodes)"
|
433
|
+
end
|
434
|
+
|
435
|
+
def each(&block)
|
436
|
+
return @list.each(&block)
|
437
|
+
end
|
438
|
+
|
439
|
+
end
|
440
|
+
|
441
|
+
|
442
|
+
|
443
|
+
# we obtain a global grid5000 user
|
444
|
+
$hostname = %x(hostname).strip
|
445
|
+
$ssh_key = G5KNode.obtain_ssh_pubkey_path
|
446
|
+
|
447
|
+
## XPFLOW LIBRARY
|
448
|
+
|
449
|
+
class NodesLibrary < ActivityLibrary
|
450
|
+
|
451
|
+
activities :node_list, :execute, :copy, :run_script, :check_node, :file,
|
452
|
+
:g5k_get_avail, :proxy_node, :broadcast, :g5k_site, :g5k_job,
|
453
|
+
:g5k_nodes, :monitor_node,
|
454
|
+
:g5k_kavlan_id, :g5k_kavlan_nodes_file, :g5k_frontend_from_job,
|
455
|
+
:g5k_kavlan_nodes, :nodes_file, :execute_funny,
|
456
|
+
:g5k_node, :execute_many, :execute_many_local, :all_prefixes, :execute_one, :distribute_one,
|
457
|
+
:execute_many_ignore_errors, :g5k_kadeploy, :execute_many_here,
|
458
|
+
:bootstrap_taktuk, :simple_node, :node_range, :taktuk_raw, :test_connectivity,
|
459
|
+
:nodes_from_file, :nodes_from_result, :distribute, :chain_copy, :ssh_key,
|
460
|
+
:nodes_from_machinefile, :g5k_deploy_keys, :localhost, :file_consistency,
|
461
|
+
:ping_localhost, :ping_node, :g5k_reserve_nodes
|
462
|
+
|
463
|
+
def setup
|
464
|
+
nil
|
465
|
+
end
|
466
|
+
|
467
|
+
def ping_node(node, target)
|
468
|
+
result = execute_one(node,"ping #{target} -c 1")
|
469
|
+
result.stdout[/time=(\d+.*) /,1].to_f
|
470
|
+
end
|
471
|
+
|
472
|
+
def ping_localhost(node = nil)
|
473
|
+
node = self.localhost() if node.nil?
|
474
|
+
ping_node(node, "localhost")
|
475
|
+
end
|
476
|
+
|
477
|
+
def get_g5k_tmpfile(prefix = "tmp")
|
478
|
+
hash = 16.times.map { |x| (rand * 16).to_i.to_s(16) }.join
|
479
|
+
return "/tmp/.#{prefix}-#{get_g5k_username()}-#{hash}"
|
480
|
+
end
|
481
|
+
|
482
|
+
def all_prefixes(nodes, inc = 1)
|
483
|
+
arr = []
|
484
|
+
i = inc - 1
|
485
|
+
while i < nodes.length
|
486
|
+
arr.push(nodes[0..i])
|
487
|
+
i += inc
|
488
|
+
end
|
489
|
+
return arr
|
490
|
+
end
|
491
|
+
|
492
|
+
def nodes
|
493
|
+
return Scope.current[:__nodes__]
|
494
|
+
end
|
495
|
+
|
496
|
+
def _transform_nodes(x)
|
497
|
+
if x.is_a?(String)
|
498
|
+
return x.strip.split
|
499
|
+
elsif x.is_a?(Hash)
|
500
|
+
h = x.map { |k, v| [ k.strip, _transform_nodes(v) ] }
|
501
|
+
return Hash[h]
|
502
|
+
elsif x.is_a?(Array)
|
503
|
+
return x.map { |x| _transform_nodes(x) }
|
504
|
+
else
|
505
|
+
raise "Error!"
|
506
|
+
end
|
507
|
+
end
|
508
|
+
|
509
|
+
def _get_node_via_proxy(name, parent = nil)
|
510
|
+
name = name.strip
|
511
|
+
name = "nancy.g5k" if name == "g5k"
|
512
|
+
if /^(.+)\.g5k$/.match(name)
|
513
|
+
raise "G5K proxies must be topmost" if !parent.nil?
|
514
|
+
# special syntax for G5K
|
515
|
+
site = name.split(".").first
|
516
|
+
proxy = g5k_site(site)
|
517
|
+
return proxy
|
518
|
+
else
|
519
|
+
user, host = name.split("@")
|
520
|
+
if parent.nil?
|
521
|
+
proxy = simple_node(name)
|
522
|
+
else
|
523
|
+
proxy = proxy_node(parent, user, host)
|
524
|
+
end
|
525
|
+
return proxy
|
526
|
+
end
|
527
|
+
end
|
528
|
+
|
529
|
+
def __transform_with_proxy(structure, proxy, nodes)
|
530
|
+
# proxy_node(via, user, host)
|
531
|
+
if structure.is_a?(String)
|
532
|
+
node = _get_node_via_proxy(structure, proxy)
|
533
|
+
nodes[:nodes].push(node)
|
534
|
+
elsif structure.is_a?(Array)
|
535
|
+
structure.each { |x| __transform_with_proxy(x, proxy, nodes) }
|
536
|
+
elsif structure.is_a?(Hash)
|
537
|
+
structure.each_pair do |p, sub|
|
538
|
+
new_proxy = _get_node_via_proxy(p, proxy)
|
539
|
+
nodes[:proxies].push(new_proxy)
|
540
|
+
__transform_with_proxy(sub, new_proxy, nodes)
|
541
|
+
end
|
542
|
+
else
|
543
|
+
raise "Error!"
|
544
|
+
end
|
545
|
+
end
|
546
|
+
|
547
|
+
def _transform_with_proxy(tree)
|
548
|
+
nodes = { :nodes => [], :proxies => [] }
|
549
|
+
__transform_with_proxy(tree, nil, nodes)
|
550
|
+
return nodes
|
551
|
+
end
|
552
|
+
|
553
|
+
def nodes_from_machinefile(filename, opts = {})
|
554
|
+
nodes = IO.read(filename).strip.split
|
555
|
+
nodes = nodes.map { |x| "#{opts[:user]}@#{x}" }
|
556
|
+
return nodes.map { |x| simple_node(x) }
|
557
|
+
end
|
558
|
+
|
559
|
+
def nodes_from_file(filename, opts = {})
|
560
|
+
contents = IO.read(filename)
|
561
|
+
yaml = YAML.load(contents)
|
562
|
+
tree = _transform_nodes(yaml)
|
563
|
+
nodes = _transform_with_proxy(tree)
|
564
|
+
return nodes[:nodes]
|
565
|
+
end
|
566
|
+
|
567
|
+
def _parse_opts(array)
|
568
|
+
h = {}
|
569
|
+
array.each do |o|
|
570
|
+
k, v = o.split("=")
|
571
|
+
h[k.to_sym] = v
|
572
|
+
end
|
573
|
+
return h
|
574
|
+
end
|
575
|
+
|
576
|
+
def nodes_from_result(result, proxy = nil)
|
577
|
+
lines = result.stdout.strip.lines.map(&:strip)
|
578
|
+
r = lines.map do |line|
|
579
|
+
userhost = line.split.first
|
580
|
+
opts = _parse_opts(line.split[1..-1])
|
581
|
+
if proxy.nil?
|
582
|
+
simple_node(userhost, opts)
|
583
|
+
else
|
584
|
+
u, h = userhost.split("@")
|
585
|
+
proxy_node(proxy, u, h, opts)
|
586
|
+
end
|
587
|
+
end
|
588
|
+
return r
|
589
|
+
end
|
590
|
+
|
591
|
+
# activities
|
592
|
+
|
593
|
+
def node_list()
|
594
|
+
return nodes()
|
595
|
+
end
|
596
|
+
|
597
|
+
def get_node_list(args)
|
598
|
+
# extracts nodes and a command from arguments
|
599
|
+
if args.length == 1
|
600
|
+
return [ node_list(), args.first ]
|
601
|
+
end
|
602
|
+
if args.length == 2
|
603
|
+
cmd = args.last
|
604
|
+
return [ arrayize(args.first), cmd ]
|
605
|
+
end
|
606
|
+
raise "Wrong number of arguments"
|
607
|
+
end
|
608
|
+
|
609
|
+
def arrayize(nodes)
|
610
|
+
# turns the argument into a list of nodes
|
611
|
+
if !nodes.is_a?(Array)
|
612
|
+
nodes = [ nodes ]
|
613
|
+
end
|
614
|
+
return nodes
|
615
|
+
end
|
616
|
+
|
617
|
+
def execute(nodes, cmd, env = {})
|
618
|
+
wd = env.delete(:wd)
|
619
|
+
nodes = arrayize(nodes)
|
620
|
+
arr = []
|
621
|
+
nodes.each do |node|
|
622
|
+
res = node.execute(cmd, env, :wd => wd)
|
623
|
+
arr.push(res)
|
624
|
+
end
|
625
|
+
return arr
|
626
|
+
end
|
627
|
+
|
628
|
+
def execute_one(node, cmd, env = {})
|
629
|
+
return execute(node, cmd, env).first
|
630
|
+
end
|
631
|
+
|
632
|
+
def _execute_many_parse_args(args)
|
633
|
+
opts = {}
|
634
|
+
opts = args.pop if args.last.is_a?(Hash)
|
635
|
+
nodes, cmd = get_node_list(args)
|
636
|
+
return [ nodes, cmd, opts ]
|
637
|
+
end
|
638
|
+
|
639
|
+
def _get_taktuk(nodes, options = {})
|
640
|
+
# domains = nodes.map(&:domain).uniq
|
641
|
+
# raise "TakTuk: nodes span different domains" if domains.length != 1
|
642
|
+
|
643
|
+
master = nodes.first
|
644
|
+
nodes = nodes.tail if options[:exclude_master]
|
645
|
+
taktuk = File.join(master.directory.path, "ssh taktuk")
|
646
|
+
directory = proxy.engine.main_directory
|
647
|
+
opts = {
|
648
|
+
:stdout => directory.mktemp(),
|
649
|
+
:stderr => directory.mktemp(),
|
650
|
+
:filename => directory.mktemp()
|
651
|
+
}.merge(options)
|
652
|
+
return TakTukRun.new(taktuk, nodes, opts)
|
653
|
+
end
|
654
|
+
|
655
|
+
def bootstrap_taktuk(nodes)
|
656
|
+
|
657
|
+
if nodes.is_a?(AbstractNode)
|
658
|
+
nodes = [ nodes ]
|
659
|
+
end
|
660
|
+
|
661
|
+
if nodes.length == 0
|
662
|
+
return
|
663
|
+
end
|
664
|
+
|
665
|
+
cmd = "(dpkg -l | grep taktuk) || apt-get install -y --force-yes taktuk"
|
666
|
+
escaped_cmd = Shellwords.escape(cmd)
|
667
|
+
master = nodes.first
|
668
|
+
master.execute(escaped_cmd)
|
669
|
+
|
670
|
+
proxy.log("#{master} has TakTuk now")
|
671
|
+
|
672
|
+
return execute_many(nodes, cmd, :propagate => true)
|
673
|
+
end
|
674
|
+
|
675
|
+
def execute_many_here(*args)
|
676
|
+
nodes, cmd, opts = _execute_many_parse_args(args)
|
677
|
+
|
678
|
+
return ManyExecutionResult.new([], cmd) if nodes.length == 0
|
679
|
+
|
680
|
+
taktuk = _get_taktuk(nodes, opts)
|
681
|
+
|
682
|
+
# command has to be escape 2 times: local shell and remote shell
|
683
|
+
succ, fail = taktuk.execute(cmd, :escape => 2)
|
684
|
+
|
685
|
+
if succ.length != nodes.length
|
686
|
+
raise "TakTuk: Some nodes failed (success: #{succ.length}/#{nodes.length}). See #{taktuk.stdout}"
|
687
|
+
end
|
688
|
+
|
689
|
+
results = succ.map do |x|
|
690
|
+
BasicRemoteResult.new(x[:node], cmd, x[:stdout], x[:stderr])
|
691
|
+
end
|
692
|
+
return ManyExecutionResult.new(results, cmd)
|
693
|
+
end
|
694
|
+
|
695
|
+
def execute_many(*args)
|
696
|
+
nodes, cmd, opts = _execute_many_parse_args(args)
|
697
|
+
|
698
|
+
return ManyExecutionResult.new([], cmd) if nodes.length == 0
|
699
|
+
|
700
|
+
taktuk = _get_taktuk(nodes, opts)
|
701
|
+
|
702
|
+
succ, fail = taktuk.execute_remote(cmd, :escape => 2)
|
703
|
+
|
704
|
+
if succ.length != nodes.length
|
705
|
+
msg = "TakTuk: Some nodes failed (success: #{succ.length}/#{nodes.length})."
|
706
|
+
if fail.length > 0
|
707
|
+
msg += " See #{fail.first[:stdout_file]}"
|
708
|
+
end
|
709
|
+
msg += " See #{taktuk.stdout}"
|
710
|
+
raise msg
|
711
|
+
end
|
712
|
+
|
713
|
+
results = succ.map do |x|
|
714
|
+
FileRemoteResult.new(x[:node], cmd, x[:stdout_file], x[:stderr_file])
|
715
|
+
end
|
716
|
+
return ManyExecutionResult.new(results, cmd)
|
717
|
+
end
|
718
|
+
|
719
|
+
def distribute_one(f, nodes, dest, opts = {})
|
720
|
+
|
721
|
+
nodes = [ nodes ] unless nodes.is_a?(Array)
|
722
|
+
|
723
|
+
if f.is_a?(String)
|
724
|
+
# nothing
|
725
|
+
elsif f.is_a?(LocalExecutionResult)
|
726
|
+
f = f.stdout_file
|
727
|
+
elsif f.is_a?(FileRemote)
|
728
|
+
f = f.result.stdout_file
|
729
|
+
else
|
730
|
+
raise "I don't know how to distribute #{f.class}"
|
731
|
+
end
|
732
|
+
|
733
|
+
if dest.end_with?("/") # a directory
|
734
|
+
dest = File.join(dest, File.basename(f))
|
735
|
+
else # a file
|
736
|
+
# it's fine
|
737
|
+
end
|
738
|
+
|
739
|
+
proxy.log("Saving to: #{dest}")
|
740
|
+
|
741
|
+
master = nodes.first
|
742
|
+
|
743
|
+
return nil if nodes.length == 0
|
744
|
+
|
745
|
+
master = nodes.first
|
746
|
+
master.scp_many([ f ], dest)
|
747
|
+
|
748
|
+
return nil if nodes.length == 1
|
749
|
+
|
750
|
+
taktuk = _get_taktuk(nodes, :exclude_master => true)
|
751
|
+
results = taktuk.put(dest, dest, :escape => 2)
|
752
|
+
|
753
|
+
if results.length + 1 != nodes.length
|
754
|
+
raise "Some nodes did not respond."
|
755
|
+
end
|
756
|
+
|
757
|
+
thelist = results.map { |x| x[:hash] }.uniq
|
758
|
+
raise "Some hashes were different." if thelist.length != 1
|
759
|
+
|
760
|
+
orig_hash = md5sum(nodes, [ dest ]).map { |x| x[:hash] }.uniq
|
761
|
+
|
762
|
+
if orig_hash.length != 1
|
763
|
+
raise "Hashes could not be verified."
|
764
|
+
end
|
765
|
+
|
766
|
+
return nil
|
767
|
+
end
|
768
|
+
|
769
|
+
def file_consistency(nodes, fs)
|
770
|
+
if !fs.is_a?(Array)
|
771
|
+
fs = [ fs ]
|
772
|
+
end
|
773
|
+
sums = md5sum(nodes, fs)
|
774
|
+
hashes = Hash.new { |h, k| h[k] = [] }
|
775
|
+
sums.each do |h|
|
776
|
+
hashes[h[:filename]].push(h[:hash])
|
777
|
+
end
|
778
|
+
hashes.each_pair do |f, h|
|
779
|
+
raise "File #{f} is not consistent" if h.uniq.length != 1
|
780
|
+
end
|
781
|
+
end
|
782
|
+
|
783
|
+
def md5sum(nodes, files)
|
784
|
+
h = []
|
785
|
+
results = execute_many_here nodes, "md5sum #{files.join(' ')}"
|
786
|
+
results.to_list.each do |r|
|
787
|
+
r.stdout.strip.lines do |line|
|
788
|
+
hash, filename = line.split
|
789
|
+
h.push({ :filename => filename, :hash => hash })
|
790
|
+
end
|
791
|
+
end
|
792
|
+
return h
|
793
|
+
end
|
794
|
+
|
795
|
+
def distribute(glob, nodes, dest, opts = {})
|
796
|
+
|
797
|
+
nodes = [ nodes ] unless nodes.is_a?(Array)
|
798
|
+
glob = glob.first if glob.is_a?(Array)
|
799
|
+
|
800
|
+
if glob.is_a?(String)
|
801
|
+
files = Dir[glob] # get a list of files
|
802
|
+
elsif glob.is_a?(LocalExecutionResult)
|
803
|
+
files = [ glob.stdout_file ]
|
804
|
+
elsif glob.is_a?(FileRemote)
|
805
|
+
files = [ glob.result.stdout_file ]
|
806
|
+
else
|
807
|
+
raise "I don't know how to distribute #{glob.class}"
|
808
|
+
end
|
809
|
+
remote_files = files.map { |f| File.join(dest, File.basename(f)) }
|
810
|
+
|
811
|
+
proxy.log("Found #{files.length} files to distribute.")
|
812
|
+
|
813
|
+
return nil if nodes.length == 0
|
814
|
+
|
815
|
+
master = nodes.first
|
816
|
+
master.scp_many(files, dest)
|
817
|
+
|
818
|
+
proxy.log("Files copied to #{master}:#{dest}")
|
819
|
+
|
820
|
+
if nodes.length == 1
|
821
|
+
return nil
|
822
|
+
end
|
823
|
+
|
824
|
+
md5sum = master.execute("md5sum #{remote_files.join(' ')}").stdout
|
825
|
+
orig_hashes = {}
|
826
|
+
md5sum.strip.lines do |line|
|
827
|
+
hash, filename = line.strip.split
|
828
|
+
orig_hashes[filename] = hash
|
829
|
+
end
|
830
|
+
|
831
|
+
hashes = {}
|
832
|
+
remote_files.each do |filepath|
|
833
|
+
proxy.log("Distributing #{filepath}")
|
834
|
+
taktuk = _get_taktuk(nodes, :exclude_master => true)
|
835
|
+
results = taktuk.put(filepath, filepath, :escape => 2)
|
836
|
+
|
837
|
+
if results.length + 1 != nodes.length
|
838
|
+
raise "Some nodes did not respond."
|
839
|
+
end
|
840
|
+
|
841
|
+
thelist = results.map { |x| x[:hash] }.uniq
|
842
|
+
raise "Some hashes were different." if thelist.length != 1
|
843
|
+
|
844
|
+
hashes[filepath] = thelist.first
|
845
|
+
end
|
846
|
+
|
847
|
+
raise "Weird?" if hashes.keys.sort != orig_hashes.keys.sort
|
848
|
+
|
849
|
+
orig_hashes.keys.each do |filepath|
|
850
|
+
raise "Hash for #{filepath} does not match." \
|
851
|
+
if orig_hashes[filepath] != hashes[filepath]
|
852
|
+
end
|
853
|
+
|
854
|
+
proxy.log("MD5s match.")
|
855
|
+
|
856
|
+
end
|
857
|
+
|
858
|
+
def ssh_key
|
859
|
+
return $ssh_key
|
860
|
+
end
|
861
|
+
|
862
|
+
def test_connectivity(nodes)
|
863
|
+
execute_many nodes, "true"
|
864
|
+
end
|
865
|
+
|
866
|
+
def chain_copy(nodes, src, dest, opts = {})
|
867
|
+
|
868
|
+
nodes = [ nodes ] unless nodes.is_a?(Array)
|
869
|
+
|
870
|
+
raise "'#{src}' does not exist!" unless File.exist?(src)
|
871
|
+
|
872
|
+
dest = File.join(dest, File.basename(src))
|
873
|
+
|
874
|
+
return nil if nodes.length == 0
|
875
|
+
|
876
|
+
master = nodes.first
|
877
|
+
laster = nodes.last
|
878
|
+
|
879
|
+
master.scp(src, dest)
|
880
|
+
|
881
|
+
proxy.log("#{src} copied to #{master}:#{dest}")
|
882
|
+
|
883
|
+
if nodes.length == 1
|
884
|
+
return nil
|
885
|
+
end
|
886
|
+
|
887
|
+
orig_hash = master.execute("md5sum #{dest}").stdout.strip.split.first
|
888
|
+
|
889
|
+
execute_many(nodes, "apt-get install -y mbuffer")
|
890
|
+
|
891
|
+
# mbuffer creates a buffer of size 2% * memory (by default)
|
892
|
+
# we do chainsend thing
|
893
|
+
|
894
|
+
second = nodes[1].host
|
895
|
+
ssh_opts = "-o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no"
|
896
|
+
master.file("/tmp/.chainsend") do |f|
|
897
|
+
f.puts "set -eu"
|
898
|
+
f.puts "cat $1 | ssh #{ssh_opts} #{second} \"bash /tmp/.chainsend $1\""
|
899
|
+
end
|
900
|
+
laster.file("/tmp/.chainsend") do |f|
|
901
|
+
f.puts "set -eu"
|
902
|
+
f.puts "cat > $1"
|
903
|
+
end
|
904
|
+
nodes.each_with_index do |node, i|
|
905
|
+
next if i == 0 or i == nodes.length - 1
|
906
|
+
next_host = nodes[i+1].host
|
907
|
+
node.file("/tmp/.chainsend") do |f|
|
908
|
+
f.puts "set -eu"
|
909
|
+
f.puts "tee $1 | mbuffer -q | ssh #{ssh_opts} #{next_host} \"bash /tmp/.chainsend $1\""
|
910
|
+
end
|
911
|
+
end
|
912
|
+
|
913
|
+
proxy.log("Chain prepared.")
|
914
|
+
|
915
|
+
master.execute("bash /tmp/.chainsend #{dest}")
|
916
|
+
results = execute_many_here(nodes, "md5sum #{dest}")
|
917
|
+
|
918
|
+
hashes = results.to_list.map { |x| x.stdout.split.first }.uniq
|
919
|
+
|
920
|
+
raise "Hashes differ." if hashes.length != 1
|
921
|
+
|
922
|
+
return nil
|
923
|
+
end
|
924
|
+
|
925
|
+
def file(node, path)
|
926
|
+
r = execute_one(node, "cat #{path}")
|
927
|
+
return FileRemote.new(path, r)
|
928
|
+
end
|
929
|
+
|
930
|
+
def copy(name, nodes, where)
|
931
|
+
# make a copy of a file named "name" to "nodes"
|
932
|
+
# at path "where"; name can be:
|
933
|
+
# * a path (at local fs)
|
934
|
+
# * a LocalExecutionResult (as a result of remote execution)
|
935
|
+
# * RemoteFile - pointer to a file on a remote node
|
936
|
+
|
937
|
+
name = name.first if name.is_a?(Array)
|
938
|
+
|
939
|
+
if name.is_a?(String)
|
940
|
+
# ok
|
941
|
+
elsif name.is_a?(LocalExecutionResult)
|
942
|
+
name = name.stdout_file
|
943
|
+
elsif name.is_a?(FileRemote)
|
944
|
+
name = name.result.stdout_file
|
945
|
+
else
|
946
|
+
raise "Unknown file source: #{name.class}"
|
947
|
+
end
|
948
|
+
|
949
|
+
nodes = arrayize(nodes)
|
950
|
+
result = nodes.map do |node|
|
951
|
+
node.scp(name, where)
|
952
|
+
end
|
953
|
+
return result
|
954
|
+
end
|
955
|
+
|
956
|
+
def run_script(name, label = :all)
|
957
|
+
path = $files[name]
|
958
|
+
node_list(label).each do |node|
|
959
|
+
f = node.tmpfile
|
960
|
+
node.scp(path, f)
|
961
|
+
node.chmod(f, "700")
|
962
|
+
out = node.execute(f)
|
963
|
+
node.rm(f)
|
964
|
+
end
|
965
|
+
end
|
966
|
+
|
967
|
+
def check_node(node)
|
968
|
+
debug "Checking #{node}..."
|
969
|
+
failed = []
|
970
|
+
begin
|
971
|
+
node.ping()
|
972
|
+
rescue ExecutionError => e
|
973
|
+
return false
|
974
|
+
end
|
975
|
+
return true
|
976
|
+
end
|
977
|
+
|
978
|
+
def g5k_reserve_nodes(*args)
|
979
|
+
p = proxy()
|
980
|
+
lib = G5K::Library.new
|
981
|
+
lib.logging = proc { |x| p.engine.log(x) }
|
982
|
+
lib.proxy = proxy
|
983
|
+
return lib.reserve_nodes(*args)
|
984
|
+
end
|
985
|
+
|
986
|
+
def g5k_get_avail(opts = {})
|
987
|
+
p = proxy()
|
988
|
+
lib = G5K::Library.new
|
989
|
+
lib.logging = proc { |x| p.engine.log(x) }
|
990
|
+
lib.proxy = proxy
|
991
|
+
job = lib.pick_reservation(opts)
|
992
|
+
return job
|
993
|
+
end
|
994
|
+
|
995
|
+
def nodes_file(user, filepath)
|
996
|
+
nodes = IO.read(filepath).chomp.lines.map(&:chomp)
|
997
|
+
nodes = nodes.map do |host|
|
998
|
+
simple_node("#{user}@#{host}")
|
999
|
+
end
|
1000
|
+
return nodes
|
1001
|
+
end
|
1002
|
+
|
1003
|
+
def g5k_job(opts = {})
|
1004
|
+
lib = G5K::Library.new
|
1005
|
+
lib.logging = proc { |x| puts x }
|
1006
|
+
job = lib.job(opts[:site], opts[:id])
|
1007
|
+
return job
|
1008
|
+
end
|
1009
|
+
|
1010
|
+
def g5k_nodes(job)
|
1011
|
+
hosts = job["assigned_nodes"]
|
1012
|
+
username = get_g5k_username()
|
1013
|
+
return hosts.map { |h| g5k_node(username, h) }
|
1014
|
+
end
|
1015
|
+
|
1016
|
+
def g5k_site_from_job(job)
|
1017
|
+
nodes = job["assigned_nodes"]
|
1018
|
+
link = job["links"].select { |x| x["rel"] == "parent" }.first
|
1019
|
+
site = link["href"].split("/").last
|
1020
|
+
return site
|
1021
|
+
end
|
1022
|
+
|
1023
|
+
def _filter_vlan(ok_nodes, vlan_nodes)
|
1024
|
+
h = {}
|
1025
|
+
hosts = ok_nodes.each { |x| h[x.split(".").first] = true }
|
1026
|
+
# puts hosts.inspect
|
1027
|
+
good_ones = vlan_nodes.select { |x| h.key?(x.split("-kavlan").first) }
|
1028
|
+
# puts good_ones.inspect
|
1029
|
+
return good_ones
|
1030
|
+
end
|
1031
|
+
|
1032
|
+
def g5k_deploy_keys(nodes, site)
|
1033
|
+
# assumes you are on the frontend
|
1034
|
+
key = get_g5k_tmpfile("ssh_key")
|
1035
|
+
frontend = g5k_site(site)
|
1036
|
+
frontend.execute("rm -f #{key} #{key}.pub; ssh-keygen -f #{key} -q -N \'\'")
|
1037
|
+
nodes.each do |n|
|
1038
|
+
proxy.log "Sending key to: #{n.userhost}"
|
1039
|
+
frontend.execute("scp -o 'BatchMode=yes' -o 'UserKnownHostsFile=/dev/null' #{key} #{n.userhost}:.ssh/id_rsa")
|
1040
|
+
frontend.execute("ssh-copy-id -i #{key}.pub #{n.userhost}")
|
1041
|
+
end
|
1042
|
+
frontend.execute("rm -f #{key} #{key}.pub")
|
1043
|
+
end
|
1044
|
+
|
1045
|
+
def g5k_kadeploy(job, env, custom = "", opts = {})
|
1046
|
+
|
1047
|
+
site = g5k_site_from_job(job)
|
1048
|
+
frontend = g5k_site(site)
|
1049
|
+
nodes = job["assigned_nodes"]
|
1050
|
+
|
1051
|
+
nodes = opts.key?(:count) ? nodes[0...opts[:count]] : nodes
|
1052
|
+
final_nodes = opts[:real_nodes]
|
1053
|
+
|
1054
|
+
proxy.log("Using #{nodes.length} machines.")
|
1055
|
+
|
1056
|
+
machinefile = get_g5k_tmpfile("machines")
|
1057
|
+
nodes_ok = get_g5k_tmpfile("good_nodes")
|
1058
|
+
|
1059
|
+
IO.write(machinefile, nodes.join("\n"))
|
1060
|
+
frontend.scp(machinefile, machinefile)
|
1061
|
+
kadeploy = "kadeploy3 -f #{machinefile} -e #{env} -k #{custom} -o #{nodes_ok}"
|
1062
|
+
proxy.log("Running deployment: #{kadeploy}")
|
1063
|
+
frontend.execute(kadeploy)
|
1064
|
+
frontend.execute("rm -f #{machinefile}")
|
1065
|
+
|
1066
|
+
key = get_g5k_tmpfile("ssh_key")
|
1067
|
+
frontend.execute("rm -f #{key} #{key}.pub; ssh-keygen -f #{key} -q -N \'\'")
|
1068
|
+
|
1069
|
+
ok_nodes = frontend.execute("sort -V #{nodes_ok}").stdout.split
|
1070
|
+
frontend.execute("rm -f #{nodes_ok}")
|
1071
|
+
|
1072
|
+
proxy.log("Nodes that survived: #{ok_nodes.length}/#{nodes.length}")
|
1073
|
+
proxy.log("Final nodes: #{final_nodes}")
|
1074
|
+
|
1075
|
+
if final_nodes.nil?
|
1076
|
+
final_nodes = ok_nodes
|
1077
|
+
else
|
1078
|
+
final_nodes = _filter_vlan(ok_nodes, final_nodes)
|
1079
|
+
end
|
1080
|
+
|
1081
|
+
# we have to install SSH keys
|
1082
|
+
final_nodes.each do |n|
|
1083
|
+
host = "root@#{n}"
|
1084
|
+
frontend.execute("scp -o 'BatchMode=yes' -o 'UserKnownHostsFile=/dev/null' #{key} #{host}:.ssh/id_rsa")
|
1085
|
+
frontend.execute("ssh-copy-id -i #{key}.pub #{host}")
|
1086
|
+
end
|
1087
|
+
frontend.execute("rm -f #{key} #{key}.pub")
|
1088
|
+
|
1089
|
+
all_nodes = final_nodes.map { |x| g5k_node("root", x) }
|
1090
|
+
return all_nodes
|
1091
|
+
end
|
1092
|
+
|
1093
|
+
def g5k_frontend_from_job(job)
|
1094
|
+
site = g5k_site_from_job(job)
|
1095
|
+
frontend = g5k_site(site)
|
1096
|
+
return frontend
|
1097
|
+
end
|
1098
|
+
|
1099
|
+
def g5k_kavlan_id(job)
|
1100
|
+
uid = job['uid']
|
1101
|
+
frontend = g5k_frontend_from_job(job)
|
1102
|
+
out = frontend.execute("kavlan -V -j #{uid}")
|
1103
|
+
return out.stdout.strip.to_i
|
1104
|
+
end
|
1105
|
+
|
1106
|
+
def g5k_kavlan_nodes_file(job)
|
1107
|
+
# TODO
|
1108
|
+
uid = job['uid']
|
1109
|
+
kavlan_nodes = get_g5k_tmpfile("kavlan_nodes")
|
1110
|
+
frontend = g5k_frontend_from_job(job)
|
1111
|
+
out = frontend.execute("kavlan -l -j #{uid}")
|
1112
|
+
IO.write(kavlan_nodes, out.stdout)
|
1113
|
+
frontend.scp(kavlan_nodes, kavlan_nodes)
|
1114
|
+
return kavlan_nodes
|
1115
|
+
end
|
1116
|
+
|
1117
|
+
def g5k_kavlan_nodes(job)
|
1118
|
+
uid = job['uid']
|
1119
|
+
frontend = g5k_frontend_from_job(job)
|
1120
|
+
out = frontend.execute("kavlan -l -j #{uid}")
|
1121
|
+
hosts = out.stdout.strip.lines.map(&:strip)
|
1122
|
+
nodes = hosts.map { |x| g5k_node("root", x) }
|
1123
|
+
return nodes
|
1124
|
+
end
|
1125
|
+
|
1126
|
+
def g5k_site(site)
|
1127
|
+
manager = proxy.engine.nodes_manager
|
1128
|
+
return manager.get_node(get_g5k_username(), "#{site}.grid5000.fr", G5KNodeFactory.new)
|
1129
|
+
end
|
1130
|
+
|
1131
|
+
def g5k_node(user, host)
|
1132
|
+
manager = proxy.engine.nodes_manager
|
1133
|
+
return manager.get_node(user, host, G5KNodeFactory.new)
|
1134
|
+
end
|
1135
|
+
|
1136
|
+
def proxy_node(via, user, host, opts = {})
|
1137
|
+
manager = proxy.engine.nodes_manager
|
1138
|
+
return manager.get_node(user, host, ProxiedFactory.new(via), opts)
|
1139
|
+
end
|
1140
|
+
|
1141
|
+
def split(pattern)
|
1142
|
+
pattern = pattern.strip
|
1143
|
+
if pattern == ""
|
1144
|
+
raise "Empty host specification"
|
1145
|
+
end
|
1146
|
+
parts = pattern.split("@")
|
1147
|
+
if parts.length == 2
|
1148
|
+
return parts
|
1149
|
+
elsif parts.length > 2
|
1150
|
+
raise "Invalid host specification: #{pattern}"
|
1151
|
+
else
|
1152
|
+
proxy.log "User not specified. This is not reproducible."
|
1153
|
+
return [ Etc.getlogin, pattern ]
|
1154
|
+
end
|
1155
|
+
end
|
1156
|
+
|
1157
|
+
def broadcast(pattern)
|
1158
|
+
manager = proxy.engine.nodes_manager
|
1159
|
+
user, address = split(pattern)
|
1160
|
+
nodes = NodeUtils.broadcast_ping(address)
|
1161
|
+
proxy.log("Found #{nodes.length} hosts via ICMP broadcast")
|
1162
|
+
return nodes.map { |n| manager.get_node(user, n) }
|
1163
|
+
end
|
1164
|
+
|
1165
|
+
def node_range(ip_start, count)
|
1166
|
+
user, host = split(ip_start)
|
1167
|
+
parts = host.split(".").map(&:to_i)
|
1168
|
+
ips = []
|
1169
|
+
count.times do |i|
|
1170
|
+
j = 3
|
1171
|
+
while j > 0 and parts[j] == 256
|
1172
|
+
parts[j] = 0
|
1173
|
+
parts[j - 1] += 1
|
1174
|
+
j -= 1
|
1175
|
+
end
|
1176
|
+
ip = parts.map(&:to_s).join(".")
|
1177
|
+
ips.push(ip)
|
1178
|
+
parts[3] += 1
|
1179
|
+
end
|
1180
|
+
return ips.map { |x| simple_node("#{user}@#{x}") }
|
1181
|
+
end
|
1182
|
+
|
1183
|
+
def simple_node(pattern, opts = {})
|
1184
|
+
user, host = split(pattern)
|
1185
|
+
manager = proxy.engine.nodes_manager
|
1186
|
+
return manager.get_node(user, host, SimpleNodeFactory.new, opts)
|
1187
|
+
end
|
1188
|
+
|
1189
|
+
def localhost()
|
1190
|
+
user = ENV["USER"]
|
1191
|
+
return simple_node("#{user}@127.0.0.1")
|
1192
|
+
end
|
1193
|
+
|
1194
|
+
end
|
1195
|
+
|
1196
|
+
module NodeUtils
|
1197
|
+
|
1198
|
+
def self.get_templates_dir(name)
|
1199
|
+
here = File.dirname(__FILE__)
|
1200
|
+
return realpath(File.join(here, name))
|
1201
|
+
end
|
1202
|
+
|
1203
|
+
def self.render_file(t, path, ctx)
|
1204
|
+
out = Erb.render(IO.read(t), ctx)
|
1205
|
+
File.open(path, "wb") do |f|
|
1206
|
+
f.write(out)
|
1207
|
+
f.chmod(0700)
|
1208
|
+
end
|
1209
|
+
end
|
1210
|
+
|
1211
|
+
def self.install_templates(path, name, ctx)
|
1212
|
+
templates = get_templates_dir("templates")
|
1213
|
+
ctx = ctx.merge({ :templates => templates })
|
1214
|
+
tdir = get_templates_dir("templates/utils")
|
1215
|
+
Dir.entries(tdir).each do |f|
|
1216
|
+
template = File.join(tdir, f)
|
1217
|
+
output = File.join(path, f)
|
1218
|
+
render_file(template, output, ctx) if File.file?(template)
|
1219
|
+
end
|
1220
|
+
config = get_templates_dir("templates/ssh-config.#{name}")
|
1221
|
+
ssh_config = File.join(path, "ssh-config")
|
1222
|
+
render_file(config, ssh_config, ctx)
|
1223
|
+
end
|
1224
|
+
|
1225
|
+
# uses a broadcast ping to get a list of nodes that respond
|
1226
|
+
def self.broadcast_ping(address, timeout = 2)
|
1227
|
+
output = %x(ping -c 3 -n -b #{address} -w #{timeout} 2> /dev/null)
|
1228
|
+
# raise "Ping returned error #{$?.exitstatus}." if $?.exitstatus != 0
|
1229
|
+
ms = output.scan(/from ([\.0-9]+): icmp_seq=/)
|
1230
|
+
addresses = ms.flatten.uniq.sort
|
1231
|
+
raise "No nodes found" if addresses.length == 0
|
1232
|
+
return addresses
|
1233
|
+
end
|
1234
|
+
|
1235
|
+
class Erb < OpenStruct
|
1236
|
+
|
1237
|
+
def render(hash)
|
1238
|
+
ERB.new(hash).result(binding)
|
1239
|
+
end
|
1240
|
+
|
1241
|
+
def self.render(template, hash)
|
1242
|
+
x = Erb.new(hash)
|
1243
|
+
return x.render(template)
|
1244
|
+
end
|
1245
|
+
|
1246
|
+
end
|
1247
|
+
|
1248
|
+
end
|
1249
|
+
|
1250
|
+
end
|