ruby-cute 0.12 → 0.24

Sign up to get free protection for your applications and to get access to all the features.
data/bin/grd ADDED
@@ -0,0 +1,701 @@
1
+ #!/usr/bin/ruby
2
+ # For help, see' grd -h'
3
+ #
4
+ #
5
+ # Grid5000 bugs that would help improve grd:
6
+ # Bug 11547 - l'API OAR génère des fichiers temporaires oarapi.subscript.* dans les homes
7
+ # Bug 13929 - Stocker la sortie des admissions rules dans un événement
8
+ # Bug 13867 - l'API de kadeploy donne les logs de manière non idempotente
9
+ # Random issues with the API that cause error 500:
10
+ # - Bug 13928 - erreur 500 sur l'API lors d'un DELETE
11
+ # - Bug 13742 - erreur 500 sur l'API pour créer un job
12
+ # - Bug 13766 - erreur 500 api
13
+ # - Bug 11697 - api-proxy/g5k-api: erreur "AH01102: error reading status line from remote server localhost:8000"
14
+ #
15
+ # Test cases:
16
+ # grd bs -s ncy
17
+ # grd bs -s ncy -c
18
+ # grd bs -s ncy -q production -l {grappe}/nodes=1+{"type='kavlan'"}/vlan=1 -w 0:10 --armor
19
+ # grd bs -s ncy -q production -l {grappe}/nodes=1+{"type='kavlan'"}/vlan=1 -w 0:10 --armor -d
20
+ # echo 'hostname ; pwd' > setup-script
21
+ # grd bs -s ncy -f setup-script -T
22
+ # grd l
23
+
24
+ gem 'ruby-cute', '>= 0.12'
25
+ $:.unshift File.expand_path("../../lib", File.realpath(__FILE__))
26
+
27
+ require 'cute'
28
+ require 'optparse'
29
+ require 'pp'
30
+ require 'net/ssh/gateway'
31
+ require 'net/ssh'
32
+ require 'net/scp'
33
+ require 'peach'
34
+
35
+ def cute_init
36
+ $logger = Logger.new(STDOUT)
37
+ STDOUT.sync = true
38
+ STDERR.sync = true
39
+ conf = ENV['HOME']+'/.grid5000_api.yml'
40
+ if File::exist?(conf)
41
+ yconf = YAML::load(IO::read(conf)) rescue {}
42
+ $login = yconf['username']
43
+ $password = yconf['password']
44
+ $g5k = Cute::G5K::API.new(:conf_file => conf, :debug => true)
45
+ elsif g5k_internal?
46
+ $login = `whoami`.chomp
47
+ $g5k = Cute::G5K::API.new(:debug => true)
48
+ else
49
+ puts "ERROR: no .grid5000_api.yml found, and outside Grid'5000. Need API credentials."
50
+ exit(1)
51
+ end
52
+ $g5k.rest.user_agent += " grd"
53
+ $debug = true if ENV['GRD_DEBUG']
54
+ if $debug
55
+ $logger.level = Logger::DEBUG
56
+ else
57
+ $logger.level = Logger::INFO
58
+ end
59
+ $logger.formatter = proc { |severity, datetime, _progname, msg| "%s, [%s] %s\n" % [severity[0..0], datetime.strftime("%Y-%m-%d %H:%M:%S"), msg ] }
60
+ $g5k.logger = $logger
61
+ end
62
+
63
+ def do_provisioning(site, job, o)
64
+ if o[:env]
65
+ keys = Dir::glob(ENV['HOME'] + '/.ssh/*.pub').map { |f| IO::read(f) }.join("\n")
66
+ ts = Time::now
67
+ d = {
68
+ :env => o[:env],
69
+ :keys => keys
70
+ }
71
+ if get_job_vlan(job)
72
+ d[:vlan_id] = get_job_vlan(job)
73
+ end
74
+ $logger.level = Logger::WARN unless $debug
75
+ job = $g5k.deploy(job, d)
76
+ $logger.level = Logger::INFO unless $debug
77
+ duid = job['deploy'].last['uid']
78
+ $logger.info "Started provisioning of environment #{o[:env]}. Live log: https://api.grid5000.fr/3.0/sites/#{site}/internal/kadeployapi/deployment/#{duid}/logs"
79
+ $logger.level = Logger::WARN unless $debug
80
+ job = $g5k.wait_for_deploy(job)
81
+ $logger.level = Logger::INFO unless $debug
82
+ deploy = job['deploy'].last
83
+ failed = ((deploy['status'] == 'error') or (not deploy['result'].to_a.select { |e| e[1]['state'] != 'OK' }.empty?))
84
+ if failed
85
+ $logger.error "Provisioning failed. Terminating resources reservation and exiting."
86
+ $g5k.release(job)
87
+ exit(1)
88
+ end
89
+ $logger.info "Provisioning completed after #{(Time::now - ts).to_i}s"
90
+ end
91
+ end
92
+
93
+ def get_job_vlan(job)
94
+ if job['resources_by_type']['vlans'] and job['resources_by_type']['vlans'].length > 0
95
+ return job['resources_by_type']['vlans'].first
96
+ else
97
+ return nil
98
+ end
99
+ end
100
+
101
+ def nodename_in_vlan(node, vlan)
102
+ fqdn = node.split('.')
103
+ nodename = fqdn[0]
104
+ site = fqdn[1]
105
+ domainname = fqdn[2..3].join('.')
106
+ node_vlan_name = "#{nodename}-kavlan-#{vlan}.#{site}.#{domainname}"
107
+ return node_vlan_name
108
+ end
109
+
110
+ def show_nodes(job)
111
+ nodes = job['assigned_nodes']
112
+ $logger.info "Nodes: #{nodes.join(' ')}"
113
+ if (v = get_job_vlan(job))
114
+ $logger.info "VLAN: #{v}"
115
+ end
116
+ if nodes.length > 1
117
+ $logger.info "Nodeset: #{nodeset(nodes)}"
118
+ end
119
+ end
120
+
121
+ $cache = {}
122
+ def get_api_sites
123
+ unless $cache["sites"]
124
+ $cache['sites'] = $g5k.site_uids
125
+ $cache['sites'].freeze
126
+ end
127
+ return $cache['sites']
128
+ end
129
+
130
+ def resolve_site(s)
131
+ s = 'nancy' if s == 'ncy' or s == 'ny'
132
+ s = 'nantes' if s == 'ns'
133
+ sites = get_api_sites
134
+ l = sites.select { |e| e.start_with?(s) }.sort
135
+ if l.length == 0
136
+ raise "Invalid site: #{s}"
137
+ elsif l.length > 1
138
+ raise "Invalid site: #{s} (matches: #{l.join(' ')})"
139
+ else
140
+ return l.first
141
+ end
142
+ end
143
+
144
+ def nodeset(nodes)
145
+ return `echo #{nodes.join(',')} | nodeset -f`.chomp
146
+ end
147
+
148
+ def g5k_internal?
149
+ return `hostname --fqdn`.chomp =~ /\.grid5000\.fr$/ ? true : false
150
+ end
151
+
152
+ if ['bootstrap', 'bs'].include?(ARGV[0])
153
+ ARGV.shift
154
+ o = {}
155
+ o[:type] = []
156
+ OptionParser.new do |opts|
157
+ opts.banner = "usage: grd bootstrap [options]"
158
+ opts.separator " Reserve, provision, configure and connect to resources (alias: bs)"
159
+ opts.separator ""
160
+ opts.separator "# Options:"
161
+ opts.separator "## (A) Select which resources to reserve, and for how long:"
162
+ opts.on("-s", "--site SITE", "Site on which resources will be requested") do |d|
163
+ o[:site] = d
164
+ end
165
+ opts.on("-l", "--resources DESCRIPTION", "description of the requested resources (using the syntax for oarsub -l). default: nodes=1") do |d|
166
+ o[:resources] = d
167
+ end
168
+ opts.on("-w", "--walltime DURATION", "maximum duration of the reservation (using the syntax for oarsub -l)") do |d|
169
+ o[:walltime] = d
170
+ end
171
+ opts.separator ""
172
+ opts.separator "## (B) Choose when to reserve resources (default: ASAP, and wait interactively):"
173
+ opts.on("-d", "--detach", "Reserve resources ASAP, but do not wait interactively (batch/submission mode)") do
174
+ o[:detach] = true
175
+ end
176
+ opts.on("-r", "--reservation DATE", "Resources at a specified date and time. Do not wait for the reservation to start.") do |d|
177
+ o[:reservation] = d
178
+ o[:detach] = true
179
+ end
180
+ opts.separator ""
181
+ opts.separator "## (C) Set the environment (system image) to provision. If none specified, the 'standard' environment will be used"
182
+ opts.on("-e", "--environment ENV", "Kadeploy environment to provision") do |d|
183
+ o[:env] = d
184
+ end
185
+ opts.separator ""
186
+ opts.separator "## (D) Run a file (typically a script) on the first node"
187
+ opts.on("-f", "--script FILE", "script") do |d|
188
+ o[:script] = d
189
+ end
190
+ opts.on("-a", "--script-arg ARG", "argument to pass to the script (can be specified multiple times)") do |d|
191
+ o[:script_arg] ||= []
192
+ o[:script_arg] << d
193
+ end
194
+ opts.on("-T", "--terminate-after-script", "Terminate the reservation after the script execution") do
195
+ o[:terminate_after_script] = true
196
+ end
197
+ opts.separator ""
198
+ opts.separator "## (E) Connect interactively to the first node (incompatible with '--reservation' and '--detach')."
199
+ opts.on("-c", "--connect") do
200
+ o[:connect] = true
201
+ end
202
+ opts.separator ""
203
+ opts.separator "## Advanced options:"
204
+ opts.separator "### Related to reservation"
205
+
206
+ opts.on("-t", "--type JOB_TYPE", "OAR job type (can be specified multiple times)") do |d|
207
+ o[:type] << d
208
+ end
209
+ opts.on("-q", "--queue QUEUE", "OAR queue") do |d|
210
+ o[:queue] = d
211
+ end
212
+ opts.on("-p", "--project PROJECT", "OAR project") do |d|
213
+ o[:project] = d
214
+ end
215
+ opts.on("-n", "--name NAME", "OAR name") do |d|
216
+ o[:name] = d
217
+ end
218
+ opts.separator ""
219
+ opts.separator "### Related to post-deployment configuration"
220
+ opts.on("", "--armor", "Fetch and run g5k-armor-node.py (see https://www.grid5000.fr/w/Armored_Node_for_Sensitive_Data). This implies '--environment debian11-x64-big'") do
221
+ o[:armor] = true
222
+ end
223
+ opts.separator ""
224
+ opts.separator "# Notes:"
225
+ opts.separator "* All options are optional except '--site' if running from outside Grid'5000."
226
+ opts.separator "* In steps (D) and (E), the connection is done as 'root' if an environment was provisioned (and as the normal user otherwise)."
227
+ opts.separator ""
228
+ opts.separator "# Examples:"
229
+ opts.separator "## Basic usage: reserve one node on the current site, as soon as possible, and for the default walltime, and wait until it is available"
230
+ opts.separator " grd bs"
231
+ opts.separator ""
232
+ opts.separator "## Reserve, provision, execute a script, and connect to the node"
233
+ opts.separator " grd bs -s ly -l nodes=3 -w 0:10 -e debian11-x64-min -f setup-script -c"
234
+ opts.separator ""
235
+ opts.separator "Examples for -l / --resources:"
236
+ opts.separator ' nodes=3 {"gpu_count>0"}/nodes=1 {"cluster=\'gros\'"}/nodes=1 {nova}/nodes=3 (see https://www.grid5000.fr/w/OAR_Syntax_simplification)'
237
+ opts.separator ""
238
+ opts.separator "Examples for -r / --reservation:"
239
+ opts.separator ' "2022-03-30 19:30:05" "2022-03-30 19" "19" "2022-03-30 19,2022-04-02 04" "13,15"'
240
+ end.parse!
241
+
242
+ cute_init
243
+
244
+ if not o[:site]
245
+ if `hostname --fqdn`.chomp =~ /.*\.([^.]+)\.grid5000\.fr$/
246
+ o[:site] = $1
247
+ else
248
+ $logger.error("Running outside Grid'5000: the target site must be specified using '-s SITE'.")
249
+ exit(1)
250
+ end
251
+ end
252
+ site = resolve_site(o[:site])
253
+ jo = {}
254
+ jo[:site] = site
255
+ jo[:resources] = o[:resources] || 'nodes=1'
256
+ jo[:walltime] = o[:walltime] if o[:walltime] # else use OAR's default
257
+ jo[:reservation] = o[:reservation] if o[:reservation]
258
+ jo[:queue] = o[:queue] if o[:queue] # else use OAR's default
259
+ jo[:project] = o[:project] if o[:project] # else use OAR's default
260
+ o[:name] ||= 'grd'
261
+ jo[:name] = o[:name]
262
+ jo[:wait] = false
263
+ if o[:env] and not o[:type].include?('deploy')
264
+ o[:type] << 'deploy'
265
+ end
266
+ jo[:type] = o[:type]
267
+
268
+ if o[:armor]
269
+ if o[:script]
270
+ $logger.error("--armor and --script are incompatible. Exiting.")
271
+ exit(1)
272
+ end
273
+ if o[:env] and o[:env] != 'debian11-x64-big'
274
+ $logger.error("--armor and --environment are incompatible. Exiting.")
275
+ exit(1)
276
+ end
277
+ o[:env] = 'debian11-x64-big'
278
+ o[:type] << 'deploy' if not o[:type].include?('deploy')
279
+ tmp = `mktemp /tmp/armor.XXXXXX`.chomp
280
+ File::open(tmp, "w") do |fd|
281
+ fd.puts <<-EOF
282
+ #!/bin/bash -xe
283
+ wget https://gitlab.inria.fr/grid5000/g5k-armor/-/raw/master/g5k-armor-node.py
284
+ chmod a+rx g5k-armor-node.py
285
+ ./g5k-armor-node.py
286
+ EOF
287
+ end
288
+ o[:script] = tmp
289
+ end
290
+
291
+ if o[:detach]
292
+ if o[:connect]
293
+ $logger.error("--connect and --detach (or --reservation) are incompatible. Exiting.")
294
+ exit(1)
295
+ end
296
+ jo[:cmd] = 'grd inner'
297
+ if o[:env]
298
+ jo[:cmd] += " -e '#{o[:env]}'"
299
+ end
300
+ if o[:script]
301
+ # upload script to frontend
302
+ ssh = nil
303
+ gateway = nil
304
+ fnode = "#{site}.grid5000.fr"
305
+ if g5k_internal?
306
+ ssh = Net::SSH.start(fnode, $login)
307
+ else
308
+ gateway = Net::SSH::Gateway.new('access.grid5000.fr', $login)
309
+ ssh = gateway.ssh(fnode, $login)
310
+ end
311
+ tmpfile = ssh.exec3!("mkdir -p .cache/grd ; mktemp .cache/grd/script.XXXXXX", { :no_log => true, :no_output => true })[:stdout].chomp
312
+ ssh.scp.upload!(o[:script], tmpfile)
313
+ ssh.exec3!("chmod u+x #{tmpfile}", { :no_log => true, :no_output => true })
314
+ ssh.close
315
+ ssh.shutdown!
316
+ if not g5k_internal?
317
+ gateway.shutdown!
318
+ end
319
+ jo[:cmd] += " -f '#{tmpfile}'"
320
+ (o[:script_arg] || []).each do |a|
321
+ jo[:cmd] += " -a '#{a}'"
322
+ end
323
+ if o[:terminate_after_script]
324
+ jo[:cmd] += " --terminate-after-script"
325
+ end
326
+ end
327
+ if jo[:cmd] == 'grd inner'
328
+ # since we have nothing specific to do, we just sleep instead of calling 'grd inner'.
329
+ jo[:cmd] = "sleep infinity"
330
+ end
331
+ end
332
+
333
+ begin
334
+ job = $g5k.reserve(jo)
335
+ rescue Cute::G5K::BadRequest => e
336
+ $logger.error("Reservation failed with error 400 (Bad Request)")
337
+ if e.inner_message
338
+ $logger.error("Error message from reservation system:")
339
+ e.inner_message.each_line { |l| puts " " + l }
340
+ end
341
+ exit(1)
342
+ end
343
+
344
+ $logger.info("OAR job (reservation) ID: #{job['uid']}")
345
+
346
+ begin
347
+ t = nil
348
+ ts = Time::now
349
+ loop do
350
+ tries = 0
351
+ begin
352
+ job = $g5k.get_job(site, job['uid'])
353
+ rescue Cute::G5K::RequestFailed, Cute::G5K::BadRequest => e
354
+ $logger.info "Fetching reservation status failed due to API error: #{e.message}"
355
+ tries += 1
356
+ if tries < 5
357
+ $logger.info "Retrying.."
358
+ sleep 5
359
+ retry
360
+ else
361
+ $logger.info "Too many attempts, exiting. The job might still be running."
362
+ exit(1)
363
+ end
364
+ end
365
+ break if job['state'] == 'running'
366
+ t = job['scheduled_at']
367
+ if !t.nil?
368
+ t = Time.at(t)
369
+ secs = [ t - Time.now, 0 ].max.to_i
370
+ $logger.info "Reservation #{job['uid']} should be available at #{t} (in #{secs} s)"
371
+ break if o[:detach]
372
+ end
373
+ Kernel.sleep(2)
374
+ end
375
+
376
+ if o[:detach]
377
+ $logger.info "Your reservation will run in the background. Follow its status:"
378
+ $logger.info " Using the API: https://api.grid5000.fr/3.0/sites/#{site}/jobs/#{job['uid']}?pretty"
379
+ $logger.info " Using the Gantt: https://intranet.grid5000.fr/oar/#{site}/drawgantt-svg/"
380
+ $logger.info " When it will be running, using its output files:"
381
+ $logger.info " #{site}:/home/#{$login}/OAR.#{o[:name]}.#{job['uid']}.stdout"
382
+ $logger.info " #{site}:/home/#{$login}/OAR.#{o[:name]}.#{job['uid']}.stderr"
383
+ $logger.info "Exiting."
384
+ exit(0)
385
+ end
386
+
387
+ $logger.info "Resources are available after #{(Time::now - ts).to_i}s"
388
+ show_nodes(job)
389
+ # deployment
390
+ do_provisioning(site, job, o)
391
+
392
+ rescue Interrupt
393
+ $logger.info "Interrupted. Releasing resources."
394
+ $g5k.release(job)
395
+ exit(1)
396
+ end
397
+
398
+ # execute script
399
+ tlogin = o[:env] ? 'root' : $login
400
+ fnode = job['assigned_nodes'].first
401
+ # if in VLAN, adjust node name
402
+ if get_job_vlan(job)
403
+ fnode = nodename_in_vlan(fnode, get_job_vlan(job))
404
+ end
405
+ if o[:script]
406
+ ssh = nil
407
+ gateway = nil
408
+ if g5k_internal?
409
+ ssh = Net::SSH.start(fnode, tlogin)
410
+ else
411
+ gateway = Net::SSH::Gateway.new('access.grid5000.fr', $login)
412
+ ssh = gateway.ssh(fnode, tlogin)
413
+ end
414
+ if tlogin == 'root'
415
+ # We use a file in /root to avoid issues when unmounting /tmp in the script
416
+ tmpfile = ssh.exec3!("mkdir -p /root/.cache/grd && mktemp /root/.cache/grd/grd.XXXXXX", { :no_log => true, :no_output => true })[:stdout].chomp
417
+ else
418
+ tmpfile = ssh.exec3!("mktemp /tmp/grd.XXXXXX", { :no_log => true, :no_output => true })[:stdout].chomp
419
+ end
420
+ ssh.scp.upload!(o[:script], tmpfile)
421
+ ssh.exec3!("chmod u+x #{tmpfile}", { :no_log => true, :no_output => true })
422
+ args = (o[:script_arg] || []).join(' ')
423
+ $logger.info "Running script on #{fnode} ..."
424
+ ssh.exec3!("#{tmpfile} #{args}", { :ignore_error => true })
425
+ $logger.info "Script finished."
426
+ ssh.close
427
+ ssh.shutdown!
428
+ if not g5k_internal?
429
+ gateway.shutdown!
430
+ end
431
+ end
432
+
433
+ if o[:terminate_after_script]
434
+ $logger.info("Releasing resources.")
435
+ $g5k.release(job)
436
+
437
+ else
438
+ jh = g5k_internal? ? "" : "-J #{$login}@access.grid5000.fr "
439
+ cmd = "ssh -o StrictHostKeyChecking=accept-new #{jh}#{tlogin}@#{fnode}"
440
+ $logger.info "Connect to first node using:"
441
+ $logger.info " #{cmd}"
442
+
443
+ if o[:connect]
444
+ puts
445
+ system(cmd)
446
+ puts
447
+ s = nil
448
+ loop do
449
+ print "Connection to node terminated. Terminate resources reservation? (Y/N) "
450
+ s = gets.chomp.upcase
451
+ break if s == "Y" or s == "N"
452
+ end
453
+ if s == 'Y'
454
+ $logger.info("Releasing resources.")
455
+ $g5k.release(job)
456
+ end
457
+ end
458
+ end
459
+
460
+ elsif ['inner'].include?(ARGV[0])
461
+ ARGV.shift
462
+ o = {}
463
+ o[:type] = []
464
+ OptionParser.new do |opts|
465
+ opts.banner = "usage: grd inner [options]"
466
+ opts.separator " Do the actions that need to be performed inside a reservation"
467
+ opts.on("-e", "--environment ENV", "Kadeploy environment to provision") do |d|
468
+ o[:env] = d
469
+ end
470
+ opts.on("-f", "--script FILE", "script") do |d|
471
+ o[:script] = d
472
+ end
473
+ opts.on("-a", "--script-arg ARG", "argument to pass to the script (can be specified multiple times)") do |d|
474
+ o[:script_arg] ||= []
475
+ o[:script_arg] << d
476
+ end
477
+ opts.on("-T", "--terminate-after-script", "Terminate the reservation after the script execution (default: keep resources until end of walltime)") do
478
+ o[:terminate_after_script] = true
479
+ end
480
+ end.parse!
481
+
482
+
483
+ if `hostname --fqdn`.chomp =~ /.*\.([^.]+)\.grid5000\.fr$/
484
+ o[:site] = $1
485
+ site = $1
486
+ else
487
+ $logger.error("ERROR: could not determine site.")
488
+ exit(1)
489
+ end
490
+
491
+ cute_init
492
+
493
+ jobid = ENV['OAR_JOB_ID'].to_i
494
+
495
+ $logger.info "Arguments: #{ARGV.inspect}"
496
+ $logger.info "OAR job ID: #{jobid}"
497
+ # we use OAR_NODEFILE to avoid an API request that would not work from a node
498
+ nodes = IO::readlines(ENV['OAR_NODEFILE']).map { |l| l.chomp }.uniq
499
+ $logger.info "Nodes: #{nodes.join(' ')}"
500
+ if nodes.length > 1
501
+ $logger.info "Nodeset: #{nodeset(nodes)}"
502
+ end
503
+
504
+ if o[:env]
505
+ job = $g5k.get_job(site, jobid) # check if this works from node
506
+ do_provisioning(site, job, o)
507
+ end
508
+
509
+ if o[:script]
510
+ args = (o[:script_arg] || []).join(' ')
511
+ if o[:env]
512
+ # we need to copy the script to the node using ssh
513
+ fnode = nodes.first
514
+ # if in VLAN, adjust node name
515
+ if get_job_vlan(job)
516
+ fnode = nodename_in_vlan(fnode, get_job_vlan(job))
517
+ end
518
+ tlogin = 'root'
519
+ ssh = Net::SSH.start(fnode, tlogin)
520
+ # We use a file in /root to avoid issues when unmounting /tmp in the script
521
+ tmpfile = ssh.exec3!("mkdir -p /root/.cache/grd && mktemp /root/.cache/grd/grd.XXXXXX", { :no_log => true, :no_output => true })[:stdout].chomp
522
+ ssh.scp.upload!(o[:script], tmpfile)
523
+ ssh.exec3!("chmod u+x #{tmpfile}", { :no_log => true, :no_output => true })
524
+ $logger.info "Running script on #{fnode} ..."
525
+ ssh.exec3!("#{tmpfile} #{args}", { :ignore_error => true })
526
+ $logger.info "Script finished."
527
+ ssh.close
528
+ ssh.shutdown!
529
+ else
530
+ # we are already on the node
531
+ system("chmod u+x #{o[:script]}") or raise
532
+ $logger.info "Running script ..."
533
+ ssh.exec3!("#{tmpfile} #{args}", { :ignore_error => true }) or raise
534
+ $logger.info "Script finished."
535
+ end
536
+ end
537
+
538
+ if o[:terminate_after_script]
539
+ $logger.info("Terminating resources reservation.")
540
+ else
541
+ $logger.info("Waiting until end of reservation.")
542
+ sleep() # sleep until end of reservation
543
+ end
544
+
545
+ elsif ['list', 'l'].include?(ARGV[0])
546
+ cute_init
547
+
548
+ ARGV.shift
549
+ o = {}
550
+ OptionParser.new do |opts|
551
+ opts.banner = "usage: grd list [options]"
552
+ opts.separator " List reservations"
553
+ opts.on("-s", "--site SITE", "Only list reservations on the specified site") do |d|
554
+ o[:site] = resolve_site(d)
555
+ end
556
+ opts.on("-a", "--all", "List all reservations, not just the current user's") do
557
+ o[:all] = true
558
+ end
559
+ opts.on("-r", "--raw", "Raw output (suitable for scripts)") do
560
+ o[:raw] = true
561
+ end
562
+ end.parse!
563
+
564
+ if o[:site]
565
+ sites = [ o[:site] ]
566
+ else
567
+ sites = get_api_sites.sort
568
+ end
569
+ begin
570
+ require 'terminal-table'
571
+ rescue LoadError
572
+ STDERR.puts "'terminal-table' library not found, using raw mode"
573
+ o[:raw] = true
574
+ end
575
+ a = []
576
+ r = ['site', 'id']
577
+ if o[:all]
578
+ r += ['project', 'user']
579
+ end
580
+ r += ['queue', 'state', 'start', 'end', 'duration', 'name', 'types', 'nodes', 'count' ]
581
+ a << r
582
+
583
+ login = o[:all] ? nil : $login
584
+ m = Mutex::new
585
+ sites.peach do |cursite|
586
+ # FIXME implement pagination (see https://intranet.grid5000.fr/bugzilla/show_bug.cgi?id=13882 about missing doc)
587
+ jobs = $g5k.get_jobs(cursite, login, %w{waiting launching running hold error terminated} - %w{error terminated}, true, false)
588
+ jobs.each do |j|
589
+ sa = if j['started_at'] and j['started_at'] > 0
590
+ Time::at(j['started_at'])
591
+ elsif j['scheduled_at'] and j['scheduled_at'] > 0
592
+ Time::at(j['scheduled_at'])
593
+ else
594
+ nil
595
+ end
596
+ sa_s = sa.nil? ? '' : sa.strftime("%Y-%m-%d %H:%M:%S")
597
+ endt = if sa
598
+ sa + j['walltime']
599
+ else
600
+ nil
601
+ end
602
+ endt_s = endt.nil? ? '' : endt.strftime("%Y-%m-%d %H:%M:%S")
603
+
604
+ # convert walltime to d/h/m/s
605
+ t = j['walltime']
606
+ mm, ss = t.divmod(60) #=> [4515, 21]
607
+ hh, mm = mm.divmod(60) #=> [75, 15]
608
+ dd, hh = hh.divmod(24) #=> [3, 3]
609
+ walltime_s = "%dd%2dh%2dm%2ds" % [dd, hh, mm, ss]
610
+ nodes = nodeset(j['assigned_nodes'])
611
+ nodes_count = j['assigned_nodes'].uniq.length
612
+ # we hide the default monitor type to avoid useless noise
613
+ types = j['types'].reject { |type| type == 'monitor=prom_.*default_metrics' }.join(',')
614
+ m.synchronize do
615
+ r = [ cursite, j['uid'] ]
616
+ if o[:all]
617
+ r += [ j['project'], j['user'] ]
618
+ end
619
+ r += [ j['queue'], j['state'], sa_s, endt_s, walltime_s, j['name'], types, nodes, nodes_count ]
620
+ a << r
621
+ end
622
+ end
623
+ end
624
+ a = [a[0]] + a[1..-1].sort { |a1, a2| [a1[0],a1[1]] <=> [a2[0], a2[1]] }
625
+ if o[:raw]
626
+ a.each do |l|
627
+ puts l.join("\t")
628
+ end
629
+ else
630
+ table = Terminal::Table.new
631
+ table.headings = a[0]
632
+ table.rows = a[1..-1]
633
+ puts table
634
+ end
635
+
636
+ elsif ['terminate', 't'].include?(ARGV[0])
637
+ cute_init
638
+ ARGV.shift
639
+ o = {}
640
+ OptionParser.new do |opts|
641
+ opts.banner = "usage: grd terminate [options]"
642
+ opts.separator " Terminate reservations"
643
+ opts.on("-s", "--site SITE", "Only terminate reservations on the specified site") do |d|
644
+ o[:site] = resolve_site(d)
645
+ end
646
+ opts.on("-j", "--job JOB_ID", "Only terminate the specified job/reservation (implies -y)") do |j|
647
+ o[:job] = j
648
+ o[:yes] = true
649
+ end
650
+ opts.on("-y", "--yes", "Do not ask for confirmation") do
651
+ o[:yes] = true
652
+ end
653
+ end.parse!
654
+
655
+ if o[:site]
656
+ sites = [ o[:site] ]
657
+ else
658
+ sites = get_api_sites
659
+ end
660
+ sites.each do |current_site|
661
+ jobs = $g5k.get_jobs(current_site, $login, %w{waiting launching running hold error terminated} - %w{error terminated})
662
+ jobs.each do |j|
663
+ next if o[:job] and j['uid'].to_i != o[:job].to_i
664
+ if j['started_at'] and j['started_at'] > 0
665
+ sa = Time::at(j['started_at'])
666
+ else
667
+ sa = ''
668
+ end
669
+ sig = "site=#{current_site} id=#{j['uid']} state=#{j['state']} started_at=#{sa} nodes=#{nodeset(j['assigned_nodes'])}"
670
+ if o[:yes]
671
+ puts "Terminating #{sig}"
672
+ $g5k.release(j)
673
+ else
674
+ print "Terminate #{sig} ? (Y/N) "
675
+ s = nil
676
+ loop do
677
+ s = gets.chomp.upcase
678
+ break if s == "Y" or s == "N"
679
+ print "Terminate? (Y/N) "
680
+ end
681
+ if s == 'Y'
682
+ $g5k.release(j)
683
+ end
684
+ end
685
+ end
686
+ end
687
+
688
+ else
689
+ puts <<-EOF
690
+ usage: grd <command> [options]
691
+
692
+ Available commands:
693
+ bootstrap Reserve, provision, configure and connect to resources (alias: bs)
694
+ list List resources reservations (alias: l)
695
+ terminate Terminate resources reservation(s) (alias: t)
696
+
697
+ Use 'grd <command> --help' for details.
698
+ EOF
699
+ exit(1)
700
+ end
701
+
@@ -0,0 +1,3 @@
1
+ .gitattributes export-ignore
2
+ gbp.conf export-ignore
3
+ salsa-ci.yml export-ignore