ruby-cute 0.12 → 0.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/bin/grd ADDED
@@ -0,0 +1,701 @@
1
+ #!/usr/bin/ruby
2
+ # For help, see' grd -h'
3
+ #
4
+ #
5
+ # Grid5000 bugs that would help improve grd:
6
+ # Bug 11547 - l'API OAR génère des fichiers temporaires oarapi.subscript.* dans les homes
7
+ # Bug 13929 - Stocker la sortie des admissions rules dans un événement
8
+ # Bug 13867 - l'API de kadeploy donne les logs de manière non idempotente
9
+ # Random issues with the API that cause error 500:
10
+ # - Bug 13928 - erreur 500 sur l'API lors d'un DELETE
11
+ # - Bug 13742 - erreur 500 sur l'API pour créer un job
12
+ # - Bug 13766 - erreur 500 api
13
+ # - Bug 11697 - api-proxy/g5k-api: erreur "AH01102: error reading status line from remote server localhost:8000"
14
+ #
15
+ # Test cases:
16
+ # grd bs -s ncy
17
+ # grd bs -s ncy -c
18
+ # grd bs -s ncy -q production -l {grappe}/nodes=1+{"type='kavlan'"}/vlan=1 -w 0:10 --armor
19
+ # grd bs -s ncy -q production -l {grappe}/nodes=1+{"type='kavlan'"}/vlan=1 -w 0:10 --armor -d
20
+ # echo 'hostname ; pwd' > setup-script
21
+ # grd bs -s ncy -f setup-script -T
22
+ # grd l
23
+
24
+ gem 'ruby-cute', '>= 0.12'
25
+ $:.unshift File.expand_path("../../lib", File.realpath(__FILE__))
26
+
27
+ require 'cute'
28
+ require 'optparse'
29
+ require 'pp'
30
+ require 'net/ssh/gateway'
31
+ require 'net/ssh'
32
+ require 'net/scp'
33
+ require 'peach'
34
+
35
+ def cute_init
36
+ $logger = Logger.new(STDOUT)
37
+ STDOUT.sync = true
38
+ STDERR.sync = true
39
+ conf = ENV['HOME']+'/.grid5000_api.yml'
40
+ if File::exist?(conf)
41
+ yconf = YAML::load(IO::read(conf)) rescue {}
42
+ $login = yconf['username']
43
+ $password = yconf['password']
44
+ $g5k = Cute::G5K::API.new(:conf_file => conf, :debug => true)
45
+ elsif g5k_internal?
46
+ $login = `whoami`.chomp
47
+ $g5k = Cute::G5K::API.new(:debug => true)
48
+ else
49
+ puts "ERROR: no .grid5000_api.yml found, and outside Grid'5000. Need API credentials."
50
+ exit(1)
51
+ end
52
+ $g5k.rest.user_agent += " grd"
53
+ $debug = true if ENV['GRD_DEBUG']
54
+ if $debug
55
+ $logger.level = Logger::DEBUG
56
+ else
57
+ $logger.level = Logger::INFO
58
+ end
59
+ $logger.formatter = proc { |severity, datetime, _progname, msg| "%s, [%s] %s\n" % [severity[0..0], datetime.strftime("%Y-%m-%d %H:%M:%S"), msg ] }
60
+ $g5k.logger = $logger
61
+ end
62
+
63
+ def do_provisioning(site, job, o)
64
+ if o[:env]
65
+ keys = Dir::glob(ENV['HOME'] + '/.ssh/*.pub').map { |f| IO::read(f) }.join("\n")
66
+ ts = Time::now
67
+ d = {
68
+ :env => o[:env],
69
+ :keys => keys
70
+ }
71
+ if get_job_vlan(job)
72
+ d[:vlan_id] = get_job_vlan(job)
73
+ end
74
+ $logger.level = Logger::WARN unless $debug
75
+ job = $g5k.deploy(job, d)
76
+ $logger.level = Logger::INFO unless $debug
77
+ duid = job['deploy'].last['uid']
78
+ $logger.info "Started provisioning of environment #{o[:env]}. Live log: https://api.grid5000.fr/3.0/sites/#{site}/internal/kadeployapi/deployment/#{duid}/logs"
79
+ $logger.level = Logger::WARN unless $debug
80
+ job = $g5k.wait_for_deploy(job)
81
+ $logger.level = Logger::INFO unless $debug
82
+ deploy = job['deploy'].last
83
+ failed = ((deploy['status'] == 'error') or (not deploy['result'].to_a.select { |e| e[1]['state'] != 'OK' }.empty?))
84
+ if failed
85
+ $logger.error "Provisioning failed. Terminating resources reservation and exiting."
86
+ $g5k.release(job)
87
+ exit(1)
88
+ end
89
+ $logger.info "Provisioning completed after #{(Time::now - ts).to_i}s"
90
+ end
91
+ end
92
+
93
+ def get_job_vlan(job)
94
+ if job['resources_by_type']['vlans'] and job['resources_by_type']['vlans'].length > 0
95
+ return job['resources_by_type']['vlans'].first
96
+ else
97
+ return nil
98
+ end
99
+ end
100
+
101
+ def nodename_in_vlan(node, vlan)
102
+ fqdn = node.split('.')
103
+ nodename = fqdn[0]
104
+ site = fqdn[1]
105
+ domainname = fqdn[2..3].join('.')
106
+ node_vlan_name = "#{nodename}-kavlan-#{vlan}.#{site}.#{domainname}"
107
+ return node_vlan_name
108
+ end
109
+
110
+ def show_nodes(job)
111
+ nodes = job['assigned_nodes']
112
+ $logger.info "Nodes: #{nodes.join(' ')}"
113
+ if (v = get_job_vlan(job))
114
+ $logger.info "VLAN: #{v}"
115
+ end
116
+ if nodes.length > 1
117
+ $logger.info "Nodeset: #{nodeset(nodes)}"
118
+ end
119
+ end
120
+
121
+ $cache = {}
122
+ def get_api_sites
123
+ unless $cache["sites"]
124
+ $cache['sites'] = $g5k.site_uids
125
+ $cache['sites'].freeze
126
+ end
127
+ return $cache['sites']
128
+ end
129
+
130
+ def resolve_site(s)
131
+ s = 'nancy' if s == 'ncy' or s == 'ny'
132
+ s = 'nantes' if s == 'ns'
133
+ sites = get_api_sites
134
+ l = sites.select { |e| e.start_with?(s) }.sort
135
+ if l.length == 0
136
+ raise "Invalid site: #{s}"
137
+ elsif l.length > 1
138
+ raise "Invalid site: #{s} (matches: #{l.join(' ')})"
139
+ else
140
+ return l.first
141
+ end
142
+ end
143
+
144
+ def nodeset(nodes)
145
+ return `echo #{nodes.join(',')} | nodeset -f`.chomp
146
+ end
147
+
148
+ def g5k_internal?
149
+ return `hostname --fqdn`.chomp =~ /\.grid5000\.fr$/ ? true : false
150
+ end
151
+
152
+ if ['bootstrap', 'bs'].include?(ARGV[0])
153
+ ARGV.shift
154
+ o = {}
155
+ o[:type] = []
156
+ OptionParser.new do |opts|
157
+ opts.banner = "usage: grd bootstrap [options]"
158
+ opts.separator " Reserve, provision, configure and connect to resources (alias: bs)"
159
+ opts.separator ""
160
+ opts.separator "# Options:"
161
+ opts.separator "## (A) Select which resources to reserve, and for how long:"
162
+ opts.on("-s", "--site SITE", "Site on which resources will be requested") do |d|
163
+ o[:site] = d
164
+ end
165
+ opts.on("-l", "--resources DESCRIPTION", "description of the requested resources (using the syntax for oarsub -l). default: nodes=1") do |d|
166
+ o[:resources] = d
167
+ end
168
+ opts.on("-w", "--walltime DURATION", "maximum duration of the reservation (using the syntax for oarsub -l)") do |d|
169
+ o[:walltime] = d
170
+ end
171
+ opts.separator ""
172
+ opts.separator "## (B) Choose when to reserve resources (default: ASAP, and wait interactively):"
173
+ opts.on("-d", "--detach", "Reserve resources ASAP, but do not wait interactively (batch/submission mode)") do
174
+ o[:detach] = true
175
+ end
176
+ opts.on("-r", "--reservation DATE", "Resources at a specified date and time. Do not wait for the reservation to start.") do |d|
177
+ o[:reservation] = d
178
+ o[:detach] = true
179
+ end
180
+ opts.separator ""
181
+ opts.separator "## (C) Set the environment (system image) to provision. If none specified, the 'standard' environment will be used"
182
+ opts.on("-e", "--environment ENV", "Kadeploy environment to provision") do |d|
183
+ o[:env] = d
184
+ end
185
+ opts.separator ""
186
+ opts.separator "## (D) Run a file (typically a script) on the first node"
187
+ opts.on("-f", "--script FILE", "script") do |d|
188
+ o[:script] = d
189
+ end
190
+ opts.on("-a", "--script-arg ARG", "argument to pass to the script (can be specified multiple times)") do |d|
191
+ o[:script_arg] ||= []
192
+ o[:script_arg] << d
193
+ end
194
+ opts.on("-T", "--terminate-after-script", "Terminate the reservation after the script execution") do
195
+ o[:terminate_after_script] = true
196
+ end
197
+ opts.separator ""
198
+ opts.separator "## (E) Connect interactively to the first node (incompatible with '--reservation' and '--detach')."
199
+ opts.on("-c", "--connect") do
200
+ o[:connect] = true
201
+ end
202
+ opts.separator ""
203
+ opts.separator "## Advanced options:"
204
+ opts.separator "### Related to reservation"
205
+
206
+ opts.on("-t", "--type JOB_TYPE", "OAR job type (can be specified multiple times)") do |d|
207
+ o[:type] << d
208
+ end
209
+ opts.on("-q", "--queue QUEUE", "OAR queue") do |d|
210
+ o[:queue] = d
211
+ end
212
+ opts.on("-p", "--project PROJECT", "OAR project") do |d|
213
+ o[:project] = d
214
+ end
215
+ opts.on("-n", "--name NAME", "OAR name") do |d|
216
+ o[:name] = d
217
+ end
218
+ opts.separator ""
219
+ opts.separator "### Related to post-deployment configuration"
220
+ opts.on("", "--armor", "Fetch and run g5k-armor-node.py (see https://www.grid5000.fr/w/Armored_Node_for_Sensitive_Data). This implies '--environment debian11-x64-big'") do
221
+ o[:armor] = true
222
+ end
223
+ opts.separator ""
224
+ opts.separator "# Notes:"
225
+ opts.separator "* All options are optional except '--site' if running from outside Grid'5000."
226
+ opts.separator "* In steps (D) and (E), the connection is done as 'root' if an environment was provisioned (and as the normal user otherwise)."
227
+ opts.separator ""
228
+ opts.separator "# Examples:"
229
+ opts.separator "## Basic usage: reserve one node on the current site, as soon as possible, and for the default walltime, and wait until it is available"
230
+ opts.separator " grd bs"
231
+ opts.separator ""
232
+ opts.separator "## Reserve, provision, execute a script, and connect to the node"
233
+ opts.separator " grd bs -s ly -l nodes=3 -w 0:10 -e debian11-x64-min -f setup-script -c"
234
+ opts.separator ""
235
+ opts.separator "Examples for -l / --resources:"
236
+ opts.separator ' nodes=3 {"gpu_count>0"}/nodes=1 {"cluster=\'gros\'"}/nodes=1 {nova}/nodes=3 (see https://www.grid5000.fr/w/OAR_Syntax_simplification)'
237
+ opts.separator ""
238
+ opts.separator "Examples for -r / --reservation:"
239
+ opts.separator ' "2022-03-30 19:30:05" "2022-03-30 19" "19" "2022-03-30 19,2022-04-02 04" "13,15"'
240
+ end.parse!
241
+
242
+ cute_init
243
+
244
+ if not o[:site]
245
+ if `hostname --fqdn`.chomp =~ /.*\.([^.]+)\.grid5000\.fr$/
246
+ o[:site] = $1
247
+ else
248
+ $logger.error("Running outside Grid'5000: the target site must be specified using '-s SITE'.")
249
+ exit(1)
250
+ end
251
+ end
252
+ site = resolve_site(o[:site])
253
+ jo = {}
254
+ jo[:site] = site
255
+ jo[:resources] = o[:resources] || 'nodes=1'
256
+ jo[:walltime] = o[:walltime] if o[:walltime] # else use OAR's default
257
+ jo[:reservation] = o[:reservation] if o[:reservation]
258
+ jo[:queue] = o[:queue] if o[:queue] # else use OAR's default
259
+ jo[:project] = o[:project] if o[:project] # else use OAR's default
260
+ o[:name] ||= 'grd'
261
+ jo[:name] = o[:name]
262
+ jo[:wait] = false
263
+ if o[:env] and not o[:type].include?('deploy')
264
+ o[:type] << 'deploy'
265
+ end
266
+ jo[:type] = o[:type]
267
+
268
+ if o[:armor]
269
+ if o[:script]
270
+ $logger.error("--armor and --script are incompatible. Exiting.")
271
+ exit(1)
272
+ end
273
+ if o[:env] and o[:env] != 'debian11-x64-big'
274
+ $logger.error("--armor and --environment are incompatible. Exiting.")
275
+ exit(1)
276
+ end
277
+ o[:env] = 'debian11-x64-big'
278
+ o[:type] << 'deploy' if not o[:type].include?('deploy')
279
+ tmp = `mktemp /tmp/armor.XXXXXX`.chomp
280
+ File::open(tmp, "w") do |fd|
281
+ fd.puts <<-EOF
282
+ #!/bin/bash -xe
283
+ wget https://gitlab.inria.fr/grid5000/g5k-armor/-/raw/master/g5k-armor-node.py
284
+ chmod a+rx g5k-armor-node.py
285
+ ./g5k-armor-node.py
286
+ EOF
287
+ end
288
+ o[:script] = tmp
289
+ end
290
+
291
+ if o[:detach]
292
+ if o[:connect]
293
+ $logger.error("--connect and --detach (or --reservation) are incompatible. Exiting.")
294
+ exit(1)
295
+ end
296
+ jo[:cmd] = 'grd inner'
297
+ if o[:env]
298
+ jo[:cmd] += " -e '#{o[:env]}'"
299
+ end
300
+ if o[:script]
301
+ # upload script to frontend
302
+ ssh = nil
303
+ gateway = nil
304
+ fnode = "#{site}.grid5000.fr"
305
+ if g5k_internal?
306
+ ssh = Net::SSH.start(fnode, $login)
307
+ else
308
+ gateway = Net::SSH::Gateway.new('access.grid5000.fr', $login)
309
+ ssh = gateway.ssh(fnode, $login)
310
+ end
311
+ tmpfile = ssh.exec3!("mkdir -p .cache/grd ; mktemp .cache/grd/script.XXXXXX", { :no_log => true, :no_output => true })[:stdout].chomp
312
+ ssh.scp.upload!(o[:script], tmpfile)
313
+ ssh.exec3!("chmod u+x #{tmpfile}", { :no_log => true, :no_output => true })
314
+ ssh.close
315
+ ssh.shutdown!
316
+ if not g5k_internal?
317
+ gateway.shutdown!
318
+ end
319
+ jo[:cmd] += " -f '#{tmpfile}'"
320
+ (o[:script_arg] || []).each do |a|
321
+ jo[:cmd] += " -a '#{a}'"
322
+ end
323
+ if o[:terminate_after_script]
324
+ jo[:cmd] += " --terminate-after-script"
325
+ end
326
+ end
327
+ if jo[:cmd] == 'grd inner'
328
+ # since we have nothing specific to do, we just sleep instead of calling 'grd inner'.
329
+ jo[:cmd] = "sleep infinity"
330
+ end
331
+ end
332
+
333
+ begin
334
+ job = $g5k.reserve(jo)
335
+ rescue Cute::G5K::BadRequest => e
336
+ $logger.error("Reservation failed with error 400 (Bad Request)")
337
+ if e.inner_message
338
+ $logger.error("Error message from reservation system:")
339
+ e.inner_message.each_line { |l| puts " " + l }
340
+ end
341
+ exit(1)
342
+ end
343
+
344
+ $logger.info("OAR job (reservation) ID: #{job['uid']}")
345
+
346
+ begin
347
+ t = nil
348
+ ts = Time::now
349
+ loop do
350
+ tries = 0
351
+ begin
352
+ job = $g5k.get_job(site, job['uid'])
353
+ rescue Cute::G5K::RequestFailed, Cute::G5K::BadRequest => e
354
+ $logger.info "Fetching reservation status failed due to API error: #{e.message}"
355
+ tries += 1
356
+ if tries < 5
357
+ $logger.info "Retrying.."
358
+ sleep 5
359
+ retry
360
+ else
361
+ $logger.info "Too many attempts, exiting. The job might still be running."
362
+ exit(1)
363
+ end
364
+ end
365
+ break if job['state'] == 'running'
366
+ t = job['scheduled_at']
367
+ if !t.nil?
368
+ t = Time.at(t)
369
+ secs = [ t - Time.now, 0 ].max.to_i
370
+ $logger.info "Reservation #{job['uid']} should be available at #{t} (in #{secs} s)"
371
+ break if o[:detach]
372
+ end
373
+ Kernel.sleep(2)
374
+ end
375
+
376
+ if o[:detach]
377
+ $logger.info "Your reservation will run in the background. Follow its status:"
378
+ $logger.info " Using the API: https://api.grid5000.fr/3.0/sites/#{site}/jobs/#{job['uid']}?pretty"
379
+ $logger.info " Using the Gantt: https://intranet.grid5000.fr/oar/#{site}/drawgantt-svg/"
380
+ $logger.info " When it will be running, using its output files:"
381
+ $logger.info " #{site}:/home/#{$login}/OAR.#{o[:name]}.#{job['uid']}.stdout"
382
+ $logger.info " #{site}:/home/#{$login}/OAR.#{o[:name]}.#{job['uid']}.stderr"
383
+ $logger.info "Exiting."
384
+ exit(0)
385
+ end
386
+
387
+ $logger.info "Resources are available after #{(Time::now - ts).to_i}s"
388
+ show_nodes(job)
389
+ # deployment
390
+ do_provisioning(site, job, o)
391
+
392
+ rescue Interrupt
393
+ $logger.info "Interrupted. Releasing resources."
394
+ $g5k.release(job)
395
+ exit(1)
396
+ end
397
+
398
+ # execute script
399
+ tlogin = o[:env] ? 'root' : $login
400
+ fnode = job['assigned_nodes'].first
401
+ # if in VLAN, adjust node name
402
+ if get_job_vlan(job)
403
+ fnode = nodename_in_vlan(fnode, get_job_vlan(job))
404
+ end
405
+ if o[:script]
406
+ ssh = nil
407
+ gateway = nil
408
+ if g5k_internal?
409
+ ssh = Net::SSH.start(fnode, tlogin)
410
+ else
411
+ gateway = Net::SSH::Gateway.new('access.grid5000.fr', $login)
412
+ ssh = gateway.ssh(fnode, tlogin)
413
+ end
414
+ if tlogin == 'root'
415
+ # We use a file in /root to avoid issues when unmounting /tmp in the script
416
+ tmpfile = ssh.exec3!("mkdir -p /root/.cache/grd && mktemp /root/.cache/grd/grd.XXXXXX", { :no_log => true, :no_output => true })[:stdout].chomp
417
+ else
418
+ tmpfile = ssh.exec3!("mktemp /tmp/grd.XXXXXX", { :no_log => true, :no_output => true })[:stdout].chomp
419
+ end
420
+ ssh.scp.upload!(o[:script], tmpfile)
421
+ ssh.exec3!("chmod u+x #{tmpfile}", { :no_log => true, :no_output => true })
422
+ args = (o[:script_arg] || []).join(' ')
423
+ $logger.info "Running script on #{fnode} ..."
424
+ ssh.exec3!("#{tmpfile} #{args}", { :ignore_error => true })
425
+ $logger.info "Script finished."
426
+ ssh.close
427
+ ssh.shutdown!
428
+ if not g5k_internal?
429
+ gateway.shutdown!
430
+ end
431
+ end
432
+
433
+ if o[:terminate_after_script]
434
+ $logger.info("Releasing resources.")
435
+ $g5k.release(job)
436
+
437
+ else
438
+ jh = g5k_internal? ? "" : "-J #{$login}@access.grid5000.fr "
439
+ cmd = "ssh -o StrictHostKeyChecking=accept-new #{jh}#{tlogin}@#{fnode}"
440
+ $logger.info "Connect to first node using:"
441
+ $logger.info " #{cmd}"
442
+
443
+ if o[:connect]
444
+ puts
445
+ system(cmd)
446
+ puts
447
+ s = nil
448
+ loop do
449
+ print "Connection to node terminated. Terminate resources reservation? (Y/N) "
450
+ s = gets.chomp.upcase
451
+ break if s == "Y" or s == "N"
452
+ end
453
+ if s == 'Y'
454
+ $logger.info("Releasing resources.")
455
+ $g5k.release(job)
456
+ end
457
+ end
458
+ end
459
+
460
+ elsif ['inner'].include?(ARGV[0])
461
+ ARGV.shift
462
+ o = {}
463
+ o[:type] = []
464
+ OptionParser.new do |opts|
465
+ opts.banner = "usage: grd inner [options]"
466
+ opts.separator " Do the actions that need to be performed inside a reservation"
467
+ opts.on("-e", "--environment ENV", "Kadeploy environment to provision") do |d|
468
+ o[:env] = d
469
+ end
470
+ opts.on("-f", "--script FILE", "script") do |d|
471
+ o[:script] = d
472
+ end
473
+ opts.on("-a", "--script-arg ARG", "argument to pass to the script (can be specified multiple times)") do |d|
474
+ o[:script_arg] ||= []
475
+ o[:script_arg] << d
476
+ end
477
+ opts.on("-T", "--terminate-after-script", "Terminate the reservation after the script execution (default: keep resources until end of walltime)") do
478
+ o[:terminate_after_script] = true
479
+ end
480
+ end.parse!
481
+
482
+
483
+ if `hostname --fqdn`.chomp =~ /.*\.([^.]+)\.grid5000\.fr$/
484
+ o[:site] = $1
485
+ site = $1
486
+ else
487
+ $logger.error("ERROR: could not determine site.")
488
+ exit(1)
489
+ end
490
+
491
+ cute_init
492
+
493
+ jobid = ENV['OAR_JOB_ID'].to_i
494
+
495
+ $logger.info "Arguments: #{ARGV.inspect}"
496
+ $logger.info "OAR job ID: #{jobid}"
497
+ # we use OAR_NODEFILE to avoid an API request that would not work from a node
498
+ nodes = IO::readlines(ENV['OAR_NODEFILE']).map { |l| l.chomp }.uniq
499
+ $logger.info "Nodes: #{nodes.join(' ')}"
500
+ if nodes.length > 1
501
+ $logger.info "Nodeset: #{nodeset(nodes)}"
502
+ end
503
+
504
+ if o[:env]
505
+ job = $g5k.get_job(site, jobid) # check if this works from node
506
+ do_provisioning(site, job, o)
507
+ end
508
+
509
+ if o[:script]
510
+ args = (o[:script_arg] || []).join(' ')
511
+ if o[:env]
512
+ # we need to copy the script to the node using ssh
513
+ fnode = nodes.first
514
+ # if in VLAN, adjust node name
515
+ if get_job_vlan(job)
516
+ fnode = nodename_in_vlan(fnode, get_job_vlan(job))
517
+ end
518
+ tlogin = 'root'
519
+ ssh = Net::SSH.start(fnode, tlogin)
520
+ # We use a file in /root to avoid issues when unmounting /tmp in the script
521
+ tmpfile = ssh.exec3!("mkdir -p /root/.cache/grd && mktemp /root/.cache/grd/grd.XXXXXX", { :no_log => true, :no_output => true })[:stdout].chomp
522
+ ssh.scp.upload!(o[:script], tmpfile)
523
+ ssh.exec3!("chmod u+x #{tmpfile}", { :no_log => true, :no_output => true })
524
+ $logger.info "Running script on #{fnode} ..."
525
+ ssh.exec3!("#{tmpfile} #{args}", { :ignore_error => true })
526
+ $logger.info "Script finished."
527
+ ssh.close
528
+ ssh.shutdown!
529
+ else
530
+ # we are already on the node
531
+ system("chmod u+x #{o[:script]}") or raise
532
+ $logger.info "Running script ..."
533
+ ssh.exec3!("#{tmpfile} #{args}", { :ignore_error => true }) or raise
534
+ $logger.info "Script finished."
535
+ end
536
+ end
537
+
538
+ if o[:terminate_after_script]
539
+ $logger.info("Terminating resources reservation.")
540
+ else
541
+ $logger.info("Waiting until end of reservation.")
542
+ sleep() # sleep until end of reservation
543
+ end
544
+
545
+ elsif ['list', 'l'].include?(ARGV[0])
546
+ cute_init
547
+
548
+ ARGV.shift
549
+ o = {}
550
+ OptionParser.new do |opts|
551
+ opts.banner = "usage: grd list [options]"
552
+ opts.separator " List reservations"
553
+ opts.on("-s", "--site SITE", "Only list reservations on the specified site") do |d|
554
+ o[:site] = resolve_site(d)
555
+ end
556
+ opts.on("-a", "--all", "List all reservations, not just the current user's") do
557
+ o[:all] = true
558
+ end
559
+ opts.on("-r", "--raw", "Raw output (suitable for scripts)") do
560
+ o[:raw] = true
561
+ end
562
+ end.parse!
563
+
564
+ if o[:site]
565
+ sites = [ o[:site] ]
566
+ else
567
+ sites = get_api_sites.sort
568
+ end
569
+ begin
570
+ require 'terminal-table'
571
+ rescue LoadError
572
+ STDERR.puts "'terminal-table' library not found, using raw mode"
573
+ o[:raw] = true
574
+ end
575
+ a = []
576
+ r = ['site', 'id']
577
+ if o[:all]
578
+ r += ['project', 'user']
579
+ end
580
+ r += ['queue', 'state', 'start', 'end', 'duration', 'name', 'types', 'nodes', 'count' ]
581
+ a << r
582
+
583
+ login = o[:all] ? nil : $login
584
+ m = Mutex::new
585
+ sites.peach do |cursite|
586
+ # FIXME implement pagination (see https://intranet.grid5000.fr/bugzilla/show_bug.cgi?id=13882 about missing doc)
587
+ jobs = $g5k.get_jobs(cursite, login, %w{waiting launching running hold error terminated} - %w{error terminated}, true, false)
588
+ jobs.each do |j|
589
+ sa = if j['started_at'] and j['started_at'] > 0
590
+ Time::at(j['started_at'])
591
+ elsif j['scheduled_at'] and j['scheduled_at'] > 0
592
+ Time::at(j['scheduled_at'])
593
+ else
594
+ nil
595
+ end
596
+ sa_s = sa.nil? ? '' : sa.strftime("%Y-%m-%d %H:%M:%S")
597
+ endt = if sa
598
+ sa + j['walltime']
599
+ else
600
+ nil
601
+ end
602
+ endt_s = endt.nil? ? '' : endt.strftime("%Y-%m-%d %H:%M:%S")
603
+
604
+ # convert walltime to d/h/m/s
605
+ t = j['walltime']
606
+ mm, ss = t.divmod(60) #=> [4515, 21]
607
+ hh, mm = mm.divmod(60) #=> [75, 15]
608
+ dd, hh = hh.divmod(24) #=> [3, 3]
609
+ walltime_s = "%dd%2dh%2dm%2ds" % [dd, hh, mm, ss]
610
+ nodes = nodeset(j['assigned_nodes'])
611
+ nodes_count = j['assigned_nodes'].uniq.length
612
+ # we hide the default monitor type to avoid useless noise
613
+ types = j['types'].reject { |type| type == 'monitor=prom_.*default_metrics' }.join(',')
614
+ m.synchronize do
615
+ r = [ cursite, j['uid'] ]
616
+ if o[:all]
617
+ r += [ j['project'], j['user'] ]
618
+ end
619
+ r += [ j['queue'], j['state'], sa_s, endt_s, walltime_s, j['name'], types, nodes, nodes_count ]
620
+ a << r
621
+ end
622
+ end
623
+ end
624
+ a = [a[0]] + a[1..-1].sort { |a1, a2| [a1[0],a1[1]] <=> [a2[0], a2[1]] }
625
+ if o[:raw]
626
+ a.each do |l|
627
+ puts l.join("\t")
628
+ end
629
+ else
630
+ table = Terminal::Table.new
631
+ table.headings = a[0]
632
+ table.rows = a[1..-1]
633
+ puts table
634
+ end
635
+
636
+ elsif ['terminate', 't'].include?(ARGV[0])
637
+ cute_init
638
+ ARGV.shift
639
+ o = {}
640
+ OptionParser.new do |opts|
641
+ opts.banner = "usage: grd terminate [options]"
642
+ opts.separator " Terminate reservations"
643
+ opts.on("-s", "--site SITE", "Only terminate reservations on the specified site") do |d|
644
+ o[:site] = resolve_site(d)
645
+ end
646
+ opts.on("-j", "--job JOB_ID", "Only terminate the specified job/reservation (implies -y)") do |j|
647
+ o[:job] = j
648
+ o[:yes] = true
649
+ end
650
+ opts.on("-y", "--yes", "Do not ask for confirmation") do
651
+ o[:yes] = true
652
+ end
653
+ end.parse!
654
+
655
+ if o[:site]
656
+ sites = [ o[:site] ]
657
+ else
658
+ sites = get_api_sites
659
+ end
660
+ sites.each do |current_site|
661
+ jobs = $g5k.get_jobs(current_site, $login, %w{waiting launching running hold error terminated} - %w{error terminated})
662
+ jobs.each do |j|
663
+ next if o[:job] and j['uid'].to_i != o[:job].to_i
664
+ if j['started_at'] and j['started_at'] > 0
665
+ sa = Time::at(j['started_at'])
666
+ else
667
+ sa = ''
668
+ end
669
+ sig = "site=#{current_site} id=#{j['uid']} state=#{j['state']} started_at=#{sa} nodes=#{nodeset(j['assigned_nodes'])}"
670
+ if o[:yes]
671
+ puts "Terminating #{sig}"
672
+ $g5k.release(j)
673
+ else
674
+ print "Terminate #{sig} ? (Y/N) "
675
+ s = nil
676
+ loop do
677
+ s = gets.chomp.upcase
678
+ break if s == "Y" or s == "N"
679
+ print "Terminate? (Y/N) "
680
+ end
681
+ if s == 'Y'
682
+ $g5k.release(j)
683
+ end
684
+ end
685
+ end
686
+ end
687
+
688
+ else
689
+ puts <<-EOF
690
+ usage: grd <command> [options]
691
+
692
+ Available commands:
693
+ bootstrap Reserve, provision, configure and connect to resources (alias: bs)
694
+ list List resources reservations (alias: l)
695
+ terminate Terminate resources reservation(s) (alias: t)
696
+
697
+ Use 'grd <command> --help' for details.
698
+ EOF
699
+ exit(1)
700
+ end
701
+
@@ -0,0 +1,3 @@
1
+ .gitattributes export-ignore
2
+ gbp.conf export-ignore
3
+ salsa-ci.yml export-ignore