arvados-cli 0.1.20160608142315 → 0.1.20160913014253
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/crunch-job +33 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2a3dae7abb2565c96c930378ff0804dee060904f
|
4
|
+
data.tar.gz: ae346cc798ae73cc9e97299c245af5133f53cf29
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9185b99de69e9576411307ce24383734a9f03875a9f9723b238c4cb6d5049341132215360956d9060f3b91732378158bce64149281d3ffb2d7b55cc9238cc259
|
7
|
+
data.tar.gz: 4fd3ab654fc817827d1cfb79c03519112c7397b5f9c66f33e6d49d0f9f0ad9b4bac7a03a94d52c36442f5f5d0898e7f09004e74860d5c894276fd8f1acea7e3b
|
data/bin/crunch-job
CHANGED
@@ -355,6 +355,7 @@ my @jobstep_done = ();
|
|
355
355
|
my @jobstep_tomerge = ();
|
356
356
|
my $jobstep_tomerge_level = 0;
|
357
357
|
my $squeue_checked = 0;
|
358
|
+
my $sinfo_checked = 0;
|
358
359
|
my $latest_refresh = scalar time;
|
359
360
|
|
360
361
|
|
@@ -1401,6 +1402,37 @@ sub check_squeue
|
|
1401
1402
|
}
|
1402
1403
|
}
|
1403
1404
|
|
1405
|
+
sub check_sinfo
|
1406
|
+
{
|
1407
|
+
# If a node fails in a multi-node "srun" call during job setup, the call
|
1408
|
+
# may hang instead of exiting with a nonzero code. This function checks
|
1409
|
+
# "sinfo" for the health of the nodes that were allocated and ensures that
|
1410
|
+
# they are all still in the "alloc" state. If a node that is allocated to
|
1411
|
+
# this job is not in "alloc" state, then set please_freeze.
|
1412
|
+
#
|
1413
|
+
# This is only called from srun_sync() for node configuration. If a
|
1414
|
+
# node fails doing actual work, there are other recovery mechanisms.
|
1415
|
+
|
1416
|
+
# Do not call `sinfo` more than once every 15 seconds.
|
1417
|
+
return if $sinfo_checked > time - 15;
|
1418
|
+
$sinfo_checked = time;
|
1419
|
+
|
1420
|
+
# The output format "%t" means output node states.
|
1421
|
+
my @sinfo = `sinfo --nodes=\Q$ENV{SLURM_NODELIST}\E --noheader -o "%t"`;
|
1422
|
+
if ($? != 0)
|
1423
|
+
{
|
1424
|
+
Log(undef, "warning: sinfo exit status $? ($!)");
|
1425
|
+
return;
|
1426
|
+
}
|
1427
|
+
chop @sinfo;
|
1428
|
+
|
1429
|
+
foreach (@sinfo)
|
1430
|
+
{
|
1431
|
+
if ($_ != "alloc" && $_ != "alloc*") {
|
1432
|
+
$main::please_freeze = 1;
|
1433
|
+
}
|
1434
|
+
}
|
1435
|
+
}
|
1404
1436
|
|
1405
1437
|
sub release_allocation
|
1406
1438
|
{
|
@@ -1906,7 +1938,6 @@ sub freezeunquote
|
|
1906
1938
|
return $s;
|
1907
1939
|
}
|
1908
1940
|
|
1909
|
-
|
1910
1941
|
sub srun_sync
|
1911
1942
|
{
|
1912
1943
|
my $srunargs = shift;
|
@@ -1961,6 +1992,7 @@ sub srun_sync
|
|
1961
1992
|
if (!$busy || ($latest_refresh + 2 < scalar time)) {
|
1962
1993
|
check_refresh_wanted();
|
1963
1994
|
check_squeue();
|
1995
|
+
check_sinfo();
|
1964
1996
|
}
|
1965
1997
|
if (!$busy) {
|
1966
1998
|
select(undef, undef, undef, 0.1);
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: arvados-cli
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.20160913014253
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Arvados Authors
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-09-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: arvados
|
@@ -164,7 +164,7 @@ dependencies:
|
|
164
164
|
- - "~>"
|
165
165
|
- !ruby/object:Gem::Version
|
166
166
|
version: '0.8'
|
167
|
-
description: Arvados command line tools, git commit
|
167
|
+
description: Arvados command line tools, git commit b54478ea1b7c8aaeaf565d591f32769bcdc09b8f
|
168
168
|
email: gem-dev@curoverse.com
|
169
169
|
executables:
|
170
170
|
- arv
|