arvados-cli 0.1.20160608142315 → 0.1.20160913014253
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/crunch-job +33 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2a3dae7abb2565c96c930378ff0804dee060904f
|
4
|
+
data.tar.gz: ae346cc798ae73cc9e97299c245af5133f53cf29
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9185b99de69e9576411307ce24383734a9f03875a9f9723b238c4cb6d5049341132215360956d9060f3b91732378158bce64149281d3ffb2d7b55cc9238cc259
|
7
|
+
data.tar.gz: 4fd3ab654fc817827d1cfb79c03519112c7397b5f9c66f33e6d49d0f9f0ad9b4bac7a03a94d52c36442f5f5d0898e7f09004e74860d5c894276fd8f1acea7e3b
|
data/bin/crunch-job
CHANGED
@@ -355,6 +355,7 @@ my @jobstep_done = ();
|
|
355
355
|
my @jobstep_tomerge = ();
|
356
356
|
my $jobstep_tomerge_level = 0;
|
357
357
|
my $squeue_checked = 0;
|
358
|
+
my $sinfo_checked = 0;
|
358
359
|
my $latest_refresh = scalar time;
|
359
360
|
|
360
361
|
|
@@ -1401,6 +1402,37 @@ sub check_squeue
|
|
1401
1402
|
}
|
1402
1403
|
}
|
1403
1404
|
|
1405
|
+
sub check_sinfo
|
1406
|
+
{
|
1407
|
+
# If a node fails in a multi-node "srun" call during job setup, the call
|
1408
|
+
# may hang instead of exiting with a nonzero code. This function checks
|
1409
|
+
# "sinfo" for the health of the nodes that were allocated and ensures that
|
1410
|
+
# they are all still in the "alloc" state. If a node that is allocated to
|
1411
|
+
# this job is not in "alloc" state, then set please_freeze.
|
1412
|
+
#
|
1413
|
+
# This is only called from srun_sync() for node configuration. If a
|
1414
|
+
# node fails doing actual work, there are other recovery mechanisms.
|
1415
|
+
|
1416
|
+
# Do not call `sinfo` more than once every 15 seconds.
|
1417
|
+
return if $sinfo_checked > time - 15;
|
1418
|
+
$sinfo_checked = time;
|
1419
|
+
|
1420
|
+
# The output format "%t" means output node states.
|
1421
|
+
my @sinfo = `sinfo --nodes=\Q$ENV{SLURM_NODELIST}\E --noheader -o "%t"`;
|
1422
|
+
if ($? != 0)
|
1423
|
+
{
|
1424
|
+
Log(undef, "warning: sinfo exit status $? ($!)");
|
1425
|
+
return;
|
1426
|
+
}
|
1427
|
+
chop @sinfo;
|
1428
|
+
|
1429
|
+
foreach (@sinfo)
|
1430
|
+
{
|
1431
|
+
if ($_ != "alloc" && $_ != "alloc*") {
|
1432
|
+
$main::please_freeze = 1;
|
1433
|
+
}
|
1434
|
+
}
|
1435
|
+
}
|
1404
1436
|
|
1405
1437
|
sub release_allocation
|
1406
1438
|
{
|
@@ -1906,7 +1938,6 @@ sub freezeunquote
|
|
1906
1938
|
return $s;
|
1907
1939
|
}
|
1908
1940
|
|
1909
|
-
|
1910
1941
|
sub srun_sync
|
1911
1942
|
{
|
1912
1943
|
my $srunargs = shift;
|
@@ -1961,6 +1992,7 @@ sub srun_sync
|
|
1961
1992
|
if (!$busy || ($latest_refresh + 2 < scalar time)) {
|
1962
1993
|
check_refresh_wanted();
|
1963
1994
|
check_squeue();
|
1995
|
+
check_sinfo();
|
1964
1996
|
}
|
1965
1997
|
if (!$busy) {
|
1966
1998
|
select(undef, undef, undef, 0.1);
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: arvados-cli
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.20160913014253
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Arvados Authors
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-09-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: arvados
|
@@ -164,7 +164,7 @@ dependencies:
|
|
164
164
|
- - "~>"
|
165
165
|
- !ruby/object:Gem::Version
|
166
166
|
version: '0.8'
|
167
|
-
description: Arvados command line tools, git commit
|
167
|
+
description: Arvados command line tools, git commit b54478ea1b7c8aaeaf565d591f32769bcdc09b8f
|
168
168
|
email: gem-dev@curoverse.com
|
169
169
|
executables:
|
170
170
|
- arv
|