arvados-cli 0.1.20160608142315 → 0.1.20160913014253

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/bin/crunch-job +33 -1
  3. metadata +3 -3
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: a5fc889cbdad1ec1ac89ed8674cf1499e90c20d4
4
- data.tar.gz: aed390277dc320517d93fd8ab7b14da48b3b0a2c
3
+ metadata.gz: 2a3dae7abb2565c96c930378ff0804dee060904f
4
+ data.tar.gz: ae346cc798ae73cc9e97299c245af5133f53cf29
5
5
  SHA512:
6
- metadata.gz: 92372e41f978b51561327fba6bf9107e1291aac4d8dd36de024e8714399a1a9cd904f4713a28bdf663a34049de94b82c4c67e718b27c277b686a4ed0e28f601e
7
- data.tar.gz: 11bef7ffe6f6f12c0c10e3739727d7376457069b547ce1d80953d16c7079b00fa4518ff68cf668eb09719d6df404fc6e7a8142feecb390f9f4ed244bf6d5958c
6
+ metadata.gz: 9185b99de69e9576411307ce24383734a9f03875a9f9723b238c4cb6d5049341132215360956d9060f3b91732378158bce64149281d3ffb2d7b55cc9238cc259
7
+ data.tar.gz: 4fd3ab654fc817827d1cfb79c03519112c7397b5f9c66f33e6d49d0f9f0ad9b4bac7a03a94d52c36442f5f5d0898e7f09004e74860d5c894276fd8f1acea7e3b
data/bin/crunch-job CHANGED
@@ -355,6 +355,7 @@ my @jobstep_done = ();
355
355
  my @jobstep_tomerge = ();
356
356
  my $jobstep_tomerge_level = 0;
357
357
  my $squeue_checked = 0;
358
+ my $sinfo_checked = 0;
358
359
  my $latest_refresh = scalar time;
359
360
 
360
361
 
@@ -1401,6 +1402,37 @@ sub check_squeue
1401
1402
  }
1402
1403
  }
1403
1404
 
1405
+ sub check_sinfo
1406
+ {
1407
+ # If a node fails in a multi-node "srun" call during job setup, the call
1408
+ # may hang instead of exiting with a nonzero code. This function checks
1409
+ # "sinfo" for the health of the nodes that were allocated and ensures that
1410
+ # they are all still in the "alloc" state. If a node that is allocated to
1411
+ # this job is not in "alloc" state, then set please_freeze.
1412
+ #
1413
+ # This is only called from srun_sync() for node configuration. If a
1414
+ # node fails doing actual work, there are other recovery mechanisms.
1415
+
1416
+ # Do not call `sinfo` more than once every 15 seconds.
1417
+ return if $sinfo_checked > time - 15;
1418
+ $sinfo_checked = time;
1419
+
1420
+ # The output format "%t" means output node states.
1421
+ my @sinfo = `sinfo --nodes=\Q$ENV{SLURM_NODELIST}\E --noheader -o "%t"`;
1422
+ if ($? != 0)
1423
+ {
1424
+ Log(undef, "warning: sinfo exit status $? ($!)");
1425
+ return;
1426
+ }
1427
+ chop @sinfo;
1428
+
1429
+ foreach (@sinfo)
1430
+ {
1431
+ if ($_ != "alloc" && $_ != "alloc*") {
1432
+ $main::please_freeze = 1;
1433
+ }
1434
+ }
1435
+ }
1404
1436
 
1405
1437
  sub release_allocation
1406
1438
  {
@@ -1906,7 +1938,6 @@ sub freezeunquote
1906
1938
  return $s;
1907
1939
  }
1908
1940
 
1909
-
1910
1941
  sub srun_sync
1911
1942
  {
1912
1943
  my $srunargs = shift;
@@ -1961,6 +1992,7 @@ sub srun_sync
1961
1992
  if (!$busy || ($latest_refresh + 2 < scalar time)) {
1962
1993
  check_refresh_wanted();
1963
1994
  check_squeue();
1995
+ check_sinfo();
1964
1996
  }
1965
1997
  if (!$busy) {
1966
1998
  select(undef, undef, undef, 0.1);
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: arvados-cli
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.20160608142315
4
+ version: 0.1.20160913014253
5
5
  platform: ruby
6
6
  authors:
7
7
  - Arvados Authors
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-06-08 00:00:00.000000000 Z
11
+ date: 2016-09-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: arvados
@@ -164,7 +164,7 @@ dependencies:
164
164
  - - "~>"
165
165
  - !ruby/object:Gem::Version
166
166
  version: '0.8'
167
- description: Arvados command line tools, git commit 548e0c54db524cb7317850d4dfd8f3ee0b93cdb0
167
+ description: Arvados command line tools, git commit b54478ea1b7c8aaeaf565d591f32769bcdc09b8f
168
168
  email: gem-dev@curoverse.com
169
169
  executables:
170
170
  - arv