arvados-cli 0.1.20170711213448 → 0.1.20170726144433

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/bin/crunch-job +25 -12
  3. metadata +3 -3
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 499fd2dfb6a9e53d98f54a37c6092dc97012bd76
4
- data.tar.gz: c123cdb6169f8c2f94875931378647f3d6d72d5d
3
+ metadata.gz: 42613b210d35b7a656101e0b9a32ccf9fddaf985
4
+ data.tar.gz: a74110bd649f4a8c6b3d93131b016b22d9c1a8d4
5
5
  SHA512:
6
- metadata.gz: 277ba1ec29870bb2d56e322ca2f50f75c77d0476ddc82ee07a4330943f28a43839c8c584a4a0a56160d4df2072349ad5d5101d559c68c55cd3bc4e6a7949634f
7
- data.tar.gz: 0c439c970a6df4338c9eb766096caec719ce22d7ccfeac0a88bc9bba7a76bb26781ac3b7a0444e34891313f97234a7ebe47b8214f5e5ec2288618ab1f579e00d
6
+ metadata.gz: 2051c249708e46df2ddbdef8932220ac85271ba757a74398d54c2454de1469b62d82475619bce2ee009086ab61891d492a71f803e84dc0ee9ceb936c54c7bb20
7
+ data.tar.gz: 73409dda1ed16857f8db7fe4fd581f2aa9864eb6d7ae6c09ef99f436810d235f097053ae9a14e156f1ae3ff9e2cb145b8dd800c8c6925b5378c543e174176d7a
data/bin/crunch-job CHANGED
@@ -189,7 +189,7 @@ if (($Job || $local_job)->{docker_image_locator}) {
189
189
  $cmd = [$docker_bin, 'ps', '-q'];
190
190
  }
191
191
  Log(undef, "Sanity check is `@$cmd`");
192
- my ($exited, $stdout, $stderr) = srun_sync(
192
+ my ($exited, $stdout, $stderr, $tempfail) = srun_sync(
193
193
  ["srun", "--nodes=\Q$ENV{SLURM_NNODES}\E", "--ntasks-per-node=1"],
194
194
  $cmd,
195
195
  {label => "sanity check"});
@@ -397,7 +397,7 @@ if (!defined $no_clear_tmp) {
397
397
  # Find FUSE mounts under $CRUNCH_TMP and unmount them. Then clean
398
398
  # up work directories crunch_tmp/work, crunch_tmp/opt,
399
399
  # crunch_tmp/src*.
400
- my ($exited, $stdout, $stderr) = srun_sync(
400
+ my ($exited, $stdout, $stderr, $tempfail) = srun_sync(
401
401
  ["srun", "--nodelist=$nodelist", "-D", $ENV{'TMPDIR'}],
402
402
  ['bash', '-ec', q{
403
403
  arv-mount --unmount-timeout 10 --unmount-all ${CRUNCH_TMP}
@@ -405,7 +405,7 @@ rm -rf ${JOB_WORK} ${CRUNCH_INSTALL} ${CRUNCH_TMP}/task ${CRUNCH_TMP}/src* ${CRU
405
405
  }],
406
406
  {label => "clean work dirs"});
407
407
  if ($exited != 0) {
408
- exit(EX_RETRY_UNLOCKED);
408
+ exit_retry_unlocked();
409
409
  }
410
410
  }
411
411
 
@@ -439,20 +439,23 @@ fi
439
439
  echo >&2 "image loaded successfully"
440
440
  };
441
441
 
442
- my ($exited, $stdout, $stderr) = srun_sync(
442
+ my ($exited, $stdout, $stderr, $tempfail) = srun_sync(
443
443
  ["srun", "--nodelist=" . join(',', @node)],
444
444
  ["/bin/bash", "-o", "pipefail", "-ec", $docker_install_script],
445
445
  {label => "load docker image"});
446
446
  if ($exited != 0)
447
447
  {
448
- exit(EX_RETRY_UNLOCKED);
448
+ exit_retry_unlocked();
449
449
  }
450
450
 
451
451
  # Determine whether this version of Docker supports memory+swap limits.
452
- ($exited, $stdout, $stderr) = srun_sync(
452
+ ($exited, $stdout, $stderr, $tempfail) = srun_sync(
453
453
  ["srun", "--nodes=1"],
454
454
  [$docker_bin, 'run', '--help'],
455
455
  {label => "check --memory-swap feature"});
456
+ if ($tempfail) {
457
+ exit_retry_unlocked();
458
+ }
456
459
  $docker_limitmem = ($stdout =~ /--memory-swap/);
457
460
 
458
461
  # Find a non-root Docker user to use.
@@ -472,7 +475,7 @@ echo >&2 "image loaded successfully"
472
475
  $label = "check whether user '$try_user' is UID 0";
473
476
  $try_user_arg = "--user=$try_user";
474
477
  }
475
- my ($exited, $stdout, $stderr) = srun_sync(
478
+ my ($exited, $stdout, $stderr, $tempfail) = srun_sync(
476
479
  ["srun", "--nodes=1"],
477
480
  ["/bin/sh", "-ec",
478
481
  "$docker_bin run $docker_run_args $try_user_arg $docker_hash id --user"],
@@ -486,6 +489,8 @@ echo >&2 "image loaded successfully"
486
489
  Log(undef, "Container will run with $dockeruserarg");
487
490
  }
488
491
  last;
492
+ } elsif ($tempfail) {
493
+ exit_retry_unlocked();
489
494
  }
490
495
  }
491
496
 
@@ -678,11 +683,14 @@ else {
678
683
  "mkdir -p $ENV{CRUNCH_INSTALL} && cd $ENV{CRUNCH_TMP} && perl -");
679
684
 
680
685
  $ENV{"CRUNCH_GIT_ARCHIVE_HASH"} = md5_hex($git_archive);
681
- my ($stdout, $stderr);
682
- ($exited, $stdout, $stderr) = srun_sync(
686
+ my ($stdout, $stderr, $tempfail);
687
+ ($exited, $stdout, $stderr, $tempfail) = srun_sync(
683
688
  \@srunargs, \@execargs,
684
689
  {label => "run install script on all workers"},
685
- $build_script . $git_archive);
690
+ $build_script . $git_archive);
691
+ if ($tempfail) {
692
+ exit_retry_unlocked();
693
+ }
686
694
 
687
695
  my $stderr_anything_from_script = 0;
688
696
  for my $line (split(/\n/, $stderr)) {
@@ -1117,7 +1125,7 @@ if (!defined $main::success)
1117
1125
  } elsif ($working_slot_count < 1) {
1118
1126
  save_output_collection();
1119
1127
  save_meta();
1120
- exit(EX_RETRY_UNLOCKED);
1128
+ exit_retry_unlocked();
1121
1129
  } elsif ($thisround_succeeded == 0 &&
1122
1130
  ($thisround_failed == 0 || $thisround_failed > 4)) {
1123
1131
  my $message = "stop because $thisround_failed tasks failed and none succeeded";
@@ -2044,7 +2052,7 @@ sub srun_sync
2044
2052
  if ($main::please_freeze || $j->{tempfail}) {
2045
2053
  $exited ||= 255;
2046
2054
  }
2047
- return ($exited, $j->{stdout_captured}, $j->{stderr_captured});
2055
+ return ($exited, $j->{stdout_captured}, $j->{stderr_captured}, $j->{tempfail});
2048
2056
  }
2049
2057
 
2050
2058
 
@@ -2132,6 +2140,11 @@ sub find_docker_image {
2132
2140
  }
2133
2141
  }
2134
2142
 
2143
+ sub exit_retry_unlocked {
2144
+ Log(undef, "Transient failure with lock acquired; asking for re-dispatch by exiting ".EX_RETRY_UNLOCKED);
2145
+ exit(EX_RETRY_UNLOCKED);
2146
+ }
2147
+
2135
2148
  sub retry_count {
2136
2149
  # Calculate the number of times an operation should be retried,
2137
2150
  # assuming exponential backoff, and that we're willing to retry as
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: arvados-cli
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.20170711213448
4
+ version: 0.1.20170726144433
5
5
  platform: ruby
6
6
  authors:
7
7
  - Arvados Authors
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-07-11 00:00:00.000000000 Z
11
+ date: 2017-07-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: arvados
@@ -164,7 +164,7 @@ dependencies:
164
164
  - - "~>"
165
165
  - !ruby/object:Gem::Version
166
166
  version: '0.8'
167
- description: Arvados command line tools, git commit c4670f707f305b675669f0d4fb085568bc373b60
167
+ description: Arvados command line tools, git commit 8cbabbbe014628574a10a48148d179c14137d61f
168
168
  email: gem-dev@curoverse.com
169
169
  executables:
170
170
  - arv