arvados-cli 0.1.20170711213448 → 0.1.20170726144433

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/bin/crunch-job +25 -12
  3. metadata +3 -3
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 499fd2dfb6a9e53d98f54a37c6092dc97012bd76
4
- data.tar.gz: c123cdb6169f8c2f94875931378647f3d6d72d5d
3
+ metadata.gz: 42613b210d35b7a656101e0b9a32ccf9fddaf985
4
+ data.tar.gz: a74110bd649f4a8c6b3d93131b016b22d9c1a8d4
5
5
  SHA512:
6
- metadata.gz: 277ba1ec29870bb2d56e322ca2f50f75c77d0476ddc82ee07a4330943f28a43839c8c584a4a0a56160d4df2072349ad5d5101d559c68c55cd3bc4e6a7949634f
7
- data.tar.gz: 0c439c970a6df4338c9eb766096caec719ce22d7ccfeac0a88bc9bba7a76bb26781ac3b7a0444e34891313f97234a7ebe47b8214f5e5ec2288618ab1f579e00d
6
+ metadata.gz: 2051c249708e46df2ddbdef8932220ac85271ba757a74398d54c2454de1469b62d82475619bce2ee009086ab61891d492a71f803e84dc0ee9ceb936c54c7bb20
7
+ data.tar.gz: 73409dda1ed16857f8db7fe4fd581f2aa9864eb6d7ae6c09ef99f436810d235f097053ae9a14e156f1ae3ff9e2cb145b8dd800c8c6925b5378c543e174176d7a
data/bin/crunch-job CHANGED
@@ -189,7 +189,7 @@ if (($Job || $local_job)->{docker_image_locator}) {
189
189
  $cmd = [$docker_bin, 'ps', '-q'];
190
190
  }
191
191
  Log(undef, "Sanity check is `@$cmd`");
192
- my ($exited, $stdout, $stderr) = srun_sync(
192
+ my ($exited, $stdout, $stderr, $tempfail) = srun_sync(
193
193
  ["srun", "--nodes=\Q$ENV{SLURM_NNODES}\E", "--ntasks-per-node=1"],
194
194
  $cmd,
195
195
  {label => "sanity check"});
@@ -397,7 +397,7 @@ if (!defined $no_clear_tmp) {
397
397
  # Find FUSE mounts under $CRUNCH_TMP and unmount them. Then clean
398
398
  # up work directories crunch_tmp/work, crunch_tmp/opt,
399
399
  # crunch_tmp/src*.
400
- my ($exited, $stdout, $stderr) = srun_sync(
400
+ my ($exited, $stdout, $stderr, $tempfail) = srun_sync(
401
401
  ["srun", "--nodelist=$nodelist", "-D", $ENV{'TMPDIR'}],
402
402
  ['bash', '-ec', q{
403
403
  arv-mount --unmount-timeout 10 --unmount-all ${CRUNCH_TMP}
@@ -405,7 +405,7 @@ rm -rf ${JOB_WORK} ${CRUNCH_INSTALL} ${CRUNCH_TMP}/task ${CRUNCH_TMP}/src* ${CRU
405
405
  }],
406
406
  {label => "clean work dirs"});
407
407
  if ($exited != 0) {
408
- exit(EX_RETRY_UNLOCKED);
408
+ exit_retry_unlocked();
409
409
  }
410
410
  }
411
411
 
@@ -439,20 +439,23 @@ fi
439
439
  echo >&2 "image loaded successfully"
440
440
  };
441
441
 
442
- my ($exited, $stdout, $stderr) = srun_sync(
442
+ my ($exited, $stdout, $stderr, $tempfail) = srun_sync(
443
443
  ["srun", "--nodelist=" . join(',', @node)],
444
444
  ["/bin/bash", "-o", "pipefail", "-ec", $docker_install_script],
445
445
  {label => "load docker image"});
446
446
  if ($exited != 0)
447
447
  {
448
- exit(EX_RETRY_UNLOCKED);
448
+ exit_retry_unlocked();
449
449
  }
450
450
 
451
451
  # Determine whether this version of Docker supports memory+swap limits.
452
- ($exited, $stdout, $stderr) = srun_sync(
452
+ ($exited, $stdout, $stderr, $tempfail) = srun_sync(
453
453
  ["srun", "--nodes=1"],
454
454
  [$docker_bin, 'run', '--help'],
455
455
  {label => "check --memory-swap feature"});
456
+ if ($tempfail) {
457
+ exit_retry_unlocked();
458
+ }
456
459
  $docker_limitmem = ($stdout =~ /--memory-swap/);
457
460
 
458
461
  # Find a non-root Docker user to use.
@@ -472,7 +475,7 @@ echo >&2 "image loaded successfully"
472
475
  $label = "check whether user '$try_user' is UID 0";
473
476
  $try_user_arg = "--user=$try_user";
474
477
  }
475
- my ($exited, $stdout, $stderr) = srun_sync(
478
+ my ($exited, $stdout, $stderr, $tempfail) = srun_sync(
476
479
  ["srun", "--nodes=1"],
477
480
  ["/bin/sh", "-ec",
478
481
  "$docker_bin run $docker_run_args $try_user_arg $docker_hash id --user"],
@@ -486,6 +489,8 @@ echo >&2 "image loaded successfully"
486
489
  Log(undef, "Container will run with $dockeruserarg");
487
490
  }
488
491
  last;
492
+ } elsif ($tempfail) {
493
+ exit_retry_unlocked();
489
494
  }
490
495
  }
491
496
 
@@ -678,11 +683,14 @@ else {
678
683
  "mkdir -p $ENV{CRUNCH_INSTALL} && cd $ENV{CRUNCH_TMP} && perl -");
679
684
 
680
685
  $ENV{"CRUNCH_GIT_ARCHIVE_HASH"} = md5_hex($git_archive);
681
- my ($stdout, $stderr);
682
- ($exited, $stdout, $stderr) = srun_sync(
686
+ my ($stdout, $stderr, $tempfail);
687
+ ($exited, $stdout, $stderr, $tempfail) = srun_sync(
683
688
  \@srunargs, \@execargs,
684
689
  {label => "run install script on all workers"},
685
- $build_script . $git_archive);
690
+ $build_script . $git_archive);
691
+ if ($tempfail) {
692
+ exit_retry_unlocked();
693
+ }
686
694
 
687
695
  my $stderr_anything_from_script = 0;
688
696
  for my $line (split(/\n/, $stderr)) {
@@ -1117,7 +1125,7 @@ if (!defined $main::success)
1117
1125
  } elsif ($working_slot_count < 1) {
1118
1126
  save_output_collection();
1119
1127
  save_meta();
1120
- exit(EX_RETRY_UNLOCKED);
1128
+ exit_retry_unlocked();
1121
1129
  } elsif ($thisround_succeeded == 0 &&
1122
1130
  ($thisround_failed == 0 || $thisround_failed > 4)) {
1123
1131
  my $message = "stop because $thisround_failed tasks failed and none succeeded";
@@ -2044,7 +2052,7 @@ sub srun_sync
2044
2052
  if ($main::please_freeze || $j->{tempfail}) {
2045
2053
  $exited ||= 255;
2046
2054
  }
2047
- return ($exited, $j->{stdout_captured}, $j->{stderr_captured});
2055
+ return ($exited, $j->{stdout_captured}, $j->{stderr_captured}, $j->{tempfail});
2048
2056
  }
2049
2057
 
2050
2058
 
@@ -2132,6 +2140,11 @@ sub find_docker_image {
2132
2140
  }
2133
2141
  }
2134
2142
 
2143
+ sub exit_retry_unlocked {
2144
+ Log(undef, "Transient failure with lock acquired; asking for re-dispatch by exiting ".EX_RETRY_UNLOCKED);
2145
+ exit(EX_RETRY_UNLOCKED);
2146
+ }
2147
+
2135
2148
  sub retry_count {
2136
2149
  # Calculate the number of times an operation should be retried,
2137
2150
  # assuming exponential backoff, and that we're willing to retry as
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: arvados-cli
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.20170711213448
4
+ version: 0.1.20170726144433
5
5
  platform: ruby
6
6
  authors:
7
7
  - Arvados Authors
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-07-11 00:00:00.000000000 Z
11
+ date: 2017-07-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: arvados
@@ -164,7 +164,7 @@ dependencies:
164
164
  - - "~>"
165
165
  - !ruby/object:Gem::Version
166
166
  version: '0.8'
167
- description: Arvados command line tools, git commit c4670f707f305b675669f0d4fb085568bc373b60
167
+ description: Arvados command line tools, git commit 8cbabbbe014628574a10a48148d179c14137d61f
168
168
  email: gem-dev@curoverse.com
169
169
  executables:
170
170
  - arv