arvados-cli 0.1.20170711213448 → 0.1.20170726144433
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/crunch-job +25 -12
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 42613b210d35b7a656101e0b9a32ccf9fddaf985
|
4
|
+
data.tar.gz: a74110bd649f4a8c6b3d93131b016b22d9c1a8d4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2051c249708e46df2ddbdef8932220ac85271ba757a74398d54c2454de1469b62d82475619bce2ee009086ab61891d492a71f803e84dc0ee9ceb936c54c7bb20
|
7
|
+
data.tar.gz: 73409dda1ed16857f8db7fe4fd581f2aa9864eb6d7ae6c09ef99f436810d235f097053ae9a14e156f1ae3ff9e2cb145b8dd800c8c6925b5378c543e174176d7a
|
data/bin/crunch-job
CHANGED
@@ -189,7 +189,7 @@ if (($Job || $local_job)->{docker_image_locator}) {
|
|
189
189
|
$cmd = [$docker_bin, 'ps', '-q'];
|
190
190
|
}
|
191
191
|
Log(undef, "Sanity check is `@$cmd`");
|
192
|
-
my ($exited, $stdout, $stderr) = srun_sync(
|
192
|
+
my ($exited, $stdout, $stderr, $tempfail) = srun_sync(
|
193
193
|
["srun", "--nodes=\Q$ENV{SLURM_NNODES}\E", "--ntasks-per-node=1"],
|
194
194
|
$cmd,
|
195
195
|
{label => "sanity check"});
|
@@ -397,7 +397,7 @@ if (!defined $no_clear_tmp) {
|
|
397
397
|
# Find FUSE mounts under $CRUNCH_TMP and unmount them. Then clean
|
398
398
|
# up work directories crunch_tmp/work, crunch_tmp/opt,
|
399
399
|
# crunch_tmp/src*.
|
400
|
-
my ($exited, $stdout, $stderr) = srun_sync(
|
400
|
+
my ($exited, $stdout, $stderr, $tempfail) = srun_sync(
|
401
401
|
["srun", "--nodelist=$nodelist", "-D", $ENV{'TMPDIR'}],
|
402
402
|
['bash', '-ec', q{
|
403
403
|
arv-mount --unmount-timeout 10 --unmount-all ${CRUNCH_TMP}
|
@@ -405,7 +405,7 @@ rm -rf ${JOB_WORK} ${CRUNCH_INSTALL} ${CRUNCH_TMP}/task ${CRUNCH_TMP}/src* ${CRU
|
|
405
405
|
}],
|
406
406
|
{label => "clean work dirs"});
|
407
407
|
if ($exited != 0) {
|
408
|
-
|
408
|
+
exit_retry_unlocked();
|
409
409
|
}
|
410
410
|
}
|
411
411
|
|
@@ -439,20 +439,23 @@ fi
|
|
439
439
|
echo >&2 "image loaded successfully"
|
440
440
|
};
|
441
441
|
|
442
|
-
my ($exited, $stdout, $stderr) = srun_sync(
|
442
|
+
my ($exited, $stdout, $stderr, $tempfail) = srun_sync(
|
443
443
|
["srun", "--nodelist=" . join(',', @node)],
|
444
444
|
["/bin/bash", "-o", "pipefail", "-ec", $docker_install_script],
|
445
445
|
{label => "load docker image"});
|
446
446
|
if ($exited != 0)
|
447
447
|
{
|
448
|
-
|
448
|
+
exit_retry_unlocked();
|
449
449
|
}
|
450
450
|
|
451
451
|
# Determine whether this version of Docker supports memory+swap limits.
|
452
|
-
($exited, $stdout, $stderr) = srun_sync(
|
452
|
+
($exited, $stdout, $stderr, $tempfail) = srun_sync(
|
453
453
|
["srun", "--nodes=1"],
|
454
454
|
[$docker_bin, 'run', '--help'],
|
455
455
|
{label => "check --memory-swap feature"});
|
456
|
+
if ($tempfail) {
|
457
|
+
exit_retry_unlocked();
|
458
|
+
}
|
456
459
|
$docker_limitmem = ($stdout =~ /--memory-swap/);
|
457
460
|
|
458
461
|
# Find a non-root Docker user to use.
|
@@ -472,7 +475,7 @@ echo >&2 "image loaded successfully"
|
|
472
475
|
$label = "check whether user '$try_user' is UID 0";
|
473
476
|
$try_user_arg = "--user=$try_user";
|
474
477
|
}
|
475
|
-
my ($exited, $stdout, $stderr) = srun_sync(
|
478
|
+
my ($exited, $stdout, $stderr, $tempfail) = srun_sync(
|
476
479
|
["srun", "--nodes=1"],
|
477
480
|
["/bin/sh", "-ec",
|
478
481
|
"$docker_bin run $docker_run_args $try_user_arg $docker_hash id --user"],
|
@@ -486,6 +489,8 @@ echo >&2 "image loaded successfully"
|
|
486
489
|
Log(undef, "Container will run with $dockeruserarg");
|
487
490
|
}
|
488
491
|
last;
|
492
|
+
} elsif ($tempfail) {
|
493
|
+
exit_retry_unlocked();
|
489
494
|
}
|
490
495
|
}
|
491
496
|
|
@@ -678,11 +683,14 @@ else {
|
|
678
683
|
"mkdir -p $ENV{CRUNCH_INSTALL} && cd $ENV{CRUNCH_TMP} && perl -");
|
679
684
|
|
680
685
|
$ENV{"CRUNCH_GIT_ARCHIVE_HASH"} = md5_hex($git_archive);
|
681
|
-
my ($stdout, $stderr);
|
682
|
-
($exited, $stdout, $stderr) = srun_sync(
|
686
|
+
my ($stdout, $stderr, $tempfail);
|
687
|
+
($exited, $stdout, $stderr, $tempfail) = srun_sync(
|
683
688
|
\@srunargs, \@execargs,
|
684
689
|
{label => "run install script on all workers"},
|
685
|
-
|
690
|
+
$build_script . $git_archive);
|
691
|
+
if ($tempfail) {
|
692
|
+
exit_retry_unlocked();
|
693
|
+
}
|
686
694
|
|
687
695
|
my $stderr_anything_from_script = 0;
|
688
696
|
for my $line (split(/\n/, $stderr)) {
|
@@ -1117,7 +1125,7 @@ if (!defined $main::success)
|
|
1117
1125
|
} elsif ($working_slot_count < 1) {
|
1118
1126
|
save_output_collection();
|
1119
1127
|
save_meta();
|
1120
|
-
|
1128
|
+
exit_retry_unlocked();
|
1121
1129
|
} elsif ($thisround_succeeded == 0 &&
|
1122
1130
|
($thisround_failed == 0 || $thisround_failed > 4)) {
|
1123
1131
|
my $message = "stop because $thisround_failed tasks failed and none succeeded";
|
@@ -2044,7 +2052,7 @@ sub srun_sync
|
|
2044
2052
|
if ($main::please_freeze || $j->{tempfail}) {
|
2045
2053
|
$exited ||= 255;
|
2046
2054
|
}
|
2047
|
-
return ($exited, $j->{stdout_captured}, $j->{stderr_captured});
|
2055
|
+
return ($exited, $j->{stdout_captured}, $j->{stderr_captured}, $j->{tempfail});
|
2048
2056
|
}
|
2049
2057
|
|
2050
2058
|
|
@@ -2132,6 +2140,11 @@ sub find_docker_image {
|
|
2132
2140
|
}
|
2133
2141
|
}
|
2134
2142
|
|
2143
|
+
sub exit_retry_unlocked {
|
2144
|
+
Log(undef, "Transient failure with lock acquired; asking for re-dispatch by exiting ".EX_RETRY_UNLOCKED);
|
2145
|
+
exit(EX_RETRY_UNLOCKED);
|
2146
|
+
}
|
2147
|
+
|
2135
2148
|
sub retry_count {
|
2136
2149
|
# Calculate the number of times an operation should be retried,
|
2137
2150
|
# assuming exponential backoff, and that we're willing to retry as
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: arvados-cli
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.20170726144433
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Arvados Authors
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-07-
|
11
|
+
date: 2017-07-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: arvados
|
@@ -164,7 +164,7 @@ dependencies:
|
|
164
164
|
- - "~>"
|
165
165
|
- !ruby/object:Gem::Version
|
166
166
|
version: '0.8'
|
167
|
-
description: Arvados command line tools, git commit
|
167
|
+
description: Arvados command line tools, git commit 8cbabbbe014628574a10a48148d179c14137d61f
|
168
168
|
email: gem-dev@curoverse.com
|
169
169
|
executables:
|
170
170
|
- arv
|