arvados-cli 0.1.20160301220801 → 0.1.20160302171627
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/crunch-job +336 -260
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5d7cf3d250304a66887286e13ecdfc1da39a9624
|
4
|
+
data.tar.gz: f17e37107acba14052fd85dc525bd64998706c49
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c190742af3c5f4253c8f05f0e57e1f5d566bdb0db6aadd24f91e6c2c5ee8445f967e621cbac0e7124874a5533efa3052d171b425e418b173b9760072d7884ba3
|
7
|
+
data.tar.gz: c25440d9c5ff3e18807be0a8399494f4da816f2cb790c60866ce919baa02de653ad881baf7214ff193600693dc7ca09cbd24d02dda75ad07116247eae9fe2e69
|
data/bin/crunch-job
CHANGED
@@ -126,6 +126,7 @@ my $jobspec;
|
|
126
126
|
my $job_api_token;
|
127
127
|
my $no_clear_tmp;
|
128
128
|
my $resume_stash;
|
129
|
+
my $cgroup_root = "/sys/fs/cgroup";
|
129
130
|
my $docker_bin = "docker.io";
|
130
131
|
my $docker_run_args = "";
|
131
132
|
GetOptions('force-unlock' => \$force_unlock,
|
@@ -134,6 +135,7 @@ GetOptions('force-unlock' => \$force_unlock,
|
|
134
135
|
'job-api-token=s' => \$job_api_token,
|
135
136
|
'no-clear-tmp' => \$no_clear_tmp,
|
136
137
|
'resume-stash=s' => \$resume_stash,
|
138
|
+
'cgroup-root=s' => \$cgroup_root,
|
137
139
|
'docker-bin=s' => \$docker_bin,
|
138
140
|
'docker-run-args=s' => \$docker_run_args,
|
139
141
|
);
|
@@ -183,11 +185,12 @@ if (($Job || $local_job)->{docker_image_locator}) {
|
|
183
185
|
$cmd = [$docker_bin, 'ps', '-q'];
|
184
186
|
}
|
185
187
|
Log(undef, "Sanity check is `@$cmd`");
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
188
|
+
my ($exited, $stdout, $stderr) = srun_sync(
|
189
|
+
["srun", "--nodes=\Q$ENV{SLURM_NNODES}\E", "--ntasks-per-node=1"],
|
190
|
+
$cmd,
|
191
|
+
{label => "sanity check"});
|
192
|
+
if ($exited != 0) {
|
193
|
+
Log(undef, "Sanity check failed: ".exit_status_s($exited));
|
191
194
|
exit EX_TEMPFAIL;
|
192
195
|
}
|
193
196
|
Log(undef, "Sanity check OK");
|
@@ -386,28 +389,17 @@ my $nodelist = join(",", @node);
|
|
386
389
|
my $git_tar_count = 0;
|
387
390
|
|
388
391
|
if (!defined $no_clear_tmp) {
|
389
|
-
#
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
['bash', '-ec', '-o', 'pipefail', 'mount -t fuse,fuse.keep | awk "(index(\$3, \"$CRUNCH_TMP\") == 1){print \$3}" | xargs -r -n 1 fusermount -u -z; sleep 1; rm -rf $JOB_WORK $CRUNCH_INSTALL $CRUNCH_TMP/task $CRUNCH_TMP/src* $CRUNCH_TMP/*.cid']);
|
401
|
-
exit (1);
|
402
|
-
}
|
403
|
-
while (1)
|
404
|
-
{
|
405
|
-
last if $cleanpid == waitpid (-1, WNOHANG);
|
406
|
-
freeze_if_want_freeze ($cleanpid);
|
407
|
-
select (undef, undef, undef, 0.1);
|
408
|
-
}
|
409
|
-
if ($?) {
|
410
|
-
Log(undef, "Clean work dirs: exit ".exit_status_s($?));
|
392
|
+
# Find FUSE mounts under $CRUNCH_TMP and unmount them. Then clean
|
393
|
+
# up work directories crunch_tmp/work, crunch_tmp/opt,
|
394
|
+
# crunch_tmp/src*.
|
395
|
+
#
|
396
|
+
# TODO: When #5036 is done and widely deployed, we can limit mount's
|
397
|
+
# -t option to simply fuse.keep.
|
398
|
+
my ($exited, $stdout, $stderr) = srun_sync(
|
399
|
+
["srun", "--nodelist=$nodelist", "-D", $ENV{'TMPDIR'}],
|
400
|
+
['bash', '-ec', '-o', 'pipefail', 'mount -t fuse,fuse.keep | awk "(index(\$3, \"$CRUNCH_TMP\") == 1){print \$3}" | xargs -r -n 1 fusermount -u -z; sleep 1; rm -rf $JOB_WORK $CRUNCH_INSTALL $CRUNCH_TMP/task $CRUNCH_TMP/src* $CRUNCH_TMP/*.cid'],
|
401
|
+
{label => "clean work dirs"});
|
402
|
+
if ($exited != 0) {
|
411
403
|
exit(EX_RETRY_UNLOCKED);
|
412
404
|
}
|
413
405
|
}
|
@@ -428,30 +420,22 @@ if ! $docker_bin images -q --no-trunc --all | grep -qxF \Q$docker_hash\E; then
|
|
428
420
|
arv-get \Q$docker_locator$docker_stream/$docker_hash.tar\E | $docker_bin load
|
429
421
|
fi
|
430
422
|
};
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
}
|
438
|
-
while (1)
|
439
|
-
{
|
440
|
-
last if $docker_pid == waitpid (-1, WNOHANG);
|
441
|
-
freeze_if_want_freeze ($docker_pid);
|
442
|
-
select (undef, undef, undef, 0.1);
|
443
|
-
}
|
444
|
-
if ($? != 0)
|
423
|
+
|
424
|
+
my ($exited, $stdout, $stderr) = srun_sync(
|
425
|
+
["srun", "--nodelist=" . join(',', @node)],
|
426
|
+
["/bin/bash", "-o", "pipefail", "-ec", $docker_install_script],
|
427
|
+
{label => "load docker image"});
|
428
|
+
if ($exited != 0)
|
445
429
|
{
|
446
|
-
Log(undef, "Installing Docker image from $docker_locator exited " . exit_status_s($?));
|
447
430
|
exit(EX_RETRY_UNLOCKED);
|
448
431
|
}
|
449
432
|
|
450
433
|
# Determine whether this version of Docker supports memory+swap limits.
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
|
434
|
+
($exited, $stdout, $stderr) = srun_sync(
|
435
|
+
["srun", "--nodelist=" . $node[0]],
|
436
|
+
[$docker_bin, 'run', '--help'],
|
437
|
+
{label => "check --memory-swap feature"});
|
438
|
+
$docker_limitmem = ($stdout =~ /--memory-swap/);
|
455
439
|
|
456
440
|
# Find a non-root Docker user to use.
|
457
441
|
# Tries the default user for the container, then 'crunch', then 'nobody',
|
@@ -461,20 +445,22 @@ fi
|
|
461
445
|
# Docker containers.
|
462
446
|
my @tryusers = ("", "crunch", "nobody");
|
463
447
|
foreach my $try_user (@tryusers) {
|
448
|
+
my $label;
|
464
449
|
my $try_user_arg;
|
465
450
|
if ($try_user eq "") {
|
466
|
-
|
451
|
+
$label = "check whether default user is UID 0";
|
467
452
|
$try_user_arg = "";
|
468
453
|
} else {
|
469
|
-
|
454
|
+
$label = "check whether user '$try_user' is UID 0";
|
470
455
|
$try_user_arg = "--user=$try_user";
|
471
456
|
}
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
|
476
|
-
|
477
|
-
|
457
|
+
my ($exited, $stdout, $stderr) = srun_sync(
|
458
|
+
["srun", "--nodelist=" . $node[0]],
|
459
|
+
["/bin/sh", "-ec",
|
460
|
+
"$docker_bin run $docker_run_args $try_user_arg $docker_hash id --user"],
|
461
|
+
{label => $label});
|
462
|
+
chomp($stdout);
|
463
|
+
if ($exited == 0 && $stdout =~ /^\d+$/ && $stdout > 0) {
|
478
464
|
$dockeruserarg = $try_user_arg;
|
479
465
|
if ($try_user eq "") {
|
480
466
|
Log(undef, "Container will run with default user");
|
@@ -664,11 +650,9 @@ if (!defined $git_archive) {
|
|
664
650
|
}
|
665
651
|
}
|
666
652
|
else {
|
667
|
-
my $
|
653
|
+
my $exited;
|
668
654
|
my $install_script_tries_left = 3;
|
669
655
|
for (my $attempts = 0; $attempts < 3; $attempts++) {
|
670
|
-
Log(undef, "Run install script on all workers");
|
671
|
-
|
672
656
|
my @srunargs = ("srun",
|
673
657
|
"--nodelist=$nodelist",
|
674
658
|
"-D", $ENV{'TMPDIR'}, "--job-name=$job_id");
|
@@ -676,59 +660,21 @@ else {
|
|
676
660
|
"mkdir -p $ENV{CRUNCH_INSTALL} && cd $ENV{CRUNCH_TMP} && perl -");
|
677
661
|
|
678
662
|
$ENV{"CRUNCH_GIT_ARCHIVE_HASH"} = md5_hex($git_archive);
|
679
|
-
my ($
|
680
|
-
|
681
|
-
|
682
|
-
|
683
|
-
|
684
|
-
|
685
|
-
close($install_stderr_r);
|
686
|
-
fcntl($install_stderr_w, F_SETFL, 0) or croak($!); # no close-on-exec
|
687
|
-
open(STDOUT, ">&", $install_stderr_w);
|
688
|
-
open(STDERR, ">&", $install_stderr_w);
|
689
|
-
srun (\@srunargs, \@execargs, {}, $build_script . $git_archive);
|
690
|
-
exit (1);
|
691
|
-
}
|
692
|
-
close($install_stderr_w);
|
693
|
-
# Tell freeze_if_want_freeze how to kill the child, otherwise the
|
694
|
-
# "waitpid(installpid)" loop won't get interrupted by a freeze:
|
695
|
-
$proc{$installpid} = {};
|
696
|
-
my $stderr_buf = '';
|
697
|
-
# Track whether anything appears on stderr other than slurm errors
|
698
|
-
# ("srun: ...") and the "starting: ..." message printed by the
|
699
|
-
# srun subroutine itself:
|
663
|
+
my ($stdout, $stderr);
|
664
|
+
($exited, $stdout, $stderr) = srun_sync(
|
665
|
+
\@srunargs, \@execargs,
|
666
|
+
{label => "run install script on all workers"},
|
667
|
+
$build_script . $git_archive);
|
668
|
+
|
700
669
|
my $stderr_anything_from_script = 0;
|
701
|
-
my $
|
702
|
-
|
703
|
-
freeze_if_want_freeze ($installpid);
|
704
|
-
# Wait up to 0.1 seconds for something to appear on stderr, then
|
705
|
-
# do a non-blocking read.
|
706
|
-
my $bits = fhbits($install_stderr_r);
|
707
|
-
select ($bits, undef, $bits, 0.1);
|
708
|
-
if (0 < sysread ($install_stderr_r, $stderr_buf, 8192, length($stderr_buf)))
|
709
|
-
{
|
710
|
-
while ($stderr_buf =~ /^(.*?)\n/) {
|
711
|
-
my $line = $1;
|
712
|
-
substr $stderr_buf, 0, 1+length($line), "";
|
713
|
-
Log(undef, "stderr $line");
|
714
|
-
if ($line !~ /$match_our_own_errors/) {
|
715
|
-
$stderr_anything_from_script = 1;
|
716
|
-
}
|
717
|
-
}
|
718
|
-
}
|
719
|
-
}
|
720
|
-
delete $proc{$installpid};
|
721
|
-
$install_exited = $?;
|
722
|
-
close($install_stderr_r);
|
723
|
-
if (length($stderr_buf) > 0) {
|
724
|
-
if ($stderr_buf !~ /$match_our_own_errors/) {
|
670
|
+
for my $line (split(/\n/, $stderr)) {
|
671
|
+
if ($line !~ /^(srun: error: |starting: \[)/) {
|
725
672
|
$stderr_anything_from_script = 1;
|
726
673
|
}
|
727
|
-
Log(undef, "stderr $stderr_buf")
|
728
674
|
}
|
729
675
|
|
730
|
-
|
731
|
-
|
676
|
+
last if $exited == 0 || $main::please_freeze;
|
677
|
+
|
732
678
|
# If the install script fails but doesn't print an error message,
|
733
679
|
# the next thing anyone is likely to do is just run it again in
|
734
680
|
# case it was a transient problem like "slurm communication fails
|
@@ -744,7 +690,7 @@ else {
|
|
744
690
|
unlink($tar_filename);
|
745
691
|
}
|
746
692
|
|
747
|
-
if ($
|
693
|
+
if ($exited != 0) {
|
748
694
|
croak("Giving up");
|
749
695
|
}
|
750
696
|
}
|
@@ -803,6 +749,7 @@ if ($initial_tasks_this_level < @node) {
|
|
803
749
|
@freeslot = (0..$#slot);
|
804
750
|
}
|
805
751
|
my $round_num_freeslots = scalar(@freeslot);
|
752
|
+
print STDERR "crunch-job have ${round_num_freeslots} free slots for ${initial_tasks_this_level} initial tasks at this level, ".scalar(@node)." nodes, and ".scalar(@slot)." slots\n";
|
806
753
|
|
807
754
|
my %round_max_slots = ();
|
808
755
|
for (my $ii = $#freeslot; $ii >= 0; $ii--) {
|
@@ -915,7 +862,7 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
|
|
915
862
|
{
|
916
863
|
my $containername = "$Jobstep->{arvados_task}->{uuid}-$Jobstep->{failures}";
|
917
864
|
my $cidfile = "$ENV{CRUNCH_TMP}/$containername.cid";
|
918
|
-
$command .= "crunchstat -cgroup-root
|
865
|
+
$command .= "crunchstat -cgroup-root=\Q$cgroup_root\E -cgroup-parent=docker -cgroup-cid=$cidfile -poll=10000 ";
|
919
866
|
$command .= "$docker_bin run $docker_run_args --name=$containername --attach=stdout --attach=stderr --attach=stdin -i \Q$dockeruserarg\E --cidfile=$cidfile --sig-proxy ";
|
920
867
|
# We only set memory limits if Docker lets us limit both memory and swap.
|
921
868
|
# Memory limits alone have been supported longer, but subprocesses tend
|
@@ -995,7 +942,7 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
|
|
995
942
|
}
|
996
943
|
} else {
|
997
944
|
# Non-docker run
|
998
|
-
$command .= "crunchstat -cgroup-root
|
945
|
+
$command .= "crunchstat -cgroup-root=\Q$cgroup_root\E -poll=10000 ";
|
999
946
|
$command .= $stdbuf;
|
1000
947
|
$command .= "perl - $ENV{CRUNCH_SRC}/crunch_scripts/" . $Job->{"script"};
|
1001
948
|
}
|
@@ -1013,11 +960,12 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
|
|
1013
960
|
next;
|
1014
961
|
}
|
1015
962
|
shift @freeslot;
|
1016
|
-
$proc{$childpid} = {
|
1017
|
-
|
1018
|
-
|
1019
|
-
|
1020
|
-
|
963
|
+
$proc{$childpid} = {
|
964
|
+
jobstepidx => $id,
|
965
|
+
time => time,
|
966
|
+
slot => $childslot,
|
967
|
+
jobstepname => "$job_id.$id.$childpid",
|
968
|
+
};
|
1021
969
|
croak ("assert failed: \$slot[$childslot]->{'pid'} exists") if exists $slot[$childslot]->{pid};
|
1022
970
|
$slot[$childslot]->{pid} = $childpid;
|
1023
971
|
|
@@ -1185,128 +1133,142 @@ sub update_progress_stats
|
|
1185
1133
|
|
1186
1134
|
sub reapchildren
|
1187
1135
|
{
|
1188
|
-
my $
|
1189
|
-
|
1190
|
-
|
1191
|
-
my $whatslot = ($slot[$proc{$pid}->{slot}]->{node}->{name}
|
1192
|
-
. "."
|
1193
|
-
. $slot[$proc{$pid}->{slot}]->{cpu});
|
1194
|
-
my $jobstepid = $proc{$pid}->{jobstep};
|
1195
|
-
my $elapsed = time - $proc{$pid}->{time};
|
1196
|
-
my $Jobstep = $jobstep[$jobstepid];
|
1197
|
-
|
1198
|
-
my $childstatus = $?;
|
1199
|
-
my $exitvalue = $childstatus >> 8;
|
1200
|
-
my $exitinfo = "exit ".exit_status_s($childstatus);
|
1201
|
-
$Jobstep->{'arvados_task'}->reload;
|
1202
|
-
my $task_success = $Jobstep->{'arvados_task'}->{success};
|
1203
|
-
|
1204
|
-
Log ($jobstepid, "child $pid on $whatslot $exitinfo success=$task_success");
|
1205
|
-
|
1206
|
-
if (!defined $task_success) {
|
1207
|
-
# task did not indicate one way or the other --> fail
|
1208
|
-
Log($jobstepid, sprintf(
|
1209
|
-
"ERROR: Task process exited %s, but never updated its task record to indicate success and record its output.",
|
1210
|
-
exit_status_s($childstatus)));
|
1211
|
-
$Jobstep->{'arvados_task'}->{success} = 0;
|
1212
|
-
$Jobstep->{'arvados_task'}->save;
|
1213
|
-
$task_success = 0;
|
1214
|
-
}
|
1215
|
-
|
1216
|
-
if (!$task_success)
|
1136
|
+
my $children_reaped = 0;
|
1137
|
+
while ((my $pid = waitpid (-1, WNOHANG)) > 0)
|
1217
1138
|
{
|
1218
|
-
my $
|
1219
|
-
|
1220
|
-
$
|
1221
|
-
|
1222
|
-
|
1223
|
-
|
1224
|
-
|
1225
|
-
|
1226
|
-
|
1227
|
-
|
1228
|
-
|
1229
|
-
|
1230
|
-
if ($slot[$proc{$pid}->{slot}]->{node}->{hold_until} &&
|
1231
|
-
$elapsed < 5) {
|
1232
|
-
Log ($jobstepid, "blaming failure on suspect node " .
|
1233
|
-
$slot[$proc{$pid}->{slot}]->{node}->{name});
|
1234
|
-
$temporary_fail ||= 1;
|
1235
|
-
}
|
1236
|
-
ban_node_by_slot($proc{$pid}->{slot});
|
1139
|
+
my $childstatus = $?;
|
1140
|
+
|
1141
|
+
my $whatslot = ($slot[$proc{$pid}->{slot}]->{node}->{name}
|
1142
|
+
. "."
|
1143
|
+
. $slot[$proc{$pid}->{slot}]->{cpu});
|
1144
|
+
my $jobstepidx = $proc{$pid}->{jobstepidx};
|
1145
|
+
|
1146
|
+
if (!WIFEXITED($childstatus))
|
1147
|
+
{
|
1148
|
+
# child did not exit (may be temporarily stopped)
|
1149
|
+
Log ($jobstepidx, "child $pid did not actually exit in reapchildren, ignoring for now.");
|
1150
|
+
next;
|
1237
1151
|
}
|
1238
1152
|
|
1239
|
-
|
1240
|
-
|
1241
|
-
|
1242
|
-
|
1153
|
+
$children_reaped++;
|
1154
|
+
my $elapsed = time - $proc{$pid}->{time};
|
1155
|
+
my $Jobstep = $jobstep[$jobstepidx];
|
1156
|
+
|
1157
|
+
my $exitvalue = $childstatus >> 8;
|
1158
|
+
my $exitinfo = "exit ".exit_status_s($childstatus);
|
1159
|
+
$Jobstep->{'arvados_task'}->reload;
|
1160
|
+
my $task_success = $Jobstep->{'arvados_task'}->{success};
|
1161
|
+
|
1162
|
+
Log ($jobstepidx, "child $pid on $whatslot $exitinfo success=$task_success");
|
1163
|
+
|
1164
|
+
if (!defined $task_success) {
|
1165
|
+
# task did not indicate one way or the other --> fail
|
1166
|
+
Log($jobstepidx, sprintf(
|
1167
|
+
"ERROR: Task process exited %s, but never updated its task record to indicate success and record its output.",
|
1168
|
+
exit_status_s($childstatus)));
|
1169
|
+
$Jobstep->{'arvados_task'}->{success} = 0;
|
1170
|
+
$Jobstep->{'arvados_task'}->save;
|
1171
|
+
$task_success = 0;
|
1172
|
+
}
|
1243
1173
|
|
1244
|
-
if (!$
|
1245
|
-
|
1246
|
-
$
|
1174
|
+
if (!$task_success)
|
1175
|
+
{
|
1176
|
+
my $temporary_fail;
|
1177
|
+
$temporary_fail ||= $Jobstep->{tempfail};
|
1178
|
+
$temporary_fail ||= ($exitvalue == TASK_TEMPFAIL);
|
1179
|
+
|
1180
|
+
++$thisround_failed;
|
1181
|
+
++$thisround_failed_multiple if $Jobstep->{'failures'} >= 1;
|
1182
|
+
|
1183
|
+
# Check for signs of a failed or misconfigured node
|
1184
|
+
if (++$slot[$proc{$pid}->{slot}]->{node}->{losing_streak} >=
|
1185
|
+
2+$slot[$proc{$pid}->{slot}]->{node}->{ncpus}) {
|
1186
|
+
# Don't count this against jobstep failure thresholds if this
|
1187
|
+
# node is already suspected faulty and srun exited quickly
|
1188
|
+
if ($slot[$proc{$pid}->{slot}]->{node}->{hold_until} &&
|
1189
|
+
$elapsed < 5) {
|
1190
|
+
Log ($jobstepidx, "blaming failure on suspect node " .
|
1191
|
+
$slot[$proc{$pid}->{slot}]->{node}->{name});
|
1192
|
+
$temporary_fail ||= 1;
|
1193
|
+
}
|
1194
|
+
ban_node_by_slot($proc{$pid}->{slot});
|
1195
|
+
}
|
1196
|
+
|
1197
|
+
Log ($jobstepidx, sprintf('failure (#%d, %s) after %d seconds',
|
1198
|
+
++$Jobstep->{'failures'},
|
1199
|
+
$temporary_fail ? 'temporary' : 'permanent',
|
1200
|
+
$elapsed));
|
1201
|
+
|
1202
|
+
if (!$temporary_fail || $Jobstep->{'failures'} >= 3) {
|
1203
|
+
# Give up on this task, and the whole job
|
1204
|
+
$main::success = 0;
|
1205
|
+
}
|
1206
|
+
# Put this task back on the todo queue
|
1207
|
+
push @jobstep_todo, $jobstepidx;
|
1208
|
+
$Job->{'tasks_summary'}->{'failed'}++;
|
1247
1209
|
}
|
1248
|
-
|
1249
|
-
|
1250
|
-
|
1251
|
-
|
1252
|
-
|
1253
|
-
|
1254
|
-
|
1255
|
-
|
1256
|
-
$slot[$proc{$pid}->{slot}]->{node}->{hold_until} = 0;
|
1257
|
-
$slot[$proc{$pid}->{slot}]->{node}->{fail_count} = 0;
|
1258
|
-
push @jobstep_done, $jobstepid;
|
1259
|
-
Log ($jobstepid, "success in $elapsed seconds");
|
1260
|
-
}
|
1261
|
-
$Jobstep->{exitcode} = $childstatus;
|
1262
|
-
$Jobstep->{finishtime} = time;
|
1263
|
-
$Jobstep->{'arvados_task'}->{finished_at} = strftime "%Y-%m-%dT%H:%M:%SZ", gmtime($Jobstep->{finishtime});
|
1264
|
-
$Jobstep->{'arvados_task'}->save;
|
1265
|
-
process_stderr ($jobstepid, $task_success);
|
1266
|
-
Log ($jobstepid, sprintf("task output (%d bytes): %s",
|
1267
|
-
length($Jobstep->{'arvados_task'}->{output}),
|
1268
|
-
$Jobstep->{'arvados_task'}->{output}));
|
1269
|
-
|
1270
|
-
close $reader{$jobstepid};
|
1271
|
-
delete $reader{$jobstepid};
|
1272
|
-
delete $slot[$proc{$pid}->{slot}]->{pid};
|
1273
|
-
push @freeslot, $proc{$pid}->{slot};
|
1274
|
-
delete $proc{$pid};
|
1275
|
-
|
1276
|
-
if ($task_success) {
|
1277
|
-
# Load new tasks
|
1278
|
-
my $newtask_list = [];
|
1279
|
-
my $newtask_results;
|
1280
|
-
do {
|
1281
|
-
$newtask_results = api_call(
|
1282
|
-
"job_tasks/list",
|
1283
|
-
'where' => {
|
1284
|
-
'created_by_job_task_uuid' => $Jobstep->{'arvados_task'}->{uuid}
|
1285
|
-
},
|
1286
|
-
'order' => 'qsequence',
|
1287
|
-
'offset' => scalar(@$newtask_list),
|
1288
|
-
);
|
1289
|
-
push(@$newtask_list, @{$newtask_results->{items}});
|
1290
|
-
} while (@{$newtask_results->{items}});
|
1291
|
-
foreach my $arvados_task (@$newtask_list) {
|
1292
|
-
my $jobstep = {
|
1293
|
-
'level' => $arvados_task->{'sequence'},
|
1294
|
-
'failures' => 0,
|
1295
|
-
'arvados_task' => $arvados_task
|
1296
|
-
};
|
1297
|
-
push @jobstep, $jobstep;
|
1298
|
-
push @jobstep_todo, $#jobstep;
|
1210
|
+
else
|
1211
|
+
{
|
1212
|
+
++$thisround_succeeded;
|
1213
|
+
$slot[$proc{$pid}->{slot}]->{node}->{losing_streak} = 0;
|
1214
|
+
$slot[$proc{$pid}->{slot}]->{node}->{hold_until} = 0;
|
1215
|
+
$slot[$proc{$pid}->{slot}]->{node}->{fail_count} = 0;
|
1216
|
+
push @jobstep_done, $jobstepidx;
|
1217
|
+
Log ($jobstepidx, "success in $elapsed seconds");
|
1299
1218
|
}
|
1219
|
+
$Jobstep->{exitcode} = $childstatus;
|
1220
|
+
$Jobstep->{finishtime} = time;
|
1221
|
+
$Jobstep->{'arvados_task'}->{finished_at} = strftime "%Y-%m-%dT%H:%M:%SZ", gmtime($Jobstep->{finishtime});
|
1222
|
+
$Jobstep->{'arvados_task'}->save;
|
1223
|
+
process_stderr_final ($jobstepidx);
|
1224
|
+
Log ($jobstepidx, sprintf("task output (%d bytes): %s",
|
1225
|
+
length($Jobstep->{'arvados_task'}->{output}),
|
1226
|
+
$Jobstep->{'arvados_task'}->{output}));
|
1227
|
+
|
1228
|
+
close $reader{$jobstepidx};
|
1229
|
+
delete $reader{$jobstepidx};
|
1230
|
+
delete $slot[$proc{$pid}->{slot}]->{pid};
|
1231
|
+
push @freeslot, $proc{$pid}->{slot};
|
1232
|
+
delete $proc{$pid};
|
1233
|
+
|
1234
|
+
if ($task_success) {
|
1235
|
+
# Load new tasks
|
1236
|
+
my $newtask_list = [];
|
1237
|
+
my $newtask_results;
|
1238
|
+
do {
|
1239
|
+
$newtask_results = api_call(
|
1240
|
+
"job_tasks/list",
|
1241
|
+
'where' => {
|
1242
|
+
'created_by_job_task_uuid' => $Jobstep->{'arvados_task'}->{uuid}
|
1243
|
+
},
|
1244
|
+
'order' => 'qsequence',
|
1245
|
+
'offset' => scalar(@$newtask_list),
|
1246
|
+
);
|
1247
|
+
push(@$newtask_list, @{$newtask_results->{items}});
|
1248
|
+
} while (@{$newtask_results->{items}});
|
1249
|
+
foreach my $arvados_task (@$newtask_list) {
|
1250
|
+
my $jobstep = {
|
1251
|
+
'level' => $arvados_task->{'sequence'},
|
1252
|
+
'failures' => 0,
|
1253
|
+
'arvados_task' => $arvados_task
|
1254
|
+
};
|
1255
|
+
push @jobstep, $jobstep;
|
1256
|
+
push @jobstep_todo, $#jobstep;
|
1257
|
+
}
|
1258
|
+
}
|
1259
|
+
$progress_is_dirty = 1;
|
1300
1260
|
}
|
1301
1261
|
|
1302
|
-
$
|
1303
|
-
1;
|
1262
|
+
return $children_reaped;
|
1304
1263
|
}
|
1305
1264
|
|
1306
1265
|
sub check_refresh_wanted
|
1307
1266
|
{
|
1308
1267
|
my @stat = stat $ENV{"CRUNCH_REFRESH_TRIGGER"};
|
1309
|
-
if (@stat &&
|
1268
|
+
if (@stat &&
|
1269
|
+
$stat[9] > $latest_refresh &&
|
1270
|
+
# ...and we have actually locked the job record...
|
1271
|
+
$job_id eq $Job->{'uuid'}) {
|
1310
1272
|
$latest_refresh = scalar time;
|
1311
1273
|
my $Job2 = api_call("jobs/get", uuid => $jobspec);
|
1312
1274
|
for my $attr ('cancelled_at',
|
@@ -1344,10 +1306,13 @@ sub check_squeue
|
|
1344
1306
|
# squeue check interval (15s) this should make the squeue check an
|
1345
1307
|
# infrequent event.
|
1346
1308
|
my $silent_procs = 0;
|
1347
|
-
for my $
|
1309
|
+
for my $js (map {$jobstep[$_->{jobstepidx}]} values %proc)
|
1348
1310
|
{
|
1349
|
-
|
1350
|
-
|
1311
|
+
if (!exists($js->{stderr_at}))
|
1312
|
+
{
|
1313
|
+
$js->{stderr_at} = 0;
|
1314
|
+
}
|
1315
|
+
if ($js->{stderr_at} < $last_squeue_check)
|
1351
1316
|
{
|
1352
1317
|
$silent_procs++;
|
1353
1318
|
}
|
@@ -1357,16 +1322,16 @@ sub check_squeue
|
|
1357
1322
|
# use killem() on procs whose killtime is reached
|
1358
1323
|
while (my ($pid, $procinfo) = each %proc)
|
1359
1324
|
{
|
1360
|
-
my $
|
1325
|
+
my $js = $jobstep[$procinfo->{jobstepidx}];
|
1361
1326
|
if (exists $procinfo->{killtime}
|
1362
1327
|
&& $procinfo->{killtime} <= time
|
1363
|
-
&& $
|
1328
|
+
&& $js->{stderr_at} < $last_squeue_check)
|
1364
1329
|
{
|
1365
1330
|
my $sincewhen = "";
|
1366
|
-
if ($
|
1367
|
-
$sincewhen = " in last " . (time - $
|
1331
|
+
if ($js->{stderr_at}) {
|
1332
|
+
$sincewhen = " in last " . (time - $js->{stderr_at}) . "s";
|
1368
1333
|
}
|
1369
|
-
Log($procinfo->{
|
1334
|
+
Log($procinfo->{jobstepidx}, "killing orphaned srun process $pid (task not in slurm queue, no stderr received$sincewhen)");
|
1370
1335
|
killem ($pid);
|
1371
1336
|
}
|
1372
1337
|
}
|
@@ -1416,7 +1381,7 @@ sub check_squeue
|
|
1416
1381
|
# error/delay has caused the task to die without notifying srun,
|
1417
1382
|
# and we'll kill srun ourselves.
|
1418
1383
|
$procinfo->{killtime} = time + 30;
|
1419
|
-
Log($procinfo->{
|
1384
|
+
Log($procinfo->{jobstepidx}, "notice: task is not in slurm queue but srun process $pid has not exited");
|
1420
1385
|
}
|
1421
1386
|
}
|
1422
1387
|
}
|
@@ -1435,70 +1400,99 @@ sub release_allocation
|
|
1435
1400
|
sub readfrompipes
|
1436
1401
|
{
|
1437
1402
|
my $gotsome = 0;
|
1438
|
-
|
1403
|
+
my %fd_job;
|
1404
|
+
my $sel = IO::Select->new();
|
1405
|
+
foreach my $jobstepidx (keys %reader)
|
1406
|
+
{
|
1407
|
+
my $fd = $reader{$jobstepidx};
|
1408
|
+
$sel->add($fd);
|
1409
|
+
$fd_job{$fd} = $jobstepidx;
|
1410
|
+
|
1411
|
+
if (my $stdout_fd = $jobstep[$jobstepidx]->{stdout_r}) {
|
1412
|
+
$sel->add($stdout_fd);
|
1413
|
+
$fd_job{$stdout_fd} = $jobstepidx;
|
1414
|
+
}
|
1415
|
+
}
|
1416
|
+
# select on all reader fds with 0.1s timeout
|
1417
|
+
my @ready_fds = $sel->can_read(0.1);
|
1418
|
+
foreach my $fd (@ready_fds)
|
1439
1419
|
{
|
1440
1420
|
my $buf;
|
1441
|
-
if (0 < sysread ($
|
1421
|
+
if (0 < sysread ($fd, $buf, 65536))
|
1442
1422
|
{
|
1423
|
+
$gotsome = 1;
|
1443
1424
|
print STDERR $buf if $ENV{CRUNCH_DEBUG};
|
1444
|
-
|
1445
|
-
$
|
1425
|
+
|
1426
|
+
my $jobstepidx = $fd_job{$fd};
|
1427
|
+
if ($jobstep[$jobstepidx]->{stdout_r} == $fd) {
|
1428
|
+
$jobstep[$jobstepidx]->{stdout_captured} .= $buf;
|
1429
|
+
next;
|
1430
|
+
}
|
1431
|
+
|
1432
|
+
$jobstep[$jobstepidx]->{stderr_at} = time;
|
1433
|
+
$jobstep[$jobstepidx]->{stderr} .= $buf;
|
1446
1434
|
|
1447
1435
|
# Consume everything up to the last \n
|
1448
|
-
preprocess_stderr ($
|
1436
|
+
preprocess_stderr ($jobstepidx);
|
1449
1437
|
|
1450
|
-
if (length ($jobstep[$
|
1438
|
+
if (length ($jobstep[$jobstepidx]->{stderr}) > 16384)
|
1451
1439
|
{
|
1452
1440
|
# If we get a lot of stderr without a newline, chop off the
|
1453
1441
|
# front to avoid letting our buffer grow indefinitely.
|
1454
|
-
substr ($jobstep[$
|
1455
|
-
0, length($jobstep[$
|
1442
|
+
substr ($jobstep[$jobstepidx]->{stderr},
|
1443
|
+
0, length($jobstep[$jobstepidx]->{stderr}) - 8192) = "";
|
1456
1444
|
}
|
1457
|
-
$gotsome = 1;
|
1458
1445
|
}
|
1459
1446
|
}
|
1460
1447
|
return $gotsome;
|
1461
1448
|
}
|
1462
1449
|
|
1463
1450
|
|
1451
|
+
# Consume all full lines of stderr for a jobstep. Everything after the
|
1452
|
+
# last newline will remain in $jobstep[$jobstepidx]->{stderr} after
|
1453
|
+
# returning.
|
1464
1454
|
sub preprocess_stderr
|
1465
1455
|
{
|
1466
|
-
my $
|
1456
|
+
my $jobstepidx = shift;
|
1467
1457
|
|
1468
|
-
while ($jobstep[$
|
1458
|
+
while ($jobstep[$jobstepidx]->{stderr} =~ /^(.*?)\n/) {
|
1469
1459
|
my $line = $1;
|
1470
|
-
substr $jobstep[$
|
1471
|
-
Log ($
|
1460
|
+
substr $jobstep[$jobstepidx]->{stderr}, 0, 1+length($line), "";
|
1461
|
+
Log ($jobstepidx, "stderr $line");
|
1472
1462
|
if ($line =~ /srun: error: (SLURM job $ENV{SLURM_JOB_ID} has expired|Unable to confirm allocation for job $ENV{SLURM_JOB_ID})/) {
|
1473
1463
|
# whoa.
|
1474
1464
|
$main::please_freeze = 1;
|
1475
1465
|
}
|
1466
|
+
elsif (!exists $jobstep[$jobstepidx]->{slotindex}) {
|
1467
|
+
# Skip the following tempfail checks if this srun proc isn't
|
1468
|
+
# attached to a particular worker slot.
|
1469
|
+
}
|
1476
1470
|
elsif ($line =~ /srun: error: (Node failure on|Aborting, .*\bio error\b)/) {
|
1477
|
-
my $job_slot_index = $jobstep[$
|
1471
|
+
my $job_slot_index = $jobstep[$jobstepidx]->{slotindex};
|
1478
1472
|
$slot[$job_slot_index]->{node}->{fail_count}++;
|
1479
|
-
$jobstep[$
|
1473
|
+
$jobstep[$jobstepidx]->{tempfail} = 1;
|
1480
1474
|
ban_node_by_slot($job_slot_index);
|
1481
1475
|
}
|
1482
1476
|
elsif ($line =~ /srun: error: (Unable to create job step|.*: Communication connection failure)/) {
|
1483
|
-
$jobstep[$
|
1484
|
-
ban_node_by_slot($jobstep[$
|
1477
|
+
$jobstep[$jobstepidx]->{tempfail} = 1;
|
1478
|
+
ban_node_by_slot($jobstep[$jobstepidx]->{slotindex});
|
1485
1479
|
}
|
1486
1480
|
elsif ($line =~ /\bKeep(Read|Write|Request)Error:/) {
|
1487
|
-
$jobstep[$
|
1481
|
+
$jobstep[$jobstepidx]->{tempfail} = 1;
|
1488
1482
|
}
|
1489
1483
|
}
|
1490
1484
|
}
|
1491
1485
|
|
1492
1486
|
|
1493
|
-
sub
|
1487
|
+
sub process_stderr_final
|
1494
1488
|
{
|
1495
|
-
my $
|
1496
|
-
|
1497
|
-
preprocess_stderr ($job);
|
1489
|
+
my $jobstepidx = shift;
|
1490
|
+
preprocess_stderr ($jobstepidx);
|
1498
1491
|
|
1499
1492
|
map {
|
1500
|
-
Log ($
|
1501
|
-
} split ("\n", $jobstep[$
|
1493
|
+
Log ($jobstepidx, "stderr $_");
|
1494
|
+
} split ("\n", $jobstep[$jobstepidx]->{stderr});
|
1495
|
+
$jobstep[$jobstepidx]->{stderr} = '';
|
1502
1496
|
}
|
1503
1497
|
|
1504
1498
|
sub fetch_block
|
@@ -1636,7 +1630,7 @@ sub killem
|
|
1636
1630
|
}
|
1637
1631
|
if (!exists $proc{$_}->{"sent_$sig"})
|
1638
1632
|
{
|
1639
|
-
Log ($proc{$_}->{
|
1633
|
+
Log ($proc{$_}->{jobstepidx}, "sending 2x signal $sig to pid $_");
|
1640
1634
|
kill $sig, $_;
|
1641
1635
|
select (undef, undef, undef, 0.1);
|
1642
1636
|
if ($sig == 2)
|
@@ -1760,16 +1754,21 @@ sub log_writer_is_active() {
|
|
1760
1754
|
return $log_pipe_pid;
|
1761
1755
|
}
|
1762
1756
|
|
1763
|
-
sub Log # ($
|
1757
|
+
sub Log # ($jobstepidx, $logmessage)
|
1764
1758
|
{
|
1765
|
-
|
1759
|
+
my ($jobstepidx, $logmessage) = @_;
|
1760
|
+
if ($logmessage =~ /\n/) {
|
1766
1761
|
for my $line (split (/\n/, $_[1])) {
|
1767
|
-
Log ($
|
1762
|
+
Log ($jobstepidx, $line);
|
1768
1763
|
}
|
1769
1764
|
return;
|
1770
1765
|
}
|
1771
1766
|
my $fh = select STDERR; $|=1; select $fh;
|
1772
|
-
my $
|
1767
|
+
my $task_qseq = '';
|
1768
|
+
if (defined($jobstepidx) && exists($jobstep[$jobstepidx]->{arvados_task})) {
|
1769
|
+
$task_qseq = $jobstepidx;
|
1770
|
+
}
|
1771
|
+
my $message = sprintf ("%s %d %s %s", $job_id, $$, $task_qseq, $logmessage);
|
1773
1772
|
$message =~ s{([^ -\176])}{"\\" . sprintf ("%03o", ord($1))}ge;
|
1774
1773
|
$message .= "\n";
|
1775
1774
|
my $datetime;
|
@@ -1893,6 +1892,83 @@ sub freezeunquote
|
|
1893
1892
|
}
|
1894
1893
|
|
1895
1894
|
|
1895
|
+
sub srun_sync
|
1896
|
+
{
|
1897
|
+
my $srunargs = shift;
|
1898
|
+
my $execargs = shift;
|
1899
|
+
my $opts = shift || {};
|
1900
|
+
my $stdin = shift;
|
1901
|
+
|
1902
|
+
my $label = exists $opts->{label} ? $opts->{label} : "@$execargs";
|
1903
|
+
Log (undef, "$label: start");
|
1904
|
+
|
1905
|
+
my ($stderr_r, $stderr_w);
|
1906
|
+
pipe $stderr_r, $stderr_w or croak("pipe() failed: $!");
|
1907
|
+
|
1908
|
+
my ($stdout_r, $stdout_w);
|
1909
|
+
pipe $stdout_r, $stdout_w or croak("pipe() failed: $!");
|
1910
|
+
|
1911
|
+
my $srunpid = fork();
|
1912
|
+
if ($srunpid == 0)
|
1913
|
+
{
|
1914
|
+
close($stderr_r);
|
1915
|
+
close($stdout_r);
|
1916
|
+
fcntl($stderr_w, F_SETFL, 0) or croak($!); # no close-on-exec
|
1917
|
+
fcntl($stdout_w, F_SETFL, 0) or croak($!);
|
1918
|
+
open(STDERR, ">&", $stderr_w);
|
1919
|
+
open(STDOUT, ">&", $stdout_w);
|
1920
|
+
srun ($srunargs, $execargs, $opts, $stdin);
|
1921
|
+
exit (1);
|
1922
|
+
}
|
1923
|
+
close($stderr_w);
|
1924
|
+
close($stdout_w);
|
1925
|
+
|
1926
|
+
set_nonblocking($stderr_r);
|
1927
|
+
set_nonblocking($stdout_r);
|
1928
|
+
|
1929
|
+
# Add entries to @jobstep and %proc so check_squeue() and
|
1930
|
+
# freeze_if_want_freeze() can treat it like a job task process.
|
1931
|
+
push @jobstep, {
|
1932
|
+
stderr => '',
|
1933
|
+
stderr_at => 0,
|
1934
|
+
stderr_captured => '',
|
1935
|
+
stdout_r => $stdout_r,
|
1936
|
+
stdout_captured => '',
|
1937
|
+
};
|
1938
|
+
my $jobstepidx = $#jobstep;
|
1939
|
+
$proc{$srunpid} = {
|
1940
|
+
jobstepidx => $jobstepidx,
|
1941
|
+
};
|
1942
|
+
$reader{$jobstepidx} = $stderr_r;
|
1943
|
+
|
1944
|
+
while ($srunpid != waitpid ($srunpid, WNOHANG)) {
|
1945
|
+
my $busy = readfrompipes();
|
1946
|
+
if (!$busy || ($latest_refresh + 2 < scalar time)) {
|
1947
|
+
check_refresh_wanted();
|
1948
|
+
check_squeue();
|
1949
|
+
}
|
1950
|
+
if (!$busy) {
|
1951
|
+
select(undef, undef, undef, 0.1);
|
1952
|
+
}
|
1953
|
+
killem(keys %proc) if $main::please_freeze;
|
1954
|
+
}
|
1955
|
+
my $exited = $?;
|
1956
|
+
|
1957
|
+
1 while readfrompipes();
|
1958
|
+
process_stderr_final ($jobstepidx);
|
1959
|
+
|
1960
|
+
Log (undef, "$label: exit ".exit_status_s($exited));
|
1961
|
+
|
1962
|
+
close($stdout_r);
|
1963
|
+
close($stderr_r);
|
1964
|
+
delete $proc{$srunpid};
|
1965
|
+
delete $reader{$jobstepidx};
|
1966
|
+
|
1967
|
+
my $j = pop @jobstep;
|
1968
|
+
return ($exited, $j->{stdout_captured}, $j->{stderr_captured});
|
1969
|
+
}
|
1970
|
+
|
1971
|
+
|
1896
1972
|
sub srun
|
1897
1973
|
{
|
1898
1974
|
my $srunargs = shift;
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: arvados-cli
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.20160302171627
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Arvados Authors
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-03-
|
11
|
+
date: 2016-03-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: arvados
|
@@ -178,7 +178,7 @@ dependencies:
|
|
178
178
|
- - "<"
|
179
179
|
- !ruby/object:Gem::Version
|
180
180
|
version: 1.0.0
|
181
|
-
description: Arvados command line tools, git commit
|
181
|
+
description: Arvados command line tools, git commit 9ddb8e3b5106f885401927a513ac0d94a59133ed
|
182
182
|
email: gem-dev@curoverse.com
|
183
183
|
executables:
|
184
184
|
- arv
|