arvados-cli 0.1.20160301220801 → 0.1.20160302171627
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/crunch-job +336 -260
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5d7cf3d250304a66887286e13ecdfc1da39a9624
|
4
|
+
data.tar.gz: f17e37107acba14052fd85dc525bd64998706c49
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c190742af3c5f4253c8f05f0e57e1f5d566bdb0db6aadd24f91e6c2c5ee8445f967e621cbac0e7124874a5533efa3052d171b425e418b173b9760072d7884ba3
|
7
|
+
data.tar.gz: c25440d9c5ff3e18807be0a8399494f4da816f2cb790c60866ce919baa02de653ad881baf7214ff193600693dc7ca09cbd24d02dda75ad07116247eae9fe2e69
|
data/bin/crunch-job
CHANGED
@@ -126,6 +126,7 @@ my $jobspec;
|
|
126
126
|
my $job_api_token;
|
127
127
|
my $no_clear_tmp;
|
128
128
|
my $resume_stash;
|
129
|
+
my $cgroup_root = "/sys/fs/cgroup";
|
129
130
|
my $docker_bin = "docker.io";
|
130
131
|
my $docker_run_args = "";
|
131
132
|
GetOptions('force-unlock' => \$force_unlock,
|
@@ -134,6 +135,7 @@ GetOptions('force-unlock' => \$force_unlock,
|
|
134
135
|
'job-api-token=s' => \$job_api_token,
|
135
136
|
'no-clear-tmp' => \$no_clear_tmp,
|
136
137
|
'resume-stash=s' => \$resume_stash,
|
138
|
+
'cgroup-root=s' => \$cgroup_root,
|
137
139
|
'docker-bin=s' => \$docker_bin,
|
138
140
|
'docker-run-args=s' => \$docker_run_args,
|
139
141
|
);
|
@@ -183,11 +185,12 @@ if (($Job || $local_job)->{docker_image_locator}) {
|
|
183
185
|
$cmd = [$docker_bin, 'ps', '-q'];
|
184
186
|
}
|
185
187
|
Log(undef, "Sanity check is `@$cmd`");
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
188
|
+
my ($exited, $stdout, $stderr) = srun_sync(
|
189
|
+
["srun", "--nodes=\Q$ENV{SLURM_NNODES}\E", "--ntasks-per-node=1"],
|
190
|
+
$cmd,
|
191
|
+
{label => "sanity check"});
|
192
|
+
if ($exited != 0) {
|
193
|
+
Log(undef, "Sanity check failed: ".exit_status_s($exited));
|
191
194
|
exit EX_TEMPFAIL;
|
192
195
|
}
|
193
196
|
Log(undef, "Sanity check OK");
|
@@ -386,28 +389,17 @@ my $nodelist = join(",", @node);
|
|
386
389
|
my $git_tar_count = 0;
|
387
390
|
|
388
391
|
if (!defined $no_clear_tmp) {
|
389
|
-
#
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
['bash', '-ec', '-o', 'pipefail', 'mount -t fuse,fuse.keep | awk "(index(\$3, \"$CRUNCH_TMP\") == 1){print \$3}" | xargs -r -n 1 fusermount -u -z; sleep 1; rm -rf $JOB_WORK $CRUNCH_INSTALL $CRUNCH_TMP/task $CRUNCH_TMP/src* $CRUNCH_TMP/*.cid']);
|
401
|
-
exit (1);
|
402
|
-
}
|
403
|
-
while (1)
|
404
|
-
{
|
405
|
-
last if $cleanpid == waitpid (-1, WNOHANG);
|
406
|
-
freeze_if_want_freeze ($cleanpid);
|
407
|
-
select (undef, undef, undef, 0.1);
|
408
|
-
}
|
409
|
-
if ($?) {
|
410
|
-
Log(undef, "Clean work dirs: exit ".exit_status_s($?));
|
392
|
+
# Find FUSE mounts under $CRUNCH_TMP and unmount them. Then clean
|
393
|
+
# up work directories crunch_tmp/work, crunch_tmp/opt,
|
394
|
+
# crunch_tmp/src*.
|
395
|
+
#
|
396
|
+
# TODO: When #5036 is done and widely deployed, we can limit mount's
|
397
|
+
# -t option to simply fuse.keep.
|
398
|
+
my ($exited, $stdout, $stderr) = srun_sync(
|
399
|
+
["srun", "--nodelist=$nodelist", "-D", $ENV{'TMPDIR'}],
|
400
|
+
['bash', '-ec', '-o', 'pipefail', 'mount -t fuse,fuse.keep | awk "(index(\$3, \"$CRUNCH_TMP\") == 1){print \$3}" | xargs -r -n 1 fusermount -u -z; sleep 1; rm -rf $JOB_WORK $CRUNCH_INSTALL $CRUNCH_TMP/task $CRUNCH_TMP/src* $CRUNCH_TMP/*.cid'],
|
401
|
+
{label => "clean work dirs"});
|
402
|
+
if ($exited != 0) {
|
411
403
|
exit(EX_RETRY_UNLOCKED);
|
412
404
|
}
|
413
405
|
}
|
@@ -428,30 +420,22 @@ if ! $docker_bin images -q --no-trunc --all | grep -qxF \Q$docker_hash\E; then
|
|
428
420
|
arv-get \Q$docker_locator$docker_stream/$docker_hash.tar\E | $docker_bin load
|
429
421
|
fi
|
430
422
|
};
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
}
|
438
|
-
while (1)
|
439
|
-
{
|
440
|
-
last if $docker_pid == waitpid (-1, WNOHANG);
|
441
|
-
freeze_if_want_freeze ($docker_pid);
|
442
|
-
select (undef, undef, undef, 0.1);
|
443
|
-
}
|
444
|
-
if ($? != 0)
|
423
|
+
|
424
|
+
my ($exited, $stdout, $stderr) = srun_sync(
|
425
|
+
["srun", "--nodelist=" . join(',', @node)],
|
426
|
+
["/bin/bash", "-o", "pipefail", "-ec", $docker_install_script],
|
427
|
+
{label => "load docker image"});
|
428
|
+
if ($exited != 0)
|
445
429
|
{
|
446
|
-
Log(undef, "Installing Docker image from $docker_locator exited " . exit_status_s($?));
|
447
430
|
exit(EX_RETRY_UNLOCKED);
|
448
431
|
}
|
449
432
|
|
450
433
|
# Determine whether this version of Docker supports memory+swap limits.
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
|
434
|
+
($exited, $stdout, $stderr) = srun_sync(
|
435
|
+
["srun", "--nodelist=" . $node[0]],
|
436
|
+
[$docker_bin, 'run', '--help'],
|
437
|
+
{label => "check --memory-swap feature"});
|
438
|
+
$docker_limitmem = ($stdout =~ /--memory-swap/);
|
455
439
|
|
456
440
|
# Find a non-root Docker user to use.
|
457
441
|
# Tries the default user for the container, then 'crunch', then 'nobody',
|
@@ -461,20 +445,22 @@ fi
|
|
461
445
|
# Docker containers.
|
462
446
|
my @tryusers = ("", "crunch", "nobody");
|
463
447
|
foreach my $try_user (@tryusers) {
|
448
|
+
my $label;
|
464
449
|
my $try_user_arg;
|
465
450
|
if ($try_user eq "") {
|
466
|
-
|
451
|
+
$label = "check whether default user is UID 0";
|
467
452
|
$try_user_arg = "";
|
468
453
|
} else {
|
469
|
-
|
454
|
+
$label = "check whether user '$try_user' is UID 0";
|
470
455
|
$try_user_arg = "--user=$try_user";
|
471
456
|
}
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
|
476
|
-
|
477
|
-
|
457
|
+
my ($exited, $stdout, $stderr) = srun_sync(
|
458
|
+
["srun", "--nodelist=" . $node[0]],
|
459
|
+
["/bin/sh", "-ec",
|
460
|
+
"$docker_bin run $docker_run_args $try_user_arg $docker_hash id --user"],
|
461
|
+
{label => $label});
|
462
|
+
chomp($stdout);
|
463
|
+
if ($exited == 0 && $stdout =~ /^\d+$/ && $stdout > 0) {
|
478
464
|
$dockeruserarg = $try_user_arg;
|
479
465
|
if ($try_user eq "") {
|
480
466
|
Log(undef, "Container will run with default user");
|
@@ -664,11 +650,9 @@ if (!defined $git_archive) {
|
|
664
650
|
}
|
665
651
|
}
|
666
652
|
else {
|
667
|
-
my $
|
653
|
+
my $exited;
|
668
654
|
my $install_script_tries_left = 3;
|
669
655
|
for (my $attempts = 0; $attempts < 3; $attempts++) {
|
670
|
-
Log(undef, "Run install script on all workers");
|
671
|
-
|
672
656
|
my @srunargs = ("srun",
|
673
657
|
"--nodelist=$nodelist",
|
674
658
|
"-D", $ENV{'TMPDIR'}, "--job-name=$job_id");
|
@@ -676,59 +660,21 @@ else {
|
|
676
660
|
"mkdir -p $ENV{CRUNCH_INSTALL} && cd $ENV{CRUNCH_TMP} && perl -");
|
677
661
|
|
678
662
|
$ENV{"CRUNCH_GIT_ARCHIVE_HASH"} = md5_hex($git_archive);
|
679
|
-
my ($
|
680
|
-
|
681
|
-
|
682
|
-
|
683
|
-
|
684
|
-
|
685
|
-
close($install_stderr_r);
|
686
|
-
fcntl($install_stderr_w, F_SETFL, 0) or croak($!); # no close-on-exec
|
687
|
-
open(STDOUT, ">&", $install_stderr_w);
|
688
|
-
open(STDERR, ">&", $install_stderr_w);
|
689
|
-
srun (\@srunargs, \@execargs, {}, $build_script . $git_archive);
|
690
|
-
exit (1);
|
691
|
-
}
|
692
|
-
close($install_stderr_w);
|
693
|
-
# Tell freeze_if_want_freeze how to kill the child, otherwise the
|
694
|
-
# "waitpid(installpid)" loop won't get interrupted by a freeze:
|
695
|
-
$proc{$installpid} = {};
|
696
|
-
my $stderr_buf = '';
|
697
|
-
# Track whether anything appears on stderr other than slurm errors
|
698
|
-
# ("srun: ...") and the "starting: ..." message printed by the
|
699
|
-
# srun subroutine itself:
|
663
|
+
my ($stdout, $stderr);
|
664
|
+
($exited, $stdout, $stderr) = srun_sync(
|
665
|
+
\@srunargs, \@execargs,
|
666
|
+
{label => "run install script on all workers"},
|
667
|
+
$build_script . $git_archive);
|
668
|
+
|
700
669
|
my $stderr_anything_from_script = 0;
|
701
|
-
my $
|
702
|
-
|
703
|
-
freeze_if_want_freeze ($installpid);
|
704
|
-
# Wait up to 0.1 seconds for something to appear on stderr, then
|
705
|
-
# do a non-blocking read.
|
706
|
-
my $bits = fhbits($install_stderr_r);
|
707
|
-
select ($bits, undef, $bits, 0.1);
|
708
|
-
if (0 < sysread ($install_stderr_r, $stderr_buf, 8192, length($stderr_buf)))
|
709
|
-
{
|
710
|
-
while ($stderr_buf =~ /^(.*?)\n/) {
|
711
|
-
my $line = $1;
|
712
|
-
substr $stderr_buf, 0, 1+length($line), "";
|
713
|
-
Log(undef, "stderr $line");
|
714
|
-
if ($line !~ /$match_our_own_errors/) {
|
715
|
-
$stderr_anything_from_script = 1;
|
716
|
-
}
|
717
|
-
}
|
718
|
-
}
|
719
|
-
}
|
720
|
-
delete $proc{$installpid};
|
721
|
-
$install_exited = $?;
|
722
|
-
close($install_stderr_r);
|
723
|
-
if (length($stderr_buf) > 0) {
|
724
|
-
if ($stderr_buf !~ /$match_our_own_errors/) {
|
670
|
+
for my $line (split(/\n/, $stderr)) {
|
671
|
+
if ($line !~ /^(srun: error: |starting: \[)/) {
|
725
672
|
$stderr_anything_from_script = 1;
|
726
673
|
}
|
727
|
-
Log(undef, "stderr $stderr_buf")
|
728
674
|
}
|
729
675
|
|
730
|
-
|
731
|
-
|
676
|
+
last if $exited == 0 || $main::please_freeze;
|
677
|
+
|
732
678
|
# If the install script fails but doesn't print an error message,
|
733
679
|
# the next thing anyone is likely to do is just run it again in
|
734
680
|
# case it was a transient problem like "slurm communication fails
|
@@ -744,7 +690,7 @@ else {
|
|
744
690
|
unlink($tar_filename);
|
745
691
|
}
|
746
692
|
|
747
|
-
if ($
|
693
|
+
if ($exited != 0) {
|
748
694
|
croak("Giving up");
|
749
695
|
}
|
750
696
|
}
|
@@ -803,6 +749,7 @@ if ($initial_tasks_this_level < @node) {
|
|
803
749
|
@freeslot = (0..$#slot);
|
804
750
|
}
|
805
751
|
my $round_num_freeslots = scalar(@freeslot);
|
752
|
+
print STDERR "crunch-job have ${round_num_freeslots} free slots for ${initial_tasks_this_level} initial tasks at this level, ".scalar(@node)." nodes, and ".scalar(@slot)." slots\n";
|
806
753
|
|
807
754
|
my %round_max_slots = ();
|
808
755
|
for (my $ii = $#freeslot; $ii >= 0; $ii--) {
|
@@ -915,7 +862,7 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
|
|
915
862
|
{
|
916
863
|
my $containername = "$Jobstep->{arvados_task}->{uuid}-$Jobstep->{failures}";
|
917
864
|
my $cidfile = "$ENV{CRUNCH_TMP}/$containername.cid";
|
918
|
-
$command .= "crunchstat -cgroup-root
|
865
|
+
$command .= "crunchstat -cgroup-root=\Q$cgroup_root\E -cgroup-parent=docker -cgroup-cid=$cidfile -poll=10000 ";
|
919
866
|
$command .= "$docker_bin run $docker_run_args --name=$containername --attach=stdout --attach=stderr --attach=stdin -i \Q$dockeruserarg\E --cidfile=$cidfile --sig-proxy ";
|
920
867
|
# We only set memory limits if Docker lets us limit both memory and swap.
|
921
868
|
# Memory limits alone have been supported longer, but subprocesses tend
|
@@ -995,7 +942,7 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
|
|
995
942
|
}
|
996
943
|
} else {
|
997
944
|
# Non-docker run
|
998
|
-
$command .= "crunchstat -cgroup-root
|
945
|
+
$command .= "crunchstat -cgroup-root=\Q$cgroup_root\E -poll=10000 ";
|
999
946
|
$command .= $stdbuf;
|
1000
947
|
$command .= "perl - $ENV{CRUNCH_SRC}/crunch_scripts/" . $Job->{"script"};
|
1001
948
|
}
|
@@ -1013,11 +960,12 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
|
|
1013
960
|
next;
|
1014
961
|
}
|
1015
962
|
shift @freeslot;
|
1016
|
-
$proc{$childpid} = {
|
1017
|
-
|
1018
|
-
|
1019
|
-
|
1020
|
-
|
963
|
+
$proc{$childpid} = {
|
964
|
+
jobstepidx => $id,
|
965
|
+
time => time,
|
966
|
+
slot => $childslot,
|
967
|
+
jobstepname => "$job_id.$id.$childpid",
|
968
|
+
};
|
1021
969
|
croak ("assert failed: \$slot[$childslot]->{'pid'} exists") if exists $slot[$childslot]->{pid};
|
1022
970
|
$slot[$childslot]->{pid} = $childpid;
|
1023
971
|
|
@@ -1185,128 +1133,142 @@ sub update_progress_stats
|
|
1185
1133
|
|
1186
1134
|
sub reapchildren
|
1187
1135
|
{
|
1188
|
-
my $
|
1189
|
-
|
1190
|
-
|
1191
|
-
my $whatslot = ($slot[$proc{$pid}->{slot}]->{node}->{name}
|
1192
|
-
. "."
|
1193
|
-
. $slot[$proc{$pid}->{slot}]->{cpu});
|
1194
|
-
my $jobstepid = $proc{$pid}->{jobstep};
|
1195
|
-
my $elapsed = time - $proc{$pid}->{time};
|
1196
|
-
my $Jobstep = $jobstep[$jobstepid];
|
1197
|
-
|
1198
|
-
my $childstatus = $?;
|
1199
|
-
my $exitvalue = $childstatus >> 8;
|
1200
|
-
my $exitinfo = "exit ".exit_status_s($childstatus);
|
1201
|
-
$Jobstep->{'arvados_task'}->reload;
|
1202
|
-
my $task_success = $Jobstep->{'arvados_task'}->{success};
|
1203
|
-
|
1204
|
-
Log ($jobstepid, "child $pid on $whatslot $exitinfo success=$task_success");
|
1205
|
-
|
1206
|
-
if (!defined $task_success) {
|
1207
|
-
# task did not indicate one way or the other --> fail
|
1208
|
-
Log($jobstepid, sprintf(
|
1209
|
-
"ERROR: Task process exited %s, but never updated its task record to indicate success and record its output.",
|
1210
|
-
exit_status_s($childstatus)));
|
1211
|
-
$Jobstep->{'arvados_task'}->{success} = 0;
|
1212
|
-
$Jobstep->{'arvados_task'}->save;
|
1213
|
-
$task_success = 0;
|
1214
|
-
}
|
1215
|
-
|
1216
|
-
if (!$task_success)
|
1136
|
+
my $children_reaped = 0;
|
1137
|
+
while ((my $pid = waitpid (-1, WNOHANG)) > 0)
|
1217
1138
|
{
|
1218
|
-
my $
|
1219
|
-
|
1220
|
-
$
|
1221
|
-
|
1222
|
-
|
1223
|
-
|
1224
|
-
|
1225
|
-
|
1226
|
-
|
1227
|
-
|
1228
|
-
|
1229
|
-
|
1230
|
-
if ($slot[$proc{$pid}->{slot}]->{node}->{hold_until} &&
|
1231
|
-
$elapsed < 5) {
|
1232
|
-
Log ($jobstepid, "blaming failure on suspect node " .
|
1233
|
-
$slot[$proc{$pid}->{slot}]->{node}->{name});
|
1234
|
-
$temporary_fail ||= 1;
|
1235
|
-
}
|
1236
|
-
ban_node_by_slot($proc{$pid}->{slot});
|
1139
|
+
my $childstatus = $?;
|
1140
|
+
|
1141
|
+
my $whatslot = ($slot[$proc{$pid}->{slot}]->{node}->{name}
|
1142
|
+
. "."
|
1143
|
+
. $slot[$proc{$pid}->{slot}]->{cpu});
|
1144
|
+
my $jobstepidx = $proc{$pid}->{jobstepidx};
|
1145
|
+
|
1146
|
+
if (!WIFEXITED($childstatus))
|
1147
|
+
{
|
1148
|
+
# child did not exit (may be temporarily stopped)
|
1149
|
+
Log ($jobstepidx, "child $pid did not actually exit in reapchildren, ignoring for now.");
|
1150
|
+
next;
|
1237
1151
|
}
|
1238
1152
|
|
1239
|
-
|
1240
|
-
|
1241
|
-
|
1242
|
-
|
1153
|
+
$children_reaped++;
|
1154
|
+
my $elapsed = time - $proc{$pid}->{time};
|
1155
|
+
my $Jobstep = $jobstep[$jobstepidx];
|
1156
|
+
|
1157
|
+
my $exitvalue = $childstatus >> 8;
|
1158
|
+
my $exitinfo = "exit ".exit_status_s($childstatus);
|
1159
|
+
$Jobstep->{'arvados_task'}->reload;
|
1160
|
+
my $task_success = $Jobstep->{'arvados_task'}->{success};
|
1161
|
+
|
1162
|
+
Log ($jobstepidx, "child $pid on $whatslot $exitinfo success=$task_success");
|
1163
|
+
|
1164
|
+
if (!defined $task_success) {
|
1165
|
+
# task did not indicate one way or the other --> fail
|
1166
|
+
Log($jobstepidx, sprintf(
|
1167
|
+
"ERROR: Task process exited %s, but never updated its task record to indicate success and record its output.",
|
1168
|
+
exit_status_s($childstatus)));
|
1169
|
+
$Jobstep->{'arvados_task'}->{success} = 0;
|
1170
|
+
$Jobstep->{'arvados_task'}->save;
|
1171
|
+
$task_success = 0;
|
1172
|
+
}
|
1243
1173
|
|
1244
|
-
if (!$
|
1245
|
-
|
1246
|
-
$
|
1174
|
+
if (!$task_success)
|
1175
|
+
{
|
1176
|
+
my $temporary_fail;
|
1177
|
+
$temporary_fail ||= $Jobstep->{tempfail};
|
1178
|
+
$temporary_fail ||= ($exitvalue == TASK_TEMPFAIL);
|
1179
|
+
|
1180
|
+
++$thisround_failed;
|
1181
|
+
++$thisround_failed_multiple if $Jobstep->{'failures'} >= 1;
|
1182
|
+
|
1183
|
+
# Check for signs of a failed or misconfigured node
|
1184
|
+
if (++$slot[$proc{$pid}->{slot}]->{node}->{losing_streak} >=
|
1185
|
+
2+$slot[$proc{$pid}->{slot}]->{node}->{ncpus}) {
|
1186
|
+
# Don't count this against jobstep failure thresholds if this
|
1187
|
+
# node is already suspected faulty and srun exited quickly
|
1188
|
+
if ($slot[$proc{$pid}->{slot}]->{node}->{hold_until} &&
|
1189
|
+
$elapsed < 5) {
|
1190
|
+
Log ($jobstepidx, "blaming failure on suspect node " .
|
1191
|
+
$slot[$proc{$pid}->{slot}]->{node}->{name});
|
1192
|
+
$temporary_fail ||= 1;
|
1193
|
+
}
|
1194
|
+
ban_node_by_slot($proc{$pid}->{slot});
|
1195
|
+
}
|
1196
|
+
|
1197
|
+
Log ($jobstepidx, sprintf('failure (#%d, %s) after %d seconds',
|
1198
|
+
++$Jobstep->{'failures'},
|
1199
|
+
$temporary_fail ? 'temporary' : 'permanent',
|
1200
|
+
$elapsed));
|
1201
|
+
|
1202
|
+
if (!$temporary_fail || $Jobstep->{'failures'} >= 3) {
|
1203
|
+
# Give up on this task, and the whole job
|
1204
|
+
$main::success = 0;
|
1205
|
+
}
|
1206
|
+
# Put this task back on the todo queue
|
1207
|
+
push @jobstep_todo, $jobstepidx;
|
1208
|
+
$Job->{'tasks_summary'}->{'failed'}++;
|
1247
1209
|
}
|
1248
|
-
|
1249
|
-
|
1250
|
-
|
1251
|
-
|
1252
|
-
|
1253
|
-
|
1254
|
-
|
1255
|
-
|
1256
|
-
$slot[$proc{$pid}->{slot}]->{node}->{hold_until} = 0;
|
1257
|
-
$slot[$proc{$pid}->{slot}]->{node}->{fail_count} = 0;
|
1258
|
-
push @jobstep_done, $jobstepid;
|
1259
|
-
Log ($jobstepid, "success in $elapsed seconds");
|
1260
|
-
}
|
1261
|
-
$Jobstep->{exitcode} = $childstatus;
|
1262
|
-
$Jobstep->{finishtime} = time;
|
1263
|
-
$Jobstep->{'arvados_task'}->{finished_at} = strftime "%Y-%m-%dT%H:%M:%SZ", gmtime($Jobstep->{finishtime});
|
1264
|
-
$Jobstep->{'arvados_task'}->save;
|
1265
|
-
process_stderr ($jobstepid, $task_success);
|
1266
|
-
Log ($jobstepid, sprintf("task output (%d bytes): %s",
|
1267
|
-
length($Jobstep->{'arvados_task'}->{output}),
|
1268
|
-
$Jobstep->{'arvados_task'}->{output}));
|
1269
|
-
|
1270
|
-
close $reader{$jobstepid};
|
1271
|
-
delete $reader{$jobstepid};
|
1272
|
-
delete $slot[$proc{$pid}->{slot}]->{pid};
|
1273
|
-
push @freeslot, $proc{$pid}->{slot};
|
1274
|
-
delete $proc{$pid};
|
1275
|
-
|
1276
|
-
if ($task_success) {
|
1277
|
-
# Load new tasks
|
1278
|
-
my $newtask_list = [];
|
1279
|
-
my $newtask_results;
|
1280
|
-
do {
|
1281
|
-
$newtask_results = api_call(
|
1282
|
-
"job_tasks/list",
|
1283
|
-
'where' => {
|
1284
|
-
'created_by_job_task_uuid' => $Jobstep->{'arvados_task'}->{uuid}
|
1285
|
-
},
|
1286
|
-
'order' => 'qsequence',
|
1287
|
-
'offset' => scalar(@$newtask_list),
|
1288
|
-
);
|
1289
|
-
push(@$newtask_list, @{$newtask_results->{items}});
|
1290
|
-
} while (@{$newtask_results->{items}});
|
1291
|
-
foreach my $arvados_task (@$newtask_list) {
|
1292
|
-
my $jobstep = {
|
1293
|
-
'level' => $arvados_task->{'sequence'},
|
1294
|
-
'failures' => 0,
|
1295
|
-
'arvados_task' => $arvados_task
|
1296
|
-
};
|
1297
|
-
push @jobstep, $jobstep;
|
1298
|
-
push @jobstep_todo, $#jobstep;
|
1210
|
+
else
|
1211
|
+
{
|
1212
|
+
++$thisround_succeeded;
|
1213
|
+
$slot[$proc{$pid}->{slot}]->{node}->{losing_streak} = 0;
|
1214
|
+
$slot[$proc{$pid}->{slot}]->{node}->{hold_until} = 0;
|
1215
|
+
$slot[$proc{$pid}->{slot}]->{node}->{fail_count} = 0;
|
1216
|
+
push @jobstep_done, $jobstepidx;
|
1217
|
+
Log ($jobstepidx, "success in $elapsed seconds");
|
1299
1218
|
}
|
1219
|
+
$Jobstep->{exitcode} = $childstatus;
|
1220
|
+
$Jobstep->{finishtime} = time;
|
1221
|
+
$Jobstep->{'arvados_task'}->{finished_at} = strftime "%Y-%m-%dT%H:%M:%SZ", gmtime($Jobstep->{finishtime});
|
1222
|
+
$Jobstep->{'arvados_task'}->save;
|
1223
|
+
process_stderr_final ($jobstepidx);
|
1224
|
+
Log ($jobstepidx, sprintf("task output (%d bytes): %s",
|
1225
|
+
length($Jobstep->{'arvados_task'}->{output}),
|
1226
|
+
$Jobstep->{'arvados_task'}->{output}));
|
1227
|
+
|
1228
|
+
close $reader{$jobstepidx};
|
1229
|
+
delete $reader{$jobstepidx};
|
1230
|
+
delete $slot[$proc{$pid}->{slot}]->{pid};
|
1231
|
+
push @freeslot, $proc{$pid}->{slot};
|
1232
|
+
delete $proc{$pid};
|
1233
|
+
|
1234
|
+
if ($task_success) {
|
1235
|
+
# Load new tasks
|
1236
|
+
my $newtask_list = [];
|
1237
|
+
my $newtask_results;
|
1238
|
+
do {
|
1239
|
+
$newtask_results = api_call(
|
1240
|
+
"job_tasks/list",
|
1241
|
+
'where' => {
|
1242
|
+
'created_by_job_task_uuid' => $Jobstep->{'arvados_task'}->{uuid}
|
1243
|
+
},
|
1244
|
+
'order' => 'qsequence',
|
1245
|
+
'offset' => scalar(@$newtask_list),
|
1246
|
+
);
|
1247
|
+
push(@$newtask_list, @{$newtask_results->{items}});
|
1248
|
+
} while (@{$newtask_results->{items}});
|
1249
|
+
foreach my $arvados_task (@$newtask_list) {
|
1250
|
+
my $jobstep = {
|
1251
|
+
'level' => $arvados_task->{'sequence'},
|
1252
|
+
'failures' => 0,
|
1253
|
+
'arvados_task' => $arvados_task
|
1254
|
+
};
|
1255
|
+
push @jobstep, $jobstep;
|
1256
|
+
push @jobstep_todo, $#jobstep;
|
1257
|
+
}
|
1258
|
+
}
|
1259
|
+
$progress_is_dirty = 1;
|
1300
1260
|
}
|
1301
1261
|
|
1302
|
-
$
|
1303
|
-
1;
|
1262
|
+
return $children_reaped;
|
1304
1263
|
}
|
1305
1264
|
|
1306
1265
|
sub check_refresh_wanted
|
1307
1266
|
{
|
1308
1267
|
my @stat = stat $ENV{"CRUNCH_REFRESH_TRIGGER"};
|
1309
|
-
if (@stat &&
|
1268
|
+
if (@stat &&
|
1269
|
+
$stat[9] > $latest_refresh &&
|
1270
|
+
# ...and we have actually locked the job record...
|
1271
|
+
$job_id eq $Job->{'uuid'}) {
|
1310
1272
|
$latest_refresh = scalar time;
|
1311
1273
|
my $Job2 = api_call("jobs/get", uuid => $jobspec);
|
1312
1274
|
for my $attr ('cancelled_at',
|
@@ -1344,10 +1306,13 @@ sub check_squeue
|
|
1344
1306
|
# squeue check interval (15s) this should make the squeue check an
|
1345
1307
|
# infrequent event.
|
1346
1308
|
my $silent_procs = 0;
|
1347
|
-
for my $
|
1309
|
+
for my $js (map {$jobstep[$_->{jobstepidx}]} values %proc)
|
1348
1310
|
{
|
1349
|
-
|
1350
|
-
|
1311
|
+
if (!exists($js->{stderr_at}))
|
1312
|
+
{
|
1313
|
+
$js->{stderr_at} = 0;
|
1314
|
+
}
|
1315
|
+
if ($js->{stderr_at} < $last_squeue_check)
|
1351
1316
|
{
|
1352
1317
|
$silent_procs++;
|
1353
1318
|
}
|
@@ -1357,16 +1322,16 @@ sub check_squeue
|
|
1357
1322
|
# use killem() on procs whose killtime is reached
|
1358
1323
|
while (my ($pid, $procinfo) = each %proc)
|
1359
1324
|
{
|
1360
|
-
my $
|
1325
|
+
my $js = $jobstep[$procinfo->{jobstepidx}];
|
1361
1326
|
if (exists $procinfo->{killtime}
|
1362
1327
|
&& $procinfo->{killtime} <= time
|
1363
|
-
&& $
|
1328
|
+
&& $js->{stderr_at} < $last_squeue_check)
|
1364
1329
|
{
|
1365
1330
|
my $sincewhen = "";
|
1366
|
-
if ($
|
1367
|
-
$sincewhen = " in last " . (time - $
|
1331
|
+
if ($js->{stderr_at}) {
|
1332
|
+
$sincewhen = " in last " . (time - $js->{stderr_at}) . "s";
|
1368
1333
|
}
|
1369
|
-
Log($procinfo->{
|
1334
|
+
Log($procinfo->{jobstepidx}, "killing orphaned srun process $pid (task not in slurm queue, no stderr received$sincewhen)");
|
1370
1335
|
killem ($pid);
|
1371
1336
|
}
|
1372
1337
|
}
|
@@ -1416,7 +1381,7 @@ sub check_squeue
|
|
1416
1381
|
# error/delay has caused the task to die without notifying srun,
|
1417
1382
|
# and we'll kill srun ourselves.
|
1418
1383
|
$procinfo->{killtime} = time + 30;
|
1419
|
-
Log($procinfo->{
|
1384
|
+
Log($procinfo->{jobstepidx}, "notice: task is not in slurm queue but srun process $pid has not exited");
|
1420
1385
|
}
|
1421
1386
|
}
|
1422
1387
|
}
|
@@ -1435,70 +1400,99 @@ sub release_allocation
|
|
1435
1400
|
sub readfrompipes
|
1436
1401
|
{
|
1437
1402
|
my $gotsome = 0;
|
1438
|
-
|
1403
|
+
my %fd_job;
|
1404
|
+
my $sel = IO::Select->new();
|
1405
|
+
foreach my $jobstepidx (keys %reader)
|
1406
|
+
{
|
1407
|
+
my $fd = $reader{$jobstepidx};
|
1408
|
+
$sel->add($fd);
|
1409
|
+
$fd_job{$fd} = $jobstepidx;
|
1410
|
+
|
1411
|
+
if (my $stdout_fd = $jobstep[$jobstepidx]->{stdout_r}) {
|
1412
|
+
$sel->add($stdout_fd);
|
1413
|
+
$fd_job{$stdout_fd} = $jobstepidx;
|
1414
|
+
}
|
1415
|
+
}
|
1416
|
+
# select on all reader fds with 0.1s timeout
|
1417
|
+
my @ready_fds = $sel->can_read(0.1);
|
1418
|
+
foreach my $fd (@ready_fds)
|
1439
1419
|
{
|
1440
1420
|
my $buf;
|
1441
|
-
if (0 < sysread ($
|
1421
|
+
if (0 < sysread ($fd, $buf, 65536))
|
1442
1422
|
{
|
1423
|
+
$gotsome = 1;
|
1443
1424
|
print STDERR $buf if $ENV{CRUNCH_DEBUG};
|
1444
|
-
|
1445
|
-
$
|
1425
|
+
|
1426
|
+
my $jobstepidx = $fd_job{$fd};
|
1427
|
+
if ($jobstep[$jobstepidx]->{stdout_r} == $fd) {
|
1428
|
+
$jobstep[$jobstepidx]->{stdout_captured} .= $buf;
|
1429
|
+
next;
|
1430
|
+
}
|
1431
|
+
|
1432
|
+
$jobstep[$jobstepidx]->{stderr_at} = time;
|
1433
|
+
$jobstep[$jobstepidx]->{stderr} .= $buf;
|
1446
1434
|
|
1447
1435
|
# Consume everything up to the last \n
|
1448
|
-
preprocess_stderr ($
|
1436
|
+
preprocess_stderr ($jobstepidx);
|
1449
1437
|
|
1450
|
-
if (length ($jobstep[$
|
1438
|
+
if (length ($jobstep[$jobstepidx]->{stderr}) > 16384)
|
1451
1439
|
{
|
1452
1440
|
# If we get a lot of stderr without a newline, chop off the
|
1453
1441
|
# front to avoid letting our buffer grow indefinitely.
|
1454
|
-
substr ($jobstep[$
|
1455
|
-
0, length($jobstep[$
|
1442
|
+
substr ($jobstep[$jobstepidx]->{stderr},
|
1443
|
+
0, length($jobstep[$jobstepidx]->{stderr}) - 8192) = "";
|
1456
1444
|
}
|
1457
|
-
$gotsome = 1;
|
1458
1445
|
}
|
1459
1446
|
}
|
1460
1447
|
return $gotsome;
|
1461
1448
|
}
|
1462
1449
|
|
1463
1450
|
|
1451
|
+
# Consume all full lines of stderr for a jobstep. Everything after the
|
1452
|
+
# last newline will remain in $jobstep[$jobstepidx]->{stderr} after
|
1453
|
+
# returning.
|
1464
1454
|
sub preprocess_stderr
|
1465
1455
|
{
|
1466
|
-
my $
|
1456
|
+
my $jobstepidx = shift;
|
1467
1457
|
|
1468
|
-
while ($jobstep[$
|
1458
|
+
while ($jobstep[$jobstepidx]->{stderr} =~ /^(.*?)\n/) {
|
1469
1459
|
my $line = $1;
|
1470
|
-
substr $jobstep[$
|
1471
|
-
Log ($
|
1460
|
+
substr $jobstep[$jobstepidx]->{stderr}, 0, 1+length($line), "";
|
1461
|
+
Log ($jobstepidx, "stderr $line");
|
1472
1462
|
if ($line =~ /srun: error: (SLURM job $ENV{SLURM_JOB_ID} has expired|Unable to confirm allocation for job $ENV{SLURM_JOB_ID})/) {
|
1473
1463
|
# whoa.
|
1474
1464
|
$main::please_freeze = 1;
|
1475
1465
|
}
|
1466
|
+
elsif (!exists $jobstep[$jobstepidx]->{slotindex}) {
|
1467
|
+
# Skip the following tempfail checks if this srun proc isn't
|
1468
|
+
# attached to a particular worker slot.
|
1469
|
+
}
|
1476
1470
|
elsif ($line =~ /srun: error: (Node failure on|Aborting, .*\bio error\b)/) {
|
1477
|
-
my $job_slot_index = $jobstep[$
|
1471
|
+
my $job_slot_index = $jobstep[$jobstepidx]->{slotindex};
|
1478
1472
|
$slot[$job_slot_index]->{node}->{fail_count}++;
|
1479
|
-
$jobstep[$
|
1473
|
+
$jobstep[$jobstepidx]->{tempfail} = 1;
|
1480
1474
|
ban_node_by_slot($job_slot_index);
|
1481
1475
|
}
|
1482
1476
|
elsif ($line =~ /srun: error: (Unable to create job step|.*: Communication connection failure)/) {
|
1483
|
-
$jobstep[$
|
1484
|
-
ban_node_by_slot($jobstep[$
|
1477
|
+
$jobstep[$jobstepidx]->{tempfail} = 1;
|
1478
|
+
ban_node_by_slot($jobstep[$jobstepidx]->{slotindex});
|
1485
1479
|
}
|
1486
1480
|
elsif ($line =~ /\bKeep(Read|Write|Request)Error:/) {
|
1487
|
-
$jobstep[$
|
1481
|
+
$jobstep[$jobstepidx]->{tempfail} = 1;
|
1488
1482
|
}
|
1489
1483
|
}
|
1490
1484
|
}
|
1491
1485
|
|
1492
1486
|
|
1493
|
-
sub
|
1487
|
+
sub process_stderr_final
|
1494
1488
|
{
|
1495
|
-
my $
|
1496
|
-
|
1497
|
-
preprocess_stderr ($job);
|
1489
|
+
my $jobstepidx = shift;
|
1490
|
+
preprocess_stderr ($jobstepidx);
|
1498
1491
|
|
1499
1492
|
map {
|
1500
|
-
Log ($
|
1501
|
-
} split ("\n", $jobstep[$
|
1493
|
+
Log ($jobstepidx, "stderr $_");
|
1494
|
+
} split ("\n", $jobstep[$jobstepidx]->{stderr});
|
1495
|
+
$jobstep[$jobstepidx]->{stderr} = '';
|
1502
1496
|
}
|
1503
1497
|
|
1504
1498
|
sub fetch_block
|
@@ -1636,7 +1630,7 @@ sub killem
|
|
1636
1630
|
}
|
1637
1631
|
if (!exists $proc{$_}->{"sent_$sig"})
|
1638
1632
|
{
|
1639
|
-
Log ($proc{$_}->{
|
1633
|
+
Log ($proc{$_}->{jobstepidx}, "sending 2x signal $sig to pid $_");
|
1640
1634
|
kill $sig, $_;
|
1641
1635
|
select (undef, undef, undef, 0.1);
|
1642
1636
|
if ($sig == 2)
|
@@ -1760,16 +1754,21 @@ sub log_writer_is_active() {
|
|
1760
1754
|
return $log_pipe_pid;
|
1761
1755
|
}
|
1762
1756
|
|
1763
|
-
sub Log # ($
|
1757
|
+
sub Log # ($jobstepidx, $logmessage)
|
1764
1758
|
{
|
1765
|
-
|
1759
|
+
my ($jobstepidx, $logmessage) = @_;
|
1760
|
+
if ($logmessage =~ /\n/) {
|
1766
1761
|
for my $line (split (/\n/, $_[1])) {
|
1767
|
-
Log ($
|
1762
|
+
Log ($jobstepidx, $line);
|
1768
1763
|
}
|
1769
1764
|
return;
|
1770
1765
|
}
|
1771
1766
|
my $fh = select STDERR; $|=1; select $fh;
|
1772
|
-
my $
|
1767
|
+
my $task_qseq = '';
|
1768
|
+
if (defined($jobstepidx) && exists($jobstep[$jobstepidx]->{arvados_task})) {
|
1769
|
+
$task_qseq = $jobstepidx;
|
1770
|
+
}
|
1771
|
+
my $message = sprintf ("%s %d %s %s", $job_id, $$, $task_qseq, $logmessage);
|
1773
1772
|
$message =~ s{([^ -\176])}{"\\" . sprintf ("%03o", ord($1))}ge;
|
1774
1773
|
$message .= "\n";
|
1775
1774
|
my $datetime;
|
@@ -1893,6 +1892,83 @@ sub freezeunquote
|
|
1893
1892
|
}
|
1894
1893
|
|
1895
1894
|
|
1895
|
+
sub srun_sync
|
1896
|
+
{
|
1897
|
+
my $srunargs = shift;
|
1898
|
+
my $execargs = shift;
|
1899
|
+
my $opts = shift || {};
|
1900
|
+
my $stdin = shift;
|
1901
|
+
|
1902
|
+
my $label = exists $opts->{label} ? $opts->{label} : "@$execargs";
|
1903
|
+
Log (undef, "$label: start");
|
1904
|
+
|
1905
|
+
my ($stderr_r, $stderr_w);
|
1906
|
+
pipe $stderr_r, $stderr_w or croak("pipe() failed: $!");
|
1907
|
+
|
1908
|
+
my ($stdout_r, $stdout_w);
|
1909
|
+
pipe $stdout_r, $stdout_w or croak("pipe() failed: $!");
|
1910
|
+
|
1911
|
+
my $srunpid = fork();
|
1912
|
+
if ($srunpid == 0)
|
1913
|
+
{
|
1914
|
+
close($stderr_r);
|
1915
|
+
close($stdout_r);
|
1916
|
+
fcntl($stderr_w, F_SETFL, 0) or croak($!); # no close-on-exec
|
1917
|
+
fcntl($stdout_w, F_SETFL, 0) or croak($!);
|
1918
|
+
open(STDERR, ">&", $stderr_w);
|
1919
|
+
open(STDOUT, ">&", $stdout_w);
|
1920
|
+
srun ($srunargs, $execargs, $opts, $stdin);
|
1921
|
+
exit (1);
|
1922
|
+
}
|
1923
|
+
close($stderr_w);
|
1924
|
+
close($stdout_w);
|
1925
|
+
|
1926
|
+
set_nonblocking($stderr_r);
|
1927
|
+
set_nonblocking($stdout_r);
|
1928
|
+
|
1929
|
+
# Add entries to @jobstep and %proc so check_squeue() and
|
1930
|
+
# freeze_if_want_freeze() can treat it like a job task process.
|
1931
|
+
push @jobstep, {
|
1932
|
+
stderr => '',
|
1933
|
+
stderr_at => 0,
|
1934
|
+
stderr_captured => '',
|
1935
|
+
stdout_r => $stdout_r,
|
1936
|
+
stdout_captured => '',
|
1937
|
+
};
|
1938
|
+
my $jobstepidx = $#jobstep;
|
1939
|
+
$proc{$srunpid} = {
|
1940
|
+
jobstepidx => $jobstepidx,
|
1941
|
+
};
|
1942
|
+
$reader{$jobstepidx} = $stderr_r;
|
1943
|
+
|
1944
|
+
while ($srunpid != waitpid ($srunpid, WNOHANG)) {
|
1945
|
+
my $busy = readfrompipes();
|
1946
|
+
if (!$busy || ($latest_refresh + 2 < scalar time)) {
|
1947
|
+
check_refresh_wanted();
|
1948
|
+
check_squeue();
|
1949
|
+
}
|
1950
|
+
if (!$busy) {
|
1951
|
+
select(undef, undef, undef, 0.1);
|
1952
|
+
}
|
1953
|
+
killem(keys %proc) if $main::please_freeze;
|
1954
|
+
}
|
1955
|
+
my $exited = $?;
|
1956
|
+
|
1957
|
+
1 while readfrompipes();
|
1958
|
+
process_stderr_final ($jobstepidx);
|
1959
|
+
|
1960
|
+
Log (undef, "$label: exit ".exit_status_s($exited));
|
1961
|
+
|
1962
|
+
close($stdout_r);
|
1963
|
+
close($stderr_r);
|
1964
|
+
delete $proc{$srunpid};
|
1965
|
+
delete $reader{$jobstepidx};
|
1966
|
+
|
1967
|
+
my $j = pop @jobstep;
|
1968
|
+
return ($exited, $j->{stdout_captured}, $j->{stderr_captured});
|
1969
|
+
}
|
1970
|
+
|
1971
|
+
|
1896
1972
|
sub srun
|
1897
1973
|
{
|
1898
1974
|
my $srunargs = shift;
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: arvados-cli
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.20160302171627
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Arvados Authors
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-03-
|
11
|
+
date: 2016-03-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: arvados
|
@@ -178,7 +178,7 @@ dependencies:
|
|
178
178
|
- - "<"
|
179
179
|
- !ruby/object:Gem::Version
|
180
180
|
version: 1.0.0
|
181
|
-
description: Arvados command line tools, git commit
|
181
|
+
description: Arvados command line tools, git commit 9ddb8e3b5106f885401927a513ac0d94a59133ed
|
182
182
|
email: gem-dev@curoverse.com
|
183
183
|
executables:
|
184
184
|
- arv
|