arvados-cli 0.1.20160301220801 → 0.1.20160302171627

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/bin/crunch-job +336 -260
  3. metadata +3 -3
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: eef6bb9ccab7e81066458f6cf7ef694b49f5aafc
4
- data.tar.gz: 676b854565294414d05a46bf29c7061cdd27d0f9
3
+ metadata.gz: 5d7cf3d250304a66887286e13ecdfc1da39a9624
4
+ data.tar.gz: f17e37107acba14052fd85dc525bd64998706c49
5
5
  SHA512:
6
- metadata.gz: 8d5b06f57dca7fb720205cc941921ec5cb303a699adb3f2cc7c6127e64be617f9a8b87bc6b101d93ad40d5f7fbcdb61bfe1b812e802fa22c64d454eeb1dc7e28
7
- data.tar.gz: 8d9056bc655740a754f9b6f015b1b55887d77ab9fa8c7d2bdb67afd9fddb3c9c893da691488fb66d67d70e89d44ba1f7cec53400c7172d2c95a4724923a4d3c6
6
+ metadata.gz: c190742af3c5f4253c8f05f0e57e1f5d566bdb0db6aadd24f91e6c2c5ee8445f967e621cbac0e7124874a5533efa3052d171b425e418b173b9760072d7884ba3
7
+ data.tar.gz: c25440d9c5ff3e18807be0a8399494f4da816f2cb790c60866ce919baa02de653ad881baf7214ff193600693dc7ca09cbd24d02dda75ad07116247eae9fe2e69
data/bin/crunch-job CHANGED
@@ -126,6 +126,7 @@ my $jobspec;
126
126
  my $job_api_token;
127
127
  my $no_clear_tmp;
128
128
  my $resume_stash;
129
+ my $cgroup_root = "/sys/fs/cgroup";
129
130
  my $docker_bin = "docker.io";
130
131
  my $docker_run_args = "";
131
132
  GetOptions('force-unlock' => \$force_unlock,
@@ -134,6 +135,7 @@ GetOptions('force-unlock' => \$force_unlock,
134
135
  'job-api-token=s' => \$job_api_token,
135
136
  'no-clear-tmp' => \$no_clear_tmp,
136
137
  'resume-stash=s' => \$resume_stash,
138
+ 'cgroup-root=s' => \$cgroup_root,
137
139
  'docker-bin=s' => \$docker_bin,
138
140
  'docker-run-args=s' => \$docker_run_args,
139
141
  );
@@ -183,11 +185,12 @@ if (($Job || $local_job)->{docker_image_locator}) {
183
185
  $cmd = [$docker_bin, 'ps', '-q'];
184
186
  }
185
187
  Log(undef, "Sanity check is `@$cmd`");
186
- srun(["srun", "--nodes=\Q$ENV{SLURM_NNODES}\E", "--ntasks-per-node=1"],
187
- $cmd,
188
- {fork => 1});
189
- if ($? != 0) {
190
- Log(undef, "Sanity check failed: ".exit_status_s($?));
188
+ my ($exited, $stdout, $stderr) = srun_sync(
189
+ ["srun", "--nodes=\Q$ENV{SLURM_NNODES}\E", "--ntasks-per-node=1"],
190
+ $cmd,
191
+ {label => "sanity check"});
192
+ if ($exited != 0) {
193
+ Log(undef, "Sanity check failed: ".exit_status_s($exited));
191
194
  exit EX_TEMPFAIL;
192
195
  }
193
196
  Log(undef, "Sanity check OK");
@@ -386,28 +389,17 @@ my $nodelist = join(",", @node);
386
389
  my $git_tar_count = 0;
387
390
 
388
391
  if (!defined $no_clear_tmp) {
389
- # Clean out crunch_tmp/work, crunch_tmp/opt, crunch_tmp/src*
390
- Log (undef, "Clean work dirs");
391
-
392
- my $cleanpid = fork();
393
- if ($cleanpid == 0)
394
- {
395
- # Find FUSE mounts under $CRUNCH_TMP and unmount them.
396
- # Then clean up work directories.
397
- # TODO: When #5036 is done and widely deployed, we can limit mount's
398
- # -t option to simply fuse.keep.
399
- srun (["srun", "--nodelist=$nodelist", "-D", $ENV{'TMPDIR'}],
400
- ['bash', '-ec', '-o', 'pipefail', 'mount -t fuse,fuse.keep | awk "(index(\$3, \"$CRUNCH_TMP\") == 1){print \$3}" | xargs -r -n 1 fusermount -u -z; sleep 1; rm -rf $JOB_WORK $CRUNCH_INSTALL $CRUNCH_TMP/task $CRUNCH_TMP/src* $CRUNCH_TMP/*.cid']);
401
- exit (1);
402
- }
403
- while (1)
404
- {
405
- last if $cleanpid == waitpid (-1, WNOHANG);
406
- freeze_if_want_freeze ($cleanpid);
407
- select (undef, undef, undef, 0.1);
408
- }
409
- if ($?) {
410
- Log(undef, "Clean work dirs: exit ".exit_status_s($?));
392
+ # Find FUSE mounts under $CRUNCH_TMP and unmount them. Then clean
393
+ # up work directories crunch_tmp/work, crunch_tmp/opt,
394
+ # crunch_tmp/src*.
395
+ #
396
+ # TODO: When #5036 is done and widely deployed, we can limit mount's
397
+ # -t option to simply fuse.keep.
398
+ my ($exited, $stdout, $stderr) = srun_sync(
399
+ ["srun", "--nodelist=$nodelist", "-D", $ENV{'TMPDIR'}],
400
+ ['bash', '-ec', '-o', 'pipefail', 'mount -t fuse,fuse.keep | awk "(index(\$3, \"$CRUNCH_TMP\") == 1){print \$3}" | xargs -r -n 1 fusermount -u -z; sleep 1; rm -rf $JOB_WORK $CRUNCH_INSTALL $CRUNCH_TMP/task $CRUNCH_TMP/src* $CRUNCH_TMP/*.cid'],
401
+ {label => "clean work dirs"});
402
+ if ($exited != 0) {
411
403
  exit(EX_RETRY_UNLOCKED);
412
404
  }
413
405
  }
@@ -428,30 +420,22 @@ if ! $docker_bin images -q --no-trunc --all | grep -qxF \Q$docker_hash\E; then
428
420
  arv-get \Q$docker_locator$docker_stream/$docker_hash.tar\E | $docker_bin load
429
421
  fi
430
422
  };
431
- my $docker_pid = fork();
432
- if ($docker_pid == 0)
433
- {
434
- srun (["srun", "--nodelist=" . join(',', @node)],
435
- ["/bin/bash", "-o", "pipefail", "-ec", $docker_install_script]);
436
- exit ($?);
437
- }
438
- while (1)
439
- {
440
- last if $docker_pid == waitpid (-1, WNOHANG);
441
- freeze_if_want_freeze ($docker_pid);
442
- select (undef, undef, undef, 0.1);
443
- }
444
- if ($? != 0)
423
+
424
+ my ($exited, $stdout, $stderr) = srun_sync(
425
+ ["srun", "--nodelist=" . join(',', @node)],
426
+ ["/bin/bash", "-o", "pipefail", "-ec", $docker_install_script],
427
+ {label => "load docker image"});
428
+ if ($exited != 0)
445
429
  {
446
- Log(undef, "Installing Docker image from $docker_locator exited " . exit_status_s($?));
447
430
  exit(EX_RETRY_UNLOCKED);
448
431
  }
449
432
 
450
433
  # Determine whether this version of Docker supports memory+swap limits.
451
- srun(["srun", "--nodelist=" . $node[0]],
452
- ["/bin/sh", "-ec", "$docker_bin run --help | grep -qe --memory-swap="],
453
- {fork => 1});
454
- $docker_limitmem = ($? == 0);
434
+ ($exited, $stdout, $stderr) = srun_sync(
435
+ ["srun", "--nodelist=" . $node[0]],
436
+ [$docker_bin, 'run', '--help'],
437
+ {label => "check --memory-swap feature"});
438
+ $docker_limitmem = ($stdout =~ /--memory-swap/);
455
439
 
456
440
  # Find a non-root Docker user to use.
457
441
  # Tries the default user for the container, then 'crunch', then 'nobody',
@@ -461,20 +445,22 @@ fi
461
445
  # Docker containers.
462
446
  my @tryusers = ("", "crunch", "nobody");
463
447
  foreach my $try_user (@tryusers) {
448
+ my $label;
464
449
  my $try_user_arg;
465
450
  if ($try_user eq "") {
466
- Log(undef, "Checking if container default user is not UID 0");
451
+ $label = "check whether default user is UID 0";
467
452
  $try_user_arg = "";
468
453
  } else {
469
- Log(undef, "Checking if user '$try_user' is not UID 0");
454
+ $label = "check whether user '$try_user' is UID 0";
470
455
  $try_user_arg = "--user=$try_user";
471
456
  }
472
- srun(["srun", "--nodelist=" . $node[0]],
473
- ["/bin/sh", "-ec",
474
- "a=`$docker_bin run $docker_run_args $try_user_arg $docker_hash id --user` && " .
475
- " test \$a -ne 0"],
476
- {fork => 1});
477
- if ($? == 0) {
457
+ my ($exited, $stdout, $stderr) = srun_sync(
458
+ ["srun", "--nodelist=" . $node[0]],
459
+ ["/bin/sh", "-ec",
460
+ "$docker_bin run $docker_run_args $try_user_arg $docker_hash id --user"],
461
+ {label => $label});
462
+ chomp($stdout);
463
+ if ($exited == 0 && $stdout =~ /^\d+$/ && $stdout > 0) {
478
464
  $dockeruserarg = $try_user_arg;
479
465
  if ($try_user eq "") {
480
466
  Log(undef, "Container will run with default user");
@@ -664,11 +650,9 @@ if (!defined $git_archive) {
664
650
  }
665
651
  }
666
652
  else {
667
- my $install_exited;
653
+ my $exited;
668
654
  my $install_script_tries_left = 3;
669
655
  for (my $attempts = 0; $attempts < 3; $attempts++) {
670
- Log(undef, "Run install script on all workers");
671
-
672
656
  my @srunargs = ("srun",
673
657
  "--nodelist=$nodelist",
674
658
  "-D", $ENV{'TMPDIR'}, "--job-name=$job_id");
@@ -676,59 +660,21 @@ else {
676
660
  "mkdir -p $ENV{CRUNCH_INSTALL} && cd $ENV{CRUNCH_TMP} && perl -");
677
661
 
678
662
  $ENV{"CRUNCH_GIT_ARCHIVE_HASH"} = md5_hex($git_archive);
679
- my ($install_stderr_r, $install_stderr_w);
680
- pipe $install_stderr_r, $install_stderr_w or croak("pipe() failed: $!");
681
- set_nonblocking($install_stderr_r);
682
- my $installpid = fork();
683
- if ($installpid == 0)
684
- {
685
- close($install_stderr_r);
686
- fcntl($install_stderr_w, F_SETFL, 0) or croak($!); # no close-on-exec
687
- open(STDOUT, ">&", $install_stderr_w);
688
- open(STDERR, ">&", $install_stderr_w);
689
- srun (\@srunargs, \@execargs, {}, $build_script . $git_archive);
690
- exit (1);
691
- }
692
- close($install_stderr_w);
693
- # Tell freeze_if_want_freeze how to kill the child, otherwise the
694
- # "waitpid(installpid)" loop won't get interrupted by a freeze:
695
- $proc{$installpid} = {};
696
- my $stderr_buf = '';
697
- # Track whether anything appears on stderr other than slurm errors
698
- # ("srun: ...") and the "starting: ..." message printed by the
699
- # srun subroutine itself:
663
+ my ($stdout, $stderr);
664
+ ($exited, $stdout, $stderr) = srun_sync(
665
+ \@srunargs, \@execargs,
666
+ {label => "run install script on all workers"},
667
+ $build_script . $git_archive);
668
+
700
669
  my $stderr_anything_from_script = 0;
701
- my $match_our_own_errors = '^(srun: error: |starting: \[)';
702
- while ($installpid != waitpid(-1, WNOHANG)) {
703
- freeze_if_want_freeze ($installpid);
704
- # Wait up to 0.1 seconds for something to appear on stderr, then
705
- # do a non-blocking read.
706
- my $bits = fhbits($install_stderr_r);
707
- select ($bits, undef, $bits, 0.1);
708
- if (0 < sysread ($install_stderr_r, $stderr_buf, 8192, length($stderr_buf)))
709
- {
710
- while ($stderr_buf =~ /^(.*?)\n/) {
711
- my $line = $1;
712
- substr $stderr_buf, 0, 1+length($line), "";
713
- Log(undef, "stderr $line");
714
- if ($line !~ /$match_our_own_errors/) {
715
- $stderr_anything_from_script = 1;
716
- }
717
- }
718
- }
719
- }
720
- delete $proc{$installpid};
721
- $install_exited = $?;
722
- close($install_stderr_r);
723
- if (length($stderr_buf) > 0) {
724
- if ($stderr_buf !~ /$match_our_own_errors/) {
670
+ for my $line (split(/\n/, $stderr)) {
671
+ if ($line !~ /^(srun: error: |starting: \[)/) {
725
672
  $stderr_anything_from_script = 1;
726
673
  }
727
- Log(undef, "stderr $stderr_buf")
728
674
  }
729
675
 
730
- Log (undef, "Install script exited ".exit_status_s($install_exited));
731
- last if $install_exited == 0 || $main::please_freeze;
676
+ last if $exited == 0 || $main::please_freeze;
677
+
732
678
  # If the install script fails but doesn't print an error message,
733
679
  # the next thing anyone is likely to do is just run it again in
734
680
  # case it was a transient problem like "slurm communication fails
@@ -744,7 +690,7 @@ else {
744
690
  unlink($tar_filename);
745
691
  }
746
692
 
747
- if ($install_exited != 0) {
693
+ if ($exited != 0) {
748
694
  croak("Giving up");
749
695
  }
750
696
  }
@@ -803,6 +749,7 @@ if ($initial_tasks_this_level < @node) {
803
749
  @freeslot = (0..$#slot);
804
750
  }
805
751
  my $round_num_freeslots = scalar(@freeslot);
752
+ print STDERR "crunch-job have ${round_num_freeslots} free slots for ${initial_tasks_this_level} initial tasks at this level, ".scalar(@node)." nodes, and ".scalar(@slot)." slots\n";
806
753
 
807
754
  my %round_max_slots = ();
808
755
  for (my $ii = $#freeslot; $ii >= 0; $ii--) {
@@ -915,7 +862,7 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
915
862
  {
916
863
  my $containername = "$Jobstep->{arvados_task}->{uuid}-$Jobstep->{failures}";
917
864
  my $cidfile = "$ENV{CRUNCH_TMP}/$containername.cid";
918
- $command .= "crunchstat -cgroup-root=/sys/fs/cgroup -cgroup-parent=docker -cgroup-cid=$cidfile -poll=10000 ";
865
+ $command .= "crunchstat -cgroup-root=\Q$cgroup_root\E -cgroup-parent=docker -cgroup-cid=$cidfile -poll=10000 ";
919
866
  $command .= "$docker_bin run $docker_run_args --name=$containername --attach=stdout --attach=stderr --attach=stdin -i \Q$dockeruserarg\E --cidfile=$cidfile --sig-proxy ";
920
867
  # We only set memory limits if Docker lets us limit both memory and swap.
921
868
  # Memory limits alone have been supported longer, but subprocesses tend
@@ -995,7 +942,7 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
995
942
  }
996
943
  } else {
997
944
  # Non-docker run
998
- $command .= "crunchstat -cgroup-root=/sys/fs/cgroup -poll=10000 ";
945
+ $command .= "crunchstat -cgroup-root=\Q$cgroup_root\E -poll=10000 ";
999
946
  $command .= $stdbuf;
1000
947
  $command .= "perl - $ENV{CRUNCH_SRC}/crunch_scripts/" . $Job->{"script"};
1001
948
  }
@@ -1013,11 +960,12 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
1013
960
  next;
1014
961
  }
1015
962
  shift @freeslot;
1016
- $proc{$childpid} = { jobstep => $id,
1017
- time => time,
1018
- slot => $childslot,
1019
- jobstepname => "$job_id.$id.$childpid",
1020
- };
963
+ $proc{$childpid} = {
964
+ jobstepidx => $id,
965
+ time => time,
966
+ slot => $childslot,
967
+ jobstepname => "$job_id.$id.$childpid",
968
+ };
1021
969
  croak ("assert failed: \$slot[$childslot]->{'pid'} exists") if exists $slot[$childslot]->{pid};
1022
970
  $slot[$childslot]->{pid} = $childpid;
1023
971
 
@@ -1185,128 +1133,142 @@ sub update_progress_stats
1185
1133
 
1186
1134
  sub reapchildren
1187
1135
  {
1188
- my $pid = waitpid (-1, WNOHANG);
1189
- return 0 if $pid <= 0;
1190
-
1191
- my $whatslot = ($slot[$proc{$pid}->{slot}]->{node}->{name}
1192
- . "."
1193
- . $slot[$proc{$pid}->{slot}]->{cpu});
1194
- my $jobstepid = $proc{$pid}->{jobstep};
1195
- my $elapsed = time - $proc{$pid}->{time};
1196
- my $Jobstep = $jobstep[$jobstepid];
1197
-
1198
- my $childstatus = $?;
1199
- my $exitvalue = $childstatus >> 8;
1200
- my $exitinfo = "exit ".exit_status_s($childstatus);
1201
- $Jobstep->{'arvados_task'}->reload;
1202
- my $task_success = $Jobstep->{'arvados_task'}->{success};
1203
-
1204
- Log ($jobstepid, "child $pid on $whatslot $exitinfo success=$task_success");
1205
-
1206
- if (!defined $task_success) {
1207
- # task did not indicate one way or the other --> fail
1208
- Log($jobstepid, sprintf(
1209
- "ERROR: Task process exited %s, but never updated its task record to indicate success and record its output.",
1210
- exit_status_s($childstatus)));
1211
- $Jobstep->{'arvados_task'}->{success} = 0;
1212
- $Jobstep->{'arvados_task'}->save;
1213
- $task_success = 0;
1214
- }
1215
-
1216
- if (!$task_success)
1136
+ my $children_reaped = 0;
1137
+ while ((my $pid = waitpid (-1, WNOHANG)) > 0)
1217
1138
  {
1218
- my $temporary_fail;
1219
- $temporary_fail ||= $Jobstep->{tempfail};
1220
- $temporary_fail ||= ($exitvalue == TASK_TEMPFAIL);
1221
-
1222
- ++$thisround_failed;
1223
- ++$thisround_failed_multiple if $Jobstep->{'failures'} >= 1;
1224
-
1225
- # Check for signs of a failed or misconfigured node
1226
- if (++$slot[$proc{$pid}->{slot}]->{node}->{losing_streak} >=
1227
- 2+$slot[$proc{$pid}->{slot}]->{node}->{ncpus}) {
1228
- # Don't count this against jobstep failure thresholds if this
1229
- # node is already suspected faulty and srun exited quickly
1230
- if ($slot[$proc{$pid}->{slot}]->{node}->{hold_until} &&
1231
- $elapsed < 5) {
1232
- Log ($jobstepid, "blaming failure on suspect node " .
1233
- $slot[$proc{$pid}->{slot}]->{node}->{name});
1234
- $temporary_fail ||= 1;
1235
- }
1236
- ban_node_by_slot($proc{$pid}->{slot});
1139
+ my $childstatus = $?;
1140
+
1141
+ my $whatslot = ($slot[$proc{$pid}->{slot}]->{node}->{name}
1142
+ . "."
1143
+ . $slot[$proc{$pid}->{slot}]->{cpu});
1144
+ my $jobstepidx = $proc{$pid}->{jobstepidx};
1145
+
1146
+ if (!WIFEXITED($childstatus))
1147
+ {
1148
+ # child did not exit (may be temporarily stopped)
1149
+ Log ($jobstepidx, "child $pid did not actually exit in reapchildren, ignoring for now.");
1150
+ next;
1237
1151
  }
1238
1152
 
1239
- Log ($jobstepid, sprintf('failure (#%d, %s) after %d seconds',
1240
- ++$Jobstep->{'failures'},
1241
- $temporary_fail ? 'temporary' : 'permanent',
1242
- $elapsed));
1153
+ $children_reaped++;
1154
+ my $elapsed = time - $proc{$pid}->{time};
1155
+ my $Jobstep = $jobstep[$jobstepidx];
1156
+
1157
+ my $exitvalue = $childstatus >> 8;
1158
+ my $exitinfo = "exit ".exit_status_s($childstatus);
1159
+ $Jobstep->{'arvados_task'}->reload;
1160
+ my $task_success = $Jobstep->{'arvados_task'}->{success};
1161
+
1162
+ Log ($jobstepidx, "child $pid on $whatslot $exitinfo success=$task_success");
1163
+
1164
+ if (!defined $task_success) {
1165
+ # task did not indicate one way or the other --> fail
1166
+ Log($jobstepidx, sprintf(
1167
+ "ERROR: Task process exited %s, but never updated its task record to indicate success and record its output.",
1168
+ exit_status_s($childstatus)));
1169
+ $Jobstep->{'arvados_task'}->{success} = 0;
1170
+ $Jobstep->{'arvados_task'}->save;
1171
+ $task_success = 0;
1172
+ }
1243
1173
 
1244
- if (!$temporary_fail || $Jobstep->{'failures'} >= 3) {
1245
- # Give up on this task, and the whole job
1246
- $main::success = 0;
1174
+ if (!$task_success)
1175
+ {
1176
+ my $temporary_fail;
1177
+ $temporary_fail ||= $Jobstep->{tempfail};
1178
+ $temporary_fail ||= ($exitvalue == TASK_TEMPFAIL);
1179
+
1180
+ ++$thisround_failed;
1181
+ ++$thisround_failed_multiple if $Jobstep->{'failures'} >= 1;
1182
+
1183
+ # Check for signs of a failed or misconfigured node
1184
+ if (++$slot[$proc{$pid}->{slot}]->{node}->{losing_streak} >=
1185
+ 2+$slot[$proc{$pid}->{slot}]->{node}->{ncpus}) {
1186
+ # Don't count this against jobstep failure thresholds if this
1187
+ # node is already suspected faulty and srun exited quickly
1188
+ if ($slot[$proc{$pid}->{slot}]->{node}->{hold_until} &&
1189
+ $elapsed < 5) {
1190
+ Log ($jobstepidx, "blaming failure on suspect node " .
1191
+ $slot[$proc{$pid}->{slot}]->{node}->{name});
1192
+ $temporary_fail ||= 1;
1193
+ }
1194
+ ban_node_by_slot($proc{$pid}->{slot});
1195
+ }
1196
+
1197
+ Log ($jobstepidx, sprintf('failure (#%d, %s) after %d seconds',
1198
+ ++$Jobstep->{'failures'},
1199
+ $temporary_fail ? 'temporary' : 'permanent',
1200
+ $elapsed));
1201
+
1202
+ if (!$temporary_fail || $Jobstep->{'failures'} >= 3) {
1203
+ # Give up on this task, and the whole job
1204
+ $main::success = 0;
1205
+ }
1206
+ # Put this task back on the todo queue
1207
+ push @jobstep_todo, $jobstepidx;
1208
+ $Job->{'tasks_summary'}->{'failed'}++;
1247
1209
  }
1248
- # Put this task back on the todo queue
1249
- push @jobstep_todo, $jobstepid;
1250
- $Job->{'tasks_summary'}->{'failed'}++;
1251
- }
1252
- else
1253
- {
1254
- ++$thisround_succeeded;
1255
- $slot[$proc{$pid}->{slot}]->{node}->{losing_streak} = 0;
1256
- $slot[$proc{$pid}->{slot}]->{node}->{hold_until} = 0;
1257
- $slot[$proc{$pid}->{slot}]->{node}->{fail_count} = 0;
1258
- push @jobstep_done, $jobstepid;
1259
- Log ($jobstepid, "success in $elapsed seconds");
1260
- }
1261
- $Jobstep->{exitcode} = $childstatus;
1262
- $Jobstep->{finishtime} = time;
1263
- $Jobstep->{'arvados_task'}->{finished_at} = strftime "%Y-%m-%dT%H:%M:%SZ", gmtime($Jobstep->{finishtime});
1264
- $Jobstep->{'arvados_task'}->save;
1265
- process_stderr ($jobstepid, $task_success);
1266
- Log ($jobstepid, sprintf("task output (%d bytes): %s",
1267
- length($Jobstep->{'arvados_task'}->{output}),
1268
- $Jobstep->{'arvados_task'}->{output}));
1269
-
1270
- close $reader{$jobstepid};
1271
- delete $reader{$jobstepid};
1272
- delete $slot[$proc{$pid}->{slot}]->{pid};
1273
- push @freeslot, $proc{$pid}->{slot};
1274
- delete $proc{$pid};
1275
-
1276
- if ($task_success) {
1277
- # Load new tasks
1278
- my $newtask_list = [];
1279
- my $newtask_results;
1280
- do {
1281
- $newtask_results = api_call(
1282
- "job_tasks/list",
1283
- 'where' => {
1284
- 'created_by_job_task_uuid' => $Jobstep->{'arvados_task'}->{uuid}
1285
- },
1286
- 'order' => 'qsequence',
1287
- 'offset' => scalar(@$newtask_list),
1288
- );
1289
- push(@$newtask_list, @{$newtask_results->{items}});
1290
- } while (@{$newtask_results->{items}});
1291
- foreach my $arvados_task (@$newtask_list) {
1292
- my $jobstep = {
1293
- 'level' => $arvados_task->{'sequence'},
1294
- 'failures' => 0,
1295
- 'arvados_task' => $arvados_task
1296
- };
1297
- push @jobstep, $jobstep;
1298
- push @jobstep_todo, $#jobstep;
1210
+ else
1211
+ {
1212
+ ++$thisround_succeeded;
1213
+ $slot[$proc{$pid}->{slot}]->{node}->{losing_streak} = 0;
1214
+ $slot[$proc{$pid}->{slot}]->{node}->{hold_until} = 0;
1215
+ $slot[$proc{$pid}->{slot}]->{node}->{fail_count} = 0;
1216
+ push @jobstep_done, $jobstepidx;
1217
+ Log ($jobstepidx, "success in $elapsed seconds");
1299
1218
  }
1219
+ $Jobstep->{exitcode} = $childstatus;
1220
+ $Jobstep->{finishtime} = time;
1221
+ $Jobstep->{'arvados_task'}->{finished_at} = strftime "%Y-%m-%dT%H:%M:%SZ", gmtime($Jobstep->{finishtime});
1222
+ $Jobstep->{'arvados_task'}->save;
1223
+ process_stderr_final ($jobstepidx);
1224
+ Log ($jobstepidx, sprintf("task output (%d bytes): %s",
1225
+ length($Jobstep->{'arvados_task'}->{output}),
1226
+ $Jobstep->{'arvados_task'}->{output}));
1227
+
1228
+ close $reader{$jobstepidx};
1229
+ delete $reader{$jobstepidx};
1230
+ delete $slot[$proc{$pid}->{slot}]->{pid};
1231
+ push @freeslot, $proc{$pid}->{slot};
1232
+ delete $proc{$pid};
1233
+
1234
+ if ($task_success) {
1235
+ # Load new tasks
1236
+ my $newtask_list = [];
1237
+ my $newtask_results;
1238
+ do {
1239
+ $newtask_results = api_call(
1240
+ "job_tasks/list",
1241
+ 'where' => {
1242
+ 'created_by_job_task_uuid' => $Jobstep->{'arvados_task'}->{uuid}
1243
+ },
1244
+ 'order' => 'qsequence',
1245
+ 'offset' => scalar(@$newtask_list),
1246
+ );
1247
+ push(@$newtask_list, @{$newtask_results->{items}});
1248
+ } while (@{$newtask_results->{items}});
1249
+ foreach my $arvados_task (@$newtask_list) {
1250
+ my $jobstep = {
1251
+ 'level' => $arvados_task->{'sequence'},
1252
+ 'failures' => 0,
1253
+ 'arvados_task' => $arvados_task
1254
+ };
1255
+ push @jobstep, $jobstep;
1256
+ push @jobstep_todo, $#jobstep;
1257
+ }
1258
+ }
1259
+ $progress_is_dirty = 1;
1300
1260
  }
1301
1261
 
1302
- $progress_is_dirty = 1;
1303
- 1;
1262
+ return $children_reaped;
1304
1263
  }
1305
1264
 
1306
1265
  sub check_refresh_wanted
1307
1266
  {
1308
1267
  my @stat = stat $ENV{"CRUNCH_REFRESH_TRIGGER"};
1309
- if (@stat && $stat[9] > $latest_refresh) {
1268
+ if (@stat &&
1269
+ $stat[9] > $latest_refresh &&
1270
+ # ...and we have actually locked the job record...
1271
+ $job_id eq $Job->{'uuid'}) {
1310
1272
  $latest_refresh = scalar time;
1311
1273
  my $Job2 = api_call("jobs/get", uuid => $jobspec);
1312
1274
  for my $attr ('cancelled_at',
@@ -1344,10 +1306,13 @@ sub check_squeue
1344
1306
  # squeue check interval (15s) this should make the squeue check an
1345
1307
  # infrequent event.
1346
1308
  my $silent_procs = 0;
1347
- for my $procinfo (values %proc)
1309
+ for my $js (map {$jobstep[$_->{jobstepidx}]} values %proc)
1348
1310
  {
1349
- my $jobstep = $jobstep[$procinfo->{jobstep}];
1350
- if ($jobstep->{stderr_at} < $last_squeue_check)
1311
+ if (!exists($js->{stderr_at}))
1312
+ {
1313
+ $js->{stderr_at} = 0;
1314
+ }
1315
+ if ($js->{stderr_at} < $last_squeue_check)
1351
1316
  {
1352
1317
  $silent_procs++;
1353
1318
  }
@@ -1357,16 +1322,16 @@ sub check_squeue
1357
1322
  # use killem() on procs whose killtime is reached
1358
1323
  while (my ($pid, $procinfo) = each %proc)
1359
1324
  {
1360
- my $jobstep = $jobstep[$procinfo->{jobstep}];
1325
+ my $js = $jobstep[$procinfo->{jobstepidx}];
1361
1326
  if (exists $procinfo->{killtime}
1362
1327
  && $procinfo->{killtime} <= time
1363
- && $jobstep->{stderr_at} < $last_squeue_check)
1328
+ && $js->{stderr_at} < $last_squeue_check)
1364
1329
  {
1365
1330
  my $sincewhen = "";
1366
- if ($jobstep->{stderr_at}) {
1367
- $sincewhen = " in last " . (time - $jobstep->{stderr_at}) . "s";
1331
+ if ($js->{stderr_at}) {
1332
+ $sincewhen = " in last " . (time - $js->{stderr_at}) . "s";
1368
1333
  }
1369
- Log($procinfo->{jobstep}, "killing orphaned srun process $pid (task not in slurm queue, no stderr received$sincewhen)");
1334
+ Log($procinfo->{jobstepidx}, "killing orphaned srun process $pid (task not in slurm queue, no stderr received$sincewhen)");
1370
1335
  killem ($pid);
1371
1336
  }
1372
1337
  }
@@ -1416,7 +1381,7 @@ sub check_squeue
1416
1381
  # error/delay has caused the task to die without notifying srun,
1417
1382
  # and we'll kill srun ourselves.
1418
1383
  $procinfo->{killtime} = time + 30;
1419
- Log($procinfo->{jobstep}, "notice: task is not in slurm queue but srun process $pid has not exited");
1384
+ Log($procinfo->{jobstepidx}, "notice: task is not in slurm queue but srun process $pid has not exited");
1420
1385
  }
1421
1386
  }
1422
1387
  }
@@ -1435,70 +1400,99 @@ sub release_allocation
1435
1400
  sub readfrompipes
1436
1401
  {
1437
1402
  my $gotsome = 0;
1438
- foreach my $job (keys %reader)
1403
+ my %fd_job;
1404
+ my $sel = IO::Select->new();
1405
+ foreach my $jobstepidx (keys %reader)
1406
+ {
1407
+ my $fd = $reader{$jobstepidx};
1408
+ $sel->add($fd);
1409
+ $fd_job{$fd} = $jobstepidx;
1410
+
1411
+ if (my $stdout_fd = $jobstep[$jobstepidx]->{stdout_r}) {
1412
+ $sel->add($stdout_fd);
1413
+ $fd_job{$stdout_fd} = $jobstepidx;
1414
+ }
1415
+ }
1416
+ # select on all reader fds with 0.1s timeout
1417
+ my @ready_fds = $sel->can_read(0.1);
1418
+ foreach my $fd (@ready_fds)
1439
1419
  {
1440
1420
  my $buf;
1441
- if (0 < sysread ($reader{$job}, $buf, 65536))
1421
+ if (0 < sysread ($fd, $buf, 65536))
1442
1422
  {
1423
+ $gotsome = 1;
1443
1424
  print STDERR $buf if $ENV{CRUNCH_DEBUG};
1444
- $jobstep[$job]->{stderr_at} = time;
1445
- $jobstep[$job]->{stderr} .= $buf;
1425
+
1426
+ my $jobstepidx = $fd_job{$fd};
1427
+ if ($jobstep[$jobstepidx]->{stdout_r} == $fd) {
1428
+ $jobstep[$jobstepidx]->{stdout_captured} .= $buf;
1429
+ next;
1430
+ }
1431
+
1432
+ $jobstep[$jobstepidx]->{stderr_at} = time;
1433
+ $jobstep[$jobstepidx]->{stderr} .= $buf;
1446
1434
 
1447
1435
  # Consume everything up to the last \n
1448
- preprocess_stderr ($job);
1436
+ preprocess_stderr ($jobstepidx);
1449
1437
 
1450
- if (length ($jobstep[$job]->{stderr}) > 16384)
1438
+ if (length ($jobstep[$jobstepidx]->{stderr}) > 16384)
1451
1439
  {
1452
1440
  # If we get a lot of stderr without a newline, chop off the
1453
1441
  # front to avoid letting our buffer grow indefinitely.
1454
- substr ($jobstep[$job]->{stderr},
1455
- 0, length($jobstep[$job]->{stderr}) - 8192) = "";
1442
+ substr ($jobstep[$jobstepidx]->{stderr},
1443
+ 0, length($jobstep[$jobstepidx]->{stderr}) - 8192) = "";
1456
1444
  }
1457
- $gotsome = 1;
1458
1445
  }
1459
1446
  }
1460
1447
  return $gotsome;
1461
1448
  }
1462
1449
 
1463
1450
 
1451
+ # Consume all full lines of stderr for a jobstep. Everything after the
1452
+ # last newline will remain in $jobstep[$jobstepidx]->{stderr} after
1453
+ # returning.
1464
1454
  sub preprocess_stderr
1465
1455
  {
1466
- my $job = shift;
1456
+ my $jobstepidx = shift;
1467
1457
 
1468
- while ($jobstep[$job]->{stderr} =~ /^(.*?)\n/) {
1458
+ while ($jobstep[$jobstepidx]->{stderr} =~ /^(.*?)\n/) {
1469
1459
  my $line = $1;
1470
- substr $jobstep[$job]->{stderr}, 0, 1+length($line), "";
1471
- Log ($job, "stderr $line");
1460
+ substr $jobstep[$jobstepidx]->{stderr}, 0, 1+length($line), "";
1461
+ Log ($jobstepidx, "stderr $line");
1472
1462
  if ($line =~ /srun: error: (SLURM job $ENV{SLURM_JOB_ID} has expired|Unable to confirm allocation for job $ENV{SLURM_JOB_ID})/) {
1473
1463
  # whoa.
1474
1464
  $main::please_freeze = 1;
1475
1465
  }
1466
+ elsif (!exists $jobstep[$jobstepidx]->{slotindex}) {
1467
+ # Skip the following tempfail checks if this srun proc isn't
1468
+ # attached to a particular worker slot.
1469
+ }
1476
1470
  elsif ($line =~ /srun: error: (Node failure on|Aborting, .*\bio error\b)/) {
1477
- my $job_slot_index = $jobstep[$job]->{slotindex};
1471
+ my $job_slot_index = $jobstep[$jobstepidx]->{slotindex};
1478
1472
  $slot[$job_slot_index]->{node}->{fail_count}++;
1479
- $jobstep[$job]->{tempfail} = 1;
1473
+ $jobstep[$jobstepidx]->{tempfail} = 1;
1480
1474
  ban_node_by_slot($job_slot_index);
1481
1475
  }
1482
1476
  elsif ($line =~ /srun: error: (Unable to create job step|.*: Communication connection failure)/) {
1483
- $jobstep[$job]->{tempfail} = 1;
1484
- ban_node_by_slot($jobstep[$job]->{slotindex});
1477
+ $jobstep[$jobstepidx]->{tempfail} = 1;
1478
+ ban_node_by_slot($jobstep[$jobstepidx]->{slotindex});
1485
1479
  }
1486
1480
  elsif ($line =~ /\bKeep(Read|Write|Request)Error:/) {
1487
- $jobstep[$job]->{tempfail} = 1;
1481
+ $jobstep[$jobstepidx]->{tempfail} = 1;
1488
1482
  }
1489
1483
  }
1490
1484
  }
1491
1485
 
1492
1486
 
1493
- sub process_stderr
1487
+ sub process_stderr_final
1494
1488
  {
1495
- my $job = shift;
1496
- my $task_success = shift;
1497
- preprocess_stderr ($job);
1489
+ my $jobstepidx = shift;
1490
+ preprocess_stderr ($jobstepidx);
1498
1491
 
1499
1492
  map {
1500
- Log ($job, "stderr $_");
1501
- } split ("\n", $jobstep[$job]->{stderr});
1493
+ Log ($jobstepidx, "stderr $_");
1494
+ } split ("\n", $jobstep[$jobstepidx]->{stderr});
1495
+ $jobstep[$jobstepidx]->{stderr} = '';
1502
1496
  }
1503
1497
 
1504
1498
  sub fetch_block
@@ -1636,7 +1630,7 @@ sub killem
1636
1630
  }
1637
1631
  if (!exists $proc{$_}->{"sent_$sig"})
1638
1632
  {
1639
- Log ($proc{$_}->{jobstep}, "sending 2x signal $sig to pid $_");
1633
+ Log ($proc{$_}->{jobstepidx}, "sending 2x signal $sig to pid $_");
1640
1634
  kill $sig, $_;
1641
1635
  select (undef, undef, undef, 0.1);
1642
1636
  if ($sig == 2)
@@ -1760,16 +1754,21 @@ sub log_writer_is_active() {
1760
1754
  return $log_pipe_pid;
1761
1755
  }
1762
1756
 
1763
- sub Log # ($jobstep_id, $logmessage)
1757
+ sub Log # ($jobstepidx, $logmessage)
1764
1758
  {
1765
- if ($_[1] =~ /\n/) {
1759
+ my ($jobstepidx, $logmessage) = @_;
1760
+ if ($logmessage =~ /\n/) {
1766
1761
  for my $line (split (/\n/, $_[1])) {
1767
- Log ($_[0], $line);
1762
+ Log ($jobstepidx, $line);
1768
1763
  }
1769
1764
  return;
1770
1765
  }
1771
1766
  my $fh = select STDERR; $|=1; select $fh;
1772
- my $message = sprintf ("%s %d %s %s", $job_id, $$, @_);
1767
+ my $task_qseq = '';
1768
+ if (defined($jobstepidx) && exists($jobstep[$jobstepidx]->{arvados_task})) {
1769
+ $task_qseq = $jobstepidx;
1770
+ }
1771
+ my $message = sprintf ("%s %d %s %s", $job_id, $$, $task_qseq, $logmessage);
1773
1772
  $message =~ s{([^ -\176])}{"\\" . sprintf ("%03o", ord($1))}ge;
1774
1773
  $message .= "\n";
1775
1774
  my $datetime;
@@ -1893,6 +1892,83 @@ sub freezeunquote
1893
1892
  }
1894
1893
 
1895
1894
 
1895
+ sub srun_sync
1896
+ {
1897
+ my $srunargs = shift;
1898
+ my $execargs = shift;
1899
+ my $opts = shift || {};
1900
+ my $stdin = shift;
1901
+
1902
+ my $label = exists $opts->{label} ? $opts->{label} : "@$execargs";
1903
+ Log (undef, "$label: start");
1904
+
1905
+ my ($stderr_r, $stderr_w);
1906
+ pipe $stderr_r, $stderr_w or croak("pipe() failed: $!");
1907
+
1908
+ my ($stdout_r, $stdout_w);
1909
+ pipe $stdout_r, $stdout_w or croak("pipe() failed: $!");
1910
+
1911
+ my $srunpid = fork();
1912
+ if ($srunpid == 0)
1913
+ {
1914
+ close($stderr_r);
1915
+ close($stdout_r);
1916
+ fcntl($stderr_w, F_SETFL, 0) or croak($!); # no close-on-exec
1917
+ fcntl($stdout_w, F_SETFL, 0) or croak($!);
1918
+ open(STDERR, ">&", $stderr_w);
1919
+ open(STDOUT, ">&", $stdout_w);
1920
+ srun ($srunargs, $execargs, $opts, $stdin);
1921
+ exit (1);
1922
+ }
1923
+ close($stderr_w);
1924
+ close($stdout_w);
1925
+
1926
+ set_nonblocking($stderr_r);
1927
+ set_nonblocking($stdout_r);
1928
+
1929
+ # Add entries to @jobstep and %proc so check_squeue() and
1930
+ # freeze_if_want_freeze() can treat it like a job task process.
1931
+ push @jobstep, {
1932
+ stderr => '',
1933
+ stderr_at => 0,
1934
+ stderr_captured => '',
1935
+ stdout_r => $stdout_r,
1936
+ stdout_captured => '',
1937
+ };
1938
+ my $jobstepidx = $#jobstep;
1939
+ $proc{$srunpid} = {
1940
+ jobstepidx => $jobstepidx,
1941
+ };
1942
+ $reader{$jobstepidx} = $stderr_r;
1943
+
1944
+ while ($srunpid != waitpid ($srunpid, WNOHANG)) {
1945
+ my $busy = readfrompipes();
1946
+ if (!$busy || ($latest_refresh + 2 < scalar time)) {
1947
+ check_refresh_wanted();
1948
+ check_squeue();
1949
+ }
1950
+ if (!$busy) {
1951
+ select(undef, undef, undef, 0.1);
1952
+ }
1953
+ killem(keys %proc) if $main::please_freeze;
1954
+ }
1955
+ my $exited = $?;
1956
+
1957
+ 1 while readfrompipes();
1958
+ process_stderr_final ($jobstepidx);
1959
+
1960
+ Log (undef, "$label: exit ".exit_status_s($exited));
1961
+
1962
+ close($stdout_r);
1963
+ close($stderr_r);
1964
+ delete $proc{$srunpid};
1965
+ delete $reader{$jobstepidx};
1966
+
1967
+ my $j = pop @jobstep;
1968
+ return ($exited, $j->{stdout_captured}, $j->{stderr_captured});
1969
+ }
1970
+
1971
+
1896
1972
  sub srun
1897
1973
  {
1898
1974
  my $srunargs = shift;
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: arvados-cli
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.20160301220801
4
+ version: 0.1.20160302171627
5
5
  platform: ruby
6
6
  authors:
7
7
  - Arvados Authors
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-03-01 00:00:00.000000000 Z
11
+ date: 2016-03-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: arvados
@@ -178,7 +178,7 @@ dependencies:
178
178
  - - "<"
179
179
  - !ruby/object:Gem::Version
180
180
  version: 1.0.0
181
- description: Arvados command line tools, git commit 7a57d59c01f746599400bdf83823283321753c3c
181
+ description: Arvados command line tools, git commit 9ddb8e3b5106f885401927a513ac0d94a59133ed
182
182
  email: gem-dev@curoverse.com
183
183
  executables:
184
184
  - arv