arvados-cli 0.1.20160301220801 → 0.1.20160302171627

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/bin/crunch-job +336 -260
  3. metadata +3 -3
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: eef6bb9ccab7e81066458f6cf7ef694b49f5aafc
4
- data.tar.gz: 676b854565294414d05a46bf29c7061cdd27d0f9
3
+ metadata.gz: 5d7cf3d250304a66887286e13ecdfc1da39a9624
4
+ data.tar.gz: f17e37107acba14052fd85dc525bd64998706c49
5
5
  SHA512:
6
- metadata.gz: 8d5b06f57dca7fb720205cc941921ec5cb303a699adb3f2cc7c6127e64be617f9a8b87bc6b101d93ad40d5f7fbcdb61bfe1b812e802fa22c64d454eeb1dc7e28
7
- data.tar.gz: 8d9056bc655740a754f9b6f015b1b55887d77ab9fa8c7d2bdb67afd9fddb3c9c893da691488fb66d67d70e89d44ba1f7cec53400c7172d2c95a4724923a4d3c6
6
+ metadata.gz: c190742af3c5f4253c8f05f0e57e1f5d566bdb0db6aadd24f91e6c2c5ee8445f967e621cbac0e7124874a5533efa3052d171b425e418b173b9760072d7884ba3
7
+ data.tar.gz: c25440d9c5ff3e18807be0a8399494f4da816f2cb790c60866ce919baa02de653ad881baf7214ff193600693dc7ca09cbd24d02dda75ad07116247eae9fe2e69
data/bin/crunch-job CHANGED
@@ -126,6 +126,7 @@ my $jobspec;
126
126
  my $job_api_token;
127
127
  my $no_clear_tmp;
128
128
  my $resume_stash;
129
+ my $cgroup_root = "/sys/fs/cgroup";
129
130
  my $docker_bin = "docker.io";
130
131
  my $docker_run_args = "";
131
132
  GetOptions('force-unlock' => \$force_unlock,
@@ -134,6 +135,7 @@ GetOptions('force-unlock' => \$force_unlock,
134
135
  'job-api-token=s' => \$job_api_token,
135
136
  'no-clear-tmp' => \$no_clear_tmp,
136
137
  'resume-stash=s' => \$resume_stash,
138
+ 'cgroup-root=s' => \$cgroup_root,
137
139
  'docker-bin=s' => \$docker_bin,
138
140
  'docker-run-args=s' => \$docker_run_args,
139
141
  );
@@ -183,11 +185,12 @@ if (($Job || $local_job)->{docker_image_locator}) {
183
185
  $cmd = [$docker_bin, 'ps', '-q'];
184
186
  }
185
187
  Log(undef, "Sanity check is `@$cmd`");
186
- srun(["srun", "--nodes=\Q$ENV{SLURM_NNODES}\E", "--ntasks-per-node=1"],
187
- $cmd,
188
- {fork => 1});
189
- if ($? != 0) {
190
- Log(undef, "Sanity check failed: ".exit_status_s($?));
188
+ my ($exited, $stdout, $stderr) = srun_sync(
189
+ ["srun", "--nodes=\Q$ENV{SLURM_NNODES}\E", "--ntasks-per-node=1"],
190
+ $cmd,
191
+ {label => "sanity check"});
192
+ if ($exited != 0) {
193
+ Log(undef, "Sanity check failed: ".exit_status_s($exited));
191
194
  exit EX_TEMPFAIL;
192
195
  }
193
196
  Log(undef, "Sanity check OK");
@@ -386,28 +389,17 @@ my $nodelist = join(",", @node);
386
389
  my $git_tar_count = 0;
387
390
 
388
391
  if (!defined $no_clear_tmp) {
389
- # Clean out crunch_tmp/work, crunch_tmp/opt, crunch_tmp/src*
390
- Log (undef, "Clean work dirs");
391
-
392
- my $cleanpid = fork();
393
- if ($cleanpid == 0)
394
- {
395
- # Find FUSE mounts under $CRUNCH_TMP and unmount them.
396
- # Then clean up work directories.
397
- # TODO: When #5036 is done and widely deployed, we can limit mount's
398
- # -t option to simply fuse.keep.
399
- srun (["srun", "--nodelist=$nodelist", "-D", $ENV{'TMPDIR'}],
400
- ['bash', '-ec', '-o', 'pipefail', 'mount -t fuse,fuse.keep | awk "(index(\$3, \"$CRUNCH_TMP\") == 1){print \$3}" | xargs -r -n 1 fusermount -u -z; sleep 1; rm -rf $JOB_WORK $CRUNCH_INSTALL $CRUNCH_TMP/task $CRUNCH_TMP/src* $CRUNCH_TMP/*.cid']);
401
- exit (1);
402
- }
403
- while (1)
404
- {
405
- last if $cleanpid == waitpid (-1, WNOHANG);
406
- freeze_if_want_freeze ($cleanpid);
407
- select (undef, undef, undef, 0.1);
408
- }
409
- if ($?) {
410
- Log(undef, "Clean work dirs: exit ".exit_status_s($?));
392
+ # Find FUSE mounts under $CRUNCH_TMP and unmount them. Then clean
393
+ # up work directories crunch_tmp/work, crunch_tmp/opt,
394
+ # crunch_tmp/src*.
395
+ #
396
+ # TODO: When #5036 is done and widely deployed, we can limit mount's
397
+ # -t option to simply fuse.keep.
398
+ my ($exited, $stdout, $stderr) = srun_sync(
399
+ ["srun", "--nodelist=$nodelist", "-D", $ENV{'TMPDIR'}],
400
+ ['bash', '-ec', '-o', 'pipefail', 'mount -t fuse,fuse.keep | awk "(index(\$3, \"$CRUNCH_TMP\") == 1){print \$3}" | xargs -r -n 1 fusermount -u -z; sleep 1; rm -rf $JOB_WORK $CRUNCH_INSTALL $CRUNCH_TMP/task $CRUNCH_TMP/src* $CRUNCH_TMP/*.cid'],
401
+ {label => "clean work dirs"});
402
+ if ($exited != 0) {
411
403
  exit(EX_RETRY_UNLOCKED);
412
404
  }
413
405
  }
@@ -428,30 +420,22 @@ if ! $docker_bin images -q --no-trunc --all | grep -qxF \Q$docker_hash\E; then
428
420
  arv-get \Q$docker_locator$docker_stream/$docker_hash.tar\E | $docker_bin load
429
421
  fi
430
422
  };
431
- my $docker_pid = fork();
432
- if ($docker_pid == 0)
433
- {
434
- srun (["srun", "--nodelist=" . join(',', @node)],
435
- ["/bin/bash", "-o", "pipefail", "-ec", $docker_install_script]);
436
- exit ($?);
437
- }
438
- while (1)
439
- {
440
- last if $docker_pid == waitpid (-1, WNOHANG);
441
- freeze_if_want_freeze ($docker_pid);
442
- select (undef, undef, undef, 0.1);
443
- }
444
- if ($? != 0)
423
+
424
+ my ($exited, $stdout, $stderr) = srun_sync(
425
+ ["srun", "--nodelist=" . join(',', @node)],
426
+ ["/bin/bash", "-o", "pipefail", "-ec", $docker_install_script],
427
+ {label => "load docker image"});
428
+ if ($exited != 0)
445
429
  {
446
- Log(undef, "Installing Docker image from $docker_locator exited " . exit_status_s($?));
447
430
  exit(EX_RETRY_UNLOCKED);
448
431
  }
449
432
 
450
433
  # Determine whether this version of Docker supports memory+swap limits.
451
- srun(["srun", "--nodelist=" . $node[0]],
452
- ["/bin/sh", "-ec", "$docker_bin run --help | grep -qe --memory-swap="],
453
- {fork => 1});
454
- $docker_limitmem = ($? == 0);
434
+ ($exited, $stdout, $stderr) = srun_sync(
435
+ ["srun", "--nodelist=" . $node[0]],
436
+ [$docker_bin, 'run', '--help'],
437
+ {label => "check --memory-swap feature"});
438
+ $docker_limitmem = ($stdout =~ /--memory-swap/);
455
439
 
456
440
  # Find a non-root Docker user to use.
457
441
  # Tries the default user for the container, then 'crunch', then 'nobody',
@@ -461,20 +445,22 @@ fi
461
445
  # Docker containers.
462
446
  my @tryusers = ("", "crunch", "nobody");
463
447
  foreach my $try_user (@tryusers) {
448
+ my $label;
464
449
  my $try_user_arg;
465
450
  if ($try_user eq "") {
466
- Log(undef, "Checking if container default user is not UID 0");
451
+ $label = "check whether default user is UID 0";
467
452
  $try_user_arg = "";
468
453
  } else {
469
- Log(undef, "Checking if user '$try_user' is not UID 0");
454
+ $label = "check whether user '$try_user' is UID 0";
470
455
  $try_user_arg = "--user=$try_user";
471
456
  }
472
- srun(["srun", "--nodelist=" . $node[0]],
473
- ["/bin/sh", "-ec",
474
- "a=`$docker_bin run $docker_run_args $try_user_arg $docker_hash id --user` && " .
475
- " test \$a -ne 0"],
476
- {fork => 1});
477
- if ($? == 0) {
457
+ my ($exited, $stdout, $stderr) = srun_sync(
458
+ ["srun", "--nodelist=" . $node[0]],
459
+ ["/bin/sh", "-ec",
460
+ "$docker_bin run $docker_run_args $try_user_arg $docker_hash id --user"],
461
+ {label => $label});
462
+ chomp($stdout);
463
+ if ($exited == 0 && $stdout =~ /^\d+$/ && $stdout > 0) {
478
464
  $dockeruserarg = $try_user_arg;
479
465
  if ($try_user eq "") {
480
466
  Log(undef, "Container will run with default user");
@@ -664,11 +650,9 @@ if (!defined $git_archive) {
664
650
  }
665
651
  }
666
652
  else {
667
- my $install_exited;
653
+ my $exited;
668
654
  my $install_script_tries_left = 3;
669
655
  for (my $attempts = 0; $attempts < 3; $attempts++) {
670
- Log(undef, "Run install script on all workers");
671
-
672
656
  my @srunargs = ("srun",
673
657
  "--nodelist=$nodelist",
674
658
  "-D", $ENV{'TMPDIR'}, "--job-name=$job_id");
@@ -676,59 +660,21 @@ else {
676
660
  "mkdir -p $ENV{CRUNCH_INSTALL} && cd $ENV{CRUNCH_TMP} && perl -");
677
661
 
678
662
  $ENV{"CRUNCH_GIT_ARCHIVE_HASH"} = md5_hex($git_archive);
679
- my ($install_stderr_r, $install_stderr_w);
680
- pipe $install_stderr_r, $install_stderr_w or croak("pipe() failed: $!");
681
- set_nonblocking($install_stderr_r);
682
- my $installpid = fork();
683
- if ($installpid == 0)
684
- {
685
- close($install_stderr_r);
686
- fcntl($install_stderr_w, F_SETFL, 0) or croak($!); # no close-on-exec
687
- open(STDOUT, ">&", $install_stderr_w);
688
- open(STDERR, ">&", $install_stderr_w);
689
- srun (\@srunargs, \@execargs, {}, $build_script . $git_archive);
690
- exit (1);
691
- }
692
- close($install_stderr_w);
693
- # Tell freeze_if_want_freeze how to kill the child, otherwise the
694
- # "waitpid(installpid)" loop won't get interrupted by a freeze:
695
- $proc{$installpid} = {};
696
- my $stderr_buf = '';
697
- # Track whether anything appears on stderr other than slurm errors
698
- # ("srun: ...") and the "starting: ..." message printed by the
699
- # srun subroutine itself:
663
+ my ($stdout, $stderr);
664
+ ($exited, $stdout, $stderr) = srun_sync(
665
+ \@srunargs, \@execargs,
666
+ {label => "run install script on all workers"},
667
+ $build_script . $git_archive);
668
+
700
669
  my $stderr_anything_from_script = 0;
701
- my $match_our_own_errors = '^(srun: error: |starting: \[)';
702
- while ($installpid != waitpid(-1, WNOHANG)) {
703
- freeze_if_want_freeze ($installpid);
704
- # Wait up to 0.1 seconds for something to appear on stderr, then
705
- # do a non-blocking read.
706
- my $bits = fhbits($install_stderr_r);
707
- select ($bits, undef, $bits, 0.1);
708
- if (0 < sysread ($install_stderr_r, $stderr_buf, 8192, length($stderr_buf)))
709
- {
710
- while ($stderr_buf =~ /^(.*?)\n/) {
711
- my $line = $1;
712
- substr $stderr_buf, 0, 1+length($line), "";
713
- Log(undef, "stderr $line");
714
- if ($line !~ /$match_our_own_errors/) {
715
- $stderr_anything_from_script = 1;
716
- }
717
- }
718
- }
719
- }
720
- delete $proc{$installpid};
721
- $install_exited = $?;
722
- close($install_stderr_r);
723
- if (length($stderr_buf) > 0) {
724
- if ($stderr_buf !~ /$match_our_own_errors/) {
670
+ for my $line (split(/\n/, $stderr)) {
671
+ if ($line !~ /^(srun: error: |starting: \[)/) {
725
672
  $stderr_anything_from_script = 1;
726
673
  }
727
- Log(undef, "stderr $stderr_buf")
728
674
  }
729
675
 
730
- Log (undef, "Install script exited ".exit_status_s($install_exited));
731
- last if $install_exited == 0 || $main::please_freeze;
676
+ last if $exited == 0 || $main::please_freeze;
677
+
732
678
  # If the install script fails but doesn't print an error message,
733
679
  # the next thing anyone is likely to do is just run it again in
734
680
  # case it was a transient problem like "slurm communication fails
@@ -744,7 +690,7 @@ else {
744
690
  unlink($tar_filename);
745
691
  }
746
692
 
747
- if ($install_exited != 0) {
693
+ if ($exited != 0) {
748
694
  croak("Giving up");
749
695
  }
750
696
  }
@@ -803,6 +749,7 @@ if ($initial_tasks_this_level < @node) {
803
749
  @freeslot = (0..$#slot);
804
750
  }
805
751
  my $round_num_freeslots = scalar(@freeslot);
752
+ print STDERR "crunch-job have ${round_num_freeslots} free slots for ${initial_tasks_this_level} initial tasks at this level, ".scalar(@node)." nodes, and ".scalar(@slot)." slots\n";
806
753
 
807
754
  my %round_max_slots = ();
808
755
  for (my $ii = $#freeslot; $ii >= 0; $ii--) {
@@ -915,7 +862,7 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
915
862
  {
916
863
  my $containername = "$Jobstep->{arvados_task}->{uuid}-$Jobstep->{failures}";
917
864
  my $cidfile = "$ENV{CRUNCH_TMP}/$containername.cid";
918
- $command .= "crunchstat -cgroup-root=/sys/fs/cgroup -cgroup-parent=docker -cgroup-cid=$cidfile -poll=10000 ";
865
+ $command .= "crunchstat -cgroup-root=\Q$cgroup_root\E -cgroup-parent=docker -cgroup-cid=$cidfile -poll=10000 ";
919
866
  $command .= "$docker_bin run $docker_run_args --name=$containername --attach=stdout --attach=stderr --attach=stdin -i \Q$dockeruserarg\E --cidfile=$cidfile --sig-proxy ";
920
867
  # We only set memory limits if Docker lets us limit both memory and swap.
921
868
  # Memory limits alone have been supported longer, but subprocesses tend
@@ -995,7 +942,7 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
995
942
  }
996
943
  } else {
997
944
  # Non-docker run
998
- $command .= "crunchstat -cgroup-root=/sys/fs/cgroup -poll=10000 ";
945
+ $command .= "crunchstat -cgroup-root=\Q$cgroup_root\E -poll=10000 ";
999
946
  $command .= $stdbuf;
1000
947
  $command .= "perl - $ENV{CRUNCH_SRC}/crunch_scripts/" . $Job->{"script"};
1001
948
  }
@@ -1013,11 +960,12 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
1013
960
  next;
1014
961
  }
1015
962
  shift @freeslot;
1016
- $proc{$childpid} = { jobstep => $id,
1017
- time => time,
1018
- slot => $childslot,
1019
- jobstepname => "$job_id.$id.$childpid",
1020
- };
963
+ $proc{$childpid} = {
964
+ jobstepidx => $id,
965
+ time => time,
966
+ slot => $childslot,
967
+ jobstepname => "$job_id.$id.$childpid",
968
+ };
1021
969
  croak ("assert failed: \$slot[$childslot]->{'pid'} exists") if exists $slot[$childslot]->{pid};
1022
970
  $slot[$childslot]->{pid} = $childpid;
1023
971
 
@@ -1185,128 +1133,142 @@ sub update_progress_stats
1185
1133
 
1186
1134
  sub reapchildren
1187
1135
  {
1188
- my $pid = waitpid (-1, WNOHANG);
1189
- return 0 if $pid <= 0;
1190
-
1191
- my $whatslot = ($slot[$proc{$pid}->{slot}]->{node}->{name}
1192
- . "."
1193
- . $slot[$proc{$pid}->{slot}]->{cpu});
1194
- my $jobstepid = $proc{$pid}->{jobstep};
1195
- my $elapsed = time - $proc{$pid}->{time};
1196
- my $Jobstep = $jobstep[$jobstepid];
1197
-
1198
- my $childstatus = $?;
1199
- my $exitvalue = $childstatus >> 8;
1200
- my $exitinfo = "exit ".exit_status_s($childstatus);
1201
- $Jobstep->{'arvados_task'}->reload;
1202
- my $task_success = $Jobstep->{'arvados_task'}->{success};
1203
-
1204
- Log ($jobstepid, "child $pid on $whatslot $exitinfo success=$task_success");
1205
-
1206
- if (!defined $task_success) {
1207
- # task did not indicate one way or the other --> fail
1208
- Log($jobstepid, sprintf(
1209
- "ERROR: Task process exited %s, but never updated its task record to indicate success and record its output.",
1210
- exit_status_s($childstatus)));
1211
- $Jobstep->{'arvados_task'}->{success} = 0;
1212
- $Jobstep->{'arvados_task'}->save;
1213
- $task_success = 0;
1214
- }
1215
-
1216
- if (!$task_success)
1136
+ my $children_reaped = 0;
1137
+ while ((my $pid = waitpid (-1, WNOHANG)) > 0)
1217
1138
  {
1218
- my $temporary_fail;
1219
- $temporary_fail ||= $Jobstep->{tempfail};
1220
- $temporary_fail ||= ($exitvalue == TASK_TEMPFAIL);
1221
-
1222
- ++$thisround_failed;
1223
- ++$thisround_failed_multiple if $Jobstep->{'failures'} >= 1;
1224
-
1225
- # Check for signs of a failed or misconfigured node
1226
- if (++$slot[$proc{$pid}->{slot}]->{node}->{losing_streak} >=
1227
- 2+$slot[$proc{$pid}->{slot}]->{node}->{ncpus}) {
1228
- # Don't count this against jobstep failure thresholds if this
1229
- # node is already suspected faulty and srun exited quickly
1230
- if ($slot[$proc{$pid}->{slot}]->{node}->{hold_until} &&
1231
- $elapsed < 5) {
1232
- Log ($jobstepid, "blaming failure on suspect node " .
1233
- $slot[$proc{$pid}->{slot}]->{node}->{name});
1234
- $temporary_fail ||= 1;
1235
- }
1236
- ban_node_by_slot($proc{$pid}->{slot});
1139
+ my $childstatus = $?;
1140
+
1141
+ my $whatslot = ($slot[$proc{$pid}->{slot}]->{node}->{name}
1142
+ . "."
1143
+ . $slot[$proc{$pid}->{slot}]->{cpu});
1144
+ my $jobstepidx = $proc{$pid}->{jobstepidx};
1145
+
1146
+ if (!WIFEXITED($childstatus))
1147
+ {
1148
+ # child did not exit (may be temporarily stopped)
1149
+ Log ($jobstepidx, "child $pid did not actually exit in reapchildren, ignoring for now.");
1150
+ next;
1237
1151
  }
1238
1152
 
1239
- Log ($jobstepid, sprintf('failure (#%d, %s) after %d seconds',
1240
- ++$Jobstep->{'failures'},
1241
- $temporary_fail ? 'temporary' : 'permanent',
1242
- $elapsed));
1153
+ $children_reaped++;
1154
+ my $elapsed = time - $proc{$pid}->{time};
1155
+ my $Jobstep = $jobstep[$jobstepidx];
1156
+
1157
+ my $exitvalue = $childstatus >> 8;
1158
+ my $exitinfo = "exit ".exit_status_s($childstatus);
1159
+ $Jobstep->{'arvados_task'}->reload;
1160
+ my $task_success = $Jobstep->{'arvados_task'}->{success};
1161
+
1162
+ Log ($jobstepidx, "child $pid on $whatslot $exitinfo success=$task_success");
1163
+
1164
+ if (!defined $task_success) {
1165
+ # task did not indicate one way or the other --> fail
1166
+ Log($jobstepidx, sprintf(
1167
+ "ERROR: Task process exited %s, but never updated its task record to indicate success and record its output.",
1168
+ exit_status_s($childstatus)));
1169
+ $Jobstep->{'arvados_task'}->{success} = 0;
1170
+ $Jobstep->{'arvados_task'}->save;
1171
+ $task_success = 0;
1172
+ }
1243
1173
 
1244
- if (!$temporary_fail || $Jobstep->{'failures'} >= 3) {
1245
- # Give up on this task, and the whole job
1246
- $main::success = 0;
1174
+ if (!$task_success)
1175
+ {
1176
+ my $temporary_fail;
1177
+ $temporary_fail ||= $Jobstep->{tempfail};
1178
+ $temporary_fail ||= ($exitvalue == TASK_TEMPFAIL);
1179
+
1180
+ ++$thisround_failed;
1181
+ ++$thisround_failed_multiple if $Jobstep->{'failures'} >= 1;
1182
+
1183
+ # Check for signs of a failed or misconfigured node
1184
+ if (++$slot[$proc{$pid}->{slot}]->{node}->{losing_streak} >=
1185
+ 2+$slot[$proc{$pid}->{slot}]->{node}->{ncpus}) {
1186
+ # Don't count this against jobstep failure thresholds if this
1187
+ # node is already suspected faulty and srun exited quickly
1188
+ if ($slot[$proc{$pid}->{slot}]->{node}->{hold_until} &&
1189
+ $elapsed < 5) {
1190
+ Log ($jobstepidx, "blaming failure on suspect node " .
1191
+ $slot[$proc{$pid}->{slot}]->{node}->{name});
1192
+ $temporary_fail ||= 1;
1193
+ }
1194
+ ban_node_by_slot($proc{$pid}->{slot});
1195
+ }
1196
+
1197
+ Log ($jobstepidx, sprintf('failure (#%d, %s) after %d seconds',
1198
+ ++$Jobstep->{'failures'},
1199
+ $temporary_fail ? 'temporary' : 'permanent',
1200
+ $elapsed));
1201
+
1202
+ if (!$temporary_fail || $Jobstep->{'failures'} >= 3) {
1203
+ # Give up on this task, and the whole job
1204
+ $main::success = 0;
1205
+ }
1206
+ # Put this task back on the todo queue
1207
+ push @jobstep_todo, $jobstepidx;
1208
+ $Job->{'tasks_summary'}->{'failed'}++;
1247
1209
  }
1248
- # Put this task back on the todo queue
1249
- push @jobstep_todo, $jobstepid;
1250
- $Job->{'tasks_summary'}->{'failed'}++;
1251
- }
1252
- else
1253
- {
1254
- ++$thisround_succeeded;
1255
- $slot[$proc{$pid}->{slot}]->{node}->{losing_streak} = 0;
1256
- $slot[$proc{$pid}->{slot}]->{node}->{hold_until} = 0;
1257
- $slot[$proc{$pid}->{slot}]->{node}->{fail_count} = 0;
1258
- push @jobstep_done, $jobstepid;
1259
- Log ($jobstepid, "success in $elapsed seconds");
1260
- }
1261
- $Jobstep->{exitcode} = $childstatus;
1262
- $Jobstep->{finishtime} = time;
1263
- $Jobstep->{'arvados_task'}->{finished_at} = strftime "%Y-%m-%dT%H:%M:%SZ", gmtime($Jobstep->{finishtime});
1264
- $Jobstep->{'arvados_task'}->save;
1265
- process_stderr ($jobstepid, $task_success);
1266
- Log ($jobstepid, sprintf("task output (%d bytes): %s",
1267
- length($Jobstep->{'arvados_task'}->{output}),
1268
- $Jobstep->{'arvados_task'}->{output}));
1269
-
1270
- close $reader{$jobstepid};
1271
- delete $reader{$jobstepid};
1272
- delete $slot[$proc{$pid}->{slot}]->{pid};
1273
- push @freeslot, $proc{$pid}->{slot};
1274
- delete $proc{$pid};
1275
-
1276
- if ($task_success) {
1277
- # Load new tasks
1278
- my $newtask_list = [];
1279
- my $newtask_results;
1280
- do {
1281
- $newtask_results = api_call(
1282
- "job_tasks/list",
1283
- 'where' => {
1284
- 'created_by_job_task_uuid' => $Jobstep->{'arvados_task'}->{uuid}
1285
- },
1286
- 'order' => 'qsequence',
1287
- 'offset' => scalar(@$newtask_list),
1288
- );
1289
- push(@$newtask_list, @{$newtask_results->{items}});
1290
- } while (@{$newtask_results->{items}});
1291
- foreach my $arvados_task (@$newtask_list) {
1292
- my $jobstep = {
1293
- 'level' => $arvados_task->{'sequence'},
1294
- 'failures' => 0,
1295
- 'arvados_task' => $arvados_task
1296
- };
1297
- push @jobstep, $jobstep;
1298
- push @jobstep_todo, $#jobstep;
1210
+ else
1211
+ {
1212
+ ++$thisround_succeeded;
1213
+ $slot[$proc{$pid}->{slot}]->{node}->{losing_streak} = 0;
1214
+ $slot[$proc{$pid}->{slot}]->{node}->{hold_until} = 0;
1215
+ $slot[$proc{$pid}->{slot}]->{node}->{fail_count} = 0;
1216
+ push @jobstep_done, $jobstepidx;
1217
+ Log ($jobstepidx, "success in $elapsed seconds");
1299
1218
  }
1219
+ $Jobstep->{exitcode} = $childstatus;
1220
+ $Jobstep->{finishtime} = time;
1221
+ $Jobstep->{'arvados_task'}->{finished_at} = strftime "%Y-%m-%dT%H:%M:%SZ", gmtime($Jobstep->{finishtime});
1222
+ $Jobstep->{'arvados_task'}->save;
1223
+ process_stderr_final ($jobstepidx);
1224
+ Log ($jobstepidx, sprintf("task output (%d bytes): %s",
1225
+ length($Jobstep->{'arvados_task'}->{output}),
1226
+ $Jobstep->{'arvados_task'}->{output}));
1227
+
1228
+ close $reader{$jobstepidx};
1229
+ delete $reader{$jobstepidx};
1230
+ delete $slot[$proc{$pid}->{slot}]->{pid};
1231
+ push @freeslot, $proc{$pid}->{slot};
1232
+ delete $proc{$pid};
1233
+
1234
+ if ($task_success) {
1235
+ # Load new tasks
1236
+ my $newtask_list = [];
1237
+ my $newtask_results;
1238
+ do {
1239
+ $newtask_results = api_call(
1240
+ "job_tasks/list",
1241
+ 'where' => {
1242
+ 'created_by_job_task_uuid' => $Jobstep->{'arvados_task'}->{uuid}
1243
+ },
1244
+ 'order' => 'qsequence',
1245
+ 'offset' => scalar(@$newtask_list),
1246
+ );
1247
+ push(@$newtask_list, @{$newtask_results->{items}});
1248
+ } while (@{$newtask_results->{items}});
1249
+ foreach my $arvados_task (@$newtask_list) {
1250
+ my $jobstep = {
1251
+ 'level' => $arvados_task->{'sequence'},
1252
+ 'failures' => 0,
1253
+ 'arvados_task' => $arvados_task
1254
+ };
1255
+ push @jobstep, $jobstep;
1256
+ push @jobstep_todo, $#jobstep;
1257
+ }
1258
+ }
1259
+ $progress_is_dirty = 1;
1300
1260
  }
1301
1261
 
1302
- $progress_is_dirty = 1;
1303
- 1;
1262
+ return $children_reaped;
1304
1263
  }
1305
1264
 
1306
1265
  sub check_refresh_wanted
1307
1266
  {
1308
1267
  my @stat = stat $ENV{"CRUNCH_REFRESH_TRIGGER"};
1309
- if (@stat && $stat[9] > $latest_refresh) {
1268
+ if (@stat &&
1269
+ $stat[9] > $latest_refresh &&
1270
+ # ...and we have actually locked the job record...
1271
+ $job_id eq $Job->{'uuid'}) {
1310
1272
  $latest_refresh = scalar time;
1311
1273
  my $Job2 = api_call("jobs/get", uuid => $jobspec);
1312
1274
  for my $attr ('cancelled_at',
@@ -1344,10 +1306,13 @@ sub check_squeue
1344
1306
  # squeue check interval (15s) this should make the squeue check an
1345
1307
  # infrequent event.
1346
1308
  my $silent_procs = 0;
1347
- for my $procinfo (values %proc)
1309
+ for my $js (map {$jobstep[$_->{jobstepidx}]} values %proc)
1348
1310
  {
1349
- my $jobstep = $jobstep[$procinfo->{jobstep}];
1350
- if ($jobstep->{stderr_at} < $last_squeue_check)
1311
+ if (!exists($js->{stderr_at}))
1312
+ {
1313
+ $js->{stderr_at} = 0;
1314
+ }
1315
+ if ($js->{stderr_at} < $last_squeue_check)
1351
1316
  {
1352
1317
  $silent_procs++;
1353
1318
  }
@@ -1357,16 +1322,16 @@ sub check_squeue
1357
1322
  # use killem() on procs whose killtime is reached
1358
1323
  while (my ($pid, $procinfo) = each %proc)
1359
1324
  {
1360
- my $jobstep = $jobstep[$procinfo->{jobstep}];
1325
+ my $js = $jobstep[$procinfo->{jobstepidx}];
1361
1326
  if (exists $procinfo->{killtime}
1362
1327
  && $procinfo->{killtime} <= time
1363
- && $jobstep->{stderr_at} < $last_squeue_check)
1328
+ && $js->{stderr_at} < $last_squeue_check)
1364
1329
  {
1365
1330
  my $sincewhen = "";
1366
- if ($jobstep->{stderr_at}) {
1367
- $sincewhen = " in last " . (time - $jobstep->{stderr_at}) . "s";
1331
+ if ($js->{stderr_at}) {
1332
+ $sincewhen = " in last " . (time - $js->{stderr_at}) . "s";
1368
1333
  }
1369
- Log($procinfo->{jobstep}, "killing orphaned srun process $pid (task not in slurm queue, no stderr received$sincewhen)");
1334
+ Log($procinfo->{jobstepidx}, "killing orphaned srun process $pid (task not in slurm queue, no stderr received$sincewhen)");
1370
1335
  killem ($pid);
1371
1336
  }
1372
1337
  }
@@ -1416,7 +1381,7 @@ sub check_squeue
1416
1381
  # error/delay has caused the task to die without notifying srun,
1417
1382
  # and we'll kill srun ourselves.
1418
1383
  $procinfo->{killtime} = time + 30;
1419
- Log($procinfo->{jobstep}, "notice: task is not in slurm queue but srun process $pid has not exited");
1384
+ Log($procinfo->{jobstepidx}, "notice: task is not in slurm queue but srun process $pid has not exited");
1420
1385
  }
1421
1386
  }
1422
1387
  }
@@ -1435,70 +1400,99 @@ sub release_allocation
1435
1400
  sub readfrompipes
1436
1401
  {
1437
1402
  my $gotsome = 0;
1438
- foreach my $job (keys %reader)
1403
+ my %fd_job;
1404
+ my $sel = IO::Select->new();
1405
+ foreach my $jobstepidx (keys %reader)
1406
+ {
1407
+ my $fd = $reader{$jobstepidx};
1408
+ $sel->add($fd);
1409
+ $fd_job{$fd} = $jobstepidx;
1410
+
1411
+ if (my $stdout_fd = $jobstep[$jobstepidx]->{stdout_r}) {
1412
+ $sel->add($stdout_fd);
1413
+ $fd_job{$stdout_fd} = $jobstepidx;
1414
+ }
1415
+ }
1416
+ # select on all reader fds with 0.1s timeout
1417
+ my @ready_fds = $sel->can_read(0.1);
1418
+ foreach my $fd (@ready_fds)
1439
1419
  {
1440
1420
  my $buf;
1441
- if (0 < sysread ($reader{$job}, $buf, 65536))
1421
+ if (0 < sysread ($fd, $buf, 65536))
1442
1422
  {
1423
+ $gotsome = 1;
1443
1424
  print STDERR $buf if $ENV{CRUNCH_DEBUG};
1444
- $jobstep[$job]->{stderr_at} = time;
1445
- $jobstep[$job]->{stderr} .= $buf;
1425
+
1426
+ my $jobstepidx = $fd_job{$fd};
1427
+ if ($jobstep[$jobstepidx]->{stdout_r} == $fd) {
1428
+ $jobstep[$jobstepidx]->{stdout_captured} .= $buf;
1429
+ next;
1430
+ }
1431
+
1432
+ $jobstep[$jobstepidx]->{stderr_at} = time;
1433
+ $jobstep[$jobstepidx]->{stderr} .= $buf;
1446
1434
 
1447
1435
  # Consume everything up to the last \n
1448
- preprocess_stderr ($job);
1436
+ preprocess_stderr ($jobstepidx);
1449
1437
 
1450
- if (length ($jobstep[$job]->{stderr}) > 16384)
1438
+ if (length ($jobstep[$jobstepidx]->{stderr}) > 16384)
1451
1439
  {
1452
1440
  # If we get a lot of stderr without a newline, chop off the
1453
1441
  # front to avoid letting our buffer grow indefinitely.
1454
- substr ($jobstep[$job]->{stderr},
1455
- 0, length($jobstep[$job]->{stderr}) - 8192) = "";
1442
+ substr ($jobstep[$jobstepidx]->{stderr},
1443
+ 0, length($jobstep[$jobstepidx]->{stderr}) - 8192) = "";
1456
1444
  }
1457
- $gotsome = 1;
1458
1445
  }
1459
1446
  }
1460
1447
  return $gotsome;
1461
1448
  }
1462
1449
 
1463
1450
 
1451
+ # Consume all full lines of stderr for a jobstep. Everything after the
1452
+ # last newline will remain in $jobstep[$jobstepidx]->{stderr} after
1453
+ # returning.
1464
1454
  sub preprocess_stderr
1465
1455
  {
1466
- my $job = shift;
1456
+ my $jobstepidx = shift;
1467
1457
 
1468
- while ($jobstep[$job]->{stderr} =~ /^(.*?)\n/) {
1458
+ while ($jobstep[$jobstepidx]->{stderr} =~ /^(.*?)\n/) {
1469
1459
  my $line = $1;
1470
- substr $jobstep[$job]->{stderr}, 0, 1+length($line), "";
1471
- Log ($job, "stderr $line");
1460
+ substr $jobstep[$jobstepidx]->{stderr}, 0, 1+length($line), "";
1461
+ Log ($jobstepidx, "stderr $line");
1472
1462
  if ($line =~ /srun: error: (SLURM job $ENV{SLURM_JOB_ID} has expired|Unable to confirm allocation for job $ENV{SLURM_JOB_ID})/) {
1473
1463
  # whoa.
1474
1464
  $main::please_freeze = 1;
1475
1465
  }
1466
+ elsif (!exists $jobstep[$jobstepidx]->{slotindex}) {
1467
+ # Skip the following tempfail checks if this srun proc isn't
1468
+ # attached to a particular worker slot.
1469
+ }
1476
1470
  elsif ($line =~ /srun: error: (Node failure on|Aborting, .*\bio error\b)/) {
1477
- my $job_slot_index = $jobstep[$job]->{slotindex};
1471
+ my $job_slot_index = $jobstep[$jobstepidx]->{slotindex};
1478
1472
  $slot[$job_slot_index]->{node}->{fail_count}++;
1479
- $jobstep[$job]->{tempfail} = 1;
1473
+ $jobstep[$jobstepidx]->{tempfail} = 1;
1480
1474
  ban_node_by_slot($job_slot_index);
1481
1475
  }
1482
1476
  elsif ($line =~ /srun: error: (Unable to create job step|.*: Communication connection failure)/) {
1483
- $jobstep[$job]->{tempfail} = 1;
1484
- ban_node_by_slot($jobstep[$job]->{slotindex});
1477
+ $jobstep[$jobstepidx]->{tempfail} = 1;
1478
+ ban_node_by_slot($jobstep[$jobstepidx]->{slotindex});
1485
1479
  }
1486
1480
  elsif ($line =~ /\bKeep(Read|Write|Request)Error:/) {
1487
- $jobstep[$job]->{tempfail} = 1;
1481
+ $jobstep[$jobstepidx]->{tempfail} = 1;
1488
1482
  }
1489
1483
  }
1490
1484
  }
1491
1485
 
1492
1486
 
1493
- sub process_stderr
1487
+ sub process_stderr_final
1494
1488
  {
1495
- my $job = shift;
1496
- my $task_success = shift;
1497
- preprocess_stderr ($job);
1489
+ my $jobstepidx = shift;
1490
+ preprocess_stderr ($jobstepidx);
1498
1491
 
1499
1492
  map {
1500
- Log ($job, "stderr $_");
1501
- } split ("\n", $jobstep[$job]->{stderr});
1493
+ Log ($jobstepidx, "stderr $_");
1494
+ } split ("\n", $jobstep[$jobstepidx]->{stderr});
1495
+ $jobstep[$jobstepidx]->{stderr} = '';
1502
1496
  }
1503
1497
 
1504
1498
  sub fetch_block
@@ -1636,7 +1630,7 @@ sub killem
1636
1630
  }
1637
1631
  if (!exists $proc{$_}->{"sent_$sig"})
1638
1632
  {
1639
- Log ($proc{$_}->{jobstep}, "sending 2x signal $sig to pid $_");
1633
+ Log ($proc{$_}->{jobstepidx}, "sending 2x signal $sig to pid $_");
1640
1634
  kill $sig, $_;
1641
1635
  select (undef, undef, undef, 0.1);
1642
1636
  if ($sig == 2)
@@ -1760,16 +1754,21 @@ sub log_writer_is_active() {
1760
1754
  return $log_pipe_pid;
1761
1755
  }
1762
1756
 
1763
- sub Log # ($jobstep_id, $logmessage)
1757
+ sub Log # ($jobstepidx, $logmessage)
1764
1758
  {
1765
- if ($_[1] =~ /\n/) {
1759
+ my ($jobstepidx, $logmessage) = @_;
1760
+ if ($logmessage =~ /\n/) {
1766
1761
  for my $line (split (/\n/, $_[1])) {
1767
- Log ($_[0], $line);
1762
+ Log ($jobstepidx, $line);
1768
1763
  }
1769
1764
  return;
1770
1765
  }
1771
1766
  my $fh = select STDERR; $|=1; select $fh;
1772
- my $message = sprintf ("%s %d %s %s", $job_id, $$, @_);
1767
+ my $task_qseq = '';
1768
+ if (defined($jobstepidx) && exists($jobstep[$jobstepidx]->{arvados_task})) {
1769
+ $task_qseq = $jobstepidx;
1770
+ }
1771
+ my $message = sprintf ("%s %d %s %s", $job_id, $$, $task_qseq, $logmessage);
1773
1772
  $message =~ s{([^ -\176])}{"\\" . sprintf ("%03o", ord($1))}ge;
1774
1773
  $message .= "\n";
1775
1774
  my $datetime;
@@ -1893,6 +1892,83 @@ sub freezeunquote
1893
1892
  }
1894
1893
 
1895
1894
 
1895
+ sub srun_sync
1896
+ {
1897
+ my $srunargs = shift;
1898
+ my $execargs = shift;
1899
+ my $opts = shift || {};
1900
+ my $stdin = shift;
1901
+
1902
+ my $label = exists $opts->{label} ? $opts->{label} : "@$execargs";
1903
+ Log (undef, "$label: start");
1904
+
1905
+ my ($stderr_r, $stderr_w);
1906
+ pipe $stderr_r, $stderr_w or croak("pipe() failed: $!");
1907
+
1908
+ my ($stdout_r, $stdout_w);
1909
+ pipe $stdout_r, $stdout_w or croak("pipe() failed: $!");
1910
+
1911
+ my $srunpid = fork();
1912
+ if ($srunpid == 0)
1913
+ {
1914
+ close($stderr_r);
1915
+ close($stdout_r);
1916
+ fcntl($stderr_w, F_SETFL, 0) or croak($!); # no close-on-exec
1917
+ fcntl($stdout_w, F_SETFL, 0) or croak($!);
1918
+ open(STDERR, ">&", $stderr_w);
1919
+ open(STDOUT, ">&", $stdout_w);
1920
+ srun ($srunargs, $execargs, $opts, $stdin);
1921
+ exit (1);
1922
+ }
1923
+ close($stderr_w);
1924
+ close($stdout_w);
1925
+
1926
+ set_nonblocking($stderr_r);
1927
+ set_nonblocking($stdout_r);
1928
+
1929
+ # Add entries to @jobstep and %proc so check_squeue() and
1930
+ # freeze_if_want_freeze() can treat it like a job task process.
1931
+ push @jobstep, {
1932
+ stderr => '',
1933
+ stderr_at => 0,
1934
+ stderr_captured => '',
1935
+ stdout_r => $stdout_r,
1936
+ stdout_captured => '',
1937
+ };
1938
+ my $jobstepidx = $#jobstep;
1939
+ $proc{$srunpid} = {
1940
+ jobstepidx => $jobstepidx,
1941
+ };
1942
+ $reader{$jobstepidx} = $stderr_r;
1943
+
1944
+ while ($srunpid != waitpid ($srunpid, WNOHANG)) {
1945
+ my $busy = readfrompipes();
1946
+ if (!$busy || ($latest_refresh + 2 < scalar time)) {
1947
+ check_refresh_wanted();
1948
+ check_squeue();
1949
+ }
1950
+ if (!$busy) {
1951
+ select(undef, undef, undef, 0.1);
1952
+ }
1953
+ killem(keys %proc) if $main::please_freeze;
1954
+ }
1955
+ my $exited = $?;
1956
+
1957
+ 1 while readfrompipes();
1958
+ process_stderr_final ($jobstepidx);
1959
+
1960
+ Log (undef, "$label: exit ".exit_status_s($exited));
1961
+
1962
+ close($stdout_r);
1963
+ close($stderr_r);
1964
+ delete $proc{$srunpid};
1965
+ delete $reader{$jobstepidx};
1966
+
1967
+ my $j = pop @jobstep;
1968
+ return ($exited, $j->{stdout_captured}, $j->{stderr_captured});
1969
+ }
1970
+
1971
+
1896
1972
  sub srun
1897
1973
  {
1898
1974
  my $srunargs = shift;
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: arvados-cli
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.20160301220801
4
+ version: 0.1.20160302171627
5
5
  platform: ruby
6
6
  authors:
7
7
  - Arvados Authors
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-03-01 00:00:00.000000000 Z
11
+ date: 2016-03-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: arvados
@@ -178,7 +178,7 @@ dependencies:
178
178
  - - "<"
179
179
  - !ruby/object:Gem::Version
180
180
  version: 1.0.0
181
- description: Arvados command line tools, git commit 7a57d59c01f746599400bdf83823283321753c3c
181
+ description: Arvados command line tools, git commit 9ddb8e3b5106f885401927a513ac0d94a59133ed
182
182
  email: gem-dev@curoverse.com
183
183
  executables:
184
184
  - arv