arvados-cli 1.4.3.pre20200103204428 → 1.5.0.dev20200118215415
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/arv +0 -10
- metadata +3 -8
- data/bin/arv-crunch-job +0 -6
- data/bin/arv-run-pipeline-instance +0 -781
- data/bin/crunch-job +0 -2577
data/bin/crunch-job
DELETED
@@ -1,2577 +0,0 @@
|
|
1
|
-
#!/usr/bin/env perl
|
2
|
-
# -*- mode: perl; perl-indent-level: 2; indent-tabs-mode: nil; -*-
|
3
|
-
# Copyright (C) The Arvados Authors. All rights reserved.
|
4
|
-
#
|
5
|
-
# SPDX-License-Identifier: AGPL-3.0
|
6
|
-
|
7
|
-
=head1 NAME
|
8
|
-
|
9
|
-
crunch-job: Execute job steps, save snapshots as requested, collate output.
|
10
|
-
|
11
|
-
=head1 SYNOPSIS
|
12
|
-
|
13
|
-
Obtain job details from Arvados, run tasks on compute nodes (typically
|
14
|
-
invoked by scheduler on controller):
|
15
|
-
|
16
|
-
crunch-job --job x-y-z --git-dir /path/to/repo/.git
|
17
|
-
|
18
|
-
Obtain job details from command line, run tasks on local machine
|
19
|
-
(typically invoked by application or developer on VM):
|
20
|
-
|
21
|
-
crunch-job --job '{"script_version":"/path/to/working/tree","script":"scriptname",...}'
|
22
|
-
|
23
|
-
crunch-job --job '{"repository":"https://github.com/curoverse/arvados.git","script_version":"master","script":"scriptname",...}'
|
24
|
-
|
25
|
-
=head1 OPTIONS
|
26
|
-
|
27
|
-
=over
|
28
|
-
|
29
|
-
=item --force-unlock
|
30
|
-
|
31
|
-
If the job is already locked, steal the lock and run it anyway.
|
32
|
-
|
33
|
-
=item --git-dir
|
34
|
-
|
35
|
-
Path to a .git directory (or a git URL) where the commit given in the
|
36
|
-
job's C<script_version> attribute is to be found. If this is I<not>
|
37
|
-
given, the job's C<repository> attribute will be used.
|
38
|
-
|
39
|
-
=item --job-api-token
|
40
|
-
|
41
|
-
Arvados API authorization token to use during the course of the job.
|
42
|
-
|
43
|
-
=item --no-clear-tmp
|
44
|
-
|
45
|
-
Do not clear per-job/task temporary directories during initial job
|
46
|
-
setup. This can speed up development and debugging when running jobs
|
47
|
-
locally.
|
48
|
-
|
49
|
-
=item --job
|
50
|
-
|
51
|
-
UUID of the job to run, or a JSON-encoded job resource without a
|
52
|
-
UUID. If the latter is given, a new job object will be created.
|
53
|
-
|
54
|
-
=back
|
55
|
-
|
56
|
-
=head1 RUNNING JOBS LOCALLY
|
57
|
-
|
58
|
-
crunch-job's log messages appear on stderr along with the job tasks'
|
59
|
-
stderr streams. The log is saved in Keep at each checkpoint and when
|
60
|
-
the job finishes.
|
61
|
-
|
62
|
-
If the job succeeds, the job's output locator is printed on stdout.
|
63
|
-
|
64
|
-
While the job is running, the following signals are accepted:
|
65
|
-
|
66
|
-
=over
|
67
|
-
|
68
|
-
=item control-C, SIGINT, SIGQUIT
|
69
|
-
|
70
|
-
Save a checkpoint, terminate any job tasks that are running, and stop.
|
71
|
-
|
72
|
-
=item SIGALRM
|
73
|
-
|
74
|
-
Save a checkpoint and continue.
|
75
|
-
|
76
|
-
=item SIGHUP
|
77
|
-
|
78
|
-
Refresh node allocation (i.e., check whether any nodes have been added
|
79
|
-
or unallocated) and attributes of the Job record that should affect
|
80
|
-
behavior (e.g., cancel job if cancelled_at becomes non-nil).
|
81
|
-
|
82
|
-
=back
|
83
|
-
|
84
|
-
=cut
|
85
|
-
|
86
|
-
|
87
|
-
use strict;
|
88
|
-
use POSIX ':sys_wait_h';
|
89
|
-
use POSIX qw(strftime);
|
90
|
-
use Fcntl qw(F_GETFL F_SETFL O_NONBLOCK);
|
91
|
-
use Arvados;
|
92
|
-
use Cwd qw(realpath);
|
93
|
-
use Data::Dumper;
|
94
|
-
use Digest::MD5 qw(md5_hex);
|
95
|
-
use Getopt::Long;
|
96
|
-
use IPC::Open2;
|
97
|
-
use IO::Select;
|
98
|
-
use File::Temp;
|
99
|
-
use Fcntl ':flock';
|
100
|
-
use File::Path qw( make_path remove_tree );
|
101
|
-
|
102
|
-
use constant TASK_TEMPFAIL => 111;
|
103
|
-
use constant EX_TEMPFAIL => 75;
|
104
|
-
use constant EX_RETRY_UNLOCKED => 93;
|
105
|
-
|
106
|
-
$ENV{"TMPDIR"} ||= "/tmp";
|
107
|
-
unless (defined $ENV{"CRUNCH_TMP"}) {
|
108
|
-
$ENV{"CRUNCH_TMP"} = $ENV{"TMPDIR"} . "/crunch-job";
|
109
|
-
if ($ENV{"USER"} ne "crunch" && $< != 0) {
|
110
|
-
# use a tmp dir unique for my uid
|
111
|
-
$ENV{"CRUNCH_TMP"} .= "-$<";
|
112
|
-
}
|
113
|
-
}
|
114
|
-
|
115
|
-
# Create the tmp directory if it does not exist
|
116
|
-
if ( ! -d $ENV{"CRUNCH_TMP"} ) {
|
117
|
-
make_path $ENV{"CRUNCH_TMP"} or die "Failed to create temporary working directory: " . $ENV{"CRUNCH_TMP"};
|
118
|
-
}
|
119
|
-
|
120
|
-
$ENV{"JOB_WORK"} = $ENV{"CRUNCH_TMP"} . "/work";
|
121
|
-
$ENV{"CRUNCH_INSTALL"} = "$ENV{CRUNCH_TMP}/opt";
|
122
|
-
$ENV{"CRUNCH_WORK"} = $ENV{"JOB_WORK"}; # deprecated
|
123
|
-
mkdir ($ENV{"JOB_WORK"});
|
124
|
-
|
125
|
-
my %proc;
|
126
|
-
my $force_unlock;
|
127
|
-
my $git_dir;
|
128
|
-
my $jobspec;
|
129
|
-
my $job_api_token;
|
130
|
-
my $no_clear_tmp;
|
131
|
-
my $resume_stash;
|
132
|
-
my $cgroup_root = "/sys/fs/cgroup";
|
133
|
-
my $docker_bin = "docker.io";
|
134
|
-
my $docker_run_args = "";
|
135
|
-
my $srun_sync_timeout = 15*60;
|
136
|
-
GetOptions('force-unlock' => \$force_unlock,
|
137
|
-
'git-dir=s' => \$git_dir,
|
138
|
-
'job=s' => \$jobspec,
|
139
|
-
'job-api-token=s' => \$job_api_token,
|
140
|
-
'no-clear-tmp' => \$no_clear_tmp,
|
141
|
-
'resume-stash=s' => \$resume_stash,
|
142
|
-
'cgroup-root=s' => \$cgroup_root,
|
143
|
-
'docker-bin=s' => \$docker_bin,
|
144
|
-
'docker-run-args=s' => \$docker_run_args,
|
145
|
-
'srun-sync-timeout=i' => \$srun_sync_timeout,
|
146
|
-
);
|
147
|
-
|
148
|
-
if (defined $job_api_token) {
|
149
|
-
$ENV{ARVADOS_API_TOKEN} = $job_api_token;
|
150
|
-
}
|
151
|
-
|
152
|
-
my $have_slurm = exists $ENV{SLURM_JOB_ID} && exists $ENV{SLURM_NODELIST};
|
153
|
-
|
154
|
-
|
155
|
-
$SIG{'USR1'} = sub
|
156
|
-
{
|
157
|
-
$main::ENV{CRUNCH_DEBUG} = 1;
|
158
|
-
};
|
159
|
-
$SIG{'USR2'} = sub
|
160
|
-
{
|
161
|
-
$main::ENV{CRUNCH_DEBUG} = 0;
|
162
|
-
};
|
163
|
-
|
164
|
-
my $arv = Arvados->new('apiVersion' => 'v1');
|
165
|
-
|
166
|
-
my $Job;
|
167
|
-
my $job_id;
|
168
|
-
my $dbh;
|
169
|
-
my $sth;
|
170
|
-
my @jobstep;
|
171
|
-
|
172
|
-
my $local_job;
|
173
|
-
if ($jobspec =~ /^[-a-z\d]+$/)
|
174
|
-
{
|
175
|
-
# $jobspec is an Arvados UUID, not a JSON job specification
|
176
|
-
$Job = api_call("jobs/get", uuid => $jobspec);
|
177
|
-
$local_job = 0;
|
178
|
-
}
|
179
|
-
else
|
180
|
-
{
|
181
|
-
$local_job = JSON::decode_json($jobspec);
|
182
|
-
}
|
183
|
-
|
184
|
-
|
185
|
-
# Make sure our workers (our slurm nodes, localhost, or whatever) are
|
186
|
-
# at least able to run basic commands: they aren't down or severely
|
187
|
-
# misconfigured.
|
188
|
-
my $cmd = ['true'];
|
189
|
-
if (($Job || $local_job)->{docker_image_locator}) {
|
190
|
-
$cmd = [$docker_bin, 'ps', '-q'];
|
191
|
-
}
|
192
|
-
Log(undef, "Sanity check is `@$cmd`");
|
193
|
-
my ($exited, $stdout, $stderr, $tempfail) = srun_sync(
|
194
|
-
["srun", "--nodes=\Q$ENV{SLURM_NNODES}\E", "--ntasks-per-node=1"],
|
195
|
-
$cmd,
|
196
|
-
{label => "sanity check"});
|
197
|
-
if ($exited != 0) {
|
198
|
-
Log(undef, "Sanity check failed: ".exit_status_s($exited));
|
199
|
-
exit EX_TEMPFAIL;
|
200
|
-
}
|
201
|
-
Log(undef, "Sanity check OK");
|
202
|
-
|
203
|
-
|
204
|
-
my $User = api_call("users/current");
|
205
|
-
|
206
|
-
if (!$local_job) {
|
207
|
-
if (!$force_unlock) {
|
208
|
-
# Claim this job, and make sure nobody else does
|
209
|
-
eval { api_call("jobs/lock", uuid => $Job->{uuid}); };
|
210
|
-
if ($@) {
|
211
|
-
Log(undef, "Error while locking job, exiting ".EX_TEMPFAIL);
|
212
|
-
exit EX_TEMPFAIL;
|
213
|
-
};
|
214
|
-
}
|
215
|
-
}
|
216
|
-
else
|
217
|
-
{
|
218
|
-
if (!$resume_stash)
|
219
|
-
{
|
220
|
-
map { croak ("No $_ specified") unless $local_job->{$_} }
|
221
|
-
qw(script script_version script_parameters);
|
222
|
-
}
|
223
|
-
|
224
|
-
$local_job->{'is_locked_by_uuid'} = $User->{'uuid'};
|
225
|
-
$local_job->{'started_at'} = gmtime;
|
226
|
-
$local_job->{'state'} = 'Running';
|
227
|
-
|
228
|
-
$Job = api_call("jobs/create", job => $local_job);
|
229
|
-
}
|
230
|
-
$job_id = $Job->{'uuid'};
|
231
|
-
|
232
|
-
my $keep_logfile = $job_id . '.log.txt';
|
233
|
-
log_writer_start($keep_logfile);
|
234
|
-
|
235
|
-
$Job->{'runtime_constraints'} ||= {};
|
236
|
-
$Job->{'runtime_constraints'}->{'max_tasks_per_node'} ||= 0;
|
237
|
-
my $max_ncpus = $Job->{'runtime_constraints'}->{'max_tasks_per_node'};
|
238
|
-
|
239
|
-
my $gem_versions = `gem list --quiet arvados-cli 2>/dev/null`;
|
240
|
-
if ($? == 0) {
|
241
|
-
$gem_versions =~ s/^arvados-cli \(/ with arvados-cli Gem version(s) /;
|
242
|
-
chomp($gem_versions);
|
243
|
-
chop($gem_versions); # Closing parentheses
|
244
|
-
} else {
|
245
|
-
$gem_versions = "";
|
246
|
-
}
|
247
|
-
Log(undef,
|
248
|
-
"running from " . ((-e $0) ? realpath($0) : "stdin") . $gem_versions);
|
249
|
-
|
250
|
-
Log (undef, "check slurm allocation");
|
251
|
-
my @slot;
|
252
|
-
my @node;
|
253
|
-
# Should use $ENV{SLURM_TASKS_PER_NODE} instead of sinfo? (eg. "4(x3),2,4(x2)")
|
254
|
-
my @sinfo;
|
255
|
-
if (!$have_slurm)
|
256
|
-
{
|
257
|
-
my $localcpus = 0 + `grep -cw ^processor /proc/cpuinfo` || 1;
|
258
|
-
push @sinfo, "$localcpus localhost";
|
259
|
-
}
|
260
|
-
if (exists $ENV{SLURM_NODELIST})
|
261
|
-
{
|
262
|
-
push @sinfo, `sinfo -h --format='%c %N' --nodes=\Q$ENV{SLURM_NODELIST}\E`;
|
263
|
-
}
|
264
|
-
foreach (@sinfo)
|
265
|
-
{
|
266
|
-
my ($ncpus, $slurm_nodelist) = split;
|
267
|
-
$ncpus = $max_ncpus if $max_ncpus && $ncpus > $max_ncpus;
|
268
|
-
|
269
|
-
my @nodelist;
|
270
|
-
while ($slurm_nodelist =~ s/^([^\[,]+?(\[.*?\])?)(,|$)//)
|
271
|
-
{
|
272
|
-
my $nodelist = $1;
|
273
|
-
if ($nodelist =~ /\[((\d+)(-(\d+))?(,(\d+)(-(\d+))?)*)\]/)
|
274
|
-
{
|
275
|
-
my $ranges = $1;
|
276
|
-
foreach (split (",", $ranges))
|
277
|
-
{
|
278
|
-
my ($a, $b);
|
279
|
-
if (/(\d+)-(\d+)/)
|
280
|
-
{
|
281
|
-
$a = $1;
|
282
|
-
$b = $2;
|
283
|
-
}
|
284
|
-
else
|
285
|
-
{
|
286
|
-
$a = $_;
|
287
|
-
$b = $_;
|
288
|
-
}
|
289
|
-
push @nodelist, map {
|
290
|
-
my $n = $nodelist;
|
291
|
-
$n =~ s/\[[-,\d]+\]/$_/;
|
292
|
-
$n;
|
293
|
-
} ($a..$b);
|
294
|
-
}
|
295
|
-
}
|
296
|
-
else
|
297
|
-
{
|
298
|
-
push @nodelist, $nodelist;
|
299
|
-
}
|
300
|
-
}
|
301
|
-
foreach my $nodename (@nodelist)
|
302
|
-
{
|
303
|
-
Log (undef, "node $nodename - $ncpus slots");
|
304
|
-
my $node = { name => $nodename,
|
305
|
-
ncpus => $ncpus,
|
306
|
-
# The number of consecutive times a task has been dispatched
|
307
|
-
# to this node and failed.
|
308
|
-
losing_streak => 0,
|
309
|
-
# The number of consecutive times that SLURM has reported
|
310
|
-
# a node failure since the last successful task.
|
311
|
-
fail_count => 0,
|
312
|
-
# Don't dispatch work to this node until this time
|
313
|
-
# (in seconds since the epoch) has passed.
|
314
|
-
hold_until => 0 };
|
315
|
-
foreach my $cpu (1..$ncpus)
|
316
|
-
{
|
317
|
-
push @slot, { node => $node,
|
318
|
-
cpu => $cpu };
|
319
|
-
}
|
320
|
-
}
|
321
|
-
push @node, @nodelist;
|
322
|
-
}
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
# Ensure that we get one jobstep running on each allocated node before
|
327
|
-
# we start overloading nodes with concurrent steps
|
328
|
-
|
329
|
-
@slot = sort { $a->{cpu} <=> $b->{cpu} } @slot;
|
330
|
-
|
331
|
-
|
332
|
-
$Job->update_attributes(
|
333
|
-
'tasks_summary' => { 'failed' => 0,
|
334
|
-
'todo' => 1,
|
335
|
-
'running' => 0,
|
336
|
-
'done' => 0 });
|
337
|
-
|
338
|
-
Log (undef, "start");
|
339
|
-
$SIG{'INT'} = sub { $main::please_freeze = 1; };
|
340
|
-
$SIG{'QUIT'} = sub { $main::please_freeze = 1; };
|
341
|
-
$SIG{'TERM'} = \&croak;
|
342
|
-
$SIG{'TSTP'} = sub { $main::please_freeze = 1; };
|
343
|
-
$SIG{'ALRM'} = sub { $main::please_info = 1; };
|
344
|
-
$SIG{'CONT'} = sub { $main::please_continue = 1; };
|
345
|
-
$SIG{'HUP'} = sub { $main::please_refresh = 1; };
|
346
|
-
|
347
|
-
$main::please_freeze = 0;
|
348
|
-
$main::please_info = 0;
|
349
|
-
$main::please_continue = 0;
|
350
|
-
$main::please_refresh = 0;
|
351
|
-
my $jobsteps_must_output_keys = 0; # becomes 1 when any task outputs a key
|
352
|
-
|
353
|
-
grep { $ENV{$1} = $2 if /^(NOCACHE.*?)=(.*)/ } split ("\n", $$Job{knobs});
|
354
|
-
$ENV{"CRUNCH_JOB_UUID"} = $job_id;
|
355
|
-
$ENV{"JOB_UUID"} = $job_id;
|
356
|
-
|
357
|
-
|
358
|
-
my @jobstep_todo = ();
|
359
|
-
my @jobstep_done = ();
|
360
|
-
my @jobstep_tomerge = ();
|
361
|
-
my $jobstep_tomerge_level = 0;
|
362
|
-
my $squeue_checked = 0;
|
363
|
-
my $sinfo_checked = 0;
|
364
|
-
my $latest_refresh = scalar time;
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
if (defined $Job->{thawedfromkey})
|
369
|
-
{
|
370
|
-
thaw ($Job->{thawedfromkey});
|
371
|
-
}
|
372
|
-
else
|
373
|
-
{
|
374
|
-
my $first_task = api_call("job_tasks/create", job_task => {
|
375
|
-
'job_uuid' => $Job->{'uuid'},
|
376
|
-
'sequence' => 0,
|
377
|
-
'qsequence' => 0,
|
378
|
-
'parameters' => {},
|
379
|
-
});
|
380
|
-
push @jobstep, { 'level' => 0,
|
381
|
-
'failures' => 0,
|
382
|
-
'arvados_task' => $first_task,
|
383
|
-
};
|
384
|
-
push @jobstep_todo, 0;
|
385
|
-
}
|
386
|
-
|
387
|
-
|
388
|
-
if (!$have_slurm)
|
389
|
-
{
|
390
|
-
must_lock_now("$ENV{CRUNCH_TMP}/.lock", "a job is already running here.");
|
391
|
-
}
|
392
|
-
|
393
|
-
my $build_script = handle_readall(\*DATA);
|
394
|
-
my $nodelist = join(",", @node);
|
395
|
-
my $git_tar_count = 0;
|
396
|
-
|
397
|
-
if (!defined $no_clear_tmp) {
|
398
|
-
# Find FUSE mounts under $CRUNCH_TMP and unmount them. Then clean
|
399
|
-
# up work directories crunch_tmp/work, crunch_tmp/opt,
|
400
|
-
# crunch_tmp/src*.
|
401
|
-
my ($exited, $stdout, $stderr, $tempfail) = srun_sync(
|
402
|
-
["srun", "--nodelist=$nodelist", "-D", $ENV{'TMPDIR'}],
|
403
|
-
['bash', '-ec', q{
|
404
|
-
arv-mount --unmount-timeout 10 --unmount-all ${CRUNCH_TMP}
|
405
|
-
rm -rf ${JOB_WORK} ${CRUNCH_INSTALL} ${CRUNCH_TMP}/task ${CRUNCH_TMP}/src* ${CRUNCH_TMP}/*.cid
|
406
|
-
}],
|
407
|
-
{label => "clean work dirs"});
|
408
|
-
if ($exited != 0) {
|
409
|
-
exit_retry_unlocked();
|
410
|
-
}
|
411
|
-
}
|
412
|
-
|
413
|
-
# If this job requires a Docker image, install that.
|
414
|
-
my ($docker_locator, $docker_stream, $docker_hash, $docker_limitmem, $dockeruserarg);
|
415
|
-
if ($docker_locator = $Job->{docker_image_locator}) {
|
416
|
-
Log (undef, "Install docker image $docker_locator");
|
417
|
-
($docker_stream, $docker_hash) = find_docker_image($docker_locator);
|
418
|
-
if (!$docker_hash)
|
419
|
-
{
|
420
|
-
croak("No Docker image hash found from locator $docker_locator");
|
421
|
-
}
|
422
|
-
Log (undef, "docker image hash is $docker_hash");
|
423
|
-
$docker_stream =~ s/^\.//;
|
424
|
-
my $docker_install_script = qq{
|
425
|
-
loaded() {
|
426
|
-
id=\$($docker_bin inspect --format="{{.ID}}" \Q$docker_hash\E) || return 1
|
427
|
-
echo "image ID is \$id"
|
428
|
-
[[ \${id} = \Q$docker_hash\E ]]
|
429
|
-
}
|
430
|
-
if loaded >&2 2>/dev/null; then
|
431
|
-
echo >&2 "image is already present"
|
432
|
-
exit 0
|
433
|
-
fi
|
434
|
-
echo >&2 "docker image is not present; loading"
|
435
|
-
arv-get \Q$docker_locator$docker_stream/$docker_hash.tar\E | $docker_bin load
|
436
|
-
if ! loaded >&2; then
|
437
|
-
echo >&2 "`docker load` exited 0, but image is not found (!)"
|
438
|
-
exit 1
|
439
|
-
fi
|
440
|
-
echo >&2 "image loaded successfully"
|
441
|
-
};
|
442
|
-
|
443
|
-
my ($exited, $stdout, $stderr, $tempfail) = srun_sync(
|
444
|
-
["srun", "--nodelist=" . join(',', @node)],
|
445
|
-
["/bin/bash", "-o", "pipefail", "-ec", $docker_install_script],
|
446
|
-
{label => "load docker image"});
|
447
|
-
if ($exited != 0)
|
448
|
-
{
|
449
|
-
exit_retry_unlocked();
|
450
|
-
}
|
451
|
-
|
452
|
-
# Determine whether this version of Docker supports memory+swap limits.
|
453
|
-
($exited, $stdout, $stderr, $tempfail) = srun_sync(
|
454
|
-
["srun", "--nodes=1"],
|
455
|
-
[$docker_bin, 'run', '--help'],
|
456
|
-
{label => "check --memory-swap feature"});
|
457
|
-
if ($tempfail) {
|
458
|
-
exit_retry_unlocked();
|
459
|
-
}
|
460
|
-
$docker_limitmem = ($stdout =~ /--memory-swap/);
|
461
|
-
|
462
|
-
# Find a non-root Docker user to use.
|
463
|
-
# Tries the default user for the container, then 'crunch', then 'nobody',
|
464
|
-
# testing for whether the actual user id is non-zero. This defends against
|
465
|
-
# mistakes but not malice, but we intend to harden the security in the future
|
466
|
-
# so we don't want anyone getting used to their jobs running as root in their
|
467
|
-
# Docker containers.
|
468
|
-
my @tryusers = ("", "crunch", "nobody");
|
469
|
-
foreach my $try_user (@tryusers) {
|
470
|
-
my $label;
|
471
|
-
my $try_user_arg;
|
472
|
-
if ($try_user eq "") {
|
473
|
-
$label = "check whether default user is UID 0";
|
474
|
-
$try_user_arg = "";
|
475
|
-
} else {
|
476
|
-
$label = "check whether user '$try_user' is UID 0";
|
477
|
-
$try_user_arg = "--user=$try_user";
|
478
|
-
}
|
479
|
-
my ($exited, $stdout, $stderr, $tempfail) = srun_sync(
|
480
|
-
["srun", "--nodes=1"],
|
481
|
-
["/bin/sh", "-ec",
|
482
|
-
"$docker_bin run $docker_run_args $try_user_arg $docker_hash id --user"],
|
483
|
-
{label => $label});
|
484
|
-
chomp($stdout);
|
485
|
-
if ($exited == 0 && $stdout =~ /^\d+$/ && $stdout > 0) {
|
486
|
-
$dockeruserarg = $try_user_arg;
|
487
|
-
if ($try_user eq "") {
|
488
|
-
Log(undef, "Container will run with default user");
|
489
|
-
} else {
|
490
|
-
Log(undef, "Container will run with $dockeruserarg");
|
491
|
-
}
|
492
|
-
last;
|
493
|
-
} elsif ($tempfail) {
|
494
|
-
exit_retry_unlocked();
|
495
|
-
}
|
496
|
-
}
|
497
|
-
|
498
|
-
if (!defined $dockeruserarg) {
|
499
|
-
croak("Could not find a user in container that is not UID 0 (tried default user, @tryusers) or there was a problem running 'id' in the container.");
|
500
|
-
}
|
501
|
-
|
502
|
-
if ($Job->{arvados_sdk_version}) {
|
503
|
-
# The job also specifies an Arvados SDK version. Add the SDKs to the
|
504
|
-
# tar file for the build script to install.
|
505
|
-
Log(undef, sprintf("Packing Arvados SDK version %s for installation",
|
506
|
-
$Job->{arvados_sdk_version}));
|
507
|
-
add_git_archive("git", "--git-dir=$git_dir", "archive",
|
508
|
-
"--prefix=.arvados.sdk/",
|
509
|
-
$Job->{arvados_sdk_version}, "sdk");
|
510
|
-
}
|
511
|
-
}
|
512
|
-
|
513
|
-
if (!defined $git_dir && $Job->{'script_version'} =~ m{^/}) {
|
514
|
-
# If script_version looks like an absolute path, *and* the --git-dir
|
515
|
-
# argument was not given -- which implies we were not invoked by
|
516
|
-
# crunch-dispatch -- we will use the given path as a working
|
517
|
-
# directory instead of resolving script_version to a git commit (or
|
518
|
-
# doing anything else with git).
|
519
|
-
$ENV{"CRUNCH_SRC_COMMIT"} = $Job->{'script_version'};
|
520
|
-
$ENV{"CRUNCH_SRC"} = $Job->{'script_version'};
|
521
|
-
}
|
522
|
-
else {
|
523
|
-
# Resolve the given script_version to a git commit sha1. Also, if
|
524
|
-
# the repository is remote, clone it into our local filesystem: this
|
525
|
-
# ensures "git archive" will work, and is necessary to reliably
|
526
|
-
# resolve a symbolic script_version like "master^".
|
527
|
-
$ENV{"CRUNCH_SRC"} = "$ENV{CRUNCH_TMP}/src";
|
528
|
-
|
529
|
-
Log (undef, "Looking for version ".$Job->{script_version}." from repository ".$Job->{repository});
|
530
|
-
|
531
|
-
$ENV{"CRUNCH_SRC_COMMIT"} = $Job->{script_version};
|
532
|
-
|
533
|
-
# If we're running under crunch-dispatch, it will have already
|
534
|
-
# pulled the appropriate source tree into its own repository, and
|
535
|
-
# given us that repo's path as $git_dir.
|
536
|
-
#
|
537
|
-
# If we're running a "local" job, we might have to fetch content
|
538
|
-
# from a remote repository.
|
539
|
-
#
|
540
|
-
# (Currently crunch-dispatch gives a local path with --git-dir, but
|
541
|
-
# we might as well accept URLs there too in case it changes its
|
542
|
-
# mind.)
|
543
|
-
my $repo = $git_dir || $Job->{'repository'};
|
544
|
-
|
545
|
-
# Repository can be remote or local. If remote, we'll need to fetch it
|
546
|
-
# to a local dir before doing `git log` et al.
|
547
|
-
my $repo_location;
|
548
|
-
|
549
|
-
if ($repo =~ m{://|^[^/]*:}) {
|
550
|
-
# $repo is a git url we can clone, like git:// or https:// or
|
551
|
-
# file:/// or [user@]host:repo.git. Note "user/name@host:foo" is
|
552
|
-
# not recognized here because distinguishing that from a local
|
553
|
-
# path is too fragile. If you really need something strange here,
|
554
|
-
# use the ssh:// form.
|
555
|
-
$repo_location = 'remote';
|
556
|
-
} elsif ($repo =~ m{^\.*/}) {
|
557
|
-
# $repo is a local path to a git index. We'll also resolve ../foo
|
558
|
-
# to ../foo/.git if the latter is a directory. To help
|
559
|
-
# disambiguate local paths from named hosted repositories, this
|
560
|
-
# form must be given as ./ or ../ if it's a relative path.
|
561
|
-
if (-d "$repo/.git") {
|
562
|
-
$repo = "$repo/.git";
|
563
|
-
}
|
564
|
-
$repo_location = 'local';
|
565
|
-
} else {
|
566
|
-
# $repo is none of the above. It must be the name of a hosted
|
567
|
-
# repository.
|
568
|
-
my $arv_repo_list = api_call("repositories/list",
|
569
|
-
'filters' => [['name','=',$repo]]);
|
570
|
-
my @repos_found = @{$arv_repo_list->{'items'}};
|
571
|
-
my $n_found = $arv_repo_list->{'serverResponse'}->{'items_available'};
|
572
|
-
if ($n_found > 0) {
|
573
|
-
Log(undef, "Repository '$repo' -> "
|
574
|
-
. join(", ", map { $_->{'uuid'} } @repos_found));
|
575
|
-
}
|
576
|
-
if ($n_found != 1) {
|
577
|
-
croak("Error: Found $n_found repositories with name '$repo'.");
|
578
|
-
}
|
579
|
-
$repo = $repos_found[0]->{'fetch_url'};
|
580
|
-
$repo_location = 'remote';
|
581
|
-
}
|
582
|
-
Log(undef, "Using $repo_location repository '$repo'");
|
583
|
-
$ENV{"CRUNCH_SRC_URL"} = $repo;
|
584
|
-
|
585
|
-
# Resolve given script_version (we'll call that $treeish here) to a
|
586
|
-
# commit sha1 ($commit).
|
587
|
-
my $treeish = $Job->{'script_version'};
|
588
|
-
my $commit;
|
589
|
-
if ($repo_location eq 'remote') {
|
590
|
-
# We minimize excess object-fetching by re-using the same bare
|
591
|
-
# repository in CRUNCH_TMP/.git for multiple crunch-jobs -- we
|
592
|
-
# just keep adding remotes to it as needed.
|
593
|
-
my $local_repo = $ENV{'CRUNCH_TMP'}."/.git";
|
594
|
-
my $gitcmd = "git --git-dir=\Q$local_repo\E";
|
595
|
-
|
596
|
-
# Set up our local repo for caching remote objects, making
|
597
|
-
# archives, etc.
|
598
|
-
if (!-d $local_repo) {
|
599
|
-
make_path($local_repo) or croak("Error: could not create $local_repo");
|
600
|
-
}
|
601
|
-
# This works (exits 0 and doesn't delete fetched objects) even
|
602
|
-
# if $local_repo is already initialized:
|
603
|
-
`$gitcmd init --bare`;
|
604
|
-
if ($?) {
|
605
|
-
croak("Error: $gitcmd init --bare exited ".exit_status_s($?));
|
606
|
-
}
|
607
|
-
|
608
|
-
# If $treeish looks like a hash (or abbrev hash) we look it up in
|
609
|
-
# our local cache first, since that's cheaper. (We don't want to
|
610
|
-
# do that with tags/branches though -- those change over time, so
|
611
|
-
# they should always be resolved by the remote repo.)
|
612
|
-
if ($treeish =~ /^[0-9a-f]{7,40}$/s) {
|
613
|
-
# Hide stderr because it's normal for this to fail:
|
614
|
-
my $sha1 = `$gitcmd rev-list -n1 ''\Q$treeish\E 2>/dev/null`;
|
615
|
-
if ($? == 0 &&
|
616
|
-
# Careful not to resolve a branch named abcdeff to commit 1234567:
|
617
|
-
$sha1 =~ /^$treeish/ &&
|
618
|
-
$sha1 =~ /^([0-9a-f]{40})$/s) {
|
619
|
-
$commit = $1;
|
620
|
-
Log(undef, "Commit $commit already present in $local_repo");
|
621
|
-
}
|
622
|
-
}
|
623
|
-
|
624
|
-
if (!defined $commit) {
|
625
|
-
# If $treeish isn't just a hash or abbrev hash, or isn't here
|
626
|
-
# yet, we need to fetch the remote to resolve it correctly.
|
627
|
-
|
628
|
-
# First, remove all local heads. This prevents a name that does
|
629
|
-
# not exist on the remote from resolving to (or colliding with)
|
630
|
-
# a previously fetched branch or tag (possibly from a different
|
631
|
-
# remote).
|
632
|
-
remove_tree("$local_repo/refs/heads", {keep_root => 1});
|
633
|
-
|
634
|
-
Log(undef, "Fetching objects from $repo to $local_repo");
|
635
|
-
`$gitcmd fetch --no-progress --tags ''\Q$repo\E \Q+refs/heads/*:refs/heads/*\E`;
|
636
|
-
if ($?) {
|
637
|
-
croak("Error: `$gitcmd fetch` exited ".exit_status_s($?));
|
638
|
-
}
|
639
|
-
}
|
640
|
-
|
641
|
-
# Now that the data is all here, we will use our local repo for
|
642
|
-
# the rest of our git activities.
|
643
|
-
$repo = $local_repo;
|
644
|
-
}
|
645
|
-
|
646
|
-
my $gitcmd = "git --git-dir=\Q$repo\E";
|
647
|
-
my $sha1 = `$gitcmd rev-list -n1 ''\Q$treeish\E`;
|
648
|
-
unless ($? == 0 && $sha1 =~ /^([0-9a-f]{40})$/) {
|
649
|
-
croak("`$gitcmd rev-list` exited "
|
650
|
-
.exit_status_s($?)
|
651
|
-
.", '$treeish' not found, giving up");
|
652
|
-
}
|
653
|
-
$commit = $1;
|
654
|
-
Log(undef, "Version $treeish is commit $commit");
|
655
|
-
|
656
|
-
if ($commit ne $Job->{'script_version'}) {
|
657
|
-
# Record the real commit id in the database, frozentokey, logs,
|
658
|
-
# etc. -- instead of an abbreviation or a branch name which can
|
659
|
-
# become ambiguous or point to a different commit in the future.
|
660
|
-
if (!$Job->update_attributes('script_version' => $commit)) {
|
661
|
-
croak("Error: failed to update job's script_version attribute");
|
662
|
-
}
|
663
|
-
}
|
664
|
-
|
665
|
-
$ENV{"CRUNCH_SRC_COMMIT"} = $commit;
|
666
|
-
add_git_archive("$gitcmd archive ''\Q$commit\E");
|
667
|
-
}
|
668
|
-
|
669
|
-
my $git_archive = combined_git_archive();
|
670
|
-
if (!defined $git_archive) {
|
671
|
-
Log(undef, "Skip install phase (no git archive)");
|
672
|
-
if ($have_slurm) {
|
673
|
-
Log(undef, "Warning: This probably means workers have no source tree!");
|
674
|
-
}
|
675
|
-
}
|
676
|
-
else {
|
677
|
-
my $exited;
|
678
|
-
my $install_script_tries_left = 3;
|
679
|
-
for (my $attempts = 0; $attempts < 3; $attempts++) {
|
680
|
-
my @srunargs = ("srun",
|
681
|
-
"--nodelist=$nodelist",
|
682
|
-
"-D", $ENV{'TMPDIR'}, "--job-name=$job_id");
|
683
|
-
my @execargs = ("sh", "-c",
|
684
|
-
"mkdir -p $ENV{CRUNCH_INSTALL} && cd $ENV{CRUNCH_TMP} && perl -");
|
685
|
-
|
686
|
-
$ENV{"CRUNCH_GIT_ARCHIVE_HASH"} = md5_hex($git_archive);
|
687
|
-
my ($stdout, $stderr, $tempfail);
|
688
|
-
($exited, $stdout, $stderr, $tempfail) = srun_sync(
|
689
|
-
\@srunargs, \@execargs,
|
690
|
-
{label => "run install script on all workers"},
|
691
|
-
$build_script . $git_archive);
|
692
|
-
if ($tempfail) {
|
693
|
-
exit_retry_unlocked();
|
694
|
-
}
|
695
|
-
|
696
|
-
my $stderr_anything_from_script = 0;
|
697
|
-
for my $line (split(/\n/, $stderr)) {
|
698
|
-
if ($line !~ /^(srun: error: |starting: \[)/) {
|
699
|
-
$stderr_anything_from_script = 1;
|
700
|
-
}
|
701
|
-
}
|
702
|
-
|
703
|
-
last if $exited == 0 || $main::please_freeze;
|
704
|
-
|
705
|
-
# If the install script fails but doesn't print an error message,
|
706
|
-
# the next thing anyone is likely to do is just run it again in
|
707
|
-
# case it was a transient problem like "slurm communication fails
|
708
|
-
# because the network isn't reliable enough". So we'll just do
|
709
|
-
# that ourselves (up to 3 attempts in total). OTOH, if there is an
|
710
|
-
# error message, the problem is more likely to have a real fix and
|
711
|
-
# we should fail the job so the fixing process can start, instead
|
712
|
-
# of doing 2 more attempts.
|
713
|
-
last if $stderr_anything_from_script;
|
714
|
-
}
|
715
|
-
|
716
|
-
foreach my $tar_filename (map { tar_filename_n($_); } (1..$git_tar_count)) {
|
717
|
-
unlink($tar_filename);
|
718
|
-
}
|
719
|
-
|
720
|
-
if ($exited != 0) {
|
721
|
-
croak("Giving up");
|
722
|
-
}
|
723
|
-
}
|
724
|
-
|
725
|
-
foreach (qw (script script_version script_parameters runtime_constraints))
|
726
|
-
{
|
727
|
-
Log (undef,
|
728
|
-
"$_ " .
|
729
|
-
(ref($Job->{$_}) ? JSON::encode_json($Job->{$_}) : $Job->{$_}));
|
730
|
-
}
|
731
|
-
foreach (split (/\n/, $Job->{knobs}))
|
732
|
-
{
|
733
|
-
Log (undef, "knob " . $_);
|
734
|
-
}
|
735
|
-
my $resp = api_call(
|
736
|
-
'nodes/list',
|
737
|
-
'filters' => [['hostname', 'in', \@node]],
|
738
|
-
'order' => 'hostname',
|
739
|
-
'limit' => scalar(@node),
|
740
|
-
);
|
741
|
-
for my $n (@{$resp->{items}}) {
|
742
|
-
Log(undef, "$n->{hostname} $n->{uuid} ".JSON::encode_json($n->{properties}));
|
743
|
-
}
|
744
|
-
|
745
|
-
|
746
|
-
|
747
|
-
$main::success = undef;
|
748
|
-
|
749
|
-
|
750
|
-
|
751
|
-
ONELEVEL:
|
752
|
-
|
753
|
-
my $thisround_succeeded = 0;
|
754
|
-
my $thisround_failed = 0;
|
755
|
-
my $thisround_failed_multiple = 0;
|
756
|
-
my $working_slot_count = scalar(@slot);
|
757
|
-
|
758
|
-
@jobstep_todo = sort { $jobstep[$a]->{level} <=> $jobstep[$b]->{level}
|
759
|
-
or $a <=> $b } @jobstep_todo;
|
760
|
-
my $level = $jobstep[$jobstep_todo[0]]->{level};
|
761
|
-
|
762
|
-
my $initial_tasks_this_level = 0;
|
763
|
-
foreach my $id (@jobstep_todo) {
|
764
|
-
$initial_tasks_this_level++ if ($jobstep[$id]->{level} == $level);
|
765
|
-
}
|
766
|
-
|
767
|
-
# If the number of tasks scheduled at this level #T is smaller than the number
|
768
|
-
# of slots available #S, only use the first #T slots, or the first slot on
|
769
|
-
# each node, whichever number is greater.
|
770
|
-
#
|
771
|
-
# When we dispatch tasks later, we'll allocate whole-node resources like RAM
|
772
|
-
# based on these numbers. Using fewer slots makes more resources available
|
773
|
-
# to each individual task, which should normally be a better strategy when
|
774
|
-
# there are fewer of them running with less parallelism.
|
775
|
-
#
|
776
|
-
# Note that this calculation is not redone if the initial tasks at
|
777
|
-
# this level queue more tasks at the same level. This may harm
|
778
|
-
# overall task throughput for that level.
|
779
|
-
my @freeslot;
|
780
|
-
if ($initial_tasks_this_level < @node) {
|
781
|
-
@freeslot = (0..$#node);
|
782
|
-
} elsif ($initial_tasks_this_level < @slot) {
|
783
|
-
@freeslot = (0..$initial_tasks_this_level - 1);
|
784
|
-
} else {
|
785
|
-
@freeslot = (0..$#slot);
|
786
|
-
}
|
787
|
-
my $round_num_freeslots = scalar(@freeslot);
|
788
|
-
print STDERR "crunch-job have ${round_num_freeslots} free slots for ${initial_tasks_this_level} initial tasks at this level, ".scalar(@node)." nodes, and ".scalar(@slot)." slots\n";
|
789
|
-
|
790
|
-
my %round_max_slots = ();
|
791
|
-
for (my $ii = $#freeslot; $ii >= 0; $ii--) {
|
792
|
-
my $this_slot = $slot[$freeslot[$ii]];
|
793
|
-
my $node_name = $this_slot->{node}->{name};
|
794
|
-
$round_max_slots{$node_name} ||= $this_slot->{cpu};
|
795
|
-
last if (scalar(keys(%round_max_slots)) >= @node);
|
796
|
-
}
|
797
|
-
|
798
|
-
Log(undef, "start level $level with $round_num_freeslots slots");
|
799
|
-
my @holdslot;
|
800
|
-
my %reader;
|
801
|
-
my $progress_is_dirty = 1;
|
802
|
-
my $progress_stats_updated = 0;
|
803
|
-
|
804
|
-
update_progress_stats();
|
805
|
-
|
806
|
-
|
807
|
-
THISROUND:
|
808
|
-
for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
|
809
|
-
{
|
810
|
-
# Don't create new tasks if we already know the job's final result.
|
811
|
-
last if defined($main::success);
|
812
|
-
|
813
|
-
my $id = $jobstep_todo[$todo_ptr];
|
814
|
-
my $Jobstep = $jobstep[$id];
|
815
|
-
if ($Jobstep->{level} != $level)
|
816
|
-
{
|
817
|
-
next;
|
818
|
-
}
|
819
|
-
|
820
|
-
pipe $reader{$id}, "writer" or croak("pipe() failed: $!");
|
821
|
-
set_nonblocking($reader{$id});
|
822
|
-
|
823
|
-
my $childslot = $freeslot[0];
|
824
|
-
my $childnode = $slot[$childslot]->{node};
|
825
|
-
my $childslotname = join (".",
|
826
|
-
$slot[$childslot]->{node}->{name},
|
827
|
-
$slot[$childslot]->{cpu});
|
828
|
-
|
829
|
-
my $childpid = fork();
|
830
|
-
if ($childpid == 0)
|
831
|
-
{
|
832
|
-
$SIG{'INT'} = 'DEFAULT';
|
833
|
-
$SIG{'QUIT'} = 'DEFAULT';
|
834
|
-
$SIG{'TERM'} = 'DEFAULT';
|
835
|
-
|
836
|
-
foreach (values (%reader))
|
837
|
-
{
|
838
|
-
close($_);
|
839
|
-
}
|
840
|
-
fcntl ("writer", F_SETFL, 0) or croak ($!); # no close-on-exec
|
841
|
-
open(STDOUT,">&writer") or croak ($!);
|
842
|
-
open(STDERR,">&writer") or croak ($!);
|
843
|
-
|
844
|
-
undef $dbh;
|
845
|
-
undef $sth;
|
846
|
-
|
847
|
-
delete $ENV{"GNUPGHOME"};
|
848
|
-
$ENV{"TASK_UUID"} = $Jobstep->{'arvados_task'}->{'uuid'};
|
849
|
-
$ENV{"TASK_QSEQUENCE"} = $id;
|
850
|
-
$ENV{"TASK_SEQUENCE"} = $level;
|
851
|
-
$ENV{"JOB_SCRIPT"} = $Job->{script};
|
852
|
-
while (my ($param, $value) = each %{$Job->{script_parameters}}) {
|
853
|
-
$param =~ tr/a-z/A-Z/;
|
854
|
-
$ENV{"JOB_PARAMETER_$param"} = $value;
|
855
|
-
}
|
856
|
-
$ENV{"TASK_SLOT_NODE"} = $slot[$childslot]->{node}->{name};
|
857
|
-
$ENV{"TASK_SLOT_NUMBER"} = $slot[$childslot]->{cpu};
|
858
|
-
$ENV{"TASK_WORK"} = $ENV{"CRUNCH_TMP"}."/task/$childslotname";
|
859
|
-
$ENV{"HOME"} = $ENV{"TASK_WORK"};
|
860
|
-
$ENV{"TASK_TMPDIR"} = $ENV{"TASK_WORK"}; # deprecated
|
861
|
-
$ENV{"CRUNCH_NODE_SLOTS"} = $round_max_slots{$ENV{TASK_SLOT_NODE}};
|
862
|
-
$ENV{"PATH"} = $ENV{"CRUNCH_INSTALL"} . "/bin:" . $ENV{"PATH"};
|
863
|
-
|
864
|
-
my $keep_mnt = $ENV{"TASK_WORK"}.".keep";
|
865
|
-
|
866
|
-
$ENV{"GZIP"} = "-n";
|
867
|
-
|
868
|
-
my @srunargs = (
|
869
|
-
"srun",
|
870
|
-
"--nodelist=".$childnode->{name},
|
871
|
-
qw(-n1 -c1 -N1 -D), $ENV{'TMPDIR'},
|
872
|
-
"--job-name=$job_id.$id.$$",
|
873
|
-
);
|
874
|
-
|
875
|
-
my $stdbuf = " stdbuf --output=0 --error=0 ";
|
876
|
-
|
877
|
-
my $arv_file_cache = "";
|
878
|
-
if (defined($Job->{'runtime_constraints'}->{'keep_cache_mb_per_task'})) {
|
879
|
-
$arv_file_cache = "--file-cache=" . ($Job->{'runtime_constraints'}->{'keep_cache_mb_per_task'} * 1024 * 1024);
|
880
|
-
}
|
881
|
-
|
882
|
-
my $command =
|
883
|
-
"if [ -e \Q$ENV{TASK_WORK}\E ]; then rm -rf \Q$ENV{TASK_WORK}\E; fi; "
|
884
|
-
."mkdir -p \Q$ENV{CRUNCH_TMP}\E \Q$ENV{JOB_WORK}\E \Q$ENV{TASK_WORK}\E \Q$keep_mnt\E "
|
885
|
-
."&& cd \Q$ENV{CRUNCH_TMP}\E "
|
886
|
-
# These environment variables get used explicitly later in
|
887
|
-
# $command. No tool is expected to read these values directly.
|
888
|
-
.q{&& MEM=$(awk '($1 == "MemTotal:"){print $2}' </proc/meminfo) }
|
889
|
-
.q{&& SWAP=$(awk '($1 == "SwapTotal:"){print $2}' </proc/meminfo) }
|
890
|
-
."&& MEMLIMIT=\$(( (\$MEM * 95) / ($ENV{CRUNCH_NODE_SLOTS} * 100) )) "
|
891
|
-
."&& let SWAPLIMIT=\$MEMLIMIT+\$SWAP "
|
892
|
-
.q{&& declare -a VOLUMES=() }
|
893
|
-
.q{&& if which crunchrunner >/dev/null ; then VOLUMES+=("--volume=$(which crunchrunner):/usr/local/bin/crunchrunner:ro") ; fi }
|
894
|
-
.q{&& if test -f /etc/ssl/certs/ca-certificates.crt ; then VOLUMES+=("--volume=/etc/ssl/certs/ca-certificates.crt:/etc/arvados/ca-certificates.crt:ro") ; }
|
895
|
-
.q{elif test -f /etc/pki/tls/certs/ca-bundle.crt ; then VOLUMES+=("--volume=/etc/pki/tls/certs/ca-bundle.crt:/etc/arvados/ca-certificates.crt:ro") ; fi };
|
896
|
-
|
897
|
-
$command .= "&& exec arv-mount --read-write --mount-by-pdh=by_pdh --mount-tmp=tmp --crunchstat-interval=10 --allow-other $arv_file_cache \Q$keep_mnt\E --exec ";
|
898
|
-
$ENV{TASK_KEEPMOUNT} = "$keep_mnt/by_pdh";
|
899
|
-
$ENV{TASK_KEEPMOUNT_TMP} = "$keep_mnt/tmp";
|
900
|
-
|
901
|
-
if ($docker_hash)
|
902
|
-
{
|
903
|
-
my $containername = "$Jobstep->{arvados_task}->{uuid}-$Jobstep->{failures}";
|
904
|
-
my $cidfile = "$ENV{CRUNCH_TMP}/$containername.cid";
|
905
|
-
$command .= "crunchstat -cgroup-root=\Q$cgroup_root\E -cgroup-parent=docker -cgroup-cid=$cidfile -poll=10000 ";
|
906
|
-
$command .= "$docker_bin run $docker_run_args --name=$containername --attach=stdout --attach=stderr --attach=stdin -i \Q$dockeruserarg\E --cidfile=$cidfile --sig-proxy ";
|
907
|
-
# We only set memory limits if Docker lets us limit both memory and swap.
|
908
|
-
# Memory limits alone have been supported longer, but subprocesses tend
|
909
|
-
# to get SIGKILL if they exceed that without any swap limit set.
|
910
|
-
# See #5642 for additional background.
|
911
|
-
if ($docker_limitmem) {
|
912
|
-
$command .= "--memory=\${MEMLIMIT}k --memory-swap=\${SWAPLIMIT}k ";
|
913
|
-
}
|
914
|
-
|
915
|
-
# The source tree and $destdir directory (which we have
|
916
|
-
# installed on the worker host) are available in the container,
|
917
|
-
# under the same path.
|
918
|
-
$command .= "--volume=\Q$ENV{CRUNCH_SRC}:$ENV{CRUNCH_SRC}:ro\E ";
|
919
|
-
$command .= "--volume=\Q$ENV{CRUNCH_INSTALL}:$ENV{CRUNCH_INSTALL}:ro\E ";
|
920
|
-
|
921
|
-
# Currently, we make the "by_pdh" directory in arv-mount's mount
|
922
|
-
# point appear at /keep inside the container (instead of using
|
923
|
-
# the same path as the host like we do with CRUNCH_SRC and
|
924
|
-
# CRUNCH_INSTALL). However, crunch scripts and utilities must
|
925
|
-
# not rely on this. They must use $TASK_KEEPMOUNT.
|
926
|
-
$command .= "--volume=\Q$ENV{TASK_KEEPMOUNT}:/keep:ro\E ";
|
927
|
-
$ENV{TASK_KEEPMOUNT} = "/keep";
|
928
|
-
|
929
|
-
# Ditto TASK_KEEPMOUNT_TMP, as /keep_tmp.
|
930
|
-
$command .= "--volume=\Q$ENV{TASK_KEEPMOUNT_TMP}:/keep_tmp\E ";
|
931
|
-
$ENV{TASK_KEEPMOUNT_TMP} = "/keep_tmp";
|
932
|
-
|
933
|
-
# TASK_WORK is almost exactly like a docker data volume: it
|
934
|
-
# starts out empty, is writable, and persists until no
|
935
|
-
# containers use it any more. We don't use --volumes-from to
|
936
|
-
# share it with other containers: it is only accessible to this
|
937
|
-
# task, and it goes away when this task stops.
|
938
|
-
#
|
939
|
-
# However, a docker data volume is writable only by root unless
|
940
|
-
# the mount point already happens to exist in the container with
|
941
|
-
# different permissions. Therefore, we [1] assume /tmp already
|
942
|
-
# exists in the image and is writable by the crunch user; [2]
|
943
|
-
# avoid putting TASK_WORK inside CRUNCH_TMP (which won't be
|
944
|
-
# writable if they are created by docker while setting up the
|
945
|
-
# other --volumes); and [3] create $TASK_WORK inside the
|
946
|
-
# container using $build_script.
|
947
|
-
$command .= "--volume=/tmp ";
|
948
|
-
$ENV{"TASK_WORK"} = "/tmp/crunch-job-task-work/$childslotname";
|
949
|
-
$ENV{"HOME"} = $ENV{"TASK_WORK"};
|
950
|
-
$ENV{"TASK_TMPDIR"} = $ENV{"TASK_WORK"}; # deprecated
|
951
|
-
|
952
|
-
# TODO: Share a single JOB_WORK volume across all task
|
953
|
-
# containers on a given worker node, and delete it when the job
|
954
|
-
# ends (and, in case that doesn't work, when the next job
|
955
|
-
# starts).
|
956
|
-
#
|
957
|
-
# For now, use the same approach as TASK_WORK above.
|
958
|
-
$ENV{"JOB_WORK"} = "/tmp/crunch-job-work";
|
959
|
-
|
960
|
-
# Bind mount the crunchrunner binary and host TLS certificates file into
|
961
|
-
# the container.
|
962
|
-
$command .= '"${VOLUMES[@]}" ';
|
963
|
-
|
964
|
-
while (my ($env_key, $env_val) = each %ENV)
|
965
|
-
{
|
966
|
-
if ($env_key =~ /^(ARVADOS|CRUNCH|JOB|TASK)_/) {
|
967
|
-
$command .= "--env=\Q$env_key=$env_val\E ";
|
968
|
-
}
|
969
|
-
}
|
970
|
-
$command .= "--env=\QHOME=$ENV{HOME}\E ";
|
971
|
-
$command .= "\Q$docker_hash\E ";
|
972
|
-
|
973
|
-
if ($Job->{arvados_sdk_version}) {
|
974
|
-
$command .= $stdbuf;
|
975
|
-
$command .= "perl - \Q$ENV{CRUNCH_SRC}/crunch_scripts/$Job->{script}\E";
|
976
|
-
} else {
|
977
|
-
$command .= "/bin/sh -c \'python -c " .
|
978
|
-
'"from pkg_resources import get_distribution as get; print \"Using Arvados SDK version\", get(\"arvados-python-client\").version"' .
|
979
|
-
">&2 2>/dev/null; " .
|
980
|
-
"mkdir -p \"$ENV{JOB_WORK}\" \"$ENV{TASK_WORK}\" && " .
|
981
|
-
"if which stdbuf >/dev/null ; then " .
|
982
|
-
" exec $stdbuf \Q$ENV{CRUNCH_SRC}/crunch_scripts/$Job->{script}\E ;" .
|
983
|
-
" else " .
|
984
|
-
" exec \Q$ENV{CRUNCH_SRC}/crunch_scripts/$Job->{script}\E ;" .
|
985
|
-
" fi\'";
|
986
|
-
}
|
987
|
-
} else {
|
988
|
-
# Non-docker run
|
989
|
-
$command .= "crunchstat -cgroup-root=\Q$cgroup_root\E -poll=10000 ";
|
990
|
-
$command .= $stdbuf;
|
991
|
-
$command .= "perl - $ENV{CRUNCH_SRC}/crunch_scripts/" . $Job->{"script"};
|
992
|
-
}
|
993
|
-
|
994
|
-
my @execargs = ('bash', '-c', $command);
|
995
|
-
srun (\@srunargs, \@execargs, undef, $build_script);
|
996
|
-
# exec() failed, we assume nothing happened.
|
997
|
-
die "srun() failed on build script\n";
|
998
|
-
}
|
999
|
-
close("writer");
|
1000
|
-
if (!defined $childpid)
|
1001
|
-
{
|
1002
|
-
close $reader{$id};
|
1003
|
-
delete $reader{$id};
|
1004
|
-
next;
|
1005
|
-
}
|
1006
|
-
shift @freeslot;
|
1007
|
-
$proc{$childpid} = {
|
1008
|
-
jobstepidx => $id,
|
1009
|
-
time => time,
|
1010
|
-
slot => $childslot,
|
1011
|
-
jobstepname => "$job_id.$id.$childpid",
|
1012
|
-
};
|
1013
|
-
croak ("assert failed: \$slot[$childslot]->{'pid'} exists") if exists $slot[$childslot]->{pid};
|
1014
|
-
$slot[$childslot]->{pid} = $childpid;
|
1015
|
-
|
1016
|
-
Log ($id, "job_task ".$Jobstep->{'arvados_task'}->{'uuid'});
|
1017
|
-
Log ($id, "child $childpid started on $childslotname");
|
1018
|
-
$Jobstep->{starttime} = time;
|
1019
|
-
$Jobstep->{node} = $childnode->{name};
|
1020
|
-
$Jobstep->{slotindex} = $childslot;
|
1021
|
-
delete $Jobstep->{stderr};
|
1022
|
-
delete $Jobstep->{finishtime};
|
1023
|
-
delete $Jobstep->{tempfail};
|
1024
|
-
|
1025
|
-
$Jobstep->{'arvados_task'}->{started_at} = strftime "%Y-%m-%dT%H:%M:%SZ", gmtime($Jobstep->{starttime});
|
1026
|
-
retry_op(sub { $Jobstep->{'arvados_task'}->save; }, "job_tasks.update API");
|
1027
|
-
|
1028
|
-
splice @jobstep_todo, $todo_ptr, 1;
|
1029
|
-
--$todo_ptr;
|
1030
|
-
|
1031
|
-
$progress_is_dirty = 1;
|
1032
|
-
|
1033
|
-
while (!@freeslot
|
1034
|
-
||
|
1035
|
-
($round_num_freeslots > @freeslot && $todo_ptr+1 > $#jobstep_todo))
|
1036
|
-
{
|
1037
|
-
last THISROUND if $main::please_freeze;
|
1038
|
-
if ($main::please_info)
|
1039
|
-
{
|
1040
|
-
$main::please_info = 0;
|
1041
|
-
freeze();
|
1042
|
-
create_output_collection();
|
1043
|
-
save_meta(1);
|
1044
|
-
update_progress_stats();
|
1045
|
-
}
|
1046
|
-
my $gotsome
|
1047
|
-
= readfrompipes ()
|
1048
|
-
+ reapchildren ();
|
1049
|
-
if (!$gotsome || ($latest_refresh + 2 < scalar time))
|
1050
|
-
{
|
1051
|
-
check_refresh_wanted();
|
1052
|
-
check_squeue();
|
1053
|
-
update_progress_stats();
|
1054
|
-
}
|
1055
|
-
elsif (time - $progress_stats_updated >= 30 || $progress_is_dirty)
|
1056
|
-
{
|
1057
|
-
update_progress_stats();
|
1058
|
-
}
|
1059
|
-
if (!$gotsome) {
|
1060
|
-
select (undef, undef, undef, 0.1);
|
1061
|
-
}
|
1062
|
-
$working_slot_count = scalar(grep { $_->{node}->{fail_count} == 0 &&
|
1063
|
-
$_->{node}->{hold_count} < 4 } @slot);
|
1064
|
-
if (($thisround_failed_multiple >= 8 && $thisround_succeeded == 0) ||
|
1065
|
-
($thisround_failed_multiple >= 16 && $thisround_failed_multiple > $thisround_succeeded))
|
1066
|
-
{
|
1067
|
-
my $message = "Repeated failure rate too high ($thisround_failed_multiple/"
|
1068
|
-
.($thisround_failed+$thisround_succeeded)
|
1069
|
-
.") -- giving up on this round";
|
1070
|
-
Log (undef, $message);
|
1071
|
-
last THISROUND;
|
1072
|
-
}
|
1073
|
-
|
1074
|
-
# move slots from freeslot to holdslot (or back to freeslot) if necessary
|
1075
|
-
for (my $i=$#freeslot; $i>=0; $i--) {
|
1076
|
-
if ($slot[$freeslot[$i]]->{node}->{hold_until} > scalar time) {
|
1077
|
-
push @holdslot, (splice @freeslot, $i, 1);
|
1078
|
-
}
|
1079
|
-
}
|
1080
|
-
for (my $i=$#holdslot; $i>=0; $i--) {
|
1081
|
-
if ($slot[$holdslot[$i]]->{node}->{hold_until} <= scalar time) {
|
1082
|
-
push @freeslot, (splice @holdslot, $i, 1);
|
1083
|
-
}
|
1084
|
-
}
|
1085
|
-
|
1086
|
-
# give up if no nodes are succeeding
|
1087
|
-
if ($working_slot_count < 1) {
|
1088
|
-
Log(undef, "Every node has failed -- giving up");
|
1089
|
-
last THISROUND;
|
1090
|
-
}
|
1091
|
-
}
|
1092
|
-
}
|
1093
|
-
|
1094
|
-
|
1095
|
-
push @freeslot, splice @holdslot;
|
1096
|
-
map { $slot[$freeslot[$_]]->{node}->{losing_streak} = 0 } (0..$#freeslot);
|
1097
|
-
|
1098
|
-
|
1099
|
-
Log (undef, "wait for last ".(scalar keys %proc)." children to finish");
|
1100
|
-
while (%proc)
|
1101
|
-
{
|
1102
|
-
if ($main::please_continue) {
|
1103
|
-
$main::please_continue = 0;
|
1104
|
-
goto THISROUND;
|
1105
|
-
}
|
1106
|
-
$main::please_info = 0, freeze(), create_output_collection(), save_meta(1) if $main::please_info;
|
1107
|
-
readfrompipes ();
|
1108
|
-
if (!reapchildren())
|
1109
|
-
{
|
1110
|
-
check_refresh_wanted();
|
1111
|
-
check_squeue();
|
1112
|
-
update_progress_stats();
|
1113
|
-
select (undef, undef, undef, 0.1);
|
1114
|
-
killem (keys %proc) if $main::please_freeze;
|
1115
|
-
}
|
1116
|
-
}
|
1117
|
-
|
1118
|
-
update_progress_stats();
|
1119
|
-
freeze_if_want_freeze();
|
1120
|
-
|
1121
|
-
|
1122
|
-
if (!defined $main::success)
|
1123
|
-
{
|
1124
|
-
if (!@jobstep_todo) {
|
1125
|
-
$main::success = 1;
|
1126
|
-
} elsif ($working_slot_count < 1) {
|
1127
|
-
save_output_collection();
|
1128
|
-
save_meta();
|
1129
|
-
exit_retry_unlocked();
|
1130
|
-
} elsif ($thisround_succeeded == 0 &&
|
1131
|
-
($thisround_failed == 0 || $thisround_failed > 4)) {
|
1132
|
-
my $message = "stop because $thisround_failed tasks failed and none succeeded";
|
1133
|
-
Log (undef, $message);
|
1134
|
-
$main::success = 0;
|
1135
|
-
}
|
1136
|
-
}
|
1137
|
-
|
1138
|
-
goto ONELEVEL if !defined $main::success;
|
1139
|
-
|
1140
|
-
|
1141
|
-
release_allocation();
|
1142
|
-
freeze();
|
1143
|
-
my $collated_output = save_output_collection();
|
1144
|
-
Log (undef, "finish");
|
1145
|
-
|
1146
|
-
my $final_log = save_meta();
|
1147
|
-
|
1148
|
-
my $final_state;
|
1149
|
-
if ($collated_output && $final_log && $main::success) {
|
1150
|
-
$final_state = 'Complete';
|
1151
|
-
} else {
|
1152
|
-
$final_state = 'Failed';
|
1153
|
-
}
|
1154
|
-
$Job->update_attributes('state' => $final_state);
|
1155
|
-
|
1156
|
-
exit (($final_state eq 'Complete') ? 0 : 1);
|
1157
|
-
|
1158
|
-
|
1159
|
-
|
1160
|
-
sub update_progress_stats
|
1161
|
-
{
|
1162
|
-
$progress_stats_updated = time;
|
1163
|
-
return if !$progress_is_dirty;
|
1164
|
-
my ($todo, $done, $running) = (scalar @jobstep_todo,
|
1165
|
-
scalar @jobstep_done,
|
1166
|
-
scalar keys(%proc));
|
1167
|
-
$Job->{'tasks_summary'} ||= {};
|
1168
|
-
$Job->{'tasks_summary'}->{'todo'} = $todo;
|
1169
|
-
$Job->{'tasks_summary'}->{'done'} = $done;
|
1170
|
-
$Job->{'tasks_summary'}->{'running'} = $running;
|
1171
|
-
$Job->update_attributes('tasks_summary' => $Job->{'tasks_summary'});
|
1172
|
-
Log (undef, "status: $done done, $running running, $todo todo");
|
1173
|
-
$progress_is_dirty = 0;
|
1174
|
-
}
|
1175
|
-
|
1176
|
-
|
1177
|
-
|
1178
|
-
sub reapchildren
|
1179
|
-
{
|
1180
|
-
my $children_reaped = 0;
|
1181
|
-
my @successful_task_uuids = ();
|
1182
|
-
|
1183
|
-
while((my $pid = waitpid (-1, WNOHANG)) > 0)
|
1184
|
-
{
|
1185
|
-
my $childstatus = $?;
|
1186
|
-
|
1187
|
-
my $whatslot = ($slot[$proc{$pid}->{slot}]->{node}->{name}
|
1188
|
-
. "."
|
1189
|
-
. $slot[$proc{$pid}->{slot}]->{cpu});
|
1190
|
-
my $jobstepidx = $proc{$pid}->{jobstepidx};
|
1191
|
-
|
1192
|
-
readfrompipes_after_exit ($jobstepidx);
|
1193
|
-
|
1194
|
-
$children_reaped++;
|
1195
|
-
my $elapsed = time - $proc{$pid}->{time};
|
1196
|
-
my $Jobstep = $jobstep[$jobstepidx];
|
1197
|
-
|
1198
|
-
my $exitvalue = $childstatus >> 8;
|
1199
|
-
my $exitinfo = "exit ".exit_status_s($childstatus);
|
1200
|
-
$Jobstep->{'arvados_task'}->reload;
|
1201
|
-
my $task_success = $Jobstep->{'arvados_task'}->{success};
|
1202
|
-
|
1203
|
-
Log ($jobstepidx, "child $pid on $whatslot $exitinfo success=$task_success");
|
1204
|
-
|
1205
|
-
if (!defined $task_success) {
|
1206
|
-
# task did not indicate one way or the other --> fail
|
1207
|
-
Log($jobstepidx, sprintf(
|
1208
|
-
"ERROR: Task process exited %s, but never updated its task record to indicate success and record its output.",
|
1209
|
-
exit_status_s($childstatus)));
|
1210
|
-
$Jobstep->{'arvados_task'}->{success} = 0;
|
1211
|
-
retry_op(sub { $Jobstep->{'arvados_task'}->save; }, "job_tasks.update API");
|
1212
|
-
$task_success = 0;
|
1213
|
-
}
|
1214
|
-
|
1215
|
-
if (!$task_success)
|
1216
|
-
{
|
1217
|
-
my $temporary_fail;
|
1218
|
-
$temporary_fail ||= $Jobstep->{tempfail};
|
1219
|
-
$temporary_fail ||= ($exitvalue == TASK_TEMPFAIL);
|
1220
|
-
|
1221
|
-
++$thisround_failed;
|
1222
|
-
++$thisround_failed_multiple if $Jobstep->{'failures'} >= 1;
|
1223
|
-
|
1224
|
-
# Check for signs of a failed or misconfigured node
|
1225
|
-
if (++$slot[$proc{$pid}->{slot}]->{node}->{losing_streak} >=
|
1226
|
-
2+$slot[$proc{$pid}->{slot}]->{node}->{ncpus}) {
|
1227
|
-
# Don't count this against jobstep failure thresholds if this
|
1228
|
-
# node is already suspected faulty and srun exited quickly
|
1229
|
-
if ($slot[$proc{$pid}->{slot}]->{node}->{hold_until} &&
|
1230
|
-
$elapsed < 5) {
|
1231
|
-
Log ($jobstepidx, "blaming failure on suspect node " .
|
1232
|
-
$slot[$proc{$pid}->{slot}]->{node}->{name});
|
1233
|
-
$temporary_fail ||= 1;
|
1234
|
-
}
|
1235
|
-
ban_node_by_slot($proc{$pid}->{slot});
|
1236
|
-
}
|
1237
|
-
|
1238
|
-
Log ($jobstepidx, sprintf('failure (#%d, %s) after %d seconds',
|
1239
|
-
++$Jobstep->{'failures'},
|
1240
|
-
$temporary_fail ? 'temporary' : 'permanent',
|
1241
|
-
$elapsed));
|
1242
|
-
|
1243
|
-
if (!$temporary_fail || $Jobstep->{'failures'} >= 3) {
|
1244
|
-
# Give up on this task, and the whole job
|
1245
|
-
$main::success = 0;
|
1246
|
-
}
|
1247
|
-
# Put this task back on the todo queue
|
1248
|
-
push @jobstep_todo, $jobstepidx;
|
1249
|
-
$Job->{'tasks_summary'}->{'failed'}++;
|
1250
|
-
}
|
1251
|
-
else # task_success
|
1252
|
-
{
|
1253
|
-
push @successful_task_uuids, $Jobstep->{'arvados_task'}->{uuid};
|
1254
|
-
++$thisround_succeeded;
|
1255
|
-
$slot[$proc{$pid}->{slot}]->{node}->{losing_streak} = 0;
|
1256
|
-
$slot[$proc{$pid}->{slot}]->{node}->{hold_until} = 0;
|
1257
|
-
$slot[$proc{$pid}->{slot}]->{node}->{fail_count} = 0;
|
1258
|
-
push @jobstep_done, $jobstepidx;
|
1259
|
-
Log ($jobstepidx, "success in $elapsed seconds");
|
1260
|
-
}
|
1261
|
-
$Jobstep->{exitcode} = $childstatus;
|
1262
|
-
$Jobstep->{finishtime} = time;
|
1263
|
-
$Jobstep->{'arvados_task'}->{finished_at} = strftime "%Y-%m-%dT%H:%M:%SZ", gmtime($Jobstep->{finishtime});
|
1264
|
-
retry_op(sub { $Jobstep->{'arvados_task'}->save; }, "job_tasks.update API");
|
1265
|
-
Log ($jobstepidx, sprintf("task output (%d bytes): %s",
|
1266
|
-
length($Jobstep->{'arvados_task'}->{output}),
|
1267
|
-
$Jobstep->{'arvados_task'}->{output}));
|
1268
|
-
|
1269
|
-
close $reader{$jobstepidx};
|
1270
|
-
delete $reader{$jobstepidx};
|
1271
|
-
delete $slot[$proc{$pid}->{slot}]->{pid};
|
1272
|
-
push @freeslot, $proc{$pid}->{slot};
|
1273
|
-
delete $proc{$pid};
|
1274
|
-
|
1275
|
-
$progress_is_dirty = 1;
|
1276
|
-
}
|
1277
|
-
|
1278
|
-
if (scalar(@successful_task_uuids) > 0)
|
1279
|
-
{
|
1280
|
-
Log (undef, sprintf("%d tasks exited (%d succeeded), checking for new tasks from API server.", $children_reaped, scalar(@successful_task_uuids)));
|
1281
|
-
# Load new tasks
|
1282
|
-
my $newtask_list = [];
|
1283
|
-
my $newtask_results;
|
1284
|
-
do {
|
1285
|
-
$newtask_results = api_call(
|
1286
|
-
"job_tasks/list",
|
1287
|
-
'filters' => [["created_by_job_task_uuid","in",\@successful_task_uuids]],
|
1288
|
-
'order' => 'qsequence',
|
1289
|
-
'offset' => scalar(@$newtask_list),
|
1290
|
-
);
|
1291
|
-
push(@$newtask_list, @{$newtask_results->{items}});
|
1292
|
-
} while (@{$newtask_results->{items}});
|
1293
|
-
Log (undef, sprintf("Got %d new tasks from API server.", scalar(@$newtask_list)));
|
1294
|
-
foreach my $arvados_task (@$newtask_list) {
|
1295
|
-
my $jobstep = {
|
1296
|
-
'level' => $arvados_task->{'sequence'},
|
1297
|
-
'failures' => 0,
|
1298
|
-
'arvados_task' => $arvados_task
|
1299
|
-
};
|
1300
|
-
push @jobstep, $jobstep;
|
1301
|
-
push @jobstep_todo, $#jobstep;
|
1302
|
-
}
|
1303
|
-
}
|
1304
|
-
|
1305
|
-
return $children_reaped;
|
1306
|
-
}
|
1307
|
-
|
1308
|
-
sub check_refresh_wanted
|
1309
|
-
{
|
1310
|
-
my @stat = stat $ENV{"CRUNCH_REFRESH_TRIGGER"};
|
1311
|
-
if (@stat &&
|
1312
|
-
$stat[9] > $latest_refresh &&
|
1313
|
-
# ...and we have actually locked the job record...
|
1314
|
-
$job_id eq $Job->{'uuid'}) {
|
1315
|
-
$latest_refresh = scalar time;
|
1316
|
-
my $Job2 = api_call("jobs/get", uuid => $jobspec);
|
1317
|
-
for my $attr ('cancelled_at',
|
1318
|
-
'cancelled_by_user_uuid',
|
1319
|
-
'cancelled_by_client_uuid',
|
1320
|
-
'state') {
|
1321
|
-
$Job->{$attr} = $Job2->{$attr};
|
1322
|
-
}
|
1323
|
-
if ($Job->{'state'} ne "Running") {
|
1324
|
-
if ($Job->{'state'} eq "Cancelled") {
|
1325
|
-
Log (undef, "Job cancelled at " . $Job->{'cancelled_at'} . " by user " . $Job->{'cancelled_by_user_uuid'});
|
1326
|
-
} else {
|
1327
|
-
Log (undef, "Job state unexpectedly changed to " . $Job->{'state'});
|
1328
|
-
}
|
1329
|
-
$main::success = 0;
|
1330
|
-
$main::please_freeze = 1;
|
1331
|
-
}
|
1332
|
-
}
|
1333
|
-
}
|
1334
|
-
|
1335
|
-
sub check_squeue
|
1336
|
-
{
|
1337
|
-
my $last_squeue_check = $squeue_checked;
|
1338
|
-
|
1339
|
-
# Do not call `squeue` or check the kill list more than once every
|
1340
|
-
# 15 seconds.
|
1341
|
-
return if $last_squeue_check > time - 15;
|
1342
|
-
$squeue_checked = time;
|
1343
|
-
|
1344
|
-
# Look for children from which we haven't received stderr data since
|
1345
|
-
# the last squeue check. If no such children exist, all procs are
|
1346
|
-
# alive and there's no need to even look at squeue.
|
1347
|
-
#
|
1348
|
-
# As long as the crunchstat poll interval (10s) is shorter than the
|
1349
|
-
# squeue check interval (15s) this should make the squeue check an
|
1350
|
-
# infrequent event.
|
1351
|
-
my $silent_procs = 0;
|
1352
|
-
for my $js (map {$jobstep[$_->{jobstepidx}]} values %proc)
|
1353
|
-
{
|
1354
|
-
if (!exists($js->{stderr_at}))
|
1355
|
-
{
|
1356
|
-
$js->{stderr_at} = 0;
|
1357
|
-
}
|
1358
|
-
if ($js->{stderr_at} < $last_squeue_check)
|
1359
|
-
{
|
1360
|
-
$silent_procs++;
|
1361
|
-
}
|
1362
|
-
}
|
1363
|
-
return if $silent_procs == 0;
|
1364
|
-
|
1365
|
-
# use killem() on procs whose killtime is reached
|
1366
|
-
while (my ($pid, $procinfo) = each %proc)
|
1367
|
-
{
|
1368
|
-
my $js = $jobstep[$procinfo->{jobstepidx}];
|
1369
|
-
if (exists $procinfo->{killtime}
|
1370
|
-
&& $procinfo->{killtime} <= time
|
1371
|
-
&& $js->{stderr_at} < $last_squeue_check)
|
1372
|
-
{
|
1373
|
-
my $sincewhen = "";
|
1374
|
-
if ($js->{stderr_at}) {
|
1375
|
-
$sincewhen = " in last " . (time - $js->{stderr_at}) . "s";
|
1376
|
-
}
|
1377
|
-
Log($procinfo->{jobstepidx}, "killing orphaned srun process $pid (task not in slurm queue, no stderr received$sincewhen)");
|
1378
|
-
killem ($pid);
|
1379
|
-
}
|
1380
|
-
}
|
1381
|
-
|
1382
|
-
if (!$have_slurm)
|
1383
|
-
{
|
1384
|
-
# here is an opportunity to check for mysterious problems with local procs
|
1385
|
-
return;
|
1386
|
-
}
|
1387
|
-
|
1388
|
-
# Get a list of steps still running. Note: squeue(1) says --steps
|
1389
|
-
# selects a format (which we override anyway) and allows us to
|
1390
|
-
# specify which steps we're interested in (which we don't).
|
1391
|
-
# Importantly, it also changes the meaning of %j from "job name" to
|
1392
|
-
# "step name" and (although this isn't mentioned explicitly in the
|
1393
|
-
# docs) switches from "one line per job" mode to "one line per step"
|
1394
|
-
# mode. Without it, we'd just get a list of one job, instead of a
|
1395
|
-
# list of N steps.
|
1396
|
-
my @squeue = `squeue --jobs=\Q$ENV{SLURM_JOB_ID}\E --steps --format='%j' --noheader`;
|
1397
|
-
if ($? != 0)
|
1398
|
-
{
|
1399
|
-
Log(undef, "warning: squeue exit status $? ($!)");
|
1400
|
-
return;
|
1401
|
-
}
|
1402
|
-
chop @squeue;
|
1403
|
-
|
1404
|
-
# which of my jobsteps are running, according to squeue?
|
1405
|
-
my %ok;
|
1406
|
-
for my $jobstepname (@squeue)
|
1407
|
-
{
|
1408
|
-
$ok{$jobstepname} = 1;
|
1409
|
-
}
|
1410
|
-
|
1411
|
-
# Check for child procs >60s old and not mentioned by squeue.
|
1412
|
-
while (my ($pid, $procinfo) = each %proc)
|
1413
|
-
{
|
1414
|
-
if ($procinfo->{time} < time - 60
|
1415
|
-
&& $procinfo->{jobstepname}
|
1416
|
-
&& !exists $ok{$procinfo->{jobstepname}}
|
1417
|
-
&& !exists $procinfo->{killtime})
|
1418
|
-
{
|
1419
|
-
# According to slurm, this task has ended (successfully or not)
|
1420
|
-
# -- but our srun child hasn't exited. First we must wait (30
|
1421
|
-
# seconds) in case this is just a race between communication
|
1422
|
-
# channels. Then, if our srun child process still hasn't
|
1423
|
-
# terminated, we'll conclude some slurm communication
|
1424
|
-
# error/delay has caused the task to die without notifying srun,
|
1425
|
-
# and we'll kill srun ourselves.
|
1426
|
-
$procinfo->{killtime} = time + 30;
|
1427
|
-
Log($procinfo->{jobstepidx}, "notice: task is not in slurm queue but srun process $pid has not exited");
|
1428
|
-
}
|
1429
|
-
}
|
1430
|
-
}
|
1431
|
-
|
1432
|
-
sub check_sinfo
|
1433
|
-
{
|
1434
|
-
# If a node fails in a multi-node "srun" call during job setup, the call
|
1435
|
-
# may hang instead of exiting with a nonzero code. This function checks
|
1436
|
-
# "sinfo" for the health of the nodes that were allocated and ensures that
|
1437
|
-
# they are all still in the "alloc" state. If a node that is allocated to
|
1438
|
-
# this job is not in "alloc" state, then set please_freeze.
|
1439
|
-
#
|
1440
|
-
# This is only called from srun_sync() for node configuration. If a
|
1441
|
-
# node fails doing actual work, there are other recovery mechanisms.
|
1442
|
-
|
1443
|
-
# Do not call `sinfo` more than once every 15 seconds.
|
1444
|
-
return if $sinfo_checked > time - 15;
|
1445
|
-
$sinfo_checked = time;
|
1446
|
-
|
1447
|
-
# The output format "%t" means output node states.
|
1448
|
-
my @sinfo = `sinfo --nodes=\Q$ENV{SLURM_NODELIST}\E --noheader -o "%t"`;
|
1449
|
-
if ($? != 0)
|
1450
|
-
{
|
1451
|
-
Log(undef, "warning: sinfo exit status $? ($!)");
|
1452
|
-
return;
|
1453
|
-
}
|
1454
|
-
chop @sinfo;
|
1455
|
-
|
1456
|
-
foreach (@sinfo)
|
1457
|
-
{
|
1458
|
-
if ($_ != "alloc" && $_ != "alloc*") {
|
1459
|
-
$main::please_freeze = 1;
|
1460
|
-
}
|
1461
|
-
}
|
1462
|
-
}
|
1463
|
-
|
1464
|
-
sub release_allocation
|
1465
|
-
{
|
1466
|
-
if ($have_slurm)
|
1467
|
-
{
|
1468
|
-
Log (undef, "release job allocation");
|
1469
|
-
system "scancel $ENV{SLURM_JOB_ID}";
|
1470
|
-
}
|
1471
|
-
}
|
1472
|
-
|
1473
|
-
|
1474
|
-
sub readfrompipes
|
1475
|
-
{
|
1476
|
-
my $gotsome = 0;
|
1477
|
-
my %fd_job;
|
1478
|
-
my $sel = IO::Select->new();
|
1479
|
-
foreach my $jobstepidx (keys %reader)
|
1480
|
-
{
|
1481
|
-
my $fd = $reader{$jobstepidx};
|
1482
|
-
$sel->add($fd);
|
1483
|
-
$fd_job{$fd} = $jobstepidx;
|
1484
|
-
|
1485
|
-
if (my $stdout_fd = $jobstep[$jobstepidx]->{stdout_r}) {
|
1486
|
-
$sel->add($stdout_fd);
|
1487
|
-
$fd_job{$stdout_fd} = $jobstepidx;
|
1488
|
-
}
|
1489
|
-
}
|
1490
|
-
# select on all reader fds with 0.1s timeout
|
1491
|
-
my @ready_fds = $sel->can_read(0.1);
|
1492
|
-
foreach my $fd (@ready_fds)
|
1493
|
-
{
|
1494
|
-
my $buf;
|
1495
|
-
if (0 < sysread ($fd, $buf, 65536))
|
1496
|
-
{
|
1497
|
-
$gotsome = 1;
|
1498
|
-
print STDERR $buf if $ENV{CRUNCH_DEBUG};
|
1499
|
-
|
1500
|
-
my $jobstepidx = $fd_job{$fd};
|
1501
|
-
if ($jobstep[$jobstepidx]->{stdout_r} == $fd) {
|
1502
|
-
$jobstep[$jobstepidx]->{stdout_captured} .= $buf;
|
1503
|
-
next;
|
1504
|
-
}
|
1505
|
-
|
1506
|
-
$jobstep[$jobstepidx]->{stderr_at} = time;
|
1507
|
-
$jobstep[$jobstepidx]->{stderr} .= $buf;
|
1508
|
-
|
1509
|
-
# Consume everything up to the last \n
|
1510
|
-
preprocess_stderr ($jobstepidx);
|
1511
|
-
|
1512
|
-
if (length ($jobstep[$jobstepidx]->{stderr}) > 16384)
|
1513
|
-
{
|
1514
|
-
# If we get a lot of stderr without a newline, chop off the
|
1515
|
-
# front to avoid letting our buffer grow indefinitely.
|
1516
|
-
substr ($jobstep[$jobstepidx]->{stderr},
|
1517
|
-
0, length($jobstep[$jobstepidx]->{stderr}) - 8192) = "";
|
1518
|
-
}
|
1519
|
-
}
|
1520
|
-
}
|
1521
|
-
return $gotsome;
|
1522
|
-
}
|
1523
|
-
|
1524
|
-
|
1525
|
-
# Consume all full lines of stderr for a jobstep. Everything after the
|
1526
|
-
# last newline will remain in $jobstep[$jobstepidx]->{stderr} after
|
1527
|
-
# returning.
|
1528
|
-
sub preprocess_stderr
|
1529
|
-
{
|
1530
|
-
my $jobstepidx = shift;
|
1531
|
-
# slotindex is only defined for children running Arvados job tasks.
|
1532
|
-
# Be prepared to handle the undef case (for setup srun calls, etc.).
|
1533
|
-
my $job_slot_index = $jobstep[$jobstepidx]->{slotindex};
|
1534
|
-
|
1535
|
-
while ($jobstep[$jobstepidx]->{stderr} =~ /^(.*?)\n/) {
|
1536
|
-
my $line = $1;
|
1537
|
-
substr $jobstep[$jobstepidx]->{stderr}, 0, 1+length($line), "";
|
1538
|
-
Log ($jobstepidx, "stderr $line");
|
1539
|
-
if ($line =~ /srun: error: (SLURM job $ENV{SLURM_JOB_ID} has expired|Unable to confirm allocation for job $ENV{SLURM_JOB_ID})/i) {
|
1540
|
-
# If the allocation is revoked, we can't possibly continue, so mark all
|
1541
|
-
# nodes as failed. This will cause the overall exit code to be
|
1542
|
-
# EX_RETRY_UNLOCKED instead of failure so that crunch_dispatch can re-run
|
1543
|
-
# this job.
|
1544
|
-
$main::please_freeze = 1;
|
1545
|
-
foreach my $st (@slot) {
|
1546
|
-
$st->{node}->{fail_count}++;
|
1547
|
-
}
|
1548
|
-
}
|
1549
|
-
elsif ($line =~ /srun: error: .*?\b(Node failure on|Aborting, .*?\bio error\b|cannot communicate with node .* aborting job)/i) {
|
1550
|
-
$jobstep[$jobstepidx]->{tempfail} = 1;
|
1551
|
-
if (defined($job_slot_index)) {
|
1552
|
-
$slot[$job_slot_index]->{node}->{fail_count}++;
|
1553
|
-
ban_node_by_slot($job_slot_index);
|
1554
|
-
}
|
1555
|
-
}
|
1556
|
-
elsif ($line =~ /srun: error: (Unable to create job step|.*?: Communication connection failure)/i) {
|
1557
|
-
$jobstep[$jobstepidx]->{tempfail} = 1;
|
1558
|
-
ban_node_by_slot($job_slot_index) if (defined($job_slot_index));
|
1559
|
-
}
|
1560
|
-
elsif ($line =~ /\bKeep(Read|Write|Request)Error:/) {
|
1561
|
-
$jobstep[$jobstepidx]->{tempfail} = 1;
|
1562
|
-
}
|
1563
|
-
}
|
1564
|
-
}
|
1565
|
-
|
1566
|
-
|
1567
|
-
# Read whatever is still available on its stderr+stdout pipes after
|
1568
|
-
# the given child process has exited.
|
1569
|
-
sub readfrompipes_after_exit
|
1570
|
-
{
|
1571
|
-
my $jobstepidx = shift;
|
1572
|
-
|
1573
|
-
# The fact that the child has exited allows some convenient
|
1574
|
-
# simplifications: (1) all data must have already been written, so
|
1575
|
-
# there's no need to wait for more once sysread returns 0; (2) the
|
1576
|
-
# total amount of data available is bounded by the pipe buffer size,
|
1577
|
-
# so it's safe to read everything into one string.
|
1578
|
-
my $buf;
|
1579
|
-
while (0 < sysread ($reader{$jobstepidx}, $buf, 65536)) {
|
1580
|
-
$jobstep[$jobstepidx]->{stderr_at} = time;
|
1581
|
-
$jobstep[$jobstepidx]->{stderr} .= $buf;
|
1582
|
-
}
|
1583
|
-
if ($jobstep[$jobstepidx]->{stdout_r}) {
|
1584
|
-
while (0 < sysread ($jobstep[$jobstepidx]->{stdout_r}, $buf, 65536)) {
|
1585
|
-
$jobstep[$jobstepidx]->{stdout_captured} .= $buf;
|
1586
|
-
}
|
1587
|
-
}
|
1588
|
-
preprocess_stderr ($jobstepidx);
|
1589
|
-
|
1590
|
-
map {
|
1591
|
-
Log ($jobstepidx, "stderr $_");
|
1592
|
-
} split ("\n", $jobstep[$jobstepidx]->{stderr});
|
1593
|
-
$jobstep[$jobstepidx]->{stderr} = '';
|
1594
|
-
}
|
1595
|
-
|
1596
|
-
sub fetch_block
|
1597
|
-
{
|
1598
|
-
my $hash = shift;
|
1599
|
-
my $keep;
|
1600
|
-
if (!open($keep, "-|", "arv-get", "--retries", retry_count(), $hash)) {
|
1601
|
-
Log(undef, "fetch_block run error from arv-get $hash: $!");
|
1602
|
-
return undef;
|
1603
|
-
}
|
1604
|
-
my $output_block = "";
|
1605
|
-
while (1) {
|
1606
|
-
my $buf;
|
1607
|
-
my $bytes = sysread($keep, $buf, 1024 * 1024);
|
1608
|
-
if (!defined $bytes) {
|
1609
|
-
Log(undef, "fetch_block read error from arv-get: $!");
|
1610
|
-
$output_block = undef;
|
1611
|
-
last;
|
1612
|
-
} elsif ($bytes == 0) {
|
1613
|
-
# sysread returns 0 at the end of the pipe.
|
1614
|
-
last;
|
1615
|
-
} else {
|
1616
|
-
# some bytes were read into buf.
|
1617
|
-
$output_block .= $buf;
|
1618
|
-
}
|
1619
|
-
}
|
1620
|
-
close $keep;
|
1621
|
-
if ($?) {
|
1622
|
-
Log(undef, "fetch_block arv-get exited " . exit_status_s($?));
|
1623
|
-
$output_block = undef;
|
1624
|
-
}
|
1625
|
-
return $output_block;
|
1626
|
-
}
|
1627
|
-
|
1628
|
-
# Create a collection by concatenating the output of all tasks (each
|
1629
|
-
# task's output is either a manifest fragment, a locator for a
|
1630
|
-
# manifest fragment stored in Keep, or nothing at all). Return the
|
1631
|
-
# portable_data_hash of the new collection.
|
1632
|
-
sub create_output_collection
|
1633
|
-
{
|
1634
|
-
Log (undef, "collate");
|
1635
|
-
|
1636
|
-
my ($child_out, $child_in);
|
1637
|
-
# This depends on the python-arvados-python-client package, which needs to be installed
|
1638
|
-
# on the machine running crunch-dispatch (typically, the API server).
|
1639
|
-
my $pid = open2($child_out, $child_in, '/usr/share/python2.7/dist/python-arvados-python-client/bin/python', '-c', q{
|
1640
|
-
import arvados
|
1641
|
-
import sys
|
1642
|
-
print (arvados.api("v1").collections().
|
1643
|
-
create(body={"manifest_text": sys.stdin.read(),
|
1644
|
-
"owner_uuid": sys.argv[2]}).
|
1645
|
-
execute(num_retries=int(sys.argv[1]))["portable_data_hash"])
|
1646
|
-
}, retry_count(), $Job->{owner_uuid});
|
1647
|
-
|
1648
|
-
my $task_idx = -1;
|
1649
|
-
my $manifest_size = 0;
|
1650
|
-
for (@jobstep)
|
1651
|
-
{
|
1652
|
-
++$task_idx;
|
1653
|
-
my $output = $_->{'arvados_task'}->{output};
|
1654
|
-
next if (!defined($output));
|
1655
|
-
my $next_write;
|
1656
|
-
if ($output =~ /^[0-9a-f]{32}(\+\S+)*$/) {
|
1657
|
-
$next_write = fetch_block($output);
|
1658
|
-
} else {
|
1659
|
-
$next_write = $output;
|
1660
|
-
}
|
1661
|
-
if (defined($next_write)) {
|
1662
|
-
if (!defined(syswrite($child_in, $next_write))) {
|
1663
|
-
# There's been an error writing. Stop the loop.
|
1664
|
-
# We'll log details about the exit code later.
|
1665
|
-
last;
|
1666
|
-
} else {
|
1667
|
-
$manifest_size += length($next_write);
|
1668
|
-
}
|
1669
|
-
} else {
|
1670
|
-
my $uuid = $_->{'arvados_task'}->{'uuid'};
|
1671
|
-
Log (undef, "Error retrieving '$output' output by task $task_idx ($uuid)");
|
1672
|
-
$main::success = 0;
|
1673
|
-
}
|
1674
|
-
}
|
1675
|
-
close($child_in);
|
1676
|
-
Log(undef, "collated output manifest text to send to API server is $manifest_size bytes with access tokens");
|
1677
|
-
|
1678
|
-
my $joboutput;
|
1679
|
-
my $s = IO::Select->new($child_out);
|
1680
|
-
if ($s->can_read(120)) {
|
1681
|
-
sysread($child_out, $joboutput, 1024 * 1024);
|
1682
|
-
waitpid($pid, 0);
|
1683
|
-
if ($?) {
|
1684
|
-
Log(undef, "output collection creation exited " . exit_status_s($?));
|
1685
|
-
$joboutput = undef;
|
1686
|
-
} else {
|
1687
|
-
chomp($joboutput);
|
1688
|
-
}
|
1689
|
-
} else {
|
1690
|
-
Log (undef, "timed out while creating output collection");
|
1691
|
-
foreach my $signal (2, 2, 2, 15, 15, 9) {
|
1692
|
-
kill($signal, $pid);
|
1693
|
-
last if waitpid($pid, WNOHANG) == -1;
|
1694
|
-
sleep(1);
|
1695
|
-
}
|
1696
|
-
}
|
1697
|
-
close($child_out);
|
1698
|
-
|
1699
|
-
return $joboutput;
|
1700
|
-
}
|
1701
|
-
|
1702
|
-
# Calls create_output_collection, logs the result, and returns it.
|
1703
|
-
# If that was successful, save that as the output in the job record.
|
1704
|
-
sub save_output_collection {
|
1705
|
-
my $collated_output = create_output_collection();
|
1706
|
-
|
1707
|
-
if (!$collated_output) {
|
1708
|
-
Log(undef, "Failed to write output collection");
|
1709
|
-
}
|
1710
|
-
else {
|
1711
|
-
Log(undef, "job output $collated_output");
|
1712
|
-
$Job->update_attributes('output' => $collated_output);
|
1713
|
-
}
|
1714
|
-
return $collated_output;
|
1715
|
-
}
|
1716
|
-
|
1717
|
-
sub killem
|
1718
|
-
{
|
1719
|
-
foreach (@_)
|
1720
|
-
{
|
1721
|
-
my $sig = 2; # SIGINT first
|
1722
|
-
if (exists $proc{$_}->{"sent_$sig"} &&
|
1723
|
-
time - $proc{$_}->{"sent_$sig"} > 4)
|
1724
|
-
{
|
1725
|
-
$sig = 15; # SIGTERM if SIGINT doesn't work
|
1726
|
-
}
|
1727
|
-
if (exists $proc{$_}->{"sent_$sig"} &&
|
1728
|
-
time - $proc{$_}->{"sent_$sig"} > 4)
|
1729
|
-
{
|
1730
|
-
$sig = 9; # SIGKILL if SIGTERM doesn't work
|
1731
|
-
}
|
1732
|
-
if (!exists $proc{$_}->{"sent_$sig"})
|
1733
|
-
{
|
1734
|
-
Log ($proc{$_}->{jobstepidx}, "sending 2x signal $sig to pid $_");
|
1735
|
-
kill $sig, $_;
|
1736
|
-
select (undef, undef, undef, 0.1);
|
1737
|
-
if ($sig == 2)
|
1738
|
-
{
|
1739
|
-
kill $sig, $_; # srun wants two SIGINT to really interrupt
|
1740
|
-
}
|
1741
|
-
$proc{$_}->{"sent_$sig"} = time;
|
1742
|
-
$proc{$_}->{"killedafter"} = time - $proc{$_}->{"time"};
|
1743
|
-
}
|
1744
|
-
}
|
1745
|
-
}
|
1746
|
-
|
1747
|
-
|
1748
|
-
sub fhbits
|
1749
|
-
{
|
1750
|
-
my($bits);
|
1751
|
-
for (@_) {
|
1752
|
-
vec($bits,fileno($_),1) = 1;
|
1753
|
-
}
|
1754
|
-
$bits;
|
1755
|
-
}
|
1756
|
-
|
1757
|
-
|
1758
|
-
# Send log output to Keep via arv-put.
|
1759
|
-
#
|
1760
|
-
# $log_pipe_in and $log_pipe_out are the input and output filehandles to the arv-put pipe.
|
1761
|
-
# $log_pipe_out_buf is a string containing all output read from arv-put so far.
|
1762
|
-
# $log_pipe_out_select is an IO::Select object around $log_pipe_out.
|
1763
|
-
# $log_pipe_pid is the pid of the arv-put subprocess.
|
1764
|
-
#
|
1765
|
-
# The only functions that should access these variables directly are:
|
1766
|
-
#
|
1767
|
-
# log_writer_start($logfilename)
|
1768
|
-
# Starts an arv-put pipe, reading data on stdin and writing it to
|
1769
|
-
# a $logfilename file in an output collection.
|
1770
|
-
#
|
1771
|
-
# log_writer_read_output([$timeout])
|
1772
|
-
# Read output from $log_pipe_out and append it to $log_pipe_out_buf.
|
1773
|
-
# Passes $timeout to the select() call, with a default of 0.01.
|
1774
|
-
# Returns the result of the last read() call on $log_pipe_out, or
|
1775
|
-
# -1 if read() wasn't called because select() timed out.
|
1776
|
-
# Only other log_writer_* functions should need to call this.
|
1777
|
-
#
|
1778
|
-
# log_writer_send($txt)
|
1779
|
-
# Writes $txt to the output log collection.
|
1780
|
-
#
|
1781
|
-
# log_writer_finish()
|
1782
|
-
# Closes the arv-put pipe and returns the output that it produces.
|
1783
|
-
#
|
1784
|
-
# log_writer_is_active()
|
1785
|
-
# Returns a true value if there is currently a live arv-put
|
1786
|
-
# process, false otherwise.
|
1787
|
-
#
|
1788
|
-
my ($log_pipe_in, $log_pipe_out, $log_pipe_out_buf, $log_pipe_out_select,
|
1789
|
-
$log_pipe_pid);
|
1790
|
-
|
1791
|
-
sub log_writer_start($)
|
1792
|
-
{
|
1793
|
-
my $logfilename = shift;
|
1794
|
-
$log_pipe_pid = open2($log_pipe_out, $log_pipe_in,
|
1795
|
-
'arv-put',
|
1796
|
-
'--stream',
|
1797
|
-
'--retries', '6',
|
1798
|
-
'--filename', $logfilename,
|
1799
|
-
'-');
|
1800
|
-
$log_pipe_out_buf = "";
|
1801
|
-
$log_pipe_out_select = IO::Select->new($log_pipe_out);
|
1802
|
-
}
|
1803
|
-
|
1804
|
-
sub log_writer_read_output {
|
1805
|
-
my $timeout = shift || 0.01;
|
1806
|
-
my $read = -1;
|
1807
|
-
while ($read && $log_pipe_out_select->can_read($timeout)) {
|
1808
|
-
$read = read($log_pipe_out, $log_pipe_out_buf, 65536,
|
1809
|
-
length($log_pipe_out_buf));
|
1810
|
-
}
|
1811
|
-
if (!defined($read)) {
|
1812
|
-
Log(undef, "error reading log manifest from arv-put: $!");
|
1813
|
-
}
|
1814
|
-
return $read;
|
1815
|
-
}
|
1816
|
-
|
1817
|
-
sub log_writer_send($)
|
1818
|
-
{
|
1819
|
-
my $txt = shift;
|
1820
|
-
print $log_pipe_in $txt;
|
1821
|
-
log_writer_read_output();
|
1822
|
-
}
|
1823
|
-
|
1824
|
-
sub log_writer_finish()
|
1825
|
-
{
|
1826
|
-
return unless $log_pipe_pid;
|
1827
|
-
|
1828
|
-
close($log_pipe_in);
|
1829
|
-
|
1830
|
-
my $logger_failed = 0;
|
1831
|
-
my $read_result = log_writer_read_output(600);
|
1832
|
-
if ($read_result == -1) {
|
1833
|
-
$logger_failed = -1;
|
1834
|
-
Log (undef, "timed out reading from 'arv-put'");
|
1835
|
-
} elsif ($read_result != 0) {
|
1836
|
-
$logger_failed = -2;
|
1837
|
-
Log(undef, "failed to read arv-put log manifest to EOF");
|
1838
|
-
}
|
1839
|
-
|
1840
|
-
waitpid($log_pipe_pid, 0);
|
1841
|
-
if ($?) {
|
1842
|
-
$logger_failed ||= $?;
|
1843
|
-
Log(undef, "log_writer_finish: arv-put exited " . exit_status_s($?))
|
1844
|
-
}
|
1845
|
-
|
1846
|
-
close($log_pipe_out);
|
1847
|
-
my $arv_put_output = $logger_failed ? undef : $log_pipe_out_buf;
|
1848
|
-
$log_pipe_pid = $log_pipe_in = $log_pipe_out = $log_pipe_out_buf =
|
1849
|
-
$log_pipe_out_select = undef;
|
1850
|
-
|
1851
|
-
return $arv_put_output;
|
1852
|
-
}
|
1853
|
-
|
1854
|
-
sub log_writer_is_active() {
|
1855
|
-
return $log_pipe_pid;
|
1856
|
-
}
|
1857
|
-
|
1858
|
-
sub Log # ($jobstepidx, $logmessage)
|
1859
|
-
{
|
1860
|
-
my ($jobstepidx, $logmessage) = @_;
|
1861
|
-
if ($logmessage =~ /\n/) {
|
1862
|
-
for my $line (split (/\n/, $_[1])) {
|
1863
|
-
Log ($jobstepidx, $line);
|
1864
|
-
}
|
1865
|
-
return;
|
1866
|
-
}
|
1867
|
-
my $fh = select STDERR; $|=1; select $fh;
|
1868
|
-
my $task_qseq = '';
|
1869
|
-
if (defined($jobstepidx) && exists($jobstep[$jobstepidx]->{arvados_task})) {
|
1870
|
-
$task_qseq = $jobstepidx;
|
1871
|
-
}
|
1872
|
-
my $message = sprintf ("%s %d %s %s", $job_id, $$, $task_qseq, $logmessage);
|
1873
|
-
$message =~ s{([^ -\176])}{"\\" . sprintf ("%03o", ord($1))}ge;
|
1874
|
-
$message .= "\n";
|
1875
|
-
my $datetime;
|
1876
|
-
if (log_writer_is_active() || -t STDERR) {
|
1877
|
-
my @gmtime = gmtime;
|
1878
|
-
$datetime = sprintf ("%04d-%02d-%02d_%02d:%02d:%02d",
|
1879
|
-
$gmtime[5]+1900, $gmtime[4]+1, @gmtime[3,2,1,0]);
|
1880
|
-
}
|
1881
|
-
print STDERR ((-t STDERR) ? ($datetime." ".$message) : $message);
|
1882
|
-
|
1883
|
-
if (log_writer_is_active()) {
|
1884
|
-
log_writer_send($datetime . " " . $message);
|
1885
|
-
}
|
1886
|
-
}
|
1887
|
-
|
1888
|
-
|
1889
|
-
sub croak
|
1890
|
-
{
|
1891
|
-
my ($package, $file, $line) = caller;
|
1892
|
-
my $message = "@_ at $file line $line\n";
|
1893
|
-
Log (undef, $message);
|
1894
|
-
release_allocation();
|
1895
|
-
freeze() if @jobstep_todo;
|
1896
|
-
create_output_collection() if @jobstep_todo;
|
1897
|
-
cleanup();
|
1898
|
-
save_meta();
|
1899
|
-
die;
|
1900
|
-
}
|
1901
|
-
|
1902
|
-
|
1903
|
-
sub cleanup
|
1904
|
-
{
|
1905
|
-
return unless $Job;
|
1906
|
-
if ($Job->{'state'} eq 'Cancelled') {
|
1907
|
-
$Job->update_attributes('finished_at' => scalar gmtime);
|
1908
|
-
} else {
|
1909
|
-
$Job->update_attributes('state' => 'Failed');
|
1910
|
-
}
|
1911
|
-
}
|
1912
|
-
|
1913
|
-
|
1914
|
-
sub save_meta
|
1915
|
-
{
|
1916
|
-
my $justcheckpoint = shift; # false if this will be the last meta saved
|
1917
|
-
return if $justcheckpoint; # checkpointing is not relevant post-Warehouse.pm
|
1918
|
-
return unless log_writer_is_active();
|
1919
|
-
my $log_manifest = log_writer_finish();
|
1920
|
-
return unless defined($log_manifest);
|
1921
|
-
|
1922
|
-
if ($Job->{log}) {
|
1923
|
-
my $prev_log_coll = api_call("collections/get", uuid => $Job->{log});
|
1924
|
-
$log_manifest = $prev_log_coll->{manifest_text} . $log_manifest;
|
1925
|
-
}
|
1926
|
-
|
1927
|
-
my $log_coll = api_call(
|
1928
|
-
"collections/create", ensure_unique_name => 1, collection => {
|
1929
|
-
manifest_text => $log_manifest,
|
1930
|
-
owner_uuid => $Job->{owner_uuid},
|
1931
|
-
name => sprintf("Log from %s job %s", $Job->{script}, $Job->{uuid}),
|
1932
|
-
});
|
1933
|
-
Log(undef, "log collection is " . $log_coll->{portable_data_hash});
|
1934
|
-
$Job->update_attributes('log' => $log_coll->{portable_data_hash});
|
1935
|
-
|
1936
|
-
return $log_coll->{portable_data_hash};
|
1937
|
-
}
|
1938
|
-
|
1939
|
-
|
1940
|
-
sub freeze_if_want_freeze
|
1941
|
-
{
|
1942
|
-
if ($main::please_freeze)
|
1943
|
-
{
|
1944
|
-
release_allocation();
|
1945
|
-
if (@_)
|
1946
|
-
{
|
1947
|
-
# kill some srun procs before freeze+stop
|
1948
|
-
map { $proc{$_} = {} } @_;
|
1949
|
-
while (%proc)
|
1950
|
-
{
|
1951
|
-
killem (keys %proc);
|
1952
|
-
select (undef, undef, undef, 0.1);
|
1953
|
-
my $died;
|
1954
|
-
while (($died = waitpid (-1, WNOHANG)) > 0)
|
1955
|
-
{
|
1956
|
-
delete $proc{$died};
|
1957
|
-
}
|
1958
|
-
}
|
1959
|
-
}
|
1960
|
-
freeze();
|
1961
|
-
create_output_collection();
|
1962
|
-
cleanup();
|
1963
|
-
save_meta();
|
1964
|
-
exit 1;
|
1965
|
-
}
|
1966
|
-
}
|
1967
|
-
|
1968
|
-
|
1969
|
-
sub freeze
|
1970
|
-
{
|
1971
|
-
Log (undef, "Freeze not implemented");
|
1972
|
-
return;
|
1973
|
-
}
|
1974
|
-
|
1975
|
-
|
1976
|
-
sub thaw
|
1977
|
-
{
|
1978
|
-
croak ("Thaw not implemented");
|
1979
|
-
}
|
1980
|
-
|
1981
|
-
|
1982
|
-
sub freezequote
|
1983
|
-
{
|
1984
|
-
my $s = shift;
|
1985
|
-
$s =~ s/\\/\\\\/g;
|
1986
|
-
$s =~ s/\n/\\n/g;
|
1987
|
-
return $s;
|
1988
|
-
}
|
1989
|
-
|
1990
|
-
|
1991
|
-
sub freezeunquote
|
1992
|
-
{
|
1993
|
-
my $s = shift;
|
1994
|
-
$s =~ s{\\(.)}{$1 eq "n" ? "\n" : $1}ge;
|
1995
|
-
return $s;
|
1996
|
-
}
|
1997
|
-
|
1998
|
-
sub srun_sync
|
1999
|
-
{
|
2000
|
-
my $srunargs = shift;
|
2001
|
-
my $execargs = shift;
|
2002
|
-
my $opts = shift || {};
|
2003
|
-
my $stdin = shift;
|
2004
|
-
|
2005
|
-
my $label = exists $opts->{label} ? $opts->{label} : "@$execargs";
|
2006
|
-
Log (undef, "$label: start");
|
2007
|
-
|
2008
|
-
my ($stderr_r, $stderr_w);
|
2009
|
-
pipe $stderr_r, $stderr_w or croak("pipe() failed: $!");
|
2010
|
-
|
2011
|
-
my ($stdout_r, $stdout_w);
|
2012
|
-
pipe $stdout_r, $stdout_w or croak("pipe() failed: $!");
|
2013
|
-
|
2014
|
-
my $started_srun = scalar time;
|
2015
|
-
|
2016
|
-
my $srunpid = fork();
|
2017
|
-
if ($srunpid == 0)
|
2018
|
-
{
|
2019
|
-
close($stderr_r);
|
2020
|
-
close($stdout_r);
|
2021
|
-
fcntl($stderr_w, F_SETFL, 0) or croak($!); # no close-on-exec
|
2022
|
-
fcntl($stdout_w, F_SETFL, 0) or croak($!);
|
2023
|
-
open(STDERR, ">&", $stderr_w) or croak ($!);
|
2024
|
-
open(STDOUT, ">&", $stdout_w) or croak ($!);
|
2025
|
-
srun ($srunargs, $execargs, $opts, $stdin);
|
2026
|
-
exit (1);
|
2027
|
-
}
|
2028
|
-
close($stderr_w);
|
2029
|
-
close($stdout_w);
|
2030
|
-
|
2031
|
-
set_nonblocking($stderr_r);
|
2032
|
-
set_nonblocking($stdout_r);
|
2033
|
-
|
2034
|
-
# Add entries to @jobstep and %proc so check_squeue() and
|
2035
|
-
# freeze_if_want_freeze() can treat it like a job task process.
|
2036
|
-
push @jobstep, {
|
2037
|
-
stderr => '',
|
2038
|
-
stderr_at => 0,
|
2039
|
-
stderr_captured => '',
|
2040
|
-
stdout_r => $stdout_r,
|
2041
|
-
stdout_captured => '',
|
2042
|
-
};
|
2043
|
-
my $jobstepidx = $#jobstep;
|
2044
|
-
$proc{$srunpid} = {
|
2045
|
-
jobstepidx => $jobstepidx,
|
2046
|
-
};
|
2047
|
-
$reader{$jobstepidx} = $stderr_r;
|
2048
|
-
|
2049
|
-
while ($srunpid != waitpid ($srunpid, WNOHANG)) {
|
2050
|
-
my $busy = readfrompipes();
|
2051
|
-
if (!$busy || ($latest_refresh + 2 < scalar time)) {
|
2052
|
-
check_refresh_wanted();
|
2053
|
-
check_squeue();
|
2054
|
-
check_sinfo();
|
2055
|
-
}
|
2056
|
-
if (!$busy) {
|
2057
|
-
select(undef, undef, undef, 0.1);
|
2058
|
-
}
|
2059
|
-
if (($started_srun + $srun_sync_timeout) < scalar time) {
|
2060
|
-
# Exceeded general timeout for "srun_sync" operations, likely
|
2061
|
-
# means something got stuck on the remote node.
|
2062
|
-
Log(undef, "srun_sync exceeded timeout, will fail.");
|
2063
|
-
$main::please_freeze = 1;
|
2064
|
-
}
|
2065
|
-
killem(keys %proc) if $main::please_freeze;
|
2066
|
-
}
|
2067
|
-
my $exited = $?;
|
2068
|
-
|
2069
|
-
readfrompipes_after_exit ($jobstepidx);
|
2070
|
-
|
2071
|
-
Log (undef, "$label: exit ".exit_status_s($exited));
|
2072
|
-
|
2073
|
-
close($stdout_r);
|
2074
|
-
close($stderr_r);
|
2075
|
-
delete $proc{$srunpid};
|
2076
|
-
delete $reader{$jobstepidx};
|
2077
|
-
|
2078
|
-
my $j = pop @jobstep;
|
2079
|
-
# If the srun showed signs of tempfail, ensure the caller treats that as a
|
2080
|
-
# failure case.
|
2081
|
-
if ($main::please_freeze || $j->{tempfail}) {
|
2082
|
-
$exited ||= 255;
|
2083
|
-
}
|
2084
|
-
return ($exited, $j->{stdout_captured}, $j->{stderr_captured}, $j->{tempfail});
|
2085
|
-
}
|
2086
|
-
|
2087
|
-
|
2088
|
-
sub srun
|
2089
|
-
{
|
2090
|
-
my $srunargs = shift;
|
2091
|
-
my $execargs = shift;
|
2092
|
-
my $opts = shift || {};
|
2093
|
-
my $stdin = shift;
|
2094
|
-
my $args = $have_slurm ? [@$srunargs, @$execargs] : $execargs;
|
2095
|
-
|
2096
|
-
$Data::Dumper::Terse = 1;
|
2097
|
-
$Data::Dumper::Indent = 0;
|
2098
|
-
my $show_cmd = Dumper($args);
|
2099
|
-
$show_cmd =~ s/(TOKEN\\*=)[^\s\']+/${1}[...]/g;
|
2100
|
-
$show_cmd =~ s/\n/ /g;
|
2101
|
-
if ($opts->{fork}) {
|
2102
|
-
Log(undef, "starting: $show_cmd");
|
2103
|
-
} else {
|
2104
|
-
# This is a child process: parent is in charge of reading our
|
2105
|
-
# stderr and copying it to Log() if needed.
|
2106
|
-
warn "starting: $show_cmd\n";
|
2107
|
-
}
|
2108
|
-
|
2109
|
-
if (defined $stdin) {
|
2110
|
-
my $child = open STDIN, "-|";
|
2111
|
-
defined $child or die "no fork: $!";
|
2112
|
-
if ($child == 0) {
|
2113
|
-
print $stdin or die $!;
|
2114
|
-
close STDOUT or die $!;
|
2115
|
-
exit 0;
|
2116
|
-
}
|
2117
|
-
}
|
2118
|
-
|
2119
|
-
return system (@$args) if $opts->{fork};
|
2120
|
-
|
2121
|
-
exec @$args;
|
2122
|
-
warn "ENV size is ".length(join(" ",%ENV));
|
2123
|
-
die "exec failed: $!: @$args";
|
2124
|
-
}
|
2125
|
-
|
2126
|
-
|
2127
|
-
sub ban_node_by_slot {
|
2128
|
-
# Don't start any new jobsteps on this node for 60 seconds
|
2129
|
-
my $slotid = shift;
|
2130
|
-
$slot[$slotid]->{node}->{hold_until} = 60 + scalar time;
|
2131
|
-
$slot[$slotid]->{node}->{hold_count}++;
|
2132
|
-
Log (undef, "backing off node " . $slot[$slotid]->{node}->{name} . " for 60 seconds");
|
2133
|
-
}
|
2134
|
-
|
2135
|
-
sub must_lock_now
|
2136
|
-
{
|
2137
|
-
my ($lockfile, $error_message) = @_;
|
2138
|
-
open L, ">", $lockfile or croak("$lockfile: $!");
|
2139
|
-
if (!flock L, LOCK_EX|LOCK_NB) {
|
2140
|
-
croak("Can't lock $lockfile: $error_message\n");
|
2141
|
-
}
|
2142
|
-
}
|
2143
|
-
|
2144
|
-
sub find_docker_image {
|
2145
|
-
# Given a Keep locator, check to see if it contains a Docker image.
|
2146
|
-
# If so, return its stream name and Docker hash.
|
2147
|
-
# If not, return undef for both values.
|
2148
|
-
my $locator = shift;
|
2149
|
-
my ($streamname, $filename);
|
2150
|
-
my $image = api_call("collections/get", uuid => $locator);
|
2151
|
-
if ($image) {
|
2152
|
-
foreach my $line (split(/\n/, $image->{manifest_text})) {
|
2153
|
-
my @tokens = split(/\s+/, $line);
|
2154
|
-
next if (!@tokens);
|
2155
|
-
$streamname = shift(@tokens);
|
2156
|
-
foreach my $filedata (grep(/^\d+:\d+:/, @tokens)) {
|
2157
|
-
if (defined($filename)) {
|
2158
|
-
return (undef, undef); # More than one file in the Collection.
|
2159
|
-
} else {
|
2160
|
-
$filename = (split(/:/, $filedata, 3))[2];
|
2161
|
-
$filename =~ s/\\([0-3][0-7][0-7])/chr(oct($1))/ge;
|
2162
|
-
}
|
2163
|
-
}
|
2164
|
-
}
|
2165
|
-
}
|
2166
|
-
if (defined($filename) and ($filename =~ /^((?:sha256:)?[0-9A-Fa-f]{64})\.tar$/)) {
|
2167
|
-
return ($streamname, $1);
|
2168
|
-
} else {
|
2169
|
-
return (undef, undef);
|
2170
|
-
}
|
2171
|
-
}
|
2172
|
-
|
2173
|
-
sub exit_retry_unlocked {
|
2174
|
-
Log(undef, "Transient failure with lock acquired; asking for re-dispatch by exiting ".EX_RETRY_UNLOCKED);
|
2175
|
-
exit(EX_RETRY_UNLOCKED);
|
2176
|
-
}
|
2177
|
-
|
2178
|
-
sub retry_count {
|
2179
|
-
# Calculate the number of times an operation should be retried,
|
2180
|
-
# assuming exponential backoff, and that we're willing to retry as
|
2181
|
-
# long as tasks have been running. Enforce a minimum of 3 retries.
|
2182
|
-
my ($starttime, $endtime, $timediff, $retries);
|
2183
|
-
if (@jobstep) {
|
2184
|
-
$starttime = $jobstep[0]->{starttime};
|
2185
|
-
$endtime = $jobstep[-1]->{finishtime};
|
2186
|
-
}
|
2187
|
-
if (!defined($starttime)) {
|
2188
|
-
$timediff = 0;
|
2189
|
-
} elsif (!defined($endtime)) {
|
2190
|
-
$timediff = time - $starttime;
|
2191
|
-
} else {
|
2192
|
-
$timediff = ($endtime - $starttime) - (time - $endtime);
|
2193
|
-
}
|
2194
|
-
if ($timediff > 0) {
|
2195
|
-
$retries = int(log($timediff) / log(2));
|
2196
|
-
} else {
|
2197
|
-
$retries = 1; # Use the minimum.
|
2198
|
-
}
|
2199
|
-
return ($retries > 3) ? $retries : 3;
|
2200
|
-
}
|
2201
|
-
|
2202
|
-
sub retry_op {
|
2203
|
-
# Pass in two function references.
|
2204
|
-
# This method will be called with the remaining arguments.
|
2205
|
-
# If it dies, retry it with exponential backoff until it succeeds,
|
2206
|
-
# or until the current retry_count is exhausted. After each failure
|
2207
|
-
# that can be retried, the second function will be called with
|
2208
|
-
# the current try count (0-based), next try time, and error message.
|
2209
|
-
my $operation = shift;
|
2210
|
-
my $op_text = shift;
|
2211
|
-
my $retries = retry_count();
|
2212
|
-
my $retry_callback = sub {
|
2213
|
-
my ($try_count, $next_try_at, $errmsg) = @_;
|
2214
|
-
$errmsg =~ s/\s*\bat \Q$0\E line \d+\.?\s*//;
|
2215
|
-
$errmsg =~ s/\s/ /g;
|
2216
|
-
$errmsg =~ s/\s+$//;
|
2217
|
-
my $retry_msg;
|
2218
|
-
if ($next_try_at < time) {
|
2219
|
-
$retry_msg = "Retrying.";
|
2220
|
-
} else {
|
2221
|
-
my $next_try_fmt = strftime "%Y-%m-%dT%H:%M:%SZ", gmtime($next_try_at);
|
2222
|
-
$retry_msg = "Retrying at $next_try_fmt.";
|
2223
|
-
}
|
2224
|
-
Log(undef, "$op_text failed: $errmsg. $retry_msg");
|
2225
|
-
};
|
2226
|
-
foreach my $try_count (0..$retries) {
|
2227
|
-
my $next_try = time + (2 ** $try_count);
|
2228
|
-
my $result = eval { $operation->(@_); };
|
2229
|
-
if (!$@) {
|
2230
|
-
return $result;
|
2231
|
-
} elsif ($try_count < $retries) {
|
2232
|
-
$retry_callback->($try_count, $next_try, $@);
|
2233
|
-
my $sleep_time = $next_try - time;
|
2234
|
-
sleep($sleep_time) if ($sleep_time > 0);
|
2235
|
-
}
|
2236
|
-
}
|
2237
|
-
# Ensure the error message ends in a newline, so Perl doesn't add
|
2238
|
-
# retry_op's line number to it.
|
2239
|
-
chomp($@);
|
2240
|
-
die($@ . "\n");
|
2241
|
-
}
|
2242
|
-
|
2243
|
-
sub api_call {
|
2244
|
-
# Pass in a /-separated API method name, and arguments for it.
|
2245
|
-
# This function will call that method, retrying as needed until
|
2246
|
-
# the current retry_count is exhausted, with a log on the first failure.
|
2247
|
-
my $method_name = shift;
|
2248
|
-
my $method = $arv;
|
2249
|
-
foreach my $key (split(/\//, $method_name)) {
|
2250
|
-
$method = $method->{$key};
|
2251
|
-
}
|
2252
|
-
return retry_op(sub { $method->execute(@_); }, "API method $method_name", @_);
|
2253
|
-
}
|
2254
|
-
|
2255
|
-
sub exit_status_s {
|
2256
|
-
# Given a $?, return a human-readable exit code string like "0" or
|
2257
|
-
# "1" or "0 with signal 1" or "1 with signal 11".
|
2258
|
-
my $exitcode = shift;
|
2259
|
-
my $s = $exitcode >> 8;
|
2260
|
-
if ($exitcode & 0x7f) {
|
2261
|
-
$s .= " with signal " . ($exitcode & 0x7f);
|
2262
|
-
}
|
2263
|
-
if ($exitcode & 0x80) {
|
2264
|
-
$s .= " with core dump";
|
2265
|
-
}
|
2266
|
-
return $s;
|
2267
|
-
}
|
2268
|
-
|
2269
|
-
sub handle_readall {
|
2270
|
-
# Pass in a glob reference to a file handle.
|
2271
|
-
# Read all its contents and return them as a string.
|
2272
|
-
my $fh_glob_ref = shift;
|
2273
|
-
local $/ = undef;
|
2274
|
-
return <$fh_glob_ref>;
|
2275
|
-
}
|
2276
|
-
|
2277
|
-
sub tar_filename_n {
|
2278
|
-
my $n = shift;
|
2279
|
-
return sprintf("%s/git.%s.%d.tar", $ENV{CRUNCH_TMP}, $job_id, $n);
|
2280
|
-
}
|
2281
|
-
|
2282
|
-
sub add_git_archive {
|
2283
|
-
# Pass in a git archive command as a string or list, a la system().
|
2284
|
-
# This method will save its output to be included in the archive sent to the
|
2285
|
-
# build script.
|
2286
|
-
my $git_input;
|
2287
|
-
$git_tar_count++;
|
2288
|
-
if (!open(GIT_ARCHIVE, ">", tar_filename_n($git_tar_count))) {
|
2289
|
-
croak("Failed to save git archive: $!");
|
2290
|
-
}
|
2291
|
-
my $git_pid = open2(">&GIT_ARCHIVE", $git_input, @_);
|
2292
|
-
close($git_input);
|
2293
|
-
waitpid($git_pid, 0);
|
2294
|
-
close(GIT_ARCHIVE);
|
2295
|
-
if ($?) {
|
2296
|
-
croak("Failed to save git archive: git exited " . exit_status_s($?));
|
2297
|
-
}
|
2298
|
-
}
|
2299
|
-
|
2300
|
-
sub combined_git_archive {
|
2301
|
-
# Combine all saved tar archives into a single archive, then return its
|
2302
|
-
# contents in a string. Return undef if no archives have been saved.
|
2303
|
-
if ($git_tar_count < 1) {
|
2304
|
-
return undef;
|
2305
|
-
}
|
2306
|
-
my $base_tar_name = tar_filename_n(1);
|
2307
|
-
foreach my $tar_to_append (map { tar_filename_n($_); } (2..$git_tar_count)) {
|
2308
|
-
my $tar_exit = system("tar", "-Af", $base_tar_name, $tar_to_append);
|
2309
|
-
if ($tar_exit != 0) {
|
2310
|
-
croak("Error preparing build archive: tar -A exited " .
|
2311
|
-
exit_status_s($tar_exit));
|
2312
|
-
}
|
2313
|
-
}
|
2314
|
-
if (!open(GIT_TAR, "<", $base_tar_name)) {
|
2315
|
-
croak("Could not open build archive: $!");
|
2316
|
-
}
|
2317
|
-
my $tar_contents = handle_readall(\*GIT_TAR);
|
2318
|
-
close(GIT_TAR);
|
2319
|
-
return $tar_contents;
|
2320
|
-
}
|
2321
|
-
|
2322
|
-
sub set_nonblocking {
|
2323
|
-
my $fh = shift;
|
2324
|
-
my $flags = fcntl ($fh, F_GETFL, 0) or croak ($!);
|
2325
|
-
fcntl ($fh, F_SETFL, $flags | O_NONBLOCK) or croak ($!);
|
2326
|
-
}
|
2327
|
-
|
2328
|
-
__DATA__
|
2329
|
-
#!/usr/bin/env perl
|
2330
|
-
#
|
2331
|
-
# This is crunch-job's internal dispatch script. crunch-job running on the API
|
2332
|
-
# server invokes this script on individual compute nodes, or localhost if we're
|
2333
|
-
# running a job locally. It gets called in two modes:
|
2334
|
-
#
|
2335
|
-
# * No arguments: Installation mode. Read a tar archive from the DATA
|
2336
|
-
# file handle; it includes the Crunch script's source code, and
|
2337
|
-
# maybe SDKs as well. Those should be installed in the proper
|
2338
|
-
# locations. This runs outside of any Docker container, so don't try to
|
2339
|
-
# introspect Crunch's runtime environment.
|
2340
|
-
#
|
2341
|
-
# * With arguments: Crunch script run mode. This script should set up the
|
2342
|
-
# environment, then run the command specified in the arguments. This runs
|
2343
|
-
# inside any Docker container.
|
2344
|
-
|
2345
|
-
use Fcntl ':flock';
|
2346
|
-
use File::Path qw( make_path remove_tree );
|
2347
|
-
use POSIX qw(getcwd);
|
2348
|
-
|
2349
|
-
use constant TASK_TEMPFAIL => 111;
|
2350
|
-
|
2351
|
-
# Map SDK subdirectories to the path environments they belong to.
|
2352
|
-
my %SDK_ENVVARS = ("perl/lib" => "PERLLIB", "ruby/lib" => "RUBYLIB");
|
2353
|
-
|
2354
|
-
my $destdir = $ENV{"CRUNCH_SRC"};
|
2355
|
-
my $archive_hash = $ENV{"CRUNCH_GIT_ARCHIVE_HASH"};
|
2356
|
-
my $repo = $ENV{"CRUNCH_SRC_URL"};
|
2357
|
-
my $install_dir = $ENV{"CRUNCH_INSTALL"} || (getcwd() . "/opt");
|
2358
|
-
my $job_work = $ENV{"JOB_WORK"};
|
2359
|
-
my $task_work = $ENV{"TASK_WORK"};
|
2360
|
-
|
2361
|
-
open(STDOUT_ORIG, ">&", STDOUT);
|
2362
|
-
open(STDERR_ORIG, ">&", STDERR);
|
2363
|
-
|
2364
|
-
for my $dir ($destdir, $job_work, $task_work) {
|
2365
|
-
if ($dir) {
|
2366
|
-
make_path $dir;
|
2367
|
-
-e $dir or die "Failed to create temporary directory ($dir): $!";
|
2368
|
-
}
|
2369
|
-
}
|
2370
|
-
|
2371
|
-
if ($task_work) {
|
2372
|
-
remove_tree($task_work, {keep_root => 1});
|
2373
|
-
}
|
2374
|
-
|
2375
|
-
### Crunch script run mode
|
2376
|
-
if (@ARGV) {
|
2377
|
-
# We want to do routine logging during task 0 only. This gives the user
|
2378
|
-
# the information they need, but avoids repeating the information for every
|
2379
|
-
# task.
|
2380
|
-
my $Log;
|
2381
|
-
if ($ENV{TASK_SEQUENCE} eq "0") {
|
2382
|
-
$Log = sub {
|
2383
|
-
my $msg = shift;
|
2384
|
-
printf STDERR_ORIG "[Crunch] $msg\n", @_;
|
2385
|
-
};
|
2386
|
-
} else {
|
2387
|
-
$Log = sub { };
|
2388
|
-
}
|
2389
|
-
|
2390
|
-
my $python_src = "$install_dir/python";
|
2391
|
-
my $venv_dir = "$job_work/.arvados.venv";
|
2392
|
-
my $venv_built = -e "$venv_dir/bin/activate";
|
2393
|
-
if ((!$venv_built) and (-d $python_src) and can_run("virtualenv")) {
|
2394
|
-
shell_or_die(undef, "virtualenv", "--quiet", "--system-site-packages",
|
2395
|
-
"--python=python2.7", $venv_dir);
|
2396
|
-
shell_or_die(TASK_TEMPFAIL, "$venv_dir/bin/pip", "--quiet", "install", "-I", $python_src);
|
2397
|
-
$venv_built = 1;
|
2398
|
-
$Log->("Built Python SDK virtualenv");
|
2399
|
-
}
|
2400
|
-
|
2401
|
-
my @pysdk_version_cmd = ("python", "-c",
|
2402
|
-
"from pkg_resources import get_distribution as get; print get('arvados-python-client').version");
|
2403
|
-
if ($venv_built) {
|
2404
|
-
$Log->("Running in Python SDK virtualenv");
|
2405
|
-
@pysdk_version_cmd = ();
|
2406
|
-
my $orig_argv = join(" ", map { quotemeta($_); } @ARGV);
|
2407
|
-
@ARGV = ("/bin/sh", "-ec",
|
2408
|
-
". \Q$venv_dir/bin/activate\E; exec $orig_argv");
|
2409
|
-
} elsif (-d $python_src) {
|
2410
|
-
$Log->("Warning: virtualenv not found inside Docker container default " .
|
2411
|
-
"\$PATH. Can't install Python SDK.");
|
2412
|
-
}
|
2413
|
-
|
2414
|
-
if (@pysdk_version_cmd) {
|
2415
|
-
open(my $pysdk_version_pipe, "-|", @pysdk_version_cmd);
|
2416
|
-
my $pysdk_version = <$pysdk_version_pipe>;
|
2417
|
-
close($pysdk_version_pipe);
|
2418
|
-
if ($? == 0) {
|
2419
|
-
chomp($pysdk_version);
|
2420
|
-
$Log->("Using Arvados SDK version $pysdk_version");
|
2421
|
-
} else {
|
2422
|
-
# A lot could've gone wrong here, but pretty much all of it means that
|
2423
|
-
# Python won't be able to load the Arvados SDK.
|
2424
|
-
$Log->("Warning: Arvados SDK not found");
|
2425
|
-
}
|
2426
|
-
}
|
2427
|
-
|
2428
|
-
while (my ($sdk_dir, $sdk_envkey) = each(%SDK_ENVVARS)) {
|
2429
|
-
my $sdk_path = "$install_dir/$sdk_dir";
|
2430
|
-
if (-d $sdk_path) {
|
2431
|
-
if ($ENV{$sdk_envkey}) {
|
2432
|
-
$ENV{$sdk_envkey} = "$sdk_path:" . $ENV{$sdk_envkey};
|
2433
|
-
} else {
|
2434
|
-
$ENV{$sdk_envkey} = $sdk_path;
|
2435
|
-
}
|
2436
|
-
$Log->("Arvados SDK added to %s", $sdk_envkey);
|
2437
|
-
}
|
2438
|
-
}
|
2439
|
-
|
2440
|
-
exec(@ARGV);
|
2441
|
-
die "Cannot exec `@ARGV`: $!";
|
2442
|
-
}
|
2443
|
-
|
2444
|
-
### Installation mode
|
2445
|
-
open L, ">", "$destdir.lock" or die "$destdir.lock: $!";
|
2446
|
-
flock L, LOCK_EX;
|
2447
|
-
if (readlink ("$destdir.archive_hash") eq $archive_hash && -d $destdir) {
|
2448
|
-
# This exact git archive (source + arvados sdk) is already installed
|
2449
|
-
# here, so there's no need to reinstall it.
|
2450
|
-
|
2451
|
-
# We must consume our DATA section, though: otherwise the process
|
2452
|
-
# feeding it to us will get SIGPIPE.
|
2453
|
-
my $buf;
|
2454
|
-
while (read(DATA, $buf, 65536)) { }
|
2455
|
-
|
2456
|
-
exit(0);
|
2457
|
-
}
|
2458
|
-
|
2459
|
-
unlink "$destdir.archive_hash";
|
2460
|
-
mkdir $destdir;
|
2461
|
-
|
2462
|
-
do {
|
2463
|
-
# Ignore SIGPIPE: we check retval of close() instead. See perlipc(1).
|
2464
|
-
local $SIG{PIPE} = "IGNORE";
|
2465
|
-
warn "Extracting archive: $archive_hash\n";
|
2466
|
-
# --ignore-zeros is necessary sometimes: depending on how much NUL
|
2467
|
-
# padding tar -A put on our combined archive (which in turn depends
|
2468
|
-
# on the length of the component archives) tar without
|
2469
|
-
# --ignore-zeros will exit before consuming stdin and cause close()
|
2470
|
-
# to fail on the resulting SIGPIPE.
|
2471
|
-
if (!open(TARX, "|-", "tar", "--ignore-zeros", "-xC", $destdir)) {
|
2472
|
-
die "Error launching 'tar -xC $destdir': $!";
|
2473
|
-
}
|
2474
|
-
# If we send too much data to tar in one write (> 4-5 MiB), it stops, and we
|
2475
|
-
# get SIGPIPE. We must feed it data incrementally.
|
2476
|
-
my $tar_input;
|
2477
|
-
while (read(DATA, $tar_input, 65536)) {
|
2478
|
-
print TARX $tar_input;
|
2479
|
-
}
|
2480
|
-
if(!close(TARX)) {
|
2481
|
-
die "'tar -xC $destdir' exited $?: $!";
|
2482
|
-
}
|
2483
|
-
};
|
2484
|
-
|
2485
|
-
mkdir $install_dir;
|
2486
|
-
|
2487
|
-
my $sdk_root = "$destdir/.arvados.sdk/sdk";
|
2488
|
-
if (-d $sdk_root) {
|
2489
|
-
foreach my $sdk_lang (("python",
|
2490
|
-
map { (split /\//, $_, 2)[0]; } keys(%SDK_ENVVARS))) {
|
2491
|
-
if (-d "$sdk_root/$sdk_lang") {
|
2492
|
-
if (!rename("$sdk_root/$sdk_lang", "$install_dir/$sdk_lang")) {
|
2493
|
-
die "Failed to install $sdk_lang SDK: $!";
|
2494
|
-
}
|
2495
|
-
}
|
2496
|
-
}
|
2497
|
-
}
|
2498
|
-
|
2499
|
-
my $python_dir = "$install_dir/python";
|
2500
|
-
if ((-d $python_dir) and can_run("python2.7")) {
|
2501
|
-
open(my $egg_info_pipe, "-|",
|
2502
|
-
"python2.7 \Q$python_dir/setup.py\E egg_info 2>&1 >/dev/null");
|
2503
|
-
my @egg_info_errors = <$egg_info_pipe>;
|
2504
|
-
close($egg_info_pipe);
|
2505
|
-
|
2506
|
-
if ($?) {
|
2507
|
-
if (@egg_info_errors and (($egg_info_errors[-1] =~ /\bgit\b/) or ($egg_info_errors[-1] =~ /\[Errno 2\]/))) {
|
2508
|
-
# egg_info apparently failed because it couldn't ask git for a build tag.
|
2509
|
-
# Specify no build tag.
|
2510
|
-
open(my $pysdk_cfg, ">>", "$python_dir/setup.cfg");
|
2511
|
-
print $pysdk_cfg "\n[egg_info]\ntag_build =\n";
|
2512
|
-
close($pysdk_cfg);
|
2513
|
-
} else {
|
2514
|
-
my $egg_info_exit = $? >> 8;
|
2515
|
-
foreach my $errline (@egg_info_errors) {
|
2516
|
-
warn $errline;
|
2517
|
-
}
|
2518
|
-
warn "python setup.py egg_info failed: exit $egg_info_exit";
|
2519
|
-
exit ($egg_info_exit || 1);
|
2520
|
-
}
|
2521
|
-
}
|
2522
|
-
}
|
2523
|
-
|
2524
|
-
# Hide messages from the install script (unless it fails: shell_or_die
|
2525
|
-
# will show $destdir.log in that case).
|
2526
|
-
open(STDOUT, ">>", "$destdir.log") or die ($!);
|
2527
|
-
open(STDERR, ">&", STDOUT) or die ($!);
|
2528
|
-
|
2529
|
-
if (-e "$destdir/crunch_scripts/install") {
|
2530
|
-
shell_or_die (undef, "$destdir/crunch_scripts/install", $install_dir);
|
2531
|
-
} elsif (!-e "./install.sh" && -e "./tests/autotests.sh") {
|
2532
|
-
# Old version
|
2533
|
-
shell_or_die (undef, "./tests/autotests.sh", $install_dir);
|
2534
|
-
} elsif (-e "./install.sh") {
|
2535
|
-
shell_or_die (undef, "./install.sh", $install_dir);
|
2536
|
-
}
|
2537
|
-
|
2538
|
-
if ($archive_hash) {
|
2539
|
-
unlink "$destdir.archive_hash.new";
|
2540
|
-
symlink ($archive_hash, "$destdir.archive_hash.new") or die "$destdir.archive_hash.new: $!";
|
2541
|
-
rename ("$destdir.archive_hash.new", "$destdir.archive_hash") or die "$destdir.archive_hash: $!";
|
2542
|
-
}
|
2543
|
-
|
2544
|
-
close L;
|
2545
|
-
|
2546
|
-
sub can_run {
|
2547
|
-
my $command_name = shift;
|
2548
|
-
open(my $which, "-|", "which", $command_name) or die ($!);
|
2549
|
-
while (<$which>) { }
|
2550
|
-
close($which);
|
2551
|
-
return ($? == 0);
|
2552
|
-
}
|
2553
|
-
|
2554
|
-
sub shell_or_die
|
2555
|
-
{
|
2556
|
-
my $exitcode = shift;
|
2557
|
-
|
2558
|
-
if ($ENV{"DEBUG"}) {
|
2559
|
-
print STDERR "@_\n";
|
2560
|
-
}
|
2561
|
-
if (system (@_) != 0) {
|
2562
|
-
my $err = $!;
|
2563
|
-
my $code = $?;
|
2564
|
-
my $exitstatus = sprintf("exit %d signal %d", $code >> 8, $code & 0x7f);
|
2565
|
-
open STDERR, ">&STDERR_ORIG";
|
2566
|
-
system ("cat $destdir.log >&2");
|
2567
|
-
warn "@_ failed ($err): $exitstatus";
|
2568
|
-
if (defined($exitcode)) {
|
2569
|
-
exit $exitcode;
|
2570
|
-
}
|
2571
|
-
else {
|
2572
|
-
exit (($code >> 8) || 1);
|
2573
|
-
}
|
2574
|
-
}
|
2575
|
-
}
|
2576
|
-
|
2577
|
-
__DATA__
|