arvados-cli 0.1.20160209221008 → 0.1.20160210155133
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/crunch-job +27 -15
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 659e88b672cd2316f53bb06df90fb1ef551bb05a
|
4
|
+
data.tar.gz: e0754ab14a47f327227c9f03f704aa4d611f7a00
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e01f712acdaec33a6cd4f5f368ec0146299a5e436915d88ced3267c86b6743977e8c495513bb5ea781e74e711e92c9f6be96393bad73271684beec1daeceb6bd
|
7
|
+
data.tar.gz: 95f40cb2432c76eb5fd6c9bdaf989e88d6ee1dc525a71f05e09c031cc1d3935e375933eac0a4d200c6ab8be692aee38e2991482ef4d5a5f3760cf9fb1bb560ef
|
data/bin/crunch-job
CHANGED
@@ -415,11 +415,13 @@ if (!defined $no_clear_tmp) {
|
|
415
415
|
# If this job requires a Docker image, install that.
|
416
416
|
my ($docker_locator, $docker_stream, $docker_hash, $docker_limitmem, $dockeruserarg);
|
417
417
|
if ($docker_locator = $Job->{docker_image_locator}) {
|
418
|
+
Log (undef, "Install docker image $docker_locator");
|
418
419
|
($docker_stream, $docker_hash) = find_docker_image($docker_locator);
|
419
420
|
if (!$docker_hash)
|
420
421
|
{
|
421
422
|
croak("No Docker image hash found from locator $docker_locator");
|
422
423
|
}
|
424
|
+
Log (undef, "docker image hash is $docker_hash");
|
423
425
|
$docker_stream =~ s/^\.//;
|
424
426
|
my $docker_install_script = qq{
|
425
427
|
if ! $docker_bin images -q --no-trunc --all | grep -qxF \Q$docker_hash\E; then
|
@@ -1057,12 +1059,14 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
|
|
1057
1059
|
check_refresh_wanted();
|
1058
1060
|
check_squeue();
|
1059
1061
|
update_progress_stats();
|
1060
|
-
select (undef, undef, undef, 0.1);
|
1061
1062
|
}
|
1062
1063
|
elsif (time - $progress_stats_updated >= 30 || $progress_is_dirty)
|
1063
1064
|
{
|
1064
1065
|
update_progress_stats();
|
1065
1066
|
}
|
1067
|
+
if (!$gotsome) {
|
1068
|
+
select (undef, undef, undef, 0.1);
|
1069
|
+
}
|
1066
1070
|
$working_slot_count = scalar(grep { $_->{node}->{fail_count} == 0 &&
|
1067
1071
|
$_->{node}->{hold_count} < 4 } @slot);
|
1068
1072
|
if (($thisround_failed_multiple >= 8 && $thisround_succeeded == 0) ||
|
@@ -1340,8 +1344,9 @@ sub check_squeue
|
|
1340
1344
|
# squeue check interval (15s) this should make the squeue check an
|
1341
1345
|
# infrequent event.
|
1342
1346
|
my $silent_procs = 0;
|
1343
|
-
for my $
|
1347
|
+
for my $procinfo (values %proc)
|
1344
1348
|
{
|
1349
|
+
my $jobstep = $jobstep[$procinfo->{jobstep}];
|
1345
1350
|
if ($jobstep->{stderr_at} < $last_squeue_check)
|
1346
1351
|
{
|
1347
1352
|
$silent_procs++;
|
@@ -1350,17 +1355,18 @@ sub check_squeue
|
|
1350
1355
|
return if $silent_procs == 0;
|
1351
1356
|
|
1352
1357
|
# use killem() on procs whose killtime is reached
|
1353
|
-
while (my ($pid, $
|
1358
|
+
while (my ($pid, $procinfo) = each %proc)
|
1354
1359
|
{
|
1355
|
-
|
1356
|
-
|
1360
|
+
my $jobstep = $jobstep[$procinfo->{jobstep}];
|
1361
|
+
if (exists $procinfo->{killtime}
|
1362
|
+
&& $procinfo->{killtime} <= time
|
1357
1363
|
&& $jobstep->{stderr_at} < $last_squeue_check)
|
1358
1364
|
{
|
1359
1365
|
my $sincewhen = "";
|
1360
1366
|
if ($jobstep->{stderr_at}) {
|
1361
1367
|
$sincewhen = " in last " . (time - $jobstep->{stderr_at}) . "s";
|
1362
1368
|
}
|
1363
|
-
Log($
|
1369
|
+
Log($procinfo->{jobstep}, "killing orphaned srun process $pid (task not in slurm queue, no stderr received$sincewhen)");
|
1364
1370
|
killem ($pid);
|
1365
1371
|
}
|
1366
1372
|
}
|
@@ -1395,12 +1401,12 @@ sub check_squeue
|
|
1395
1401
|
}
|
1396
1402
|
|
1397
1403
|
# Check for child procs >60s old and not mentioned by squeue.
|
1398
|
-
while (my ($pid, $
|
1404
|
+
while (my ($pid, $procinfo) = each %proc)
|
1399
1405
|
{
|
1400
|
-
if ($
|
1401
|
-
&& $
|
1402
|
-
&& !exists $ok{$
|
1403
|
-
&& !exists $
|
1406
|
+
if ($procinfo->{time} < time - 60
|
1407
|
+
&& $procinfo->{jobstepname}
|
1408
|
+
&& !exists $ok{$procinfo->{jobstepname}}
|
1409
|
+
&& !exists $procinfo->{killtime})
|
1404
1410
|
{
|
1405
1411
|
# According to slurm, this task has ended (successfully or not)
|
1406
1412
|
# -- but our srun child hasn't exited. First we must wait (30
|
@@ -1409,8 +1415,8 @@ sub check_squeue
|
|
1409
1415
|
# terminated, we'll conclude some slurm communication
|
1410
1416
|
# error/delay has caused the task to die without notifying srun,
|
1411
1417
|
# and we'll kill srun ourselves.
|
1412
|
-
$
|
1413
|
-
Log($
|
1418
|
+
$procinfo->{killtime} = time + 30;
|
1419
|
+
Log($procinfo->{jobstep}, "notice: task is not in slurm queue but srun process $pid has not exited");
|
1414
1420
|
}
|
1415
1421
|
}
|
1416
1422
|
}
|
@@ -1432,15 +1438,21 @@ sub readfrompipes
|
|
1432
1438
|
foreach my $job (keys %reader)
|
1433
1439
|
{
|
1434
1440
|
my $buf;
|
1435
|
-
|
1441
|
+
if (0 < sysread ($reader{$job}, $buf, 65536))
|
1436
1442
|
{
|
1437
1443
|
print STDERR $buf if $ENV{CRUNCH_DEBUG};
|
1438
1444
|
$jobstep[$job]->{stderr_at} = time;
|
1439
1445
|
$jobstep[$job]->{stderr} .= $buf;
|
1446
|
+
|
1447
|
+
# Consume everything up to the last \n
|
1440
1448
|
preprocess_stderr ($job);
|
1449
|
+
|
1441
1450
|
if (length ($jobstep[$job]->{stderr}) > 16384)
|
1442
1451
|
{
|
1443
|
-
|
1452
|
+
# If we get a lot of stderr without a newline, chop off the
|
1453
|
+
# front to avoid letting our buffer grow indefinitely.
|
1454
|
+
substr ($jobstep[$job]->{stderr},
|
1455
|
+
0, length($jobstep[$job]->{stderr}) - 8192) = "";
|
1444
1456
|
}
|
1445
1457
|
$gotsome = 1;
|
1446
1458
|
}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: arvados-cli
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.20160210155133
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Arvados Authors
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-02-
|
11
|
+
date: 2016-02-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: arvados
|
@@ -178,7 +178,7 @@ dependencies:
|
|
178
178
|
- - "<"
|
179
179
|
- !ruby/object:Gem::Version
|
180
180
|
version: 1.0.0
|
181
|
-
description: Arvados command line tools, git commit
|
181
|
+
description: Arvados command line tools, git commit fdc9a9308c646d23ec50073833f141ceebf78613
|
182
182
|
email: gem-dev@curoverse.com
|
183
183
|
executables:
|
184
184
|
- arv
|