arvados-cli 0.1.20160209221008 → 0.1.20160210155133
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/crunch-job +27 -15
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 659e88b672cd2316f53bb06df90fb1ef551bb05a
|
4
|
+
data.tar.gz: e0754ab14a47f327227c9f03f704aa4d611f7a00
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e01f712acdaec33a6cd4f5f368ec0146299a5e436915d88ced3267c86b6743977e8c495513bb5ea781e74e711e92c9f6be96393bad73271684beec1daeceb6bd
|
7
|
+
data.tar.gz: 95f40cb2432c76eb5fd6c9bdaf989e88d6ee1dc525a71f05e09c031cc1d3935e375933eac0a4d200c6ab8be692aee38e2991482ef4d5a5f3760cf9fb1bb560ef
|
data/bin/crunch-job
CHANGED
@@ -415,11 +415,13 @@ if (!defined $no_clear_tmp) {
|
|
415
415
|
# If this job requires a Docker image, install that.
|
416
416
|
my ($docker_locator, $docker_stream, $docker_hash, $docker_limitmem, $dockeruserarg);
|
417
417
|
if ($docker_locator = $Job->{docker_image_locator}) {
|
418
|
+
Log (undef, "Install docker image $docker_locator");
|
418
419
|
($docker_stream, $docker_hash) = find_docker_image($docker_locator);
|
419
420
|
if (!$docker_hash)
|
420
421
|
{
|
421
422
|
croak("No Docker image hash found from locator $docker_locator");
|
422
423
|
}
|
424
|
+
Log (undef, "docker image hash is $docker_hash");
|
423
425
|
$docker_stream =~ s/^\.//;
|
424
426
|
my $docker_install_script = qq{
|
425
427
|
if ! $docker_bin images -q --no-trunc --all | grep -qxF \Q$docker_hash\E; then
|
@@ -1057,12 +1059,14 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
|
|
1057
1059
|
check_refresh_wanted();
|
1058
1060
|
check_squeue();
|
1059
1061
|
update_progress_stats();
|
1060
|
-
select (undef, undef, undef, 0.1);
|
1061
1062
|
}
|
1062
1063
|
elsif (time - $progress_stats_updated >= 30 || $progress_is_dirty)
|
1063
1064
|
{
|
1064
1065
|
update_progress_stats();
|
1065
1066
|
}
|
1067
|
+
if (!$gotsome) {
|
1068
|
+
select (undef, undef, undef, 0.1);
|
1069
|
+
}
|
1066
1070
|
$working_slot_count = scalar(grep { $_->{node}->{fail_count} == 0 &&
|
1067
1071
|
$_->{node}->{hold_count} < 4 } @slot);
|
1068
1072
|
if (($thisround_failed_multiple >= 8 && $thisround_succeeded == 0) ||
|
@@ -1340,8 +1344,9 @@ sub check_squeue
|
|
1340
1344
|
# squeue check interval (15s) this should make the squeue check an
|
1341
1345
|
# infrequent event.
|
1342
1346
|
my $silent_procs = 0;
|
1343
|
-
for my $
|
1347
|
+
for my $procinfo (values %proc)
|
1344
1348
|
{
|
1349
|
+
my $jobstep = $jobstep[$procinfo->{jobstep}];
|
1345
1350
|
if ($jobstep->{stderr_at} < $last_squeue_check)
|
1346
1351
|
{
|
1347
1352
|
$silent_procs++;
|
@@ -1350,17 +1355,18 @@ sub check_squeue
|
|
1350
1355
|
return if $silent_procs == 0;
|
1351
1356
|
|
1352
1357
|
# use killem() on procs whose killtime is reached
|
1353
|
-
while (my ($pid, $
|
1358
|
+
while (my ($pid, $procinfo) = each %proc)
|
1354
1359
|
{
|
1355
|
-
|
1356
|
-
|
1360
|
+
my $jobstep = $jobstep[$procinfo->{jobstep}];
|
1361
|
+
if (exists $procinfo->{killtime}
|
1362
|
+
&& $procinfo->{killtime} <= time
|
1357
1363
|
&& $jobstep->{stderr_at} < $last_squeue_check)
|
1358
1364
|
{
|
1359
1365
|
my $sincewhen = "";
|
1360
1366
|
if ($jobstep->{stderr_at}) {
|
1361
1367
|
$sincewhen = " in last " . (time - $jobstep->{stderr_at}) . "s";
|
1362
1368
|
}
|
1363
|
-
Log($
|
1369
|
+
Log($procinfo->{jobstep}, "killing orphaned srun process $pid (task not in slurm queue, no stderr received$sincewhen)");
|
1364
1370
|
killem ($pid);
|
1365
1371
|
}
|
1366
1372
|
}
|
@@ -1395,12 +1401,12 @@ sub check_squeue
|
|
1395
1401
|
}
|
1396
1402
|
|
1397
1403
|
# Check for child procs >60s old and not mentioned by squeue.
|
1398
|
-
while (my ($pid, $
|
1404
|
+
while (my ($pid, $procinfo) = each %proc)
|
1399
1405
|
{
|
1400
|
-
if ($
|
1401
|
-
&& $
|
1402
|
-
&& !exists $ok{$
|
1403
|
-
&& !exists $
|
1406
|
+
if ($procinfo->{time} < time - 60
|
1407
|
+
&& $procinfo->{jobstepname}
|
1408
|
+
&& !exists $ok{$procinfo->{jobstepname}}
|
1409
|
+
&& !exists $procinfo->{killtime})
|
1404
1410
|
{
|
1405
1411
|
# According to slurm, this task has ended (successfully or not)
|
1406
1412
|
# -- but our srun child hasn't exited. First we must wait (30
|
@@ -1409,8 +1415,8 @@ sub check_squeue
|
|
1409
1415
|
# terminated, we'll conclude some slurm communication
|
1410
1416
|
# error/delay has caused the task to die without notifying srun,
|
1411
1417
|
# and we'll kill srun ourselves.
|
1412
|
-
$
|
1413
|
-
Log($
|
1418
|
+
$procinfo->{killtime} = time + 30;
|
1419
|
+
Log($procinfo->{jobstep}, "notice: task is not in slurm queue but srun process $pid has not exited");
|
1414
1420
|
}
|
1415
1421
|
}
|
1416
1422
|
}
|
@@ -1432,15 +1438,21 @@ sub readfrompipes
|
|
1432
1438
|
foreach my $job (keys %reader)
|
1433
1439
|
{
|
1434
1440
|
my $buf;
|
1435
|
-
|
1441
|
+
if (0 < sysread ($reader{$job}, $buf, 65536))
|
1436
1442
|
{
|
1437
1443
|
print STDERR $buf if $ENV{CRUNCH_DEBUG};
|
1438
1444
|
$jobstep[$job]->{stderr_at} = time;
|
1439
1445
|
$jobstep[$job]->{stderr} .= $buf;
|
1446
|
+
|
1447
|
+
# Consume everything up to the last \n
|
1440
1448
|
preprocess_stderr ($job);
|
1449
|
+
|
1441
1450
|
if (length ($jobstep[$job]->{stderr}) > 16384)
|
1442
1451
|
{
|
1443
|
-
|
1452
|
+
# If we get a lot of stderr without a newline, chop off the
|
1453
|
+
# front to avoid letting our buffer grow indefinitely.
|
1454
|
+
substr ($jobstep[$job]->{stderr},
|
1455
|
+
0, length($jobstep[$job]->{stderr}) - 8192) = "";
|
1444
1456
|
}
|
1445
1457
|
$gotsome = 1;
|
1446
1458
|
}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: arvados-cli
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.20160210155133
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Arvados Authors
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-02-
|
11
|
+
date: 2016-02-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: arvados
|
@@ -178,7 +178,7 @@ dependencies:
|
|
178
178
|
- - "<"
|
179
179
|
- !ruby/object:Gem::Version
|
180
180
|
version: 1.0.0
|
181
|
-
description: Arvados command line tools, git commit
|
181
|
+
description: Arvados command line tools, git commit fdc9a9308c646d23ec50073833f141ceebf78613
|
182
182
|
email: gem-dev@curoverse.com
|
183
183
|
executables:
|
184
184
|
- arv
|