arvados-cli 0.1.20160209221008 → 0.1.20160210155133

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/bin/crunch-job +27 -15
  3. metadata +3 -3
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b325e2e44f016973f3030414417cc1b94ba93f87
4
- data.tar.gz: 70eeaa65eb2b2b71129d9f603c41da4f2e66924f
3
+ metadata.gz: 659e88b672cd2316f53bb06df90fb1ef551bb05a
4
+ data.tar.gz: e0754ab14a47f327227c9f03f704aa4d611f7a00
5
5
  SHA512:
6
- metadata.gz: 8112dfe3eee6b53bf8658a5fbe473171471ffaec0bd46b233260bef6497ac4f82b6dc52b016591056df5b399cd6f25ebeb9713fabfc3c951830981e20d772f4c
7
- data.tar.gz: 35937645f003af1f93f6169a70021e350e4e29fc9feef61859ba4d9841e59847a91a30dd7a6cb82d0962dfd316a1b5e3dc8586c5908a8992b708280c46aa7338
6
+ metadata.gz: e01f712acdaec33a6cd4f5f368ec0146299a5e436915d88ced3267c86b6743977e8c495513bb5ea781e74e711e92c9f6be96393bad73271684beec1daeceb6bd
7
+ data.tar.gz: 95f40cb2432c76eb5fd6c9bdaf989e88d6ee1dc525a71f05e09c031cc1d3935e375933eac0a4d200c6ab8be692aee38e2991482ef4d5a5f3760cf9fb1bb560ef
data/bin/crunch-job CHANGED
@@ -415,11 +415,13 @@ if (!defined $no_clear_tmp) {
415
415
  # If this job requires a Docker image, install that.
416
416
  my ($docker_locator, $docker_stream, $docker_hash, $docker_limitmem, $dockeruserarg);
417
417
  if ($docker_locator = $Job->{docker_image_locator}) {
418
+ Log (undef, "Install docker image $docker_locator");
418
419
  ($docker_stream, $docker_hash) = find_docker_image($docker_locator);
419
420
  if (!$docker_hash)
420
421
  {
421
422
  croak("No Docker image hash found from locator $docker_locator");
422
423
  }
424
+ Log (undef, "docker image hash is $docker_hash");
423
425
  $docker_stream =~ s/^\.//;
424
426
  my $docker_install_script = qq{
425
427
  if ! $docker_bin images -q --no-trunc --all | grep -qxF \Q$docker_hash\E; then
@@ -1057,12 +1059,14 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
1057
1059
  check_refresh_wanted();
1058
1060
  check_squeue();
1059
1061
  update_progress_stats();
1060
- select (undef, undef, undef, 0.1);
1061
1062
  }
1062
1063
  elsif (time - $progress_stats_updated >= 30 || $progress_is_dirty)
1063
1064
  {
1064
1065
  update_progress_stats();
1065
1066
  }
1067
+ if (!$gotsome) {
1068
+ select (undef, undef, undef, 0.1);
1069
+ }
1066
1070
  $working_slot_count = scalar(grep { $_->{node}->{fail_count} == 0 &&
1067
1071
  $_->{node}->{hold_count} < 4 } @slot);
1068
1072
  if (($thisround_failed_multiple >= 8 && $thisround_succeeded == 0) ||
@@ -1340,8 +1344,9 @@ sub check_squeue
1340
1344
  # squeue check interval (15s) this should make the squeue check an
1341
1345
  # infrequent event.
1342
1346
  my $silent_procs = 0;
1343
- for my $jobstep (values %proc)
1347
+ for my $procinfo (values %proc)
1344
1348
  {
1349
+ my $jobstep = $jobstep[$procinfo->{jobstep}];
1345
1350
  if ($jobstep->{stderr_at} < $last_squeue_check)
1346
1351
  {
1347
1352
  $silent_procs++;
@@ -1350,17 +1355,18 @@ sub check_squeue
1350
1355
  return if $silent_procs == 0;
1351
1356
 
1352
1357
  # use killem() on procs whose killtime is reached
1353
- while (my ($pid, $jobstep) = each %proc)
1358
+ while (my ($pid, $procinfo) = each %proc)
1354
1359
  {
1355
- if (exists $jobstep->{killtime}
1356
- && $jobstep->{killtime} <= time
1360
+ my $jobstep = $jobstep[$procinfo->{jobstep}];
1361
+ if (exists $procinfo->{killtime}
1362
+ && $procinfo->{killtime} <= time
1357
1363
  && $jobstep->{stderr_at} < $last_squeue_check)
1358
1364
  {
1359
1365
  my $sincewhen = "";
1360
1366
  if ($jobstep->{stderr_at}) {
1361
1367
  $sincewhen = " in last " . (time - $jobstep->{stderr_at}) . "s";
1362
1368
  }
1363
- Log($jobstep->{jobstep}, "killing orphaned srun process $pid (task not in slurm queue, no stderr received$sincewhen)");
1369
+ Log($procinfo->{jobstep}, "killing orphaned srun process $pid (task not in slurm queue, no stderr received$sincewhen)");
1364
1370
  killem ($pid);
1365
1371
  }
1366
1372
  }
@@ -1395,12 +1401,12 @@ sub check_squeue
1395
1401
  }
1396
1402
 
1397
1403
  # Check for child procs >60s old and not mentioned by squeue.
1398
- while (my ($pid, $jobstep) = each %proc)
1404
+ while (my ($pid, $procinfo) = each %proc)
1399
1405
  {
1400
- if ($jobstep->{time} < time - 60
1401
- && $jobstep->{jobstepname}
1402
- && !exists $ok{$jobstep->{jobstepname}}
1403
- && !exists $jobstep->{killtime})
1406
+ if ($procinfo->{time} < time - 60
1407
+ && $procinfo->{jobstepname}
1408
+ && !exists $ok{$procinfo->{jobstepname}}
1409
+ && !exists $procinfo->{killtime})
1404
1410
  {
1405
1411
  # According to slurm, this task has ended (successfully or not)
1406
1412
  # -- but our srun child hasn't exited. First we must wait (30
@@ -1409,8 +1415,8 @@ sub check_squeue
1409
1415
  # terminated, we'll conclude some slurm communication
1410
1416
  # error/delay has caused the task to die without notifying srun,
1411
1417
  # and we'll kill srun ourselves.
1412
- $jobstep->{killtime} = time + 30;
1413
- Log($jobstep->{jobstep}, "notice: task is not in slurm queue but srun process $pid has not exited");
1418
+ $procinfo->{killtime} = time + 30;
1419
+ Log($procinfo->{jobstep}, "notice: task is not in slurm queue but srun process $pid has not exited");
1414
1420
  }
1415
1421
  }
1416
1422
  }
@@ -1432,15 +1438,21 @@ sub readfrompipes
1432
1438
  foreach my $job (keys %reader)
1433
1439
  {
1434
1440
  my $buf;
1435
- while (0 < sysread ($reader{$job}, $buf, 8192))
1441
+ if (0 < sysread ($reader{$job}, $buf, 65536))
1436
1442
  {
1437
1443
  print STDERR $buf if $ENV{CRUNCH_DEBUG};
1438
1444
  $jobstep[$job]->{stderr_at} = time;
1439
1445
  $jobstep[$job]->{stderr} .= $buf;
1446
+
1447
+ # Consume everything up to the last \n
1440
1448
  preprocess_stderr ($job);
1449
+
1441
1450
  if (length ($jobstep[$job]->{stderr}) > 16384)
1442
1451
  {
1443
- substr ($jobstep[$job]->{stderr}, 0, 8192) = "";
1452
+ # If we get a lot of stderr without a newline, chop off the
1453
+ # front to avoid letting our buffer grow indefinitely.
1454
+ substr ($jobstep[$job]->{stderr},
1455
+ 0, length($jobstep[$job]->{stderr}) - 8192) = "";
1444
1456
  }
1445
1457
  $gotsome = 1;
1446
1458
  }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: arvados-cli
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.20160209221008
4
+ version: 0.1.20160210155133
5
5
  platform: ruby
6
6
  authors:
7
7
  - Arvados Authors
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-02-09 00:00:00.000000000 Z
11
+ date: 2016-02-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: arvados
@@ -178,7 +178,7 @@ dependencies:
178
178
  - - "<"
179
179
  - !ruby/object:Gem::Version
180
180
  version: 1.0.0
181
- description: Arvados command line tools, git commit baeb7dbe5929012dea22985b11ae4c5584f76891
181
+ description: Arvados command line tools, git commit fdc9a9308c646d23ec50073833f141ceebf78613
182
182
  email: gem-dev@curoverse.com
183
183
  executables:
184
184
  - arv