arvados-cli 0.1.20160209221008 → 0.1.20160210155133

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/bin/crunch-job +27 -15
  3. metadata +3 -3
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b325e2e44f016973f3030414417cc1b94ba93f87
4
- data.tar.gz: 70eeaa65eb2b2b71129d9f603c41da4f2e66924f
3
+ metadata.gz: 659e88b672cd2316f53bb06df90fb1ef551bb05a
4
+ data.tar.gz: e0754ab14a47f327227c9f03f704aa4d611f7a00
5
5
  SHA512:
6
- metadata.gz: 8112dfe3eee6b53bf8658a5fbe473171471ffaec0bd46b233260bef6497ac4f82b6dc52b016591056df5b399cd6f25ebeb9713fabfc3c951830981e20d772f4c
7
- data.tar.gz: 35937645f003af1f93f6169a70021e350e4e29fc9feef61859ba4d9841e59847a91a30dd7a6cb82d0962dfd316a1b5e3dc8586c5908a8992b708280c46aa7338
6
+ metadata.gz: e01f712acdaec33a6cd4f5f368ec0146299a5e436915d88ced3267c86b6743977e8c495513bb5ea781e74e711e92c9f6be96393bad73271684beec1daeceb6bd
7
+ data.tar.gz: 95f40cb2432c76eb5fd6c9bdaf989e88d6ee1dc525a71f05e09c031cc1d3935e375933eac0a4d200c6ab8be692aee38e2991482ef4d5a5f3760cf9fb1bb560ef
data/bin/crunch-job CHANGED
@@ -415,11 +415,13 @@ if (!defined $no_clear_tmp) {
415
415
  # If this job requires a Docker image, install that.
416
416
  my ($docker_locator, $docker_stream, $docker_hash, $docker_limitmem, $dockeruserarg);
417
417
  if ($docker_locator = $Job->{docker_image_locator}) {
418
+ Log (undef, "Install docker image $docker_locator");
418
419
  ($docker_stream, $docker_hash) = find_docker_image($docker_locator);
419
420
  if (!$docker_hash)
420
421
  {
421
422
  croak("No Docker image hash found from locator $docker_locator");
422
423
  }
424
+ Log (undef, "docker image hash is $docker_hash");
423
425
  $docker_stream =~ s/^\.//;
424
426
  my $docker_install_script = qq{
425
427
  if ! $docker_bin images -q --no-trunc --all | grep -qxF \Q$docker_hash\E; then
@@ -1057,12 +1059,14 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
1057
1059
  check_refresh_wanted();
1058
1060
  check_squeue();
1059
1061
  update_progress_stats();
1060
- select (undef, undef, undef, 0.1);
1061
1062
  }
1062
1063
  elsif (time - $progress_stats_updated >= 30 || $progress_is_dirty)
1063
1064
  {
1064
1065
  update_progress_stats();
1065
1066
  }
1067
+ if (!$gotsome) {
1068
+ select (undef, undef, undef, 0.1);
1069
+ }
1066
1070
  $working_slot_count = scalar(grep { $_->{node}->{fail_count} == 0 &&
1067
1071
  $_->{node}->{hold_count} < 4 } @slot);
1068
1072
  if (($thisround_failed_multiple >= 8 && $thisround_succeeded == 0) ||
@@ -1340,8 +1344,9 @@ sub check_squeue
1340
1344
  # squeue check interval (15s) this should make the squeue check an
1341
1345
  # infrequent event.
1342
1346
  my $silent_procs = 0;
1343
- for my $jobstep (values %proc)
1347
+ for my $procinfo (values %proc)
1344
1348
  {
1349
+ my $jobstep = $jobstep[$procinfo->{jobstep}];
1345
1350
  if ($jobstep->{stderr_at} < $last_squeue_check)
1346
1351
  {
1347
1352
  $silent_procs++;
@@ -1350,17 +1355,18 @@ sub check_squeue
1350
1355
  return if $silent_procs == 0;
1351
1356
 
1352
1357
  # use killem() on procs whose killtime is reached
1353
- while (my ($pid, $jobstep) = each %proc)
1358
+ while (my ($pid, $procinfo) = each %proc)
1354
1359
  {
1355
- if (exists $jobstep->{killtime}
1356
- && $jobstep->{killtime} <= time
1360
+ my $jobstep = $jobstep[$procinfo->{jobstep}];
1361
+ if (exists $procinfo->{killtime}
1362
+ && $procinfo->{killtime} <= time
1357
1363
  && $jobstep->{stderr_at} < $last_squeue_check)
1358
1364
  {
1359
1365
  my $sincewhen = "";
1360
1366
  if ($jobstep->{stderr_at}) {
1361
1367
  $sincewhen = " in last " . (time - $jobstep->{stderr_at}) . "s";
1362
1368
  }
1363
- Log($jobstep->{jobstep}, "killing orphaned srun process $pid (task not in slurm queue, no stderr received$sincewhen)");
1369
+ Log($procinfo->{jobstep}, "killing orphaned srun process $pid (task not in slurm queue, no stderr received$sincewhen)");
1364
1370
  killem ($pid);
1365
1371
  }
1366
1372
  }
@@ -1395,12 +1401,12 @@ sub check_squeue
1395
1401
  }
1396
1402
 
1397
1403
  # Check for child procs >60s old and not mentioned by squeue.
1398
- while (my ($pid, $jobstep) = each %proc)
1404
+ while (my ($pid, $procinfo) = each %proc)
1399
1405
  {
1400
- if ($jobstep->{time} < time - 60
1401
- && $jobstep->{jobstepname}
1402
- && !exists $ok{$jobstep->{jobstepname}}
1403
- && !exists $jobstep->{killtime})
1406
+ if ($procinfo->{time} < time - 60
1407
+ && $procinfo->{jobstepname}
1408
+ && !exists $ok{$procinfo->{jobstepname}}
1409
+ && !exists $procinfo->{killtime})
1404
1410
  {
1405
1411
  # According to slurm, this task has ended (successfully or not)
1406
1412
  # -- but our srun child hasn't exited. First we must wait (30
@@ -1409,8 +1415,8 @@ sub check_squeue
1409
1415
  # terminated, we'll conclude some slurm communication
1410
1416
  # error/delay has caused the task to die without notifying srun,
1411
1417
  # and we'll kill srun ourselves.
1412
- $jobstep->{killtime} = time + 30;
1413
- Log($jobstep->{jobstep}, "notice: task is not in slurm queue but srun process $pid has not exited");
1418
+ $procinfo->{killtime} = time + 30;
1419
+ Log($procinfo->{jobstep}, "notice: task is not in slurm queue but srun process $pid has not exited");
1414
1420
  }
1415
1421
  }
1416
1422
  }
@@ -1432,15 +1438,21 @@ sub readfrompipes
1432
1438
  foreach my $job (keys %reader)
1433
1439
  {
1434
1440
  my $buf;
1435
- while (0 < sysread ($reader{$job}, $buf, 8192))
1441
+ if (0 < sysread ($reader{$job}, $buf, 65536))
1436
1442
  {
1437
1443
  print STDERR $buf if $ENV{CRUNCH_DEBUG};
1438
1444
  $jobstep[$job]->{stderr_at} = time;
1439
1445
  $jobstep[$job]->{stderr} .= $buf;
1446
+
1447
+ # Consume everything up to the last \n
1440
1448
  preprocess_stderr ($job);
1449
+
1441
1450
  if (length ($jobstep[$job]->{stderr}) > 16384)
1442
1451
  {
1443
- substr ($jobstep[$job]->{stderr}, 0, 8192) = "";
1452
+ # If we get a lot of stderr without a newline, chop off the
1453
+ # front to avoid letting our buffer grow indefinitely.
1454
+ substr ($jobstep[$job]->{stderr},
1455
+ 0, length($jobstep[$job]->{stderr}) - 8192) = "";
1444
1456
  }
1445
1457
  $gotsome = 1;
1446
1458
  }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: arvados-cli
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.20160209221008
4
+ version: 0.1.20160210155133
5
5
  platform: ruby
6
6
  authors:
7
7
  - Arvados Authors
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-02-09 00:00:00.000000000 Z
11
+ date: 2016-02-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: arvados
@@ -178,7 +178,7 @@ dependencies:
178
178
  - - "<"
179
179
  - !ruby/object:Gem::Version
180
180
  version: 1.0.0
181
- description: Arvados command line tools, git commit baeb7dbe5929012dea22985b11ae4c5584f76891
181
+ description: Arvados command line tools, git commit fdc9a9308c646d23ec50073833f141ceebf78613
182
182
  email: gem-dev@curoverse.com
183
183
  executables:
184
184
  - arv