arvados-cli 0.1.20141006212502 → 0.1.20141007134429

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/bin/crunch-job +84 -38
  3. metadata +3 -3
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b84e06d6def466e49e5c53c836efe17babcc9267
4
- data.tar.gz: 957bec18268b60540dd207a9b53d3b1bb45164de
3
+ metadata.gz: ec72b4c6433914d92d8f712603c11bf014e0dcea
4
+ data.tar.gz: 5c38938f5a1123631f0b9da225b8e895756788df
5
5
  SHA512:
6
- metadata.gz: a6dd627d31352cdb3a0958516090466fc0bcf1e5b2d85f66189023642a7258f89d25dce7b90d4b53223b5bf3176e716bbe2082a7ecc6adafa9edfc4f6ce22bcd
7
- data.tar.gz: 9a2321cb216a5c8bd596137a78e4c846cee688eacde21322486a81986fd46c799c0f765816f8effbb2467296cfefd0d7b397a5e408ae6e035df379ea75fea54c
6
+ metadata.gz: a234f80b2daf38847e9a655a3ee040e0978e348dff65011c04e1765944376fdd7ad89c46bf7b905ebb07f9afaa85602e527e6d5d8657c79ba88a4e1586936aac
7
+ data.tar.gz: f0b12cbf890165ed7ef486d6af389f43fb3f55b6cf398b1b5d07abc6c65cd5719e0199780ed9299b53796a608d50f1d9a74d72f3aa4436a2d3bccb8ebd15e9fc
data/bin/crunch-job CHANGED
@@ -141,22 +141,26 @@ $SIG{'USR2'} = sub
141
141
 
142
142
  my $arv = Arvados->new('apiVersion' => 'v1');
143
143
 
144
- my $User = $arv->{'users'}->{'current'}->execute;
145
-
146
144
  my $Job;
147
145
  my $job_id;
148
146
  my $dbh;
149
147
  my $sth;
148
+ my @jobstep;
149
+
150
+ my $User = retry_op(sub { $arv->{'users'}->{'current'}->execute; });
151
+
150
152
  if ($jobspec =~ /^[-a-z\d]+$/)
151
153
  {
152
154
  # $jobspec is an Arvados UUID, not a JSON job specification
153
- $Job = $arv->{'jobs'}->{'get'}->execute('uuid' => $jobspec);
155
+ $Job = retry_op(sub {
156
+ $arv->{'jobs'}->{'get'}->execute('uuid' => $jobspec);
157
+ });
154
158
  if (!$force_unlock) {
155
159
  # Claim this job, and make sure nobody else does
156
- eval {
160
+ eval { retry_op(sub {
157
161
  # lock() sets is_locked_by_uuid and changes state to Running.
158
162
  $arv->{'jobs'}->{'lock'}->execute('uuid' => $Job->{'uuid'})
159
- };
163
+ }); };
160
164
  if ($@) {
161
165
  Log(undef, "Error while locking job, exiting ".EX_TEMPFAIL);
162
166
  exit EX_TEMPFAIL;
@@ -177,7 +181,7 @@ else
177
181
  $Job->{'started_at'} = gmtime;
178
182
  $Job->{'state'} = 'Running';
179
183
 
180
- $Job = $arv->{'jobs'}->{'create'}->execute('job' => $Job);
184
+ $Job = retry_op(sub { $arv->{'jobs'}->{'create'}->execute('job' => $Job); });
181
185
  }
182
186
  $job_id = $Job->{'uuid'};
183
187
 
@@ -290,7 +294,6 @@ $ENV{"CRUNCH_JOB_UUID"} = $job_id;
290
294
  $ENV{"JOB_UUID"} = $job_id;
291
295
 
292
296
 
293
- my @jobstep;
294
297
  my @jobstep_todo = ();
295
298
  my @jobstep_done = ();
296
299
  my @jobstep_tomerge = ();
@@ -308,12 +311,14 @@ if (defined $Job->{thawedfromkey})
308
311
  }
309
312
  else
310
313
  {
311
- my $first_task = $arv->{'job_tasks'}->{'create'}->execute('job_task' => {
312
- 'job_uuid' => $Job->{'uuid'},
313
- 'sequence' => 0,
314
- 'qsequence' => 0,
315
- 'parameters' => {},
316
- });
314
+ my $first_task = retry_op(sub {
315
+ $arv->{'job_tasks'}->{'create'}->execute('job_task' => {
316
+ 'job_uuid' => $Job->{'uuid'},
317
+ 'sequence' => 0,
318
+ 'qsequence' => 0,
319
+ 'parameters' => {},
320
+ });
321
+ });
317
322
  push @jobstep, { 'level' => 0,
318
323
  'failures' => 0,
319
324
  'arvados_task' => $first_task,
@@ -408,9 +413,10 @@ else {
408
413
  } else {
409
414
  # $repo is none of the above. It must be the name of a hosted
410
415
  # repository.
411
- my $arv_repo_list = $arv->{'repositories'}->{'list'}->execute(
412
- 'filters' => [['name','=',$repo]]
413
- )->{'items'};
416
+ my $arv_repo_list = retry_op(sub {
417
+ $arv->{'repositories'}->{'list'}->execute(
418
+ 'filters' => [['name','=',$repo]])->{'items'};
419
+ });
414
420
  my $n_found = scalar @{$arv_repo_list};
415
421
  if ($n_found > 0) {
416
422
  Log(undef, "Repository '$repo' -> "
@@ -898,8 +904,9 @@ else {
898
904
  while (my $manifest_line = <$orig_manifest>) {
899
905
  $orig_manifest_text .= $manifest_line;
900
906
  }
901
- my $output = $arv->{'collections'}->{'create'}->execute('collection' => {
902
- 'manifest_text' => $orig_manifest_text,
907
+ my $output = retry_op(sub {
908
+ $arv->{'collections'}->{'create'}->execute(
909
+ 'collection' => {'manifest_text' => $orig_manifest_text});
903
910
  });
904
911
  Log(undef, "output uuid " . $output->{uuid});
905
912
  Log(undef, "output hash " . $output->{portable_data_hash});
@@ -1034,13 +1041,15 @@ sub reapchildren
1034
1041
  my $newtask_list = [];
1035
1042
  my $newtask_results;
1036
1043
  do {
1037
- $newtask_results = $arv->{'job_tasks'}->{'list'}->execute(
1038
- 'where' => {
1039
- 'created_by_job_task_uuid' => $Jobstep->{'arvados_task'}->{uuid}
1040
- },
1041
- 'order' => 'qsequence',
1042
- 'offset' => scalar(@$newtask_list),
1043
- );
1044
+ $newtask_results = retry_op(sub {
1045
+ $arv->{'job_tasks'}->{'list'}->execute(
1046
+ 'where' => {
1047
+ 'created_by_job_task_uuid' => $Jobstep->{'arvados_task'}->{uuid}
1048
+ },
1049
+ 'order' => 'qsequence',
1050
+ 'offset' => scalar(@$newtask_list),
1051
+ );
1052
+ });
1044
1053
  push(@$newtask_list, @{$newtask_results->{items}});
1045
1054
  } while (@{$newtask_results->{items}});
1046
1055
  foreach my $arvados_task (@$newtask_list) {
@@ -1063,7 +1072,9 @@ sub check_refresh_wanted
1063
1072
  my @stat = stat $ENV{"CRUNCH_REFRESH_TRIGGER"};
1064
1073
  if (@stat && $stat[9] > $latest_refresh) {
1065
1074
  $latest_refresh = scalar time;
1066
- my $Job2 = $arv->{'jobs'}->{'get'}->execute('uuid' => $jobspec);
1075
+ my $Job2 = retry_op(sub {
1076
+ $arv->{'jobs'}->{'get'}->execute('uuid' => $jobspec);
1077
+ });
1067
1078
  for my $attr ('cancelled_at',
1068
1079
  'cancelled_by_user_uuid',
1069
1080
  'cancelled_by_client_uuid',
@@ -1244,7 +1255,7 @@ sub collate_output
1244
1255
 
1245
1256
  my ($child_out, $child_in);
1246
1257
  my $pid = open2($child_out, $child_in, 'arv-put', '--raw',
1247
- '--retries', put_retry_count());
1258
+ '--retries', retry_count());
1248
1259
  my $joboutput;
1249
1260
  for (@jobstep)
1250
1261
  {
@@ -1574,7 +1585,10 @@ sub find_docker_image {
1574
1585
  # If not, return undef for both values.
1575
1586
  my $locator = shift;
1576
1587
  my ($streamname, $filename);
1577
- if (my $image = $arv->{collections}->{get}->execute(uuid => $locator)) {
1588
+ my $image = retry_op(sub {
1589
+ $arv->{collections}->{get}->execute(uuid => $locator);
1590
+ });
1591
+ if ($image) {
1578
1592
  foreach my $line (split(/\n/, $image->{manifest_text})) {
1579
1593
  my @tokens = split(/\s+/, $line);
1580
1594
  next if (!@tokens);
@@ -1595,20 +1609,52 @@ sub find_docker_image {
1595
1609
  }
1596
1610
  }
1597
1611
 
1598
- sub put_retry_count {
1599
- # Calculate a --retries argument for arv-put that will have it try
1600
- # approximately as long as this Job has been running.
1601
- my $stoptime = shift || time;
1602
- my $starttime = $jobstep[0]->{starttime};
1603
- my $timediff = defined($starttime) ? ($stoptime - $starttime) : 1;
1604
- my $retries = 0;
1605
- while ($timediff >= 2) {
1606
- $retries++;
1607
- $timediff /= 2;
1612
+ sub retry_count {
1613
+ # Calculate the number of times an operation should be retried,
1614
+ # assuming exponential backoff, and that we're willing to retry as
1615
+ # long as tasks have been running. Enforce a minimum of 3 retries.
1616
+ my ($starttime, $endtime, $timediff, $retries);
1617
+ if (@jobstep) {
1618
+ $starttime = $jobstep[0]->{starttime};
1619
+ $endtime = $jobstep[-1]->{finishtime};
1620
+ }
1621
+ if (!defined($starttime)) {
1622
+ $timediff = 0;
1623
+ } elsif (!defined($endtime)) {
1624
+ $timediff = time - $starttime;
1625
+ } else {
1626
+ $timediff = ($endtime - $starttime) - (time - $endtime);
1627
+ }
1628
+ if ($timediff > 0) {
1629
+ $retries = int(log($timediff) / log(2));
1630
+ } else {
1631
+ $retries = 1; # Use the minimum.
1608
1632
  }
1609
1633
  return ($retries > 3) ? $retries : 3;
1610
1634
  }
1611
1635
 
1636
+ sub retry_op {
1637
+ # Given a function reference, call it with the remaining arguments. If
1638
+ # it dies, retry it with exponential backoff until it succeeds, or until
1639
+ # the current retry_count is exhausted.
1640
+ my $operation = shift;
1641
+ my $retries = retry_count();
1642
+ foreach my $try_count (0..$retries) {
1643
+ my $next_try = time + (2 ** $try_count);
1644
+ my $result = eval { $operation->(@_); };
1645
+ if (!$@) {
1646
+ return $result;
1647
+ } elsif ($try_count < $retries) {
1648
+ my $sleep_time = $next_try - time;
1649
+ sleep($sleep_time) if ($sleep_time > 0);
1650
+ }
1651
+ }
1652
+ # Ensure the error message ends in a newline, so Perl doesn't add
1653
+ # retry_op's line number to it.
1654
+ chomp($@);
1655
+ die($@ . "\n");
1656
+ }
1657
+
1612
1658
  sub exit_status_s {
1613
1659
  # Given a $?, return a human-readable exit code string like "0" or
1614
1660
  # "1" or "0 with signal 1" or "1 with signal 11".
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: arvados-cli
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.20141006212502
4
+ version: 0.1.20141007134429
5
5
  platform: ruby
6
6
  authors:
7
7
  - Arvados Authors
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-10-06 00:00:00.000000000 Z
11
+ date: 2014-10-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: arvados
@@ -178,7 +178,7 @@ dependencies:
178
178
  - - "<"
179
179
  - !ruby/object:Gem::Version
180
180
  version: 1.0.0
181
- description: Arvados command line tools, git commit 38cc5c0a51657c6b60f3d3f32c566845988dfb6b
181
+ description: Arvados command line tools, git commit 344c6dcdbae76310879c85a736e4e6cce05d5645
182
182
  email: gem-dev@curoverse.com
183
183
  executables:
184
184
  - arv