arvados-cli 0.1.20141006212502 → 0.1.20141007134429
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/crunch-job +84 -38
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ec72b4c6433914d92d8f712603c11bf014e0dcea
|
4
|
+
data.tar.gz: 5c38938f5a1123631f0b9da225b8e895756788df
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a234f80b2daf38847e9a655a3ee040e0978e348dff65011c04e1765944376fdd7ad89c46bf7b905ebb07f9afaa85602e527e6d5d8657c79ba88a4e1586936aac
|
7
|
+
data.tar.gz: f0b12cbf890165ed7ef486d6af389f43fb3f55b6cf398b1b5d07abc6c65cd5719e0199780ed9299b53796a608d50f1d9a74d72f3aa4436a2d3bccb8ebd15e9fc
|
data/bin/crunch-job
CHANGED
@@ -141,22 +141,26 @@ $SIG{'USR2'} = sub
|
|
141
141
|
|
142
142
|
my $arv = Arvados->new('apiVersion' => 'v1');
|
143
143
|
|
144
|
-
my $User = $arv->{'users'}->{'current'}->execute;
|
145
|
-
|
146
144
|
my $Job;
|
147
145
|
my $job_id;
|
148
146
|
my $dbh;
|
149
147
|
my $sth;
|
148
|
+
my @jobstep;
|
149
|
+
|
150
|
+
my $User = retry_op(sub { $arv->{'users'}->{'current'}->execute; });
|
151
|
+
|
150
152
|
if ($jobspec =~ /^[-a-z\d]+$/)
|
151
153
|
{
|
152
154
|
# $jobspec is an Arvados UUID, not a JSON job specification
|
153
|
-
$Job =
|
155
|
+
$Job = retry_op(sub {
|
156
|
+
$arv->{'jobs'}->{'get'}->execute('uuid' => $jobspec);
|
157
|
+
});
|
154
158
|
if (!$force_unlock) {
|
155
159
|
# Claim this job, and make sure nobody else does
|
156
|
-
eval {
|
160
|
+
eval { retry_op(sub {
|
157
161
|
# lock() sets is_locked_by_uuid and changes state to Running.
|
158
162
|
$arv->{'jobs'}->{'lock'}->execute('uuid' => $Job->{'uuid'})
|
159
|
-
};
|
163
|
+
}); };
|
160
164
|
if ($@) {
|
161
165
|
Log(undef, "Error while locking job, exiting ".EX_TEMPFAIL);
|
162
166
|
exit EX_TEMPFAIL;
|
@@ -177,7 +181,7 @@ else
|
|
177
181
|
$Job->{'started_at'} = gmtime;
|
178
182
|
$Job->{'state'} = 'Running';
|
179
183
|
|
180
|
-
$Job = $arv->{'jobs'}->{'create'}->execute('job' => $Job);
|
184
|
+
$Job = retry_op(sub { $arv->{'jobs'}->{'create'}->execute('job' => $Job); });
|
181
185
|
}
|
182
186
|
$job_id = $Job->{'uuid'};
|
183
187
|
|
@@ -290,7 +294,6 @@ $ENV{"CRUNCH_JOB_UUID"} = $job_id;
|
|
290
294
|
$ENV{"JOB_UUID"} = $job_id;
|
291
295
|
|
292
296
|
|
293
|
-
my @jobstep;
|
294
297
|
my @jobstep_todo = ();
|
295
298
|
my @jobstep_done = ();
|
296
299
|
my @jobstep_tomerge = ();
|
@@ -308,12 +311,14 @@ if (defined $Job->{thawedfromkey})
|
|
308
311
|
}
|
309
312
|
else
|
310
313
|
{
|
311
|
-
my $first_task =
|
312
|
-
'
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
314
|
+
my $first_task = retry_op(sub {
|
315
|
+
$arv->{'job_tasks'}->{'create'}->execute('job_task' => {
|
316
|
+
'job_uuid' => $Job->{'uuid'},
|
317
|
+
'sequence' => 0,
|
318
|
+
'qsequence' => 0,
|
319
|
+
'parameters' => {},
|
320
|
+
});
|
321
|
+
});
|
317
322
|
push @jobstep, { 'level' => 0,
|
318
323
|
'failures' => 0,
|
319
324
|
'arvados_task' => $first_task,
|
@@ -408,9 +413,10 @@ else {
|
|
408
413
|
} else {
|
409
414
|
# $repo is none of the above. It must be the name of a hosted
|
410
415
|
# repository.
|
411
|
-
my $arv_repo_list =
|
412
|
-
'
|
413
|
-
)->{'items'};
|
416
|
+
my $arv_repo_list = retry_op(sub {
|
417
|
+
$arv->{'repositories'}->{'list'}->execute(
|
418
|
+
'filters' => [['name','=',$repo]])->{'items'};
|
419
|
+
});
|
414
420
|
my $n_found = scalar @{$arv_repo_list};
|
415
421
|
if ($n_found > 0) {
|
416
422
|
Log(undef, "Repository '$repo' -> "
|
@@ -898,8 +904,9 @@ else {
|
|
898
904
|
while (my $manifest_line = <$orig_manifest>) {
|
899
905
|
$orig_manifest_text .= $manifest_line;
|
900
906
|
}
|
901
|
-
my $output =
|
902
|
-
'
|
907
|
+
my $output = retry_op(sub {
|
908
|
+
$arv->{'collections'}->{'create'}->execute(
|
909
|
+
'collection' => {'manifest_text' => $orig_manifest_text});
|
903
910
|
});
|
904
911
|
Log(undef, "output uuid " . $output->{uuid});
|
905
912
|
Log(undef, "output hash " . $output->{portable_data_hash});
|
@@ -1034,13 +1041,15 @@ sub reapchildren
|
|
1034
1041
|
my $newtask_list = [];
|
1035
1042
|
my $newtask_results;
|
1036
1043
|
do {
|
1037
|
-
$newtask_results =
|
1038
|
-
'
|
1039
|
-
'
|
1040
|
-
|
1041
|
-
|
1042
|
-
|
1043
|
-
|
1044
|
+
$newtask_results = retry_op(sub {
|
1045
|
+
$arv->{'job_tasks'}->{'list'}->execute(
|
1046
|
+
'where' => {
|
1047
|
+
'created_by_job_task_uuid' => $Jobstep->{'arvados_task'}->{uuid}
|
1048
|
+
},
|
1049
|
+
'order' => 'qsequence',
|
1050
|
+
'offset' => scalar(@$newtask_list),
|
1051
|
+
);
|
1052
|
+
});
|
1044
1053
|
push(@$newtask_list, @{$newtask_results->{items}});
|
1045
1054
|
} while (@{$newtask_results->{items}});
|
1046
1055
|
foreach my $arvados_task (@$newtask_list) {
|
@@ -1063,7 +1072,9 @@ sub check_refresh_wanted
|
|
1063
1072
|
my @stat = stat $ENV{"CRUNCH_REFRESH_TRIGGER"};
|
1064
1073
|
if (@stat && $stat[9] > $latest_refresh) {
|
1065
1074
|
$latest_refresh = scalar time;
|
1066
|
-
my $Job2 =
|
1075
|
+
my $Job2 = retry_op(sub {
|
1076
|
+
$arv->{'jobs'}->{'get'}->execute('uuid' => $jobspec);
|
1077
|
+
});
|
1067
1078
|
for my $attr ('cancelled_at',
|
1068
1079
|
'cancelled_by_user_uuid',
|
1069
1080
|
'cancelled_by_client_uuid',
|
@@ -1244,7 +1255,7 @@ sub collate_output
|
|
1244
1255
|
|
1245
1256
|
my ($child_out, $child_in);
|
1246
1257
|
my $pid = open2($child_out, $child_in, 'arv-put', '--raw',
|
1247
|
-
'--retries',
|
1258
|
+
'--retries', retry_count());
|
1248
1259
|
my $joboutput;
|
1249
1260
|
for (@jobstep)
|
1250
1261
|
{
|
@@ -1574,7 +1585,10 @@ sub find_docker_image {
|
|
1574
1585
|
# If not, return undef for both values.
|
1575
1586
|
my $locator = shift;
|
1576
1587
|
my ($streamname, $filename);
|
1577
|
-
|
1588
|
+
my $image = retry_op(sub {
|
1589
|
+
$arv->{collections}->{get}->execute(uuid => $locator);
|
1590
|
+
});
|
1591
|
+
if ($image) {
|
1578
1592
|
foreach my $line (split(/\n/, $image->{manifest_text})) {
|
1579
1593
|
my @tokens = split(/\s+/, $line);
|
1580
1594
|
next if (!@tokens);
|
@@ -1595,20 +1609,52 @@ sub find_docker_image {
|
|
1595
1609
|
}
|
1596
1610
|
}
|
1597
1611
|
|
1598
|
-
sub
|
1599
|
-
# Calculate
|
1600
|
-
#
|
1601
|
-
|
1602
|
-
my $starttime
|
1603
|
-
|
1604
|
-
|
1605
|
-
|
1606
|
-
|
1607
|
-
|
1612
|
+
sub retry_count {
|
1613
|
+
# Calculate the number of times an operation should be retried,
|
1614
|
+
# assuming exponential backoff, and that we're willing to retry as
|
1615
|
+
# long as tasks have been running. Enforce a minimum of 3 retries.
|
1616
|
+
my ($starttime, $endtime, $timediff, $retries);
|
1617
|
+
if (@jobstep) {
|
1618
|
+
$starttime = $jobstep[0]->{starttime};
|
1619
|
+
$endtime = $jobstep[-1]->{finishtime};
|
1620
|
+
}
|
1621
|
+
if (!defined($starttime)) {
|
1622
|
+
$timediff = 0;
|
1623
|
+
} elsif (!defined($endtime)) {
|
1624
|
+
$timediff = time - $starttime;
|
1625
|
+
} else {
|
1626
|
+
$timediff = ($endtime - $starttime) - (time - $endtime);
|
1627
|
+
}
|
1628
|
+
if ($timediff > 0) {
|
1629
|
+
$retries = int(log($timediff) / log(2));
|
1630
|
+
} else {
|
1631
|
+
$retries = 1; # Use the minimum.
|
1608
1632
|
}
|
1609
1633
|
return ($retries > 3) ? $retries : 3;
|
1610
1634
|
}
|
1611
1635
|
|
1636
|
+
sub retry_op {
|
1637
|
+
# Given a function reference, call it with the remaining arguments. If
|
1638
|
+
# it dies, retry it with exponential backoff until it succeeds, or until
|
1639
|
+
# the current retry_count is exhausted.
|
1640
|
+
my $operation = shift;
|
1641
|
+
my $retries = retry_count();
|
1642
|
+
foreach my $try_count (0..$retries) {
|
1643
|
+
my $next_try = time + (2 ** $try_count);
|
1644
|
+
my $result = eval { $operation->(@_); };
|
1645
|
+
if (!$@) {
|
1646
|
+
return $result;
|
1647
|
+
} elsif ($try_count < $retries) {
|
1648
|
+
my $sleep_time = $next_try - time;
|
1649
|
+
sleep($sleep_time) if ($sleep_time > 0);
|
1650
|
+
}
|
1651
|
+
}
|
1652
|
+
# Ensure the error message ends in a newline, so Perl doesn't add
|
1653
|
+
# retry_op's line number to it.
|
1654
|
+
chomp($@);
|
1655
|
+
die($@ . "\n");
|
1656
|
+
}
|
1657
|
+
|
1612
1658
|
sub exit_status_s {
|
1613
1659
|
# Given a $?, return a human-readable exit code string like "0" or
|
1614
1660
|
# "1" or "0 with signal 1" or "1 with signal 11".
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: arvados-cli
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.20141007134429
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Arvados Authors
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-10-
|
11
|
+
date: 2014-10-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: arvados
|
@@ -178,7 +178,7 @@ dependencies:
|
|
178
178
|
- - "<"
|
179
179
|
- !ruby/object:Gem::Version
|
180
180
|
version: 1.0.0
|
181
|
-
description: Arvados command line tools, git commit
|
181
|
+
description: Arvados command line tools, git commit 344c6dcdbae76310879c85a736e4e6cce05d5645
|
182
182
|
email: gem-dev@curoverse.com
|
183
183
|
executables:
|
184
184
|
- arv
|