arvados-cli 0.1.20131115001557 → 0.1.20131210143944
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/arv +19 -6
- data/bin/arv-run-pipeline-instance +32 -13
- data/bin/crunch-job +104 -74
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 42b56676c867f59f143ef0e229779bd4c67258de
|
4
|
+
data.tar.gz: 6d52906f24e1289e8e74ff2df1ea5db0d2c995ca
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 54b722d0fe53e42a15a62e6877e058f226398cb082810d35553573e44262bb98c2a45ade10399a5a4936853d98a8c77eb6601b793125e33d469ac2babcd0a6dd
|
7
|
+
data.tar.gz: e84c5816676a6f1db200b650b1e67e17af2dada90595c7b005e75136709142a656279bf575ee3515a82499dcebc93bff67902effe50f3fa8183467dc551bb07d
|
data/bin/arv
CHANGED
@@ -10,10 +10,15 @@ if RUBY_VERSION < '1.9.3' then
|
|
10
10
|
EOS
|
11
11
|
end
|
12
12
|
|
13
|
-
|
13
|
+
case ARGV[0]
|
14
|
+
when 'keep'
|
14
15
|
ARGV.shift
|
15
16
|
@sub = ARGV.shift
|
16
|
-
if ['
|
17
|
+
if ['get', 'put'].index @sub then
|
18
|
+
# Native Arvados
|
19
|
+
exec `which arv-#{@sub}`.strip, *ARGV
|
20
|
+
elsif ['ls', 'less', 'check'].index @sub then
|
21
|
+
# wh* shims
|
17
22
|
exec `which wh#{@sub}`.strip, *ARGV
|
18
23
|
else
|
19
24
|
puts "Usage: \n" +
|
@@ -24,9 +29,7 @@ if ARGV[0] == 'keep'
|
|
24
29
|
"#{$0} keep check\n"
|
25
30
|
end
|
26
31
|
abort
|
27
|
-
|
28
|
-
|
29
|
-
if ARGV[0] == 'pipeline'
|
32
|
+
when 'pipeline'
|
30
33
|
ARGV.shift
|
31
34
|
@sub = ARGV.shift
|
32
35
|
if ['run'].index @sub then
|
@@ -100,7 +103,17 @@ class Google::APIClient
|
|
100
103
|
end
|
101
104
|
end
|
102
105
|
|
103
|
-
|
106
|
+
class ArvadosClient < Google::APIClient
|
107
|
+
def execute(*args)
|
108
|
+
if args.last.is_a? Hash
|
109
|
+
args.last[:headers] ||= {}
|
110
|
+
args.last[:headers]['Accept'] ||= 'application/json'
|
111
|
+
end
|
112
|
+
super(*args)
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
client = ArvadosClient.new(:host => ENV['ARVADOS_API_HOST'], :application_name => 'arvados-cli', :application_version => '1.0')
|
104
117
|
arvados = client.discovered_api('arvados', ENV['ARVADOS_API_VERSION'])
|
105
118
|
|
106
119
|
def to_boolean(s)
|
@@ -12,6 +12,9 @@
|
|
12
12
|
#
|
13
13
|
# [--template uuid] Use the specified pipeline template.
|
14
14
|
#
|
15
|
+
# [--template path] Load the pipeline template from the specified
|
16
|
+
# local file.
|
17
|
+
#
|
15
18
|
# [--instance uuid] Use the specified pipeline instance.
|
16
19
|
#
|
17
20
|
# [-n, --dry-run] Do not start any new jobs or wait for existing jobs
|
@@ -162,7 +165,7 @@ p = Trollop::Parser.new do
|
|
162
165
|
:short => :none,
|
163
166
|
:type => :integer)
|
164
167
|
opt(:template,
|
165
|
-
"UUID of pipeline template.",
|
168
|
+
"UUID of pipeline template, or path to local pipeline template file.",
|
166
169
|
:short => :none,
|
167
170
|
:type => :string)
|
168
171
|
opt(:instance,
|
@@ -314,16 +317,25 @@ class WhRunPipelineInstance
|
|
314
317
|
@options = _options
|
315
318
|
end
|
316
319
|
|
317
|
-
def fetch_template(
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
320
|
+
def fetch_template(template)
|
321
|
+
if template.match /[^-0-9a-z]/
|
322
|
+
# Doesn't look like a uuid -- use it as a filename.
|
323
|
+
@template = JSON.parse File.read(template), :symbolize_names => true
|
324
|
+
if !@template[:components]
|
325
|
+
abort ("#{$0}: Template loaded from #{template} " +
|
326
|
+
"does not have a \"components\" key")
|
327
|
+
end
|
328
|
+
else
|
329
|
+
result = $client.execute(:api_method => $arvados.pipeline_templates.get,
|
330
|
+
:parameters => {
|
331
|
+
:api_token => ENV['ARVADOS_API_TOKEN'],
|
332
|
+
:uuid => template
|
333
|
+
},
|
334
|
+
:authenticated => false)
|
335
|
+
@template = JSON.parse result.body, :symbolize_names => true
|
336
|
+
if !@template[:uuid]
|
337
|
+
abort "#{$0}: fatal: failed to retrieve pipeline template #{template} #{@template[:errors].inspect rescue nil}"
|
338
|
+
end
|
327
339
|
end
|
328
340
|
self
|
329
341
|
end
|
@@ -407,7 +419,7 @@ class WhRunPipelineInstance
|
|
407
419
|
(@options[:no_reuse] ? [] : JobCache.
|
408
420
|
where(script: c[:script],
|
409
421
|
script_parameters: c[:script_parameters],
|
410
|
-
script_version_descends_from: c[:
|
422
|
+
script_version_descends_from: c[:script_version])
|
411
423
|
).each do |candidate_job|
|
412
424
|
candidate_params_downcase = Hash[candidate_job[:script_parameters].
|
413
425
|
map { |k,v| [k.downcase,v] }]
|
@@ -420,6 +432,12 @@ class WhRunPipelineInstance
|
|
420
432
|
next
|
421
433
|
end
|
422
434
|
|
435
|
+
if c[:script_version] !=
|
436
|
+
candidate_job[:script_version][0,c[:script_version].length]
|
437
|
+
debuglog "component #{cname} would be satisfied by job #{candidate_job[:uuid]} if script_version matched.", 2
|
438
|
+
next
|
439
|
+
end
|
440
|
+
|
423
441
|
unless candidate_job[:success] || candidate_job[:running] ||
|
424
442
|
(!candidate_job[:started_at] && !candidate_job[:cancelled_at])
|
425
443
|
debuglog "component #{cname} would be satisfied by job #{candidate_job[:uuid]} if it were running or successful.", 2
|
@@ -462,7 +480,8 @@ class WhRunPipelineInstance
|
|
462
480
|
c[:wait] = true
|
463
481
|
end
|
464
482
|
if c[:job] and c[:job][:uuid]
|
465
|
-
if
|
483
|
+
if (c[:job][:running] or
|
484
|
+
not (c[:job][:finished_at] or c[:job][:cancelled_at]))
|
466
485
|
c[:job] = JobCache.get(c[:job][:uuid])
|
467
486
|
end
|
468
487
|
if c[:job][:success]
|
data/bin/crunch-job
CHANGED
@@ -58,7 +58,8 @@ Save a checkpoint and continue.
|
|
58
58
|
=item SIGHUP
|
59
59
|
|
60
60
|
Refresh node allocation (i.e., check whether any nodes have been added
|
61
|
-
or unallocated)
|
61
|
+
or unallocated) and attributes of the Job record that should affect
|
62
|
+
behavior (e.g., cancel job if cancelled_at becomes non-nil).
|
62
63
|
|
63
64
|
=back
|
64
65
|
|
@@ -107,10 +108,6 @@ my $job_has_uuid = $jobspec =~ /^[-a-z\d]+$/;
|
|
107
108
|
my $local_job = !$job_has_uuid;
|
108
109
|
|
109
110
|
|
110
|
-
$SIG{'HUP'} = sub
|
111
|
-
{
|
112
|
-
1;
|
113
|
-
};
|
114
111
|
$SIG{'USR1'} = sub
|
115
112
|
{
|
116
113
|
$main::ENV{CRUNCH_DEBUG} = 1;
|
@@ -257,20 +254,17 @@ my $jobmanager_id;
|
|
257
254
|
if ($job_has_uuid)
|
258
255
|
{
|
259
256
|
# Claim this job, and make sure nobody else does
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
$Job->{'running'} = 1;
|
264
|
-
$Job->{'success'} = undef;
|
265
|
-
$Job->{'tasks_summary'} = { 'failed' => 0,
|
266
|
-
'todo' => 1,
|
267
|
-
'running' => 0,
|
268
|
-
'done' => 0 };
|
269
|
-
if ($job_has_uuid) {
|
270
|
-
unless ($Job->save() && $Job->{'is_locked_by_uuid'} == $User->{'uuid'}) {
|
271
|
-
croak("Error while updating / locking job");
|
272
|
-
}
|
257
|
+
unless ($Job->update_attributes('is_locked_by_uuid' => $User->{'uuid'}) &&
|
258
|
+
$Job->{'is_locked_by_uuid'} == $User->{'uuid'}) {
|
259
|
+
croak("Error while updating / locking job");
|
273
260
|
}
|
261
|
+
$Job->update_attributes('started_at' => scalar gmtime,
|
262
|
+
'running' => 1,
|
263
|
+
'success' => undef,
|
264
|
+
'tasks_summary' => { 'failed' => 0,
|
265
|
+
'todo' => 1,
|
266
|
+
'running' => 0,
|
267
|
+
'done' => 0 });
|
274
268
|
}
|
275
269
|
|
276
270
|
|
@@ -281,9 +275,12 @@ $SIG{'TERM'} = \&croak;
|
|
281
275
|
$SIG{'TSTP'} = sub { $main::please_freeze = 1; };
|
282
276
|
$SIG{'ALRM'} = sub { $main::please_info = 1; };
|
283
277
|
$SIG{'CONT'} = sub { $main::please_continue = 1; };
|
278
|
+
$SIG{'HUP'} = sub { $main::please_refresh = 1; };
|
279
|
+
|
284
280
|
$main::please_freeze = 0;
|
285
281
|
$main::please_info = 0;
|
286
282
|
$main::please_continue = 0;
|
283
|
+
$main::please_refresh = 0;
|
287
284
|
my $jobsteps_must_output_keys = 0; # becomes 1 when any task outputs a key
|
288
285
|
|
289
286
|
grep { $ENV{$1} = $2 if /^(NOCACHE.*?)=(.*)/ } split ("\n", $$Job{knobs});
|
@@ -299,6 +296,7 @@ my $jobstep_tomerge_level = 0;
|
|
299
296
|
my $squeue_checked;
|
300
297
|
my $squeue_kill_checked;
|
301
298
|
my $output_in_keep = 0;
|
299
|
+
my $latest_refresh = scalar time;
|
302
300
|
|
303
301
|
|
304
302
|
|
@@ -315,7 +313,7 @@ else
|
|
315
313
|
'parameters' => {},
|
316
314
|
});
|
317
315
|
push @jobstep, { 'level' => 0,
|
318
|
-
'
|
316
|
+
'failures' => 0,
|
319
317
|
'arvados_task' => $first_task,
|
320
318
|
};
|
321
319
|
push @jobstep_todo, 0;
|
@@ -421,7 +419,9 @@ else
|
|
421
419
|
Log (undef, "Using commit $commit for tree-ish $treeish");
|
422
420
|
if ($commit ne $treeish) {
|
423
421
|
$Job->{'script_version'} = $commit;
|
424
|
-
!$job_has_uuid or
|
422
|
+
!$job_has_uuid or
|
423
|
+
$Job->update_attributes('script_version' => $commit) or
|
424
|
+
croak("Error while updating job");
|
425
425
|
}
|
426
426
|
}
|
427
427
|
}
|
@@ -467,7 +467,7 @@ foreach (split (/\n/, $Job->{knobs}))
|
|
467
467
|
|
468
468
|
|
469
469
|
|
470
|
-
|
470
|
+
$main::success = undef;
|
471
471
|
|
472
472
|
|
473
473
|
|
@@ -504,12 +504,6 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
|
|
504
504
|
{
|
505
505
|
next;
|
506
506
|
}
|
507
|
-
if ($Jobstep->{attempts} > 9)
|
508
|
-
{
|
509
|
-
Log ($id, "jobstep $id failed $$Jobstep{attempts} times -- giving up");
|
510
|
-
$success = 0;
|
511
|
-
last THISROUND;
|
512
|
-
}
|
513
507
|
|
514
508
|
pipe $reader{$id}, "writer" or croak ($!);
|
515
509
|
my $flags = fcntl ($reader{$id}, F_GETFL, 0) or croak ($!);
|
@@ -564,7 +558,8 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
|
|
564
558
|
my @execargs = qw(sh);
|
565
559
|
my $build_script_to_send = "";
|
566
560
|
my $command =
|
567
|
-
"
|
561
|
+
"if [ -e $ENV{TASK_WORK} ]; then rm -rf $ENV{TASK_WORK}; fi; "
|
562
|
+
."mkdir -p $ENV{JOB_WORK} $ENV{CRUNCH_TMP} $ENV{TASK_WORK} "
|
568
563
|
."&& cd $ENV{CRUNCH_TMP} ";
|
569
564
|
if ($build_script)
|
570
565
|
{
|
@@ -579,7 +574,7 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
|
|
579
574
|
"&& exec $ENV{CRUNCH_SRC}/crunch_scripts/" . $Job->{"script"};
|
580
575
|
my @execargs = ('bash', '-c', $command);
|
581
576
|
srun (\@srunargs, \@execargs, undef, $build_script_to_send);
|
582
|
-
exit (
|
577
|
+
exit (111);
|
583
578
|
}
|
584
579
|
close("writer");
|
585
580
|
if (!defined $childpid)
|
@@ -599,7 +594,6 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
|
|
599
594
|
|
600
595
|
Log ($id, "job_task ".$Jobstep->{'arvados_task'}->{'uuid'});
|
601
596
|
Log ($id, "child $childpid started on $childslotname");
|
602
|
-
$Jobstep->{attempts} ++;
|
603
597
|
$Jobstep->{starttime} = time;
|
604
598
|
$Jobstep->{node} = $childnode->{name};
|
605
599
|
$Jobstep->{slotindex} = $childslot;
|
@@ -629,6 +623,7 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
|
|
629
623
|
+ reapchildren ();
|
630
624
|
if (!$gotsome)
|
631
625
|
{
|
626
|
+
check_refresh_wanted();
|
632
627
|
check_squeue();
|
633
628
|
update_progress_stats();
|
634
629
|
select (undef, undef, undef, 0.1);
|
@@ -685,6 +680,7 @@ while (%proc)
|
|
685
680
|
readfrompipes ();
|
686
681
|
if (!reapchildren())
|
687
682
|
{
|
683
|
+
check_refresh_wanted();
|
688
684
|
check_squeue();
|
689
685
|
update_progress_stats();
|
690
686
|
select (undef, undef, undef, 0.1);
|
@@ -696,7 +692,7 @@ update_progress_stats();
|
|
696
692
|
freeze_if_want_freeze();
|
697
693
|
|
698
694
|
|
699
|
-
if (!defined $success)
|
695
|
+
if (!defined $main::success)
|
700
696
|
{
|
701
697
|
if (@jobstep_todo &&
|
702
698
|
$thisround_succeeded == 0 &&
|
@@ -704,25 +700,25 @@ if (!defined $success)
|
|
704
700
|
{
|
705
701
|
my $message = "stop because $thisround_failed tasks failed and none succeeded";
|
706
702
|
Log (undef, $message);
|
707
|
-
$success = 0;
|
703
|
+
$main::success = 0;
|
708
704
|
}
|
709
705
|
if (!@jobstep_todo)
|
710
706
|
{
|
711
|
-
$success = 1;
|
707
|
+
$main::success = 1;
|
712
708
|
}
|
713
709
|
}
|
714
710
|
|
715
|
-
goto ONELEVEL if !defined $success;
|
711
|
+
goto ONELEVEL if !defined $main::success;
|
716
712
|
|
717
713
|
|
718
714
|
release_allocation();
|
719
715
|
freeze();
|
720
|
-
$
|
721
|
-
$Job->
|
722
|
-
|
723
|
-
|
724
|
-
|
725
|
-
|
716
|
+
if ($job_has_uuid) {
|
717
|
+
$Job->update_attributes('output' => &collate_output(),
|
718
|
+
'running' => 0,
|
719
|
+
'success' => $Job->{'output'} && $main::success,
|
720
|
+
'finished_at' => scalar gmtime)
|
721
|
+
}
|
726
722
|
|
727
723
|
if ($Job->{'output'})
|
728
724
|
{
|
@@ -756,7 +752,9 @@ sub update_progress_stats
|
|
756
752
|
$Job->{'tasks_summary'}->{'todo'} = $todo;
|
757
753
|
$Job->{'tasks_summary'}->{'done'} = $done;
|
758
754
|
$Job->{'tasks_summary'}->{'running'} = $running;
|
759
|
-
|
755
|
+
if ($job_has_uuid) {
|
756
|
+
$Job->update_attributes('tasks_summary' => $Job->{'tasks_summary'});
|
757
|
+
}
|
760
758
|
Log (undef, "status: $done done, $running running, $todo todo");
|
761
759
|
$progress_is_dirty = 0;
|
762
760
|
}
|
@@ -775,27 +773,32 @@ sub reapchildren
|
|
775
773
|
my $elapsed = time - $proc{$pid}->{time};
|
776
774
|
my $Jobstep = $jobstep[$jobstepid];
|
777
775
|
|
778
|
-
my $
|
779
|
-
my $
|
776
|
+
my $childstatus = $?;
|
777
|
+
my $exitvalue = $childstatus >> 8;
|
778
|
+
my $exitinfo = sprintf("exit %d signal %d%s",
|
779
|
+
$exitvalue,
|
780
|
+
$childstatus & 127,
|
781
|
+
($childstatus & 128 ? ' core dump' : ''));
|
780
782
|
$Jobstep->{'arvados_task'}->reload;
|
781
|
-
my $
|
783
|
+
my $task_success = $Jobstep->{'arvados_task'}->{success};
|
782
784
|
|
783
|
-
Log ($jobstepid, "child $pid on $whatslot $exitinfo success=$
|
785
|
+
Log ($jobstepid, "child $pid on $whatslot $exitinfo success=$task_success");
|
784
786
|
|
785
|
-
if (!defined $
|
787
|
+
if (!defined $task_success) {
|
786
788
|
# task did not indicate one way or the other --> fail
|
787
789
|
$Jobstep->{'arvados_task'}->{success} = 0;
|
788
790
|
$Jobstep->{'arvados_task'}->save;
|
789
|
-
$
|
791
|
+
$task_success = 0;
|
790
792
|
}
|
791
793
|
|
792
|
-
if (!$
|
794
|
+
if (!$task_success)
|
793
795
|
{
|
794
|
-
my $
|
795
|
-
$
|
796
|
+
my $temporary_fail;
|
797
|
+
$temporary_fail ||= $Jobstep->{node_fail};
|
798
|
+
$temporary_fail ||= ($exitvalue == 111);
|
796
799
|
|
797
800
|
++$thisround_failed;
|
798
|
-
++$thisround_failed_multiple if $Jobstep->{
|
801
|
+
++$thisround_failed_multiple if $Jobstep->{'failures'} >= 1;
|
799
802
|
|
800
803
|
# Check for signs of a failed or misconfigured node
|
801
804
|
if (++$slot[$proc{$pid}->{slot}]->{node}->{losing_streak} >=
|
@@ -803,19 +806,28 @@ sub reapchildren
|
|
803
806
|
# Don't count this against jobstep failure thresholds if this
|
804
807
|
# node is already suspected faulty and srun exited quickly
|
805
808
|
if ($slot[$proc{$pid}->{slot}]->{node}->{hold_until} &&
|
806
|
-
$elapsed < 5
|
807
|
-
|
808
|
-
|
809
|
-
$
|
810
|
-
--$Jobstep->{attempts};
|
809
|
+
$elapsed < 5) {
|
810
|
+
Log ($jobstepid, "blaming failure on suspect node " .
|
811
|
+
$slot[$proc{$pid}->{slot}]->{node}->{name});
|
812
|
+
$temporary_fail ||= 1;
|
811
813
|
}
|
812
814
|
ban_node_by_slot($proc{$pid}->{slot});
|
813
815
|
}
|
814
816
|
|
815
|
-
|
816
|
-
|
817
|
+
Log ($jobstepid, sprintf('failure (#%d, %s) after %d seconds',
|
818
|
+
++$Jobstep->{'failures'},
|
819
|
+
$temporary_fail ? 'temporary ' : 'permanent',
|
820
|
+
$elapsed));
|
817
821
|
|
818
|
-
|
822
|
+
if (!$temporary_fail || $Jobstep->{'failures'} >= 3) {
|
823
|
+
# Give up on this task, and the whole job
|
824
|
+
$main::success = 0;
|
825
|
+
$main::please_freeze = 1;
|
826
|
+
}
|
827
|
+
else {
|
828
|
+
# Put this task back on the todo queue
|
829
|
+
push @jobstep_todo, $jobstepid;
|
830
|
+
}
|
819
831
|
$Job->{'tasks_summary'}->{'failed'}++;
|
820
832
|
}
|
821
833
|
else
|
@@ -826,9 +838,9 @@ sub reapchildren
|
|
826
838
|
push @jobstep_done, $jobstepid;
|
827
839
|
Log ($jobstepid, "success in $elapsed seconds");
|
828
840
|
}
|
829
|
-
$Jobstep->{exitcode} = $
|
841
|
+
$Jobstep->{exitcode} = $childstatus;
|
830
842
|
$Jobstep->{finishtime} = time;
|
831
|
-
process_stderr ($jobstepid, $
|
843
|
+
process_stderr ($jobstepid, $task_success);
|
832
844
|
Log ($jobstepid, "output " . $Jobstep->{'arvados_task'}->{output});
|
833
845
|
|
834
846
|
close $reader{$jobstepid};
|
@@ -847,7 +859,7 @@ sub reapchildren
|
|
847
859
|
foreach my $arvados_task (@{$newtask_list->{'items'}}) {
|
848
860
|
my $jobstep = {
|
849
861
|
'level' => $arvados_task->{'sequence'},
|
850
|
-
'
|
862
|
+
'failures' => 0,
|
851
863
|
'arvados_task' => $arvados_task
|
852
864
|
};
|
853
865
|
push @jobstep, $jobstep;
|
@@ -858,6 +870,27 @@ sub reapchildren
|
|
858
870
|
1;
|
859
871
|
}
|
860
872
|
|
873
|
+
sub check_refresh_wanted
|
874
|
+
{
|
875
|
+
my @stat = stat $ENV{"CRUNCH_REFRESH_TRIGGER"};
|
876
|
+
if (@stat && $stat[9] > $latest_refresh) {
|
877
|
+
$latest_refresh = scalar time;
|
878
|
+
if ($job_has_uuid) {
|
879
|
+
my $Job2 = $arv->{'jobs'}->{'get'}->execute('uuid' => $jobspec);
|
880
|
+
for my $attr ('cancelled_at',
|
881
|
+
'cancelled_by_user_uuid',
|
882
|
+
'cancelled_by_client_uuid') {
|
883
|
+
$Job->{$attr} = $Job2->{$attr};
|
884
|
+
}
|
885
|
+
if ($Job->{'cancelled_at'}) {
|
886
|
+
Log (undef, "Job cancelled at " . $Job->{cancelled_at} .
|
887
|
+
" by user " . $Job->{cancelled_by_user_uuid});
|
888
|
+
$main::success = 0;
|
889
|
+
$main::please_freeze = 1;
|
890
|
+
}
|
891
|
+
}
|
892
|
+
}
|
893
|
+
}
|
861
894
|
|
862
895
|
sub check_squeue
|
863
896
|
{
|
@@ -967,7 +1000,7 @@ sub preprocess_stderr
|
|
967
1000
|
my $line = $1;
|
968
1001
|
substr $jobstep[$job]->{stderr}, 0, 1+length($line), "";
|
969
1002
|
Log ($job, "stderr $line");
|
970
|
-
if ($line =~ /srun: error: (SLURM job $ENV{
|
1003
|
+
if ($line =~ /srun: error: (SLURM job $ENV{SLURM_JOB_ID} has expired|Unable to confirm allocation for job $ENV{SLURM_JOB_ID})/) {
|
971
1004
|
# whoa.
|
972
1005
|
$main::please_freeze = 1;
|
973
1006
|
}
|
@@ -982,7 +1015,7 @@ sub preprocess_stderr
|
|
982
1015
|
sub process_stderr
|
983
1016
|
{
|
984
1017
|
my $job = shift;
|
985
|
-
my $
|
1018
|
+
my $task_success = shift;
|
986
1019
|
preprocess_stderr ($job);
|
987
1020
|
|
988
1021
|
map {
|
@@ -1022,15 +1055,14 @@ sub collate_output
|
|
1022
1055
|
{
|
1023
1056
|
my $errstr = $whc->errstr;
|
1024
1057
|
$whc->write_data ("XXX fetch_block($output) failed: $errstr XXX\n");
|
1025
|
-
$success = 0;
|
1058
|
+
$main::success = 0;
|
1026
1059
|
}
|
1027
1060
|
}
|
1028
1061
|
$joboutput = $whc->write_finish if !defined $joboutput;
|
1029
1062
|
if ($joboutput)
|
1030
1063
|
{
|
1031
1064
|
Log (undef, "output $joboutput");
|
1032
|
-
$Job->
|
1033
|
-
$Job->save if $job_has_uuid;
|
1065
|
+
$Job->update_attributes('output' => $joboutput) if $job_has_uuid;
|
1034
1066
|
}
|
1035
1067
|
else
|
1036
1068
|
{
|
@@ -1122,11 +1154,9 @@ sub croak
|
|
1122
1154
|
sub cleanup
|
1123
1155
|
{
|
1124
1156
|
return if !$job_has_uuid;
|
1125
|
-
$Job->
|
1126
|
-
|
1127
|
-
|
1128
|
-
$Job->{'finished_at'} = gmtime;
|
1129
|
-
$Job->save;
|
1157
|
+
$Job->update_attributes('running' => 0,
|
1158
|
+
'success' => 0,
|
1159
|
+
'finished_at' => scalar gmtime);
|
1130
1160
|
}
|
1131
1161
|
|
1132
1162
|
|
@@ -1140,7 +1170,7 @@ sub save_meta
|
|
1140
1170
|
undef $metastream if !$justcheckpoint; # otherwise Log() will try to use it
|
1141
1171
|
Log (undef, "meta key is $loglocator");
|
1142
1172
|
$Job->{'log'} = $loglocator;
|
1143
|
-
$Job->
|
1173
|
+
$Job->update_attributes('log', $loglocator) if $job_has_uuid;
|
1144
1174
|
}
|
1145
1175
|
|
1146
1176
|
|
@@ -1224,7 +1254,7 @@ sub thaw
|
|
1224
1254
|
my ($k, $v) = split ("=", $_, 2);
|
1225
1255
|
$Jobstep->{$k} = freezeunquote ($v) if $k;
|
1226
1256
|
}
|
1227
|
-
$Jobstep->{
|
1257
|
+
$Jobstep->{'failures'} = 0;
|
1228
1258
|
push @jobstep, $Jobstep;
|
1229
1259
|
|
1230
1260
|
if ($Jobstep->{exitcode} eq "0")
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: arvados-cli
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.20131210143944
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Arvados Authors
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-
|
11
|
+
date: 2013-12-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: google-api-client
|
@@ -94,7 +94,7 @@ dependencies:
|
|
94
94
|
- - '>='
|
95
95
|
- !ruby/object:Gem::Version
|
96
96
|
version: 2.0.3
|
97
|
-
description: This is the Arvados SDK CLI gem, git revision
|
97
|
+
description: This is the Arvados SDK CLI gem, git revision 82a471c92036198aaf02ca0467ea48d49dbe822d
|
98
98
|
email: gem-dev@clinicalfuture.com
|
99
99
|
executables:
|
100
100
|
- arv
|
@@ -127,7 +127,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
127
127
|
version: '0'
|
128
128
|
requirements: []
|
129
129
|
rubyforge_project:
|
130
|
-
rubygems_version: 2.
|
130
|
+
rubygems_version: 2.1.11
|
131
131
|
signing_key:
|
132
132
|
specification_version: 4
|
133
133
|
summary: Arvados SDK CLI
|