arvados-cli 0.1.20131115001557 → 0.1.20131210143944
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/arv +19 -6
- data/bin/arv-run-pipeline-instance +32 -13
- data/bin/crunch-job +104 -74
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 42b56676c867f59f143ef0e229779bd4c67258de
|
4
|
+
data.tar.gz: 6d52906f24e1289e8e74ff2df1ea5db0d2c995ca
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 54b722d0fe53e42a15a62e6877e058f226398cb082810d35553573e44262bb98c2a45ade10399a5a4936853d98a8c77eb6601b793125e33d469ac2babcd0a6dd
|
7
|
+
data.tar.gz: e84c5816676a6f1db200b650b1e67e17af2dada90595c7b005e75136709142a656279bf575ee3515a82499dcebc93bff67902effe50f3fa8183467dc551bb07d
|
data/bin/arv
CHANGED
@@ -10,10 +10,15 @@ if RUBY_VERSION < '1.9.3' then
|
|
10
10
|
EOS
|
11
11
|
end
|
12
12
|
|
13
|
-
|
13
|
+
case ARGV[0]
|
14
|
+
when 'keep'
|
14
15
|
ARGV.shift
|
15
16
|
@sub = ARGV.shift
|
16
|
-
if ['
|
17
|
+
if ['get', 'put'].index @sub then
|
18
|
+
# Native Arvados
|
19
|
+
exec `which arv-#{@sub}`.strip, *ARGV
|
20
|
+
elsif ['ls', 'less', 'check'].index @sub then
|
21
|
+
# wh* shims
|
17
22
|
exec `which wh#{@sub}`.strip, *ARGV
|
18
23
|
else
|
19
24
|
puts "Usage: \n" +
|
@@ -24,9 +29,7 @@ if ARGV[0] == 'keep'
|
|
24
29
|
"#{$0} keep check\n"
|
25
30
|
end
|
26
31
|
abort
|
27
|
-
|
28
|
-
|
29
|
-
if ARGV[0] == 'pipeline'
|
32
|
+
when 'pipeline'
|
30
33
|
ARGV.shift
|
31
34
|
@sub = ARGV.shift
|
32
35
|
if ['run'].index @sub then
|
@@ -100,7 +103,17 @@ class Google::APIClient
|
|
100
103
|
end
|
101
104
|
end
|
102
105
|
|
103
|
-
|
106
|
+
class ArvadosClient < Google::APIClient
|
107
|
+
def execute(*args)
|
108
|
+
if args.last.is_a? Hash
|
109
|
+
args.last[:headers] ||= {}
|
110
|
+
args.last[:headers]['Accept'] ||= 'application/json'
|
111
|
+
end
|
112
|
+
super(*args)
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
client = ArvadosClient.new(:host => ENV['ARVADOS_API_HOST'], :application_name => 'arvados-cli', :application_version => '1.0')
|
104
117
|
arvados = client.discovered_api('arvados', ENV['ARVADOS_API_VERSION'])
|
105
118
|
|
106
119
|
def to_boolean(s)
|
@@ -12,6 +12,9 @@
|
|
12
12
|
#
|
13
13
|
# [--template uuid] Use the specified pipeline template.
|
14
14
|
#
|
15
|
+
# [--template path] Load the pipeline template from the specified
|
16
|
+
# local file.
|
17
|
+
#
|
15
18
|
# [--instance uuid] Use the specified pipeline instance.
|
16
19
|
#
|
17
20
|
# [-n, --dry-run] Do not start any new jobs or wait for existing jobs
|
@@ -162,7 +165,7 @@ p = Trollop::Parser.new do
|
|
162
165
|
:short => :none,
|
163
166
|
:type => :integer)
|
164
167
|
opt(:template,
|
165
|
-
"UUID of pipeline template.",
|
168
|
+
"UUID of pipeline template, or path to local pipeline template file.",
|
166
169
|
:short => :none,
|
167
170
|
:type => :string)
|
168
171
|
opt(:instance,
|
@@ -314,16 +317,25 @@ class WhRunPipelineInstance
|
|
314
317
|
@options = _options
|
315
318
|
end
|
316
319
|
|
317
|
-
def fetch_template(
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
320
|
+
def fetch_template(template)
|
321
|
+
if template.match /[^-0-9a-z]/
|
322
|
+
# Doesn't look like a uuid -- use it as a filename.
|
323
|
+
@template = JSON.parse File.read(template), :symbolize_names => true
|
324
|
+
if !@template[:components]
|
325
|
+
abort ("#{$0}: Template loaded from #{template} " +
|
326
|
+
"does not have a \"components\" key")
|
327
|
+
end
|
328
|
+
else
|
329
|
+
result = $client.execute(:api_method => $arvados.pipeline_templates.get,
|
330
|
+
:parameters => {
|
331
|
+
:api_token => ENV['ARVADOS_API_TOKEN'],
|
332
|
+
:uuid => template
|
333
|
+
},
|
334
|
+
:authenticated => false)
|
335
|
+
@template = JSON.parse result.body, :symbolize_names => true
|
336
|
+
if !@template[:uuid]
|
337
|
+
abort "#{$0}: fatal: failed to retrieve pipeline template #{template} #{@template[:errors].inspect rescue nil}"
|
338
|
+
end
|
327
339
|
end
|
328
340
|
self
|
329
341
|
end
|
@@ -407,7 +419,7 @@ class WhRunPipelineInstance
|
|
407
419
|
(@options[:no_reuse] ? [] : JobCache.
|
408
420
|
where(script: c[:script],
|
409
421
|
script_parameters: c[:script_parameters],
|
410
|
-
script_version_descends_from: c[:
|
422
|
+
script_version_descends_from: c[:script_version])
|
411
423
|
).each do |candidate_job|
|
412
424
|
candidate_params_downcase = Hash[candidate_job[:script_parameters].
|
413
425
|
map { |k,v| [k.downcase,v] }]
|
@@ -420,6 +432,12 @@ class WhRunPipelineInstance
|
|
420
432
|
next
|
421
433
|
end
|
422
434
|
|
435
|
+
if c[:script_version] !=
|
436
|
+
candidate_job[:script_version][0,c[:script_version].length]
|
437
|
+
debuglog "component #{cname} would be satisfied by job #{candidate_job[:uuid]} if script_version matched.", 2
|
438
|
+
next
|
439
|
+
end
|
440
|
+
|
423
441
|
unless candidate_job[:success] || candidate_job[:running] ||
|
424
442
|
(!candidate_job[:started_at] && !candidate_job[:cancelled_at])
|
425
443
|
debuglog "component #{cname} would be satisfied by job #{candidate_job[:uuid]} if it were running or successful.", 2
|
@@ -462,7 +480,8 @@ class WhRunPipelineInstance
|
|
462
480
|
c[:wait] = true
|
463
481
|
end
|
464
482
|
if c[:job] and c[:job][:uuid]
|
465
|
-
if
|
483
|
+
if (c[:job][:running] or
|
484
|
+
not (c[:job][:finished_at] or c[:job][:cancelled_at]))
|
466
485
|
c[:job] = JobCache.get(c[:job][:uuid])
|
467
486
|
end
|
468
487
|
if c[:job][:success]
|
data/bin/crunch-job
CHANGED
@@ -58,7 +58,8 @@ Save a checkpoint and continue.
|
|
58
58
|
=item SIGHUP
|
59
59
|
|
60
60
|
Refresh node allocation (i.e., check whether any nodes have been added
|
61
|
-
or unallocated)
|
61
|
+
or unallocated) and attributes of the Job record that should affect
|
62
|
+
behavior (e.g., cancel job if cancelled_at becomes non-nil).
|
62
63
|
|
63
64
|
=back
|
64
65
|
|
@@ -107,10 +108,6 @@ my $job_has_uuid = $jobspec =~ /^[-a-z\d]+$/;
|
|
107
108
|
my $local_job = !$job_has_uuid;
|
108
109
|
|
109
110
|
|
110
|
-
$SIG{'HUP'} = sub
|
111
|
-
{
|
112
|
-
1;
|
113
|
-
};
|
114
111
|
$SIG{'USR1'} = sub
|
115
112
|
{
|
116
113
|
$main::ENV{CRUNCH_DEBUG} = 1;
|
@@ -257,20 +254,17 @@ my $jobmanager_id;
|
|
257
254
|
if ($job_has_uuid)
|
258
255
|
{
|
259
256
|
# Claim this job, and make sure nobody else does
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
$Job->{'running'} = 1;
|
264
|
-
$Job->{'success'} = undef;
|
265
|
-
$Job->{'tasks_summary'} = { 'failed' => 0,
|
266
|
-
'todo' => 1,
|
267
|
-
'running' => 0,
|
268
|
-
'done' => 0 };
|
269
|
-
if ($job_has_uuid) {
|
270
|
-
unless ($Job->save() && $Job->{'is_locked_by_uuid'} == $User->{'uuid'}) {
|
271
|
-
croak("Error while updating / locking job");
|
272
|
-
}
|
257
|
+
unless ($Job->update_attributes('is_locked_by_uuid' => $User->{'uuid'}) &&
|
258
|
+
$Job->{'is_locked_by_uuid'} == $User->{'uuid'}) {
|
259
|
+
croak("Error while updating / locking job");
|
273
260
|
}
|
261
|
+
$Job->update_attributes('started_at' => scalar gmtime,
|
262
|
+
'running' => 1,
|
263
|
+
'success' => undef,
|
264
|
+
'tasks_summary' => { 'failed' => 0,
|
265
|
+
'todo' => 1,
|
266
|
+
'running' => 0,
|
267
|
+
'done' => 0 });
|
274
268
|
}
|
275
269
|
|
276
270
|
|
@@ -281,9 +275,12 @@ $SIG{'TERM'} = \&croak;
|
|
281
275
|
$SIG{'TSTP'} = sub { $main::please_freeze = 1; };
|
282
276
|
$SIG{'ALRM'} = sub { $main::please_info = 1; };
|
283
277
|
$SIG{'CONT'} = sub { $main::please_continue = 1; };
|
278
|
+
$SIG{'HUP'} = sub { $main::please_refresh = 1; };
|
279
|
+
|
284
280
|
$main::please_freeze = 0;
|
285
281
|
$main::please_info = 0;
|
286
282
|
$main::please_continue = 0;
|
283
|
+
$main::please_refresh = 0;
|
287
284
|
my $jobsteps_must_output_keys = 0; # becomes 1 when any task outputs a key
|
288
285
|
|
289
286
|
grep { $ENV{$1} = $2 if /^(NOCACHE.*?)=(.*)/ } split ("\n", $$Job{knobs});
|
@@ -299,6 +296,7 @@ my $jobstep_tomerge_level = 0;
|
|
299
296
|
my $squeue_checked;
|
300
297
|
my $squeue_kill_checked;
|
301
298
|
my $output_in_keep = 0;
|
299
|
+
my $latest_refresh = scalar time;
|
302
300
|
|
303
301
|
|
304
302
|
|
@@ -315,7 +313,7 @@ else
|
|
315
313
|
'parameters' => {},
|
316
314
|
});
|
317
315
|
push @jobstep, { 'level' => 0,
|
318
|
-
'
|
316
|
+
'failures' => 0,
|
319
317
|
'arvados_task' => $first_task,
|
320
318
|
};
|
321
319
|
push @jobstep_todo, 0;
|
@@ -421,7 +419,9 @@ else
|
|
421
419
|
Log (undef, "Using commit $commit for tree-ish $treeish");
|
422
420
|
if ($commit ne $treeish) {
|
423
421
|
$Job->{'script_version'} = $commit;
|
424
|
-
!$job_has_uuid or
|
422
|
+
!$job_has_uuid or
|
423
|
+
$Job->update_attributes('script_version' => $commit) or
|
424
|
+
croak("Error while updating job");
|
425
425
|
}
|
426
426
|
}
|
427
427
|
}
|
@@ -467,7 +467,7 @@ foreach (split (/\n/, $Job->{knobs}))
|
|
467
467
|
|
468
468
|
|
469
469
|
|
470
|
-
|
470
|
+
$main::success = undef;
|
471
471
|
|
472
472
|
|
473
473
|
|
@@ -504,12 +504,6 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
|
|
504
504
|
{
|
505
505
|
next;
|
506
506
|
}
|
507
|
-
if ($Jobstep->{attempts} > 9)
|
508
|
-
{
|
509
|
-
Log ($id, "jobstep $id failed $$Jobstep{attempts} times -- giving up");
|
510
|
-
$success = 0;
|
511
|
-
last THISROUND;
|
512
|
-
}
|
513
507
|
|
514
508
|
pipe $reader{$id}, "writer" or croak ($!);
|
515
509
|
my $flags = fcntl ($reader{$id}, F_GETFL, 0) or croak ($!);
|
@@ -564,7 +558,8 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
|
|
564
558
|
my @execargs = qw(sh);
|
565
559
|
my $build_script_to_send = "";
|
566
560
|
my $command =
|
567
|
-
"
|
561
|
+
"if [ -e $ENV{TASK_WORK} ]; then rm -rf $ENV{TASK_WORK}; fi; "
|
562
|
+
."mkdir -p $ENV{JOB_WORK} $ENV{CRUNCH_TMP} $ENV{TASK_WORK} "
|
568
563
|
."&& cd $ENV{CRUNCH_TMP} ";
|
569
564
|
if ($build_script)
|
570
565
|
{
|
@@ -579,7 +574,7 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
|
|
579
574
|
"&& exec $ENV{CRUNCH_SRC}/crunch_scripts/" . $Job->{"script"};
|
580
575
|
my @execargs = ('bash', '-c', $command);
|
581
576
|
srun (\@srunargs, \@execargs, undef, $build_script_to_send);
|
582
|
-
exit (
|
577
|
+
exit (111);
|
583
578
|
}
|
584
579
|
close("writer");
|
585
580
|
if (!defined $childpid)
|
@@ -599,7 +594,6 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
|
|
599
594
|
|
600
595
|
Log ($id, "job_task ".$Jobstep->{'arvados_task'}->{'uuid'});
|
601
596
|
Log ($id, "child $childpid started on $childslotname");
|
602
|
-
$Jobstep->{attempts} ++;
|
603
597
|
$Jobstep->{starttime} = time;
|
604
598
|
$Jobstep->{node} = $childnode->{name};
|
605
599
|
$Jobstep->{slotindex} = $childslot;
|
@@ -629,6 +623,7 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
|
|
629
623
|
+ reapchildren ();
|
630
624
|
if (!$gotsome)
|
631
625
|
{
|
626
|
+
check_refresh_wanted();
|
632
627
|
check_squeue();
|
633
628
|
update_progress_stats();
|
634
629
|
select (undef, undef, undef, 0.1);
|
@@ -685,6 +680,7 @@ while (%proc)
|
|
685
680
|
readfrompipes ();
|
686
681
|
if (!reapchildren())
|
687
682
|
{
|
683
|
+
check_refresh_wanted();
|
688
684
|
check_squeue();
|
689
685
|
update_progress_stats();
|
690
686
|
select (undef, undef, undef, 0.1);
|
@@ -696,7 +692,7 @@ update_progress_stats();
|
|
696
692
|
freeze_if_want_freeze();
|
697
693
|
|
698
694
|
|
699
|
-
if (!defined $success)
|
695
|
+
if (!defined $main::success)
|
700
696
|
{
|
701
697
|
if (@jobstep_todo &&
|
702
698
|
$thisround_succeeded == 0 &&
|
@@ -704,25 +700,25 @@ if (!defined $success)
|
|
704
700
|
{
|
705
701
|
my $message = "stop because $thisround_failed tasks failed and none succeeded";
|
706
702
|
Log (undef, $message);
|
707
|
-
$success = 0;
|
703
|
+
$main::success = 0;
|
708
704
|
}
|
709
705
|
if (!@jobstep_todo)
|
710
706
|
{
|
711
|
-
$success = 1;
|
707
|
+
$main::success = 1;
|
712
708
|
}
|
713
709
|
}
|
714
710
|
|
715
|
-
goto ONELEVEL if !defined $success;
|
711
|
+
goto ONELEVEL if !defined $main::success;
|
716
712
|
|
717
713
|
|
718
714
|
release_allocation();
|
719
715
|
freeze();
|
720
|
-
$
|
721
|
-
$Job->
|
722
|
-
|
723
|
-
|
724
|
-
|
725
|
-
|
716
|
+
if ($job_has_uuid) {
|
717
|
+
$Job->update_attributes('output' => &collate_output(),
|
718
|
+
'running' => 0,
|
719
|
+
'success' => $Job->{'output'} && $main::success,
|
720
|
+
'finished_at' => scalar gmtime)
|
721
|
+
}
|
726
722
|
|
727
723
|
if ($Job->{'output'})
|
728
724
|
{
|
@@ -756,7 +752,9 @@ sub update_progress_stats
|
|
756
752
|
$Job->{'tasks_summary'}->{'todo'} = $todo;
|
757
753
|
$Job->{'tasks_summary'}->{'done'} = $done;
|
758
754
|
$Job->{'tasks_summary'}->{'running'} = $running;
|
759
|
-
|
755
|
+
if ($job_has_uuid) {
|
756
|
+
$Job->update_attributes('tasks_summary' => $Job->{'tasks_summary'});
|
757
|
+
}
|
760
758
|
Log (undef, "status: $done done, $running running, $todo todo");
|
761
759
|
$progress_is_dirty = 0;
|
762
760
|
}
|
@@ -775,27 +773,32 @@ sub reapchildren
|
|
775
773
|
my $elapsed = time - $proc{$pid}->{time};
|
776
774
|
my $Jobstep = $jobstep[$jobstepid];
|
777
775
|
|
778
|
-
my $
|
779
|
-
my $
|
776
|
+
my $childstatus = $?;
|
777
|
+
my $exitvalue = $childstatus >> 8;
|
778
|
+
my $exitinfo = sprintf("exit %d signal %d%s",
|
779
|
+
$exitvalue,
|
780
|
+
$childstatus & 127,
|
781
|
+
($childstatus & 128 ? ' core dump' : ''));
|
780
782
|
$Jobstep->{'arvados_task'}->reload;
|
781
|
-
my $
|
783
|
+
my $task_success = $Jobstep->{'arvados_task'}->{success};
|
782
784
|
|
783
|
-
Log ($jobstepid, "child $pid on $whatslot $exitinfo success=$
|
785
|
+
Log ($jobstepid, "child $pid on $whatslot $exitinfo success=$task_success");
|
784
786
|
|
785
|
-
if (!defined $
|
787
|
+
if (!defined $task_success) {
|
786
788
|
# task did not indicate one way or the other --> fail
|
787
789
|
$Jobstep->{'arvados_task'}->{success} = 0;
|
788
790
|
$Jobstep->{'arvados_task'}->save;
|
789
|
-
$
|
791
|
+
$task_success = 0;
|
790
792
|
}
|
791
793
|
|
792
|
-
if (!$
|
794
|
+
if (!$task_success)
|
793
795
|
{
|
794
|
-
my $
|
795
|
-
$
|
796
|
+
my $temporary_fail;
|
797
|
+
$temporary_fail ||= $Jobstep->{node_fail};
|
798
|
+
$temporary_fail ||= ($exitvalue == 111);
|
796
799
|
|
797
800
|
++$thisround_failed;
|
798
|
-
++$thisround_failed_multiple if $Jobstep->{
|
801
|
+
++$thisround_failed_multiple if $Jobstep->{'failures'} >= 1;
|
799
802
|
|
800
803
|
# Check for signs of a failed or misconfigured node
|
801
804
|
if (++$slot[$proc{$pid}->{slot}]->{node}->{losing_streak} >=
|
@@ -803,19 +806,28 @@ sub reapchildren
|
|
803
806
|
# Don't count this against jobstep failure thresholds if this
|
804
807
|
# node is already suspected faulty and srun exited quickly
|
805
808
|
if ($slot[$proc{$pid}->{slot}]->{node}->{hold_until} &&
|
806
|
-
$elapsed < 5
|
807
|
-
|
808
|
-
|
809
|
-
$
|
810
|
-
--$Jobstep->{attempts};
|
809
|
+
$elapsed < 5) {
|
810
|
+
Log ($jobstepid, "blaming failure on suspect node " .
|
811
|
+
$slot[$proc{$pid}->{slot}]->{node}->{name});
|
812
|
+
$temporary_fail ||= 1;
|
811
813
|
}
|
812
814
|
ban_node_by_slot($proc{$pid}->{slot});
|
813
815
|
}
|
814
816
|
|
815
|
-
|
816
|
-
|
817
|
+
Log ($jobstepid, sprintf('failure (#%d, %s) after %d seconds',
|
818
|
+
++$Jobstep->{'failures'},
|
819
|
+
$temporary_fail ? 'temporary ' : 'permanent',
|
820
|
+
$elapsed));
|
817
821
|
|
818
|
-
|
822
|
+
if (!$temporary_fail || $Jobstep->{'failures'} >= 3) {
|
823
|
+
# Give up on this task, and the whole job
|
824
|
+
$main::success = 0;
|
825
|
+
$main::please_freeze = 1;
|
826
|
+
}
|
827
|
+
else {
|
828
|
+
# Put this task back on the todo queue
|
829
|
+
push @jobstep_todo, $jobstepid;
|
830
|
+
}
|
819
831
|
$Job->{'tasks_summary'}->{'failed'}++;
|
820
832
|
}
|
821
833
|
else
|
@@ -826,9 +838,9 @@ sub reapchildren
|
|
826
838
|
push @jobstep_done, $jobstepid;
|
827
839
|
Log ($jobstepid, "success in $elapsed seconds");
|
828
840
|
}
|
829
|
-
$Jobstep->{exitcode} = $
|
841
|
+
$Jobstep->{exitcode} = $childstatus;
|
830
842
|
$Jobstep->{finishtime} = time;
|
831
|
-
process_stderr ($jobstepid, $
|
843
|
+
process_stderr ($jobstepid, $task_success);
|
832
844
|
Log ($jobstepid, "output " . $Jobstep->{'arvados_task'}->{output});
|
833
845
|
|
834
846
|
close $reader{$jobstepid};
|
@@ -847,7 +859,7 @@ sub reapchildren
|
|
847
859
|
foreach my $arvados_task (@{$newtask_list->{'items'}}) {
|
848
860
|
my $jobstep = {
|
849
861
|
'level' => $arvados_task->{'sequence'},
|
850
|
-
'
|
862
|
+
'failures' => 0,
|
851
863
|
'arvados_task' => $arvados_task
|
852
864
|
};
|
853
865
|
push @jobstep, $jobstep;
|
@@ -858,6 +870,27 @@ sub reapchildren
|
|
858
870
|
1;
|
859
871
|
}
|
860
872
|
|
873
|
+
sub check_refresh_wanted
|
874
|
+
{
|
875
|
+
my @stat = stat $ENV{"CRUNCH_REFRESH_TRIGGER"};
|
876
|
+
if (@stat && $stat[9] > $latest_refresh) {
|
877
|
+
$latest_refresh = scalar time;
|
878
|
+
if ($job_has_uuid) {
|
879
|
+
my $Job2 = $arv->{'jobs'}->{'get'}->execute('uuid' => $jobspec);
|
880
|
+
for my $attr ('cancelled_at',
|
881
|
+
'cancelled_by_user_uuid',
|
882
|
+
'cancelled_by_client_uuid') {
|
883
|
+
$Job->{$attr} = $Job2->{$attr};
|
884
|
+
}
|
885
|
+
if ($Job->{'cancelled_at'}) {
|
886
|
+
Log (undef, "Job cancelled at " . $Job->{cancelled_at} .
|
887
|
+
" by user " . $Job->{cancelled_by_user_uuid});
|
888
|
+
$main::success = 0;
|
889
|
+
$main::please_freeze = 1;
|
890
|
+
}
|
891
|
+
}
|
892
|
+
}
|
893
|
+
}
|
861
894
|
|
862
895
|
sub check_squeue
|
863
896
|
{
|
@@ -967,7 +1000,7 @@ sub preprocess_stderr
|
|
967
1000
|
my $line = $1;
|
968
1001
|
substr $jobstep[$job]->{stderr}, 0, 1+length($line), "";
|
969
1002
|
Log ($job, "stderr $line");
|
970
|
-
if ($line =~ /srun: error: (SLURM job $ENV{
|
1003
|
+
if ($line =~ /srun: error: (SLURM job $ENV{SLURM_JOB_ID} has expired|Unable to confirm allocation for job $ENV{SLURM_JOB_ID})/) {
|
971
1004
|
# whoa.
|
972
1005
|
$main::please_freeze = 1;
|
973
1006
|
}
|
@@ -982,7 +1015,7 @@ sub preprocess_stderr
|
|
982
1015
|
sub process_stderr
|
983
1016
|
{
|
984
1017
|
my $job = shift;
|
985
|
-
my $
|
1018
|
+
my $task_success = shift;
|
986
1019
|
preprocess_stderr ($job);
|
987
1020
|
|
988
1021
|
map {
|
@@ -1022,15 +1055,14 @@ sub collate_output
|
|
1022
1055
|
{
|
1023
1056
|
my $errstr = $whc->errstr;
|
1024
1057
|
$whc->write_data ("XXX fetch_block($output) failed: $errstr XXX\n");
|
1025
|
-
$success = 0;
|
1058
|
+
$main::success = 0;
|
1026
1059
|
}
|
1027
1060
|
}
|
1028
1061
|
$joboutput = $whc->write_finish if !defined $joboutput;
|
1029
1062
|
if ($joboutput)
|
1030
1063
|
{
|
1031
1064
|
Log (undef, "output $joboutput");
|
1032
|
-
$Job->
|
1033
|
-
$Job->save if $job_has_uuid;
|
1065
|
+
$Job->update_attributes('output' => $joboutput) if $job_has_uuid;
|
1034
1066
|
}
|
1035
1067
|
else
|
1036
1068
|
{
|
@@ -1122,11 +1154,9 @@ sub croak
|
|
1122
1154
|
sub cleanup
|
1123
1155
|
{
|
1124
1156
|
return if !$job_has_uuid;
|
1125
|
-
$Job->
|
1126
|
-
|
1127
|
-
|
1128
|
-
$Job->{'finished_at'} = gmtime;
|
1129
|
-
$Job->save;
|
1157
|
+
$Job->update_attributes('running' => 0,
|
1158
|
+
'success' => 0,
|
1159
|
+
'finished_at' => scalar gmtime);
|
1130
1160
|
}
|
1131
1161
|
|
1132
1162
|
|
@@ -1140,7 +1170,7 @@ sub save_meta
|
|
1140
1170
|
undef $metastream if !$justcheckpoint; # otherwise Log() will try to use it
|
1141
1171
|
Log (undef, "meta key is $loglocator");
|
1142
1172
|
$Job->{'log'} = $loglocator;
|
1143
|
-
$Job->
|
1173
|
+
$Job->update_attributes('log', $loglocator) if $job_has_uuid;
|
1144
1174
|
}
|
1145
1175
|
|
1146
1176
|
|
@@ -1224,7 +1254,7 @@ sub thaw
|
|
1224
1254
|
my ($k, $v) = split ("=", $_, 2);
|
1225
1255
|
$Jobstep->{$k} = freezeunquote ($v) if $k;
|
1226
1256
|
}
|
1227
|
-
$Jobstep->{
|
1257
|
+
$Jobstep->{'failures'} = 0;
|
1228
1258
|
push @jobstep, $Jobstep;
|
1229
1259
|
|
1230
1260
|
if ($Jobstep->{exitcode} eq "0")
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: arvados-cli
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.20131210143944
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Arvados Authors
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-
|
11
|
+
date: 2013-12-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: google-api-client
|
@@ -94,7 +94,7 @@ dependencies:
|
|
94
94
|
- - '>='
|
95
95
|
- !ruby/object:Gem::Version
|
96
96
|
version: 2.0.3
|
97
|
-
description: This is the Arvados SDK CLI gem, git revision
|
97
|
+
description: This is the Arvados SDK CLI gem, git revision 82a471c92036198aaf02ca0467ea48d49dbe822d
|
98
98
|
email: gem-dev@clinicalfuture.com
|
99
99
|
executables:
|
100
100
|
- arv
|
@@ -127,7 +127,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
127
127
|
version: '0'
|
128
128
|
requirements: []
|
129
129
|
rubyforge_project:
|
130
|
-
rubygems_version: 2.
|
130
|
+
rubygems_version: 2.1.11
|
131
131
|
signing_key:
|
132
132
|
specification_version: 4
|
133
133
|
summary: Arvados SDK CLI
|