arvados-cli 0.1.20150526143156 → 0.1.20150527201024
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/crunch-job +76 -43
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bafef169f074ca61d533298a5e462905d9764846
|
4
|
+
data.tar.gz: bd2e304a1cef255c8362650d82ed490d88d7a56d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fb00c43e5d7d57f3b17f82efaf494426766b12d5af483195ce89252ec11d402c9dcc5149c3d5dc340f393a125d37c53d93e6ef90d7a60808f2124aa44e7c6688
|
7
|
+
data.tar.gz: d7879e42888f1bb9a0b4305f4d1b1d2e24acb2b0bbf64fde6be45b33f560aee935c1ab936d62e21ddd330467b05cb234032214e47deee6dc76432b775ea3ad9a
|
data/bin/crunch-job
CHANGED
@@ -118,6 +118,7 @@ $ENV{"CRUNCH_INSTALL"} = "$ENV{CRUNCH_TMP}/opt";
|
|
118
118
|
$ENV{"CRUNCH_WORK"} = $ENV{"JOB_WORK"}; # deprecated
|
119
119
|
mkdir ($ENV{"JOB_WORK"});
|
120
120
|
|
121
|
+
my %proc;
|
121
122
|
my $force_unlock;
|
122
123
|
my $git_dir;
|
123
124
|
my $jobspec;
|
@@ -589,56 +590,89 @@ if (!defined $git_archive) {
|
|
589
590
|
}
|
590
591
|
}
|
591
592
|
else {
|
592
|
-
|
593
|
-
|
594
|
-
my
|
595
|
-
|
596
|
-
|
597
|
-
|
598
|
-
|
599
|
-
|
600
|
-
|
601
|
-
|
602
|
-
|
603
|
-
|
604
|
-
|
605
|
-
|
606
|
-
|
607
|
-
|
608
|
-
|
609
|
-
open(STDOUT, ">&", $install_stderr_w);
|
610
|
-
open(STDERR, ">&", $install_stderr_w);
|
611
|
-
srun (\@srunargs, \@execargs, {}, $build_script . $git_archive);
|
612
|
-
exit (1);
|
613
|
-
}
|
614
|
-
close($install_stderr_w);
|
615
|
-
my $stderr_buf = '';
|
616
|
-
while ($installpid != waitpid(-1, WNOHANG)) {
|
617
|
-
freeze_if_want_freeze ($installpid);
|
618
|
-
# Wait up to 0.1 seconds for something to appear on stderr, then
|
619
|
-
# do a non-blocking read.
|
620
|
-
my $bits = fhbits($install_stderr_r);
|
621
|
-
select ($bits, undef, $bits, 0.1);
|
622
|
-
if (0 < sysread ($install_stderr_r, $stderr_buf, 8192, length($stderr_buf)))
|
593
|
+
my $install_exited;
|
594
|
+
my $install_script_tries_left = 3;
|
595
|
+
for (my $attempts = 0; $attempts < 3; $attempts++) {
|
596
|
+
Log(undef, "Run install script on all workers");
|
597
|
+
|
598
|
+
my @srunargs = ("srun",
|
599
|
+
"--nodelist=$nodelist",
|
600
|
+
"-D", $ENV{'TMPDIR'}, "--job-name=$job_id");
|
601
|
+
my @execargs = ("sh", "-c",
|
602
|
+
"mkdir -p $ENV{CRUNCH_INSTALL} && cd $ENV{CRUNCH_TMP} && perl -");
|
603
|
+
|
604
|
+
$ENV{"CRUNCH_GIT_ARCHIVE_HASH"} = md5_hex($git_archive);
|
605
|
+
my ($install_stderr_r, $install_stderr_w);
|
606
|
+
pipe $install_stderr_r, $install_stderr_w or croak("pipe() failed: $!");
|
607
|
+
set_nonblocking($install_stderr_r);
|
608
|
+
my $installpid = fork();
|
609
|
+
if ($installpid == 0)
|
623
610
|
{
|
624
|
-
|
625
|
-
|
626
|
-
|
627
|
-
|
611
|
+
close($install_stderr_r);
|
612
|
+
fcntl($install_stderr_w, F_SETFL, 0) or croak($!); # no close-on-exec
|
613
|
+
open(STDOUT, ">&", $install_stderr_w);
|
614
|
+
open(STDERR, ">&", $install_stderr_w);
|
615
|
+
srun (\@srunargs, \@execargs, {}, $build_script . $git_archive);
|
616
|
+
exit (1);
|
617
|
+
}
|
618
|
+
close($install_stderr_w);
|
619
|
+
# Tell freeze_if_want_freeze how to kill the child, otherwise the
|
620
|
+
# "waitpid(installpid)" loop won't get interrupted by a freeze:
|
621
|
+
$proc{$installpid} = {};
|
622
|
+
my $stderr_buf = '';
|
623
|
+
# Track whether anything appears on stderr other than slurm errors
|
624
|
+
# ("srun: ...") and the "starting: ..." message printed by the
|
625
|
+
# srun subroutine itself:
|
626
|
+
my $stderr_anything_from_script = 0;
|
627
|
+
my $match_our_own_errors = '^(srun: error: |starting: \[)';
|
628
|
+
while ($installpid != waitpid(-1, WNOHANG)) {
|
629
|
+
freeze_if_want_freeze ($installpid);
|
630
|
+
# Wait up to 0.1 seconds for something to appear on stderr, then
|
631
|
+
# do a non-blocking read.
|
632
|
+
my $bits = fhbits($install_stderr_r);
|
633
|
+
select ($bits, undef, $bits, 0.1);
|
634
|
+
if (0 < sysread ($install_stderr_r, $stderr_buf, 8192, length($stderr_buf)))
|
635
|
+
{
|
636
|
+
while ($stderr_buf =~ /^(.*?)\n/) {
|
637
|
+
my $line = $1;
|
638
|
+
substr $stderr_buf, 0, 1+length($line), "";
|
639
|
+
Log(undef, "stderr $line");
|
640
|
+
if ($line !~ /$match_our_own_errors/) {
|
641
|
+
$stderr_anything_from_script = 1;
|
642
|
+
}
|
643
|
+
}
|
628
644
|
}
|
629
645
|
}
|
630
|
-
|
631
|
-
|
632
|
-
|
633
|
-
|
634
|
-
|
646
|
+
delete $proc{$installpid};
|
647
|
+
$install_exited = $?;
|
648
|
+
close($install_stderr_r);
|
649
|
+
if (length($stderr_buf) > 0) {
|
650
|
+
if ($stderr_buf !~ /$match_our_own_errors/) {
|
651
|
+
$stderr_anything_from_script = 1;
|
652
|
+
}
|
653
|
+
Log(undef, "stderr $stderr_buf")
|
654
|
+
}
|
655
|
+
|
656
|
+
Log (undef, "Install script exited ".exit_status_s($install_exited));
|
657
|
+
last if $install_exited == 0 || $main::please_freeze;
|
658
|
+
# If the install script fails but doesn't print an error message,
|
659
|
+
# the next thing anyone is likely to do is just run it again in
|
660
|
+
# case it was a transient problem like "slurm communication fails
|
661
|
+
# because the network isn't reliable enough". So we'll just do
|
662
|
+
# that ourselves (up to 3 attempts in total). OTOH, if there is an
|
663
|
+
# error message, the problem is more likely to have a real fix and
|
664
|
+
# we should fail the job so the fixing process can start, instead
|
665
|
+
# of doing 2 more attempts.
|
666
|
+
last if $stderr_anything_from_script;
|
635
667
|
}
|
636
668
|
|
637
|
-
Log (undef, "Install script exited ".exit_status_s($install_exited));
|
638
669
|
foreach my $tar_filename (map { tar_filename_n($_); } (1..$git_tar_count)) {
|
639
670
|
unlink($tar_filename);
|
640
671
|
}
|
641
|
-
|
672
|
+
|
673
|
+
if ($install_exited != 0) {
|
674
|
+
croak("Giving up");
|
675
|
+
}
|
642
676
|
}
|
643
677
|
|
644
678
|
foreach (qw (script script_version script_parameters runtime_constraints))
|
@@ -704,7 +738,6 @@ for (my $ii = $#freeslot; $ii >= 0; $ii--) {
|
|
704
738
|
}
|
705
739
|
|
706
740
|
Log(undef, "start level $level with $round_num_freeslots slots");
|
707
|
-
my %proc;
|
708
741
|
my @holdslot;
|
709
742
|
my %reader;
|
710
743
|
my $progress_is_dirty = 1;
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: arvados-cli
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.20150527201024
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Arvados Authors
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-05-
|
11
|
+
date: 2015-05-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: arvados
|
@@ -178,7 +178,7 @@ dependencies:
|
|
178
178
|
- - "<"
|
179
179
|
- !ruby/object:Gem::Version
|
180
180
|
version: 1.0.0
|
181
|
-
description: Arvados command line tools, git commit
|
181
|
+
description: Arvados command line tools, git commit 7a53d874994a5a9af273cee1329d9635b7e03edb
|
182
182
|
email: gem-dev@curoverse.com
|
183
183
|
executables:
|
184
184
|
- arv
|