arvados-cli 0.1.20150526143156 → 0.1.20150527201024
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/crunch-job +76 -43
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bafef169f074ca61d533298a5e462905d9764846
|
4
|
+
data.tar.gz: bd2e304a1cef255c8362650d82ed490d88d7a56d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fb00c43e5d7d57f3b17f82efaf494426766b12d5af483195ce89252ec11d402c9dcc5149c3d5dc340f393a125d37c53d93e6ef90d7a60808f2124aa44e7c6688
|
7
|
+
data.tar.gz: d7879e42888f1bb9a0b4305f4d1b1d2e24acb2b0bbf64fde6be45b33f560aee935c1ab936d62e21ddd330467b05cb234032214e47deee6dc76432b775ea3ad9a
|
data/bin/crunch-job
CHANGED
@@ -118,6 +118,7 @@ $ENV{"CRUNCH_INSTALL"} = "$ENV{CRUNCH_TMP}/opt";
|
|
118
118
|
$ENV{"CRUNCH_WORK"} = $ENV{"JOB_WORK"}; # deprecated
|
119
119
|
mkdir ($ENV{"JOB_WORK"});
|
120
120
|
|
121
|
+
my %proc;
|
121
122
|
my $force_unlock;
|
122
123
|
my $git_dir;
|
123
124
|
my $jobspec;
|
@@ -589,56 +590,89 @@ if (!defined $git_archive) {
|
|
589
590
|
}
|
590
591
|
}
|
591
592
|
else {
|
592
|
-
|
593
|
-
|
594
|
-
my
|
595
|
-
|
596
|
-
|
597
|
-
|
598
|
-
|
599
|
-
|
600
|
-
|
601
|
-
|
602
|
-
|
603
|
-
|
604
|
-
|
605
|
-
|
606
|
-
|
607
|
-
|
608
|
-
|
609
|
-
open(STDOUT, ">&", $install_stderr_w);
|
610
|
-
open(STDERR, ">&", $install_stderr_w);
|
611
|
-
srun (\@srunargs, \@execargs, {}, $build_script . $git_archive);
|
612
|
-
exit (1);
|
613
|
-
}
|
614
|
-
close($install_stderr_w);
|
615
|
-
my $stderr_buf = '';
|
616
|
-
while ($installpid != waitpid(-1, WNOHANG)) {
|
617
|
-
freeze_if_want_freeze ($installpid);
|
618
|
-
# Wait up to 0.1 seconds for something to appear on stderr, then
|
619
|
-
# do a non-blocking read.
|
620
|
-
my $bits = fhbits($install_stderr_r);
|
621
|
-
select ($bits, undef, $bits, 0.1);
|
622
|
-
if (0 < sysread ($install_stderr_r, $stderr_buf, 8192, length($stderr_buf)))
|
593
|
+
my $install_exited;
|
594
|
+
my $install_script_tries_left = 3;
|
595
|
+
for (my $attempts = 0; $attempts < 3; $attempts++) {
|
596
|
+
Log(undef, "Run install script on all workers");
|
597
|
+
|
598
|
+
my @srunargs = ("srun",
|
599
|
+
"--nodelist=$nodelist",
|
600
|
+
"-D", $ENV{'TMPDIR'}, "--job-name=$job_id");
|
601
|
+
my @execargs = ("sh", "-c",
|
602
|
+
"mkdir -p $ENV{CRUNCH_INSTALL} && cd $ENV{CRUNCH_TMP} && perl -");
|
603
|
+
|
604
|
+
$ENV{"CRUNCH_GIT_ARCHIVE_HASH"} = md5_hex($git_archive);
|
605
|
+
my ($install_stderr_r, $install_stderr_w);
|
606
|
+
pipe $install_stderr_r, $install_stderr_w or croak("pipe() failed: $!");
|
607
|
+
set_nonblocking($install_stderr_r);
|
608
|
+
my $installpid = fork();
|
609
|
+
if ($installpid == 0)
|
623
610
|
{
|
624
|
-
|
625
|
-
|
626
|
-
|
627
|
-
|
611
|
+
close($install_stderr_r);
|
612
|
+
fcntl($install_stderr_w, F_SETFL, 0) or croak($!); # no close-on-exec
|
613
|
+
open(STDOUT, ">&", $install_stderr_w);
|
614
|
+
open(STDERR, ">&", $install_stderr_w);
|
615
|
+
srun (\@srunargs, \@execargs, {}, $build_script . $git_archive);
|
616
|
+
exit (1);
|
617
|
+
}
|
618
|
+
close($install_stderr_w);
|
619
|
+
# Tell freeze_if_want_freeze how to kill the child, otherwise the
|
620
|
+
# "waitpid(installpid)" loop won't get interrupted by a freeze:
|
621
|
+
$proc{$installpid} = {};
|
622
|
+
my $stderr_buf = '';
|
623
|
+
# Track whether anything appears on stderr other than slurm errors
|
624
|
+
# ("srun: ...") and the "starting: ..." message printed by the
|
625
|
+
# srun subroutine itself:
|
626
|
+
my $stderr_anything_from_script = 0;
|
627
|
+
my $match_our_own_errors = '^(srun: error: |starting: \[)';
|
628
|
+
while ($installpid != waitpid(-1, WNOHANG)) {
|
629
|
+
freeze_if_want_freeze ($installpid);
|
630
|
+
# Wait up to 0.1 seconds for something to appear on stderr, then
|
631
|
+
# do a non-blocking read.
|
632
|
+
my $bits = fhbits($install_stderr_r);
|
633
|
+
select ($bits, undef, $bits, 0.1);
|
634
|
+
if (0 < sysread ($install_stderr_r, $stderr_buf, 8192, length($stderr_buf)))
|
635
|
+
{
|
636
|
+
while ($stderr_buf =~ /^(.*?)\n/) {
|
637
|
+
my $line = $1;
|
638
|
+
substr $stderr_buf, 0, 1+length($line), "";
|
639
|
+
Log(undef, "stderr $line");
|
640
|
+
if ($line !~ /$match_our_own_errors/) {
|
641
|
+
$stderr_anything_from_script = 1;
|
642
|
+
}
|
643
|
+
}
|
628
644
|
}
|
629
645
|
}
|
630
|
-
|
631
|
-
|
632
|
-
|
633
|
-
|
634
|
-
|
646
|
+
delete $proc{$installpid};
|
647
|
+
$install_exited = $?;
|
648
|
+
close($install_stderr_r);
|
649
|
+
if (length($stderr_buf) > 0) {
|
650
|
+
if ($stderr_buf !~ /$match_our_own_errors/) {
|
651
|
+
$stderr_anything_from_script = 1;
|
652
|
+
}
|
653
|
+
Log(undef, "stderr $stderr_buf")
|
654
|
+
}
|
655
|
+
|
656
|
+
Log (undef, "Install script exited ".exit_status_s($install_exited));
|
657
|
+
last if $install_exited == 0 || $main::please_freeze;
|
658
|
+
# If the install script fails but doesn't print an error message,
|
659
|
+
# the next thing anyone is likely to do is just run it again in
|
660
|
+
# case it was a transient problem like "slurm communication fails
|
661
|
+
# because the network isn't reliable enough". So we'll just do
|
662
|
+
# that ourselves (up to 3 attempts in total). OTOH, if there is an
|
663
|
+
# error message, the problem is more likely to have a real fix and
|
664
|
+
# we should fail the job so the fixing process can start, instead
|
665
|
+
# of doing 2 more attempts.
|
666
|
+
last if $stderr_anything_from_script;
|
635
667
|
}
|
636
668
|
|
637
|
-
Log (undef, "Install script exited ".exit_status_s($install_exited));
|
638
669
|
foreach my $tar_filename (map { tar_filename_n($_); } (1..$git_tar_count)) {
|
639
670
|
unlink($tar_filename);
|
640
671
|
}
|
641
|
-
|
672
|
+
|
673
|
+
if ($install_exited != 0) {
|
674
|
+
croak("Giving up");
|
675
|
+
}
|
642
676
|
}
|
643
677
|
|
644
678
|
foreach (qw (script script_version script_parameters runtime_constraints))
|
@@ -704,7 +738,6 @@ for (my $ii = $#freeslot; $ii >= 0; $ii--) {
|
|
704
738
|
}
|
705
739
|
|
706
740
|
Log(undef, "start level $level with $round_num_freeslots slots");
|
707
|
-
my %proc;
|
708
741
|
my @holdslot;
|
709
742
|
my %reader;
|
710
743
|
my $progress_is_dirty = 1;
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: arvados-cli
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.20150527201024
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Arvados Authors
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-05-
|
11
|
+
date: 2015-05-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: arvados
|
@@ -178,7 +178,7 @@ dependencies:
|
|
178
178
|
- - "<"
|
179
179
|
- !ruby/object:Gem::Version
|
180
180
|
version: 1.0.0
|
181
|
-
description: Arvados command line tools, git commit
|
181
|
+
description: Arvados command line tools, git commit 7a53d874994a5a9af273cee1329d9635b7e03edb
|
182
182
|
email: gem-dev@curoverse.com
|
183
183
|
executables:
|
184
184
|
- arv
|