arvados-cli 0.1.20141024172048 → 0.1.20141030211331

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/bin/crunch-job +64 -35
  3. metadata +5 -5
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5d18539333d0ce807ee1f7372cea20b17fbbc66a
4
- data.tar.gz: 108a0dcf6d7b46ec24db186623997c159045877a
3
+ metadata.gz: a77bcc509dbbd5280e9bfed060d5382eb2c1a089
4
+ data.tar.gz: 8046b4b329c2c24e32bc342f0a13bff2e2b3ed83
5
5
  SHA512:
6
- metadata.gz: 09f8fe0b1ba5921de15877a781e2754c5564ad4fd2f617985e6b8d5be10b49be608d8438503e046944f7baabfa5958f975df9d78b98f2e66a96a7f9d995604c8
7
- data.tar.gz: e3ef670470825fd5171e0fa7b93c46b8a53f568044436486144678f03bb90cae2a63665f74c2a843ca70fc16768bbac59f7a03327af5eda3871fd7f1a4bf668d
6
+ metadata.gz: c47e7bf766914dcf7b9fc2dd668c7217f95656a85f4e1f4dad9b217534ad1449d19481c4661d240db2a15475c4a405904df6f47e73a6ca67a35a00525c8d61d2
7
+ data.tar.gz: 48122417757209a7594bd62d1aab6b5b86b8092b7db1554281e4e3c7dde7505c451b0ab1bf840212f29aac8290c5e4310b6a8eae804ae581aa396366604c6697
data/bin/crunch-job CHANGED
@@ -86,6 +86,7 @@ use POSIX ':sys_wait_h';
86
86
  use POSIX qw(strftime);
87
87
  use Fcntl qw(F_GETFL F_SETFL O_NONBLOCK);
88
88
  use Arvados;
89
+ use Data::Dumper;
89
90
  use Digest::MD5 qw(md5_hex);
90
91
  use Getopt::Long;
91
92
  use IPC::Open2;
@@ -357,7 +358,7 @@ if (!defined $no_clear_tmp) {
357
358
  if ($cleanpid == 0)
358
359
  {
359
360
  srun (["srun", "--nodelist=$nodelist", "-D", $ENV{'TMPDIR'}],
360
- ['bash', '-c', 'if mount | grep -q $JOB_WORK/; then for i in $JOB_WORK/*keep; do /bin/fusermount -z -u $i; done; fi; sleep 1; rm -rf $JOB_WORK $CRUNCH_TMP/opt $CRUNCH_TMP/src*']);
361
+ ['bash', '-c', 'if mount | grep -q $JOB_WORK/; then for i in $JOB_WORK/*keep $CRUNCH_TMP/task/*.keep; do /bin/fusermount -z -u $i; done; fi; sleep 1; rm -rf $JOB_WORK $CRUNCH_INSTALL $CRUNCH_TMP/task $CRUNCH_TMP/src*']);
361
362
  exit (1);
362
363
  }
363
364
  while (1)
@@ -547,8 +548,6 @@ else {
547
548
  my @execargs = ("sh", "-c",
548
549
  "mkdir -p $ENV{CRUNCH_INSTALL} && cd $ENV{CRUNCH_TMP} && perl -");
549
550
 
550
- # Note: this section is almost certainly unnecessary if we're
551
- # running tasks in docker containers.
552
551
  my $installpid = fork();
553
552
  if ($installpid == 0)
554
553
  {
@@ -694,7 +693,7 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
694
693
  }
695
694
  $ENV{"TASK_SLOT_NODE"} = $slot[$childslot]->{node}->{name};
696
695
  $ENV{"TASK_SLOT_NUMBER"} = $slot[$childslot]->{cpu};
697
- $ENV{"TASK_WORK"} = $ENV{"JOB_WORK"}."/$id.$$";
696
+ $ENV{"TASK_WORK"} = $ENV{"CRUNCH_TMP"}."/task/$childslotname";
698
697
  $ENV{"HOME"} = $ENV{"TASK_WORK"};
699
698
  $ENV{"TASK_KEEPMOUNT"} = $ENV{"TASK_WORK"}.".keep";
700
699
  $ENV{"TASK_TMPDIR"} = $ENV{"TASK_WORK"}; # deprecated
@@ -723,36 +722,54 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
723
722
  $command .= "&& exec arv-mount --by-id --allow-other $ENV{TASK_KEEPMOUNT} --exec ";
724
723
  if ($docker_hash)
725
724
  {
726
- $command .= "crunchstat -cgroup-root=/sys/fs/cgroup -cgroup-parent=docker -cgroup-cid=$ENV{TASK_WORK}/docker.cid -poll=10000 ";
727
- $command .= "$docker_bin run --rm=true --attach=stdout --attach=stderr --user=crunch --cidfile=$ENV{TASK_WORK}/docker.cid ";
725
+ my $cidfile = "$ENV{CRUNCH_TMP}/$ENV{TASK_UUID}.cid";
726
+ $command .= "crunchstat -cgroup-root=/sys/fs/cgroup -cgroup-parent=docker -cgroup-cid=$cidfile -poll=10000 ";
727
+ $command .= "$docker_bin run --rm=true --attach=stdout --attach=stderr --attach=stdin -i --user=crunch --cidfile=$cidfile --sig-proxy ";
728
+
728
729
  # Dynamically configure the container to use the host system as its
729
730
  # DNS server. Get the host's global addresses from the ip command,
730
731
  # and turn them into docker --dns options using gawk.
731
732
  $command .=
732
733
  q{$(ip -o address show scope global |
733
734
  gawk 'match($4, /^([0-9\.:]+)\//, x){print "--dns", x[1]}') };
734
- $command .= "--volume=\Q$ENV{CRUNCH_SRC}:/tmp/crunch-src:ro\E ";
735
+
736
+ # The source tree and $destdir directory (which we have
737
+ # installed on the worker host) are available in the container,
738
+ # under the same path.
739
+ $command .= "--volume=\Q$ENV{CRUNCH_SRC}:$ENV{CRUNCH_SRC}:ro\E ";
740
+ $command .= "--volume=\Q$ENV{CRUNCH_INSTALL}:$ENV{CRUNCH_INSTALL}:ro\E ";
741
+
742
+ # Currently, we make arv-mount's mount point appear at /keep
743
+ # inside the container (instead of using the same path as the
744
+ # host like we do with CRUNCH_SRC and CRUNCH_INSTALL). However,
745
+ # crunch scripts and utilities must not rely on this. They must
746
+ # use $TASK_KEEPMOUNT.
735
747
  $command .= "--volume=\Q$ENV{TASK_KEEPMOUNT}:/keep:ro\E ";
736
- $command .= "--env=\QHOME=/home/crunch\E ";
748
+ $ENV{TASK_KEEPMOUNT} = "/keep";
749
+
750
+ # TASK_WORK is a plain docker data volume: it starts out empty,
751
+ # is writable, and persists until no containers use it any
752
+ # more. We don't use --volumes-from to share it with other
753
+ # containers: it is only accessible to this task, and it goes
754
+ # away when this task stops.
755
+ $command .= "--volume=\Q$ENV{TASK_WORK}\E ";
756
+
757
+ # JOB_WORK is also a plain docker data volume for now. TODO:
758
+ # Share a single JOB_WORK volume across all task containers on a
759
+ # given worker node, and delete it when the job ends (and, in
760
+ # case that doesn't work, when the next job starts).
761
+ $command .= "--volume=\Q$ENV{JOB_WORK}\E ";
762
+
737
763
  while (my ($env_key, $env_val) = each %ENV)
738
764
  {
739
- if ($env_key =~ /^(ARVADOS|JOB|TASK)_/) {
740
- if ($env_key eq "TASK_WORK") {
741
- $command .= "--env=\QTASK_WORK=/tmp/crunch-job\E ";
742
- }
743
- elsif ($env_key eq "TASK_KEEPMOUNT") {
744
- $command .= "--env=\QTASK_KEEPMOUNT=/keep\E ";
745
- }
746
- else {
747
- $command .= "--env=\Q$env_key=$env_val\E ";
748
- }
765
+ if ($env_key =~ /^(ARVADOS|CRUNCH|JOB|TASK)_/) {
766
+ $command .= "--env=\Q$env_key=$env_val\E ";
749
767
  }
750
768
  }
751
- $command .= "--env=\QCRUNCH_NODE_SLOTS=$ENV{CRUNCH_NODE_SLOTS}\E ";
752
- $command .= "--env=\QCRUNCH_SRC=/tmp/crunch-src\E ";
769
+ $command .= "--env=\QHOME=$ENV{HOME}\E ";
753
770
  $command .= "\Q$docker_hash\E ";
754
771
  $command .= "stdbuf --output=0 --error=0 ";
755
- $command .= "/tmp/crunch-src/crunch_scripts/" . $Job->{"script"};
772
+ $command .= "$ENV{CRUNCH_SRC}/crunch_scripts/" . $Job->{"script"};
756
773
  } else {
757
774
  # Non-docker run
758
775
  $command .= "crunchstat -cgroup-root=/sys/fs/cgroup -poll=10000 ";
@@ -763,8 +780,7 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
763
780
  my @execargs = ('bash', '-c', $command);
764
781
  srun (\@srunargs, \@execargs, undef, $build_script_to_send);
765
782
  # exec() failed, we assume nothing happened.
766
- Log(undef, "srun() failed on build script");
767
- die;
783
+ die "srun() failed on build script\n";
768
784
  }
769
785
  close("writer");
770
786
  if (!defined $childpid)
@@ -1555,11 +1571,13 @@ sub srun
1555
1571
  my $opts = shift || {};
1556
1572
  my $stdin = shift;
1557
1573
  my $args = $have_slurm ? [@$srunargs, @$execargs] : $execargs;
1558
- print STDERR (join (" ",
1559
- map { / / ? "'$_'" : $_ }
1560
- (@$args)),
1561
- "\n")
1562
- if $ENV{CRUNCH_DEBUG};
1574
+
1575
+ $Data::Dumper::Terse = 1;
1576
+ $Data::Dumper::Indent = 0;
1577
+ my $show_cmd = Dumper($args);
1578
+ $show_cmd =~ s/(TOKEN\\*=)[^\s\']+/${1}[...]/g;
1579
+ $show_cmd =~ s/\n/ /g;
1580
+ warn "starting: $show_cmd\n";
1563
1581
 
1564
1582
  if (defined $stdin) {
1565
1583
  my $child = open STDIN, "-|";
@@ -1692,7 +1710,7 @@ __DATA__
1692
1710
  # checkout-and-build
1693
1711
 
1694
1712
  use Fcntl ':flock';
1695
- use File::Path qw( make_path );
1713
+ use File::Path qw( make_path remove_tree );
1696
1714
 
1697
1715
  my $destdir = $ENV{"CRUNCH_SRC"};
1698
1716
  my $commit = $ENV{"CRUNCH_SRC_COMMIT"};
@@ -1700,12 +1718,17 @@ my $repo = $ENV{"CRUNCH_SRC_URL"};
1700
1718
  my $task_work = $ENV{"TASK_WORK"};
1701
1719
 
1702
1720
  for my $dir ($destdir, $task_work) {
1703
- if ($dir) {
1704
- make_path $dir;
1705
- -e $dir or die "Failed to create temporary directory ($dir): $!";
1706
- }
1721
+ if ($dir) {
1722
+ make_path $dir;
1723
+ -e $dir or die "Failed to create temporary directory ($dir): $!";
1724
+ }
1707
1725
  }
1708
1726
 
1727
+ if ($task_work) {
1728
+ remove_tree($task_work, {keep_root => 1});
1729
+ }
1730
+
1731
+
1709
1732
  open L, ">", "$destdir.lock" or die "$destdir.lock: $!";
1710
1733
  flock L, LOCK_EX;
1711
1734
  if (readlink ("$destdir.commit") eq $commit && -d $destdir) {
@@ -1718,6 +1741,7 @@ if (readlink ("$destdir.commit") eq $commit && -d $destdir) {
1718
1741
  }
1719
1742
 
1720
1743
  unlink "$destdir.commit";
1744
+ open STDERR_ORIG, ">&STDERR";
1721
1745
  open STDOUT, ">", "$destdir.log";
1722
1746
  open STDERR, ">&STDOUT";
1723
1747
 
@@ -1772,8 +1796,13 @@ sub shell_or_die
1772
1796
  if ($ENV{"DEBUG"}) {
1773
1797
  print STDERR "@_\n";
1774
1798
  }
1775
- system (@_) == 0
1776
- or die "@_ failed: $! exit 0x".sprintf("%x",$?);
1799
+ if (system (@_) != 0) {
1800
+ my $err = $!;
1801
+ my $exitstatus = sprintf("exit %d signal %d", $? >> 8, $? & 0x7f);
1802
+ open STDERR, ">&STDERR_ORIG";
1803
+ system ("cat $destdir.log >&2");
1804
+ die "@_ failed ($err): $exitstatus";
1805
+ }
1777
1806
  }
1778
1807
 
1779
1808
  __DATA__
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: arvados-cli
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.20141024172048
4
+ version: 0.1.20141030211331
5
5
  platform: ruby
6
6
  authors:
7
7
  - Arvados Authors
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-10-24 00:00:00.000000000 Z
11
+ date: 2014-10-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: arvados
@@ -36,7 +36,7 @@ dependencies:
36
36
  requirements:
37
37
  - - "~>"
38
38
  - !ruby/object:Gem::Version
39
- version: '0.6'
39
+ version: 0.6.3
40
40
  - - ">="
41
41
  - !ruby/object:Gem::Version
42
42
  version: 0.6.3
@@ -46,7 +46,7 @@ dependencies:
46
46
  requirements:
47
47
  - - "~>"
48
48
  - !ruby/object:Gem::Version
49
- version: '0.6'
49
+ version: 0.6.3
50
50
  - - ">="
51
51
  - !ruby/object:Gem::Version
52
52
  version: 0.6.3
@@ -178,7 +178,7 @@ dependencies:
178
178
  - - "<"
179
179
  - !ruby/object:Gem::Version
180
180
  version: 1.0.0
181
- description: Arvados command line tools, git commit 35ade8a042094a27e2ca5cfd5e9754aa3513410c
181
+ description: Arvados command line tools, git commit c6a03a7abff947dc8242e8be18b4b5e6920a3e4a
182
182
  email: gem-dev@curoverse.com
183
183
  executables:
184
184
  - arv