arvados-cli 0.1.20141205182512 → 0.1.20141209151444

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/bin/crunch-job +226 -83
  3. metadata +3 -3
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ddf3609b412326c94eaccbf7847f8c1deedbb833
4
- data.tar.gz: 44bc5dce941bc5f6beea57991a111053f1e581e4
3
+ metadata.gz: 6842c06e714d3300101ef5b3877fda5ecf4cbcfc
4
+ data.tar.gz: 70c9c5777fbbe9a624759d6cacf458ee43feafb3
5
5
  SHA512:
6
- metadata.gz: 4823db913e5feea59eca3ddb81829654fb0e7136db64f0bae3a65adf811a9f2d632ec18c18223c5fdf47f2202e7350ad4cdc1a02f93dbef76d953d649a984e30
7
- data.tar.gz: 3c160cc610dfe3b31bcbc0e897353575769332b1eb617d4c0ca238a3ae4d2dc846c07b453c9fb1290fa173cb5f8cee74aaa428332cd4f1cc3fdd3cf1632a78e7
6
+ metadata.gz: 3b43647b8e6f9047e011dafbcd09275f032d47c77a209e526e923e72b9a8793798270da661f34e2ce5ecbbd6a69ae897f77f156b86b73f705b3ed76f4d7f6e6b
7
+ data.tar.gz: 67316d1fe5ad62990445672e1f0a227d913e9a7a47c0daf23dd5af46b3cc40483600ebc4c48db051f84d9e916568d949c6fe252e4544ff5de569524caea5db76
data/bin/crunch-job CHANGED
@@ -86,6 +86,7 @@ use POSIX ':sys_wait_h';
86
86
  use POSIX qw(strftime);
87
87
  use Fcntl qw(F_GETFL F_SETFL O_NONBLOCK);
88
88
  use Arvados;
89
+ use Cwd qw(realpath);
89
90
  use Data::Dumper;
90
91
  use Digest::MD5 qw(md5_hex);
91
92
  use Getopt::Long;
@@ -197,6 +198,16 @@ $Job->{'runtime_constraints'} ||= {};
197
198
  $Job->{'runtime_constraints'}->{'max_tasks_per_node'} ||= 0;
198
199
  my $max_ncpus = $Job->{'runtime_constraints'}->{'max_tasks_per_node'};
199
200
 
201
+ my $gem_versions = `gem list --quiet arvados-cli 2>/dev/null`;
202
+ if ($? == 0) {
203
+ $gem_versions =~ s/^arvados-cli \(/ with arvados-cli Gem version(s) /;
204
+ chomp($gem_versions);
205
+ chop($gem_versions); # Closing parentheses
206
+ } else {
207
+ $gem_versions = "";
208
+ }
209
+ Log(undef,
210
+ "running from " . ((-e $0) ? realpath($0) : "stdin") . $gem_versions);
200
211
 
201
212
  Log (undef, "check slurm allocation");
202
213
  my @slot;
@@ -334,13 +345,9 @@ if (!$have_slurm)
334
345
  must_lock_now("$ENV{CRUNCH_TMP}/.lock", "a job is already running here.");
335
346
  }
336
347
 
337
-
338
- my $build_script;
339
- do {
340
- local $/ = undef;
341
- $build_script = <DATA>;
342
- };
348
+ my $build_script = handle_readall(\*DATA);
343
349
  my $nodelist = join(",", @node);
350
+ my $git_tar_count = 0;
344
351
 
345
352
  if (!defined $no_clear_tmp) {
346
353
  # Clean out crunch_tmp/work, crunch_tmp/opt, crunch_tmp/src*
@@ -362,8 +369,51 @@ if (!defined $no_clear_tmp) {
362
369
  Log (undef, "Cleanup command exited ".exit_status_s($?));
363
370
  }
364
371
 
372
+ # If this job requires a Docker image, install that.
373
+ my $docker_bin = "/usr/bin/docker.io";
374
+ my ($docker_locator, $docker_stream, $docker_hash);
375
+ if ($docker_locator = $Job->{docker_image_locator}) {
376
+ ($docker_stream, $docker_hash) = find_docker_image($docker_locator);
377
+ if (!$docker_hash)
378
+ {
379
+ croak("No Docker image hash found from locator $docker_locator");
380
+ }
381
+ $docker_stream =~ s/^\.//;
382
+ my $docker_install_script = qq{
383
+ if ! $docker_bin images -q --no-trunc | grep -qxF \Q$docker_hash\E; then
384
+ arv-get \Q$docker_locator$docker_stream/$docker_hash.tar\E | $docker_bin load
385
+ fi
386
+ };
387
+ my $docker_pid = fork();
388
+ if ($docker_pid == 0)
389
+ {
390
+ srun (["srun", "--nodelist=" . join(',', @node)],
391
+ ["/bin/sh", "-ec", $docker_install_script]);
392
+ exit ($?);
393
+ }
394
+ while (1)
395
+ {
396
+ last if $docker_pid == waitpid (-1, WNOHANG);
397
+ freeze_if_want_freeze ($docker_pid);
398
+ select (undef, undef, undef, 0.1);
399
+ }
400
+ if ($? != 0)
401
+ {
402
+ croak("Installing Docker image from $docker_locator exited "
403
+ .exit_status_s($?));
404
+ }
405
+
406
+ if ($Job->{arvados_sdk_version}) {
407
+ # The job also specifies an Arvados SDK version. Add the SDKs to the
408
+ # tar file for the build script to install.
409
+ Log(undef, sprintf("Packing Arvados SDK version %s for installation",
410
+ $Job->{arvados_sdk_version}));
411
+ add_git_archive("git", "--git-dir=$git_dir", "archive",
412
+ "--prefix=.arvados.sdk/",
413
+ $Job->{arvados_sdk_version}, "sdk");
414
+ }
415
+ }
365
416
 
366
- my $git_archive;
367
417
  if (!defined $git_dir && $Job->{'script_version'} =~ m{^/}) {
368
418
  # If script_version looks like an absolute path, *and* the --git-dir
369
419
  # argument was not given -- which implies we were not invoked by
@@ -517,12 +567,10 @@ else {
517
567
  }
518
568
 
519
569
  $ENV{"CRUNCH_SRC_COMMIT"} = $commit;
520
- $git_archive = `$gitcmd archive ''\Q$commit\E`;
521
- if ($?) {
522
- croak("Error: $gitcmd archive exited ".exit_status_s($?));
523
- }
570
+ add_git_archive("$gitcmd archive ''\Q$commit\E");
524
571
  }
525
572
 
573
+ my $git_archive = combined_git_archive();
526
574
  if (!defined $git_archive) {
527
575
  Log(undef, "Skip install phase (no git archive)");
528
576
  if ($have_slurm) {
@@ -552,48 +600,10 @@ else {
552
600
  }
553
601
  my $install_exited = $?;
554
602
  Log (undef, "Install script exited ".exit_status_s($install_exited));
555
- exit (1) if $install_exited != 0;
556
- }
557
-
558
- if (!$have_slurm)
559
- {
560
- # Grab our lock again (we might have deleted and re-created CRUNCH_TMP above)
561
- must_lock_now("$ENV{CRUNCH_TMP}/.lock", "a job is already running here.");
562
- }
563
-
564
- # If this job requires a Docker image, install that.
565
- my $docker_bin = "/usr/bin/docker.io";
566
- my ($docker_locator, $docker_stream, $docker_hash);
567
- if ($docker_locator = $Job->{docker_image_locator}) {
568
- ($docker_stream, $docker_hash) = find_docker_image($docker_locator);
569
- if (!$docker_hash)
570
- {
571
- croak("No Docker image hash found from locator $docker_locator");
572
- }
573
- $docker_stream =~ s/^\.//;
574
- my $docker_install_script = qq{
575
- if ! $docker_bin images -q --no-trunc | grep -qxF \Q$docker_hash\E; then
576
- arv-get \Q$docker_locator$docker_stream/$docker_hash.tar\E | $docker_bin load
577
- fi
578
- };
579
- my $docker_pid = fork();
580
- if ($docker_pid == 0)
581
- {
582
- srun (["srun", "--nodelist=" . join(',', @node)],
583
- ["/bin/sh", "-ec", $docker_install_script]);
584
- exit ($?);
585
- }
586
- while (1)
587
- {
588
- last if $docker_pid == waitpid (-1, WNOHANG);
589
- freeze_if_want_freeze ($docker_pid);
590
- select (undef, undef, undef, 0.1);
591
- }
592
- if ($? != 0)
593
- {
594
- croak("Installing Docker image from $docker_locator exited "
595
- .exit_status_s($?));
603
+ foreach my $tar_filename (map { tar_filename_n($_); } (1..$git_tar_count)) {
604
+ unlink($tar_filename);
596
605
  }
606
+ exit (1) if $install_exited != 0;
597
607
  }
598
608
 
599
609
  foreach (qw (script script_version script_parameters runtime_constraints))
@@ -1715,17 +1725,87 @@ sub exit_status_s {
1715
1725
  return $s;
1716
1726
  }
1717
1727
 
1728
+ sub handle_readall {
1729
+ # Pass in a glob reference to a file handle.
1730
+ # Read all its contents and return them as a string.
1731
+ my $fh_glob_ref = shift;
1732
+ local $/ = undef;
1733
+ return <$fh_glob_ref>;
1734
+ }
1735
+
1736
+ sub tar_filename_n {
1737
+ my $n = shift;
1738
+ return sprintf("%s/git.%s.%d.tar", $ENV{CRUNCH_TMP}, $job_id, $n);
1739
+ }
1740
+
1741
+ sub add_git_archive {
1742
+ # Pass in a git archive command as a string or list, a la system().
1743
+ # This method will save its output to be included in the archive sent to the
1744
+ # build script.
1745
+ my $git_input;
1746
+ $git_tar_count++;
1747
+ if (!open(GIT_ARCHIVE, ">", tar_filename_n($git_tar_count))) {
1748
+ croak("Failed to save git archive: $!");
1749
+ }
1750
+ my $git_pid = open2(">&GIT_ARCHIVE", $git_input, @_);
1751
+ close($git_input);
1752
+ waitpid($git_pid, 0);
1753
+ close(GIT_ARCHIVE);
1754
+ if ($?) {
1755
+ croak("Failed to save git archive: git exited " . exit_status_s($?));
1756
+ }
1757
+ }
1758
+
1759
+ sub combined_git_archive {
1760
+ # Combine all saved tar archives into a single archive, then return its
1761
+ # contents in a string. Return undef if no archives have been saved.
1762
+ if ($git_tar_count < 1) {
1763
+ return undef;
1764
+ }
1765
+ my $base_tar_name = tar_filename_n(1);
1766
+ foreach my $tar_to_append (map { tar_filename_n($_); } (2..$git_tar_count)) {
1767
+ my $tar_exit = system("tar", "-Af", $base_tar_name, $tar_to_append);
1768
+ if ($tar_exit != 0) {
1769
+ croak("Error preparing build archive: tar -A exited " .
1770
+ exit_status_s($tar_exit));
1771
+ }
1772
+ }
1773
+ if (!open(GIT_TAR, "<", $base_tar_name)) {
1774
+ croak("Could not open build archive: $!");
1775
+ }
1776
+ my $tar_contents = handle_readall(\*GIT_TAR);
1777
+ close(GIT_TAR);
1778
+ return $tar_contents;
1779
+ }
1780
+
1718
1781
  __DATA__
1719
1782
  #!/usr/bin/perl
1720
-
1721
- # checkout-and-build
1783
+ #
1784
+ # This is crunch-job's internal dispatch script. crunch-job running on the API
1785
+ # server invokes this script on individual compute nodes, or localhost if we're
1786
+ # running a job locally. It gets called in two modes:
1787
+ #
1788
+ # * No arguments: Installation mode. Read a tar archive from the DATA
1789
+ # file handle; it includes the Crunch script's source code, and
1790
+ # maybe SDKs as well. Those should be installed in the proper
1791
+ # locations. This runs outside of any Docker container, so don't try to
1792
+ # introspect Crunch's runtime environment.
1793
+ #
1794
+ # * With arguments: Crunch script run mode. This script should set up the
1795
+ # environment, then run the command specified in the arguments. This runs
1796
+ # inside any Docker container.
1722
1797
 
1723
1798
  use Fcntl ':flock';
1724
1799
  use File::Path qw( make_path remove_tree );
1800
+ use POSIX qw(getcwd);
1801
+
1802
+ # Map SDK subdirectories to the path environments they belong to.
1803
+ my %SDK_ENVVARS = ("perl/lib" => "PERLLIB", "ruby/lib" => "RUBYLIB");
1725
1804
 
1726
1805
  my $destdir = $ENV{"CRUNCH_SRC"};
1727
1806
  my $commit = $ENV{"CRUNCH_SRC_COMMIT"};
1728
1807
  my $repo = $ENV{"CRUNCH_SRC_URL"};
1808
+ my $install_dir = $ENV{"CRUNCH_INSTALL"} || (getcwd() . "/opt");
1729
1809
  my $job_work = $ENV{"JOB_WORK"};
1730
1810
  my $task_work = $ENV{"TASK_WORK"};
1731
1811
 
@@ -1740,43 +1820,110 @@ if ($task_work) {
1740
1820
  remove_tree($task_work, {keep_root => 1});
1741
1821
  }
1742
1822
 
1743
- my @git_archive_data = <DATA>;
1744
- if (!@git_archive_data) {
1745
- # Nothing to extract -> nothing to install.
1746
- run_argv_and_exit();
1823
+ open(STDOUT_ORIG, ">&", STDOUT);
1824
+ open(STDERR_ORIG, ">&", STDERR);
1825
+ open(STDOUT, ">>", "$destdir.log");
1826
+ open(STDERR, ">&", STDOUT);
1827
+
1828
+ ### Crunch script run mode
1829
+ if (@ARGV) {
1830
+ # We want to do routine logging during task 0 only. This gives the user
1831
+ # the information they need, but avoids repeating the information for every
1832
+ # task.
1833
+ my $Log;
1834
+ if ($ENV{TASK_SEQUENCE} eq "0") {
1835
+ $Log = sub {
1836
+ my $msg = shift;
1837
+ printf STDERR_ORIG "[Crunch] $msg\n", @_;
1838
+ };
1839
+ } else {
1840
+ $Log = sub { };
1841
+ }
1842
+
1843
+ my $python_src = "$install_dir/python";
1844
+ my $venv_dir = "$job_work/.arvados.venv";
1845
+ my $venv_built = -e "$venv_dir/bin/activate";
1846
+ if ((!$venv_built) and (-d $python_src) and can_run("virtualenv")) {
1847
+ shell_or_die("virtualenv", "--quiet", "--system-site-packages",
1848
+ "--python=python2.7", $venv_dir);
1849
+ shell_or_die("$venv_dir/bin/pip", "--quiet", "install", $python_src);
1850
+ $venv_built = 1;
1851
+ $Log->("Built Python SDK virtualenv");
1852
+ }
1853
+
1854
+ if ($venv_built) {
1855
+ $Log->("Running in Python SDK virtualenv");
1856
+ my $orig_argv = join(" ", map { quotemeta($_); } @ARGV);
1857
+ @ARGV = ("/bin/sh", "-ec",
1858
+ ". \Q$venv_dir/bin/activate\E; exec $orig_argv");
1859
+ } elsif (-d $python_src) {
1860
+ $Log->("Warning: virtualenv not found inside Docker container default " +
1861
+ "\$PATH. Can't install Python SDK.");
1862
+ }
1863
+
1864
+ while (my ($sdk_dir, $sdk_envkey) = each(%SDK_ENVVARS)) {
1865
+ my $sdk_path = "$install_dir/$sdk_dir";
1866
+ if (-d $sdk_path) {
1867
+ if ($ENV{$sdk_envkey}) {
1868
+ $ENV{$sdk_envkey} = "$sdk_path:" . $ENV{$sdk_envkey};
1869
+ } else {
1870
+ $ENV{$sdk_envkey} = $sdk_path;
1871
+ }
1872
+ $Log->("Arvados SDK added to %s", $sdk_envkey);
1873
+ }
1874
+ }
1875
+
1876
+ close(STDOUT);
1877
+ close(STDERR);
1878
+ open(STDOUT, ">&", STDOUT_ORIG);
1879
+ open(STDERR, ">&", STDERR_ORIG);
1880
+ exec(@ARGV);
1881
+ die "Cannot exec `@ARGV`: $!";
1747
1882
  }
1748
1883
 
1884
+ ### Installation mode
1749
1885
  open L, ">", "$destdir.lock" or die "$destdir.lock: $!";
1750
1886
  flock L, LOCK_EX;
1751
1887
  if (readlink ("$destdir.commit") eq $commit && -d $destdir) {
1752
1888
  # This version already installed -> nothing to do.
1753
- run_argv_and_exit();
1889
+ exit(0);
1754
1890
  }
1755
1891
 
1756
1892
  unlink "$destdir.commit";
1757
- open STDERR_ORIG, ">&STDERR";
1758
- open STDOUT, ">", "$destdir.log";
1759
- open STDERR, ">&STDOUT";
1760
-
1761
1893
  mkdir $destdir;
1762
- open TARX, "|-", "tar", "-C", $destdir, "-xf", "-";
1763
- print TARX @git_archive_data;
1894
+ open TARX, "|-", "tar", "-xC", $destdir;
1895
+ {
1896
+ local $/ = undef;
1897
+ print TARX <DATA>;
1898
+ }
1764
1899
  if(!close(TARX)) {
1765
- die "'tar -C $destdir -xf -' exited $?: $!";
1900
+ die "'tar -xC $destdir' exited $?: $!";
1766
1901
  }
1767
1902
 
1768
- my $pwd;
1769
- chomp ($pwd = `pwd`);
1770
- my $install_dir = $ENV{"CRUNCH_INSTALL"} || "$pwd/opt";
1771
1903
  mkdir $install_dir;
1772
1904
 
1773
- for my $src_path ("$destdir/arvados/sdk/python") {
1774
- if (-d $src_path) {
1775
- shell_or_die ("virtualenv", $install_dir);
1776
- shell_or_die ("cd $src_path && ./build.sh && $install_dir/bin/python setup.py install");
1905
+ my $sdk_root = "$destdir/.arvados.sdk/sdk";
1906
+ if (-d $sdk_root) {
1907
+ foreach my $sdk_lang (("python",
1908
+ map { (split /\//, $_, 2)[0]; } keys(%SDK_ENVVARS))) {
1909
+ if (-d "$sdk_root/$sdk_lang") {
1910
+ if (!rename("$sdk_root/$sdk_lang", "$install_dir/$sdk_lang")) {
1911
+ die "Failed to install $sdk_lang SDK: $!";
1912
+ }
1913
+ }
1777
1914
  }
1778
1915
  }
1779
1916
 
1917
+ my $python_dir = "$install_dir/python";
1918
+ if ((-d $python_dir) and can_run("python2.7") and
1919
+ (system("python2.7", "$python_dir/setup.py", "--quiet", "egg_info") != 0)) {
1920
+ # egg_info failed, probably when it asked git for a build tag.
1921
+ # Specify no build tag.
1922
+ open(my $pysdk_cfg, ">>", "$python_dir/setup.cfg");
1923
+ print $pysdk_cfg "\n[egg_info]\ntag_build =\n";
1924
+ close($pysdk_cfg);
1925
+ }
1926
+
1780
1927
  if (-e "$destdir/crunch_scripts/install") {
1781
1928
  shell_or_die ("$destdir/crunch_scripts/install", $install_dir);
1782
1929
  } elsif (!-e "./install.sh" && -e "./tests/autotests.sh") {
@@ -1794,16 +1941,12 @@ if ($commit) {
1794
1941
 
1795
1942
  close L;
1796
1943
 
1797
- run_argv_and_exit();
1798
-
1799
- sub run_argv_and_exit
1800
- {
1801
- if (@ARGV) {
1802
- exec(@ARGV);
1803
- die "Cannot exec `@ARGV`: $!";
1804
- } else {
1805
- exit 0;
1806
- }
1944
+ sub can_run {
1945
+ my $command_name = shift;
1946
+ open(my $which, "-|", "which", $command_name);
1947
+ while (<$which>) { }
1948
+ close($which);
1949
+ return ($? == 0);
1807
1950
  }
1808
1951
 
1809
1952
  sub shell_or_die
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: arvados-cli
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.20141205182512
4
+ version: 0.1.20141209151444
5
5
  platform: ruby
6
6
  authors:
7
7
  - Arvados Authors
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-12-05 00:00:00.000000000 Z
11
+ date: 2014-12-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: arvados
@@ -178,7 +178,7 @@ dependencies:
178
178
  - - "<"
179
179
  - !ruby/object:Gem::Version
180
180
  version: 1.0.0
181
- description: Arvados command line tools, git commit 5794ce4f00cd583eee74719118516c0c993fca1c
181
+ description: Arvados command line tools, git commit d980949ac4c092a44f3b64fb7cbd4a27a49256fb
182
182
  email: gem-dev@curoverse.com
183
183
  executables:
184
184
  - arv