arvados-cli 0.1.20141205182512 → 0.1.20141209151444

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/bin/crunch-job +226 -83
  3. metadata +3 -3
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ddf3609b412326c94eaccbf7847f8c1deedbb833
4
- data.tar.gz: 44bc5dce941bc5f6beea57991a111053f1e581e4
3
+ metadata.gz: 6842c06e714d3300101ef5b3877fda5ecf4cbcfc
4
+ data.tar.gz: 70c9c5777fbbe9a624759d6cacf458ee43feafb3
5
5
  SHA512:
6
- metadata.gz: 4823db913e5feea59eca3ddb81829654fb0e7136db64f0bae3a65adf811a9f2d632ec18c18223c5fdf47f2202e7350ad4cdc1a02f93dbef76d953d649a984e30
7
- data.tar.gz: 3c160cc610dfe3b31bcbc0e897353575769332b1eb617d4c0ca238a3ae4d2dc846c07b453c9fb1290fa173cb5f8cee74aaa428332cd4f1cc3fdd3cf1632a78e7
6
+ metadata.gz: 3b43647b8e6f9047e011dafbcd09275f032d47c77a209e526e923e72b9a8793798270da661f34e2ce5ecbbd6a69ae897f77f156b86b73f705b3ed76f4d7f6e6b
7
+ data.tar.gz: 67316d1fe5ad62990445672e1f0a227d913e9a7a47c0daf23dd5af46b3cc40483600ebc4c48db051f84d9e916568d949c6fe252e4544ff5de569524caea5db76
data/bin/crunch-job CHANGED
@@ -86,6 +86,7 @@ use POSIX ':sys_wait_h';
86
86
  use POSIX qw(strftime);
87
87
  use Fcntl qw(F_GETFL F_SETFL O_NONBLOCK);
88
88
  use Arvados;
89
+ use Cwd qw(realpath);
89
90
  use Data::Dumper;
90
91
  use Digest::MD5 qw(md5_hex);
91
92
  use Getopt::Long;
@@ -197,6 +198,16 @@ $Job->{'runtime_constraints'} ||= {};
197
198
  $Job->{'runtime_constraints'}->{'max_tasks_per_node'} ||= 0;
198
199
  my $max_ncpus = $Job->{'runtime_constraints'}->{'max_tasks_per_node'};
199
200
 
201
+ my $gem_versions = `gem list --quiet arvados-cli 2>/dev/null`;
202
+ if ($? == 0) {
203
+ $gem_versions =~ s/^arvados-cli \(/ with arvados-cli Gem version(s) /;
204
+ chomp($gem_versions);
205
+ chop($gem_versions); # Closing parentheses
206
+ } else {
207
+ $gem_versions = "";
208
+ }
209
+ Log(undef,
210
+ "running from " . ((-e $0) ? realpath($0) : "stdin") . $gem_versions);
200
211
 
201
212
  Log (undef, "check slurm allocation");
202
213
  my @slot;
@@ -334,13 +345,9 @@ if (!$have_slurm)
334
345
  must_lock_now("$ENV{CRUNCH_TMP}/.lock", "a job is already running here.");
335
346
  }
336
347
 
337
-
338
- my $build_script;
339
- do {
340
- local $/ = undef;
341
- $build_script = <DATA>;
342
- };
348
+ my $build_script = handle_readall(\*DATA);
343
349
  my $nodelist = join(",", @node);
350
+ my $git_tar_count = 0;
344
351
 
345
352
  if (!defined $no_clear_tmp) {
346
353
  # Clean out crunch_tmp/work, crunch_tmp/opt, crunch_tmp/src*
@@ -362,8 +369,51 @@ if (!defined $no_clear_tmp) {
362
369
  Log (undef, "Cleanup command exited ".exit_status_s($?));
363
370
  }
364
371
 
372
+ # If this job requires a Docker image, install that.
373
+ my $docker_bin = "/usr/bin/docker.io";
374
+ my ($docker_locator, $docker_stream, $docker_hash);
375
+ if ($docker_locator = $Job->{docker_image_locator}) {
376
+ ($docker_stream, $docker_hash) = find_docker_image($docker_locator);
377
+ if (!$docker_hash)
378
+ {
379
+ croak("No Docker image hash found from locator $docker_locator");
380
+ }
381
+ $docker_stream =~ s/^\.//;
382
+ my $docker_install_script = qq{
383
+ if ! $docker_bin images -q --no-trunc | grep -qxF \Q$docker_hash\E; then
384
+ arv-get \Q$docker_locator$docker_stream/$docker_hash.tar\E | $docker_bin load
385
+ fi
386
+ };
387
+ my $docker_pid = fork();
388
+ if ($docker_pid == 0)
389
+ {
390
+ srun (["srun", "--nodelist=" . join(',', @node)],
391
+ ["/bin/sh", "-ec", $docker_install_script]);
392
+ exit ($?);
393
+ }
394
+ while (1)
395
+ {
396
+ last if $docker_pid == waitpid (-1, WNOHANG);
397
+ freeze_if_want_freeze ($docker_pid);
398
+ select (undef, undef, undef, 0.1);
399
+ }
400
+ if ($? != 0)
401
+ {
402
+ croak("Installing Docker image from $docker_locator exited "
403
+ .exit_status_s($?));
404
+ }
405
+
406
+ if ($Job->{arvados_sdk_version}) {
407
+ # The job also specifies an Arvados SDK version. Add the SDKs to the
408
+ # tar file for the build script to install.
409
+ Log(undef, sprintf("Packing Arvados SDK version %s for installation",
410
+ $Job->{arvados_sdk_version}));
411
+ add_git_archive("git", "--git-dir=$git_dir", "archive",
412
+ "--prefix=.arvados.sdk/",
413
+ $Job->{arvados_sdk_version}, "sdk");
414
+ }
415
+ }
365
416
 
366
- my $git_archive;
367
417
  if (!defined $git_dir && $Job->{'script_version'} =~ m{^/}) {
368
418
  # If script_version looks like an absolute path, *and* the --git-dir
369
419
  # argument was not given -- which implies we were not invoked by
@@ -517,12 +567,10 @@ else {
517
567
  }
518
568
 
519
569
  $ENV{"CRUNCH_SRC_COMMIT"} = $commit;
520
- $git_archive = `$gitcmd archive ''\Q$commit\E`;
521
- if ($?) {
522
- croak("Error: $gitcmd archive exited ".exit_status_s($?));
523
- }
570
+ add_git_archive("$gitcmd archive ''\Q$commit\E");
524
571
  }
525
572
 
573
+ my $git_archive = combined_git_archive();
526
574
  if (!defined $git_archive) {
527
575
  Log(undef, "Skip install phase (no git archive)");
528
576
  if ($have_slurm) {
@@ -552,48 +600,10 @@ else {
552
600
  }
553
601
  my $install_exited = $?;
554
602
  Log (undef, "Install script exited ".exit_status_s($install_exited));
555
- exit (1) if $install_exited != 0;
556
- }
557
-
558
- if (!$have_slurm)
559
- {
560
- # Grab our lock again (we might have deleted and re-created CRUNCH_TMP above)
561
- must_lock_now("$ENV{CRUNCH_TMP}/.lock", "a job is already running here.");
562
- }
563
-
564
- # If this job requires a Docker image, install that.
565
- my $docker_bin = "/usr/bin/docker.io";
566
- my ($docker_locator, $docker_stream, $docker_hash);
567
- if ($docker_locator = $Job->{docker_image_locator}) {
568
- ($docker_stream, $docker_hash) = find_docker_image($docker_locator);
569
- if (!$docker_hash)
570
- {
571
- croak("No Docker image hash found from locator $docker_locator");
572
- }
573
- $docker_stream =~ s/^\.//;
574
- my $docker_install_script = qq{
575
- if ! $docker_bin images -q --no-trunc | grep -qxF \Q$docker_hash\E; then
576
- arv-get \Q$docker_locator$docker_stream/$docker_hash.tar\E | $docker_bin load
577
- fi
578
- };
579
- my $docker_pid = fork();
580
- if ($docker_pid == 0)
581
- {
582
- srun (["srun", "--nodelist=" . join(',', @node)],
583
- ["/bin/sh", "-ec", $docker_install_script]);
584
- exit ($?);
585
- }
586
- while (1)
587
- {
588
- last if $docker_pid == waitpid (-1, WNOHANG);
589
- freeze_if_want_freeze ($docker_pid);
590
- select (undef, undef, undef, 0.1);
591
- }
592
- if ($? != 0)
593
- {
594
- croak("Installing Docker image from $docker_locator exited "
595
- .exit_status_s($?));
603
+ foreach my $tar_filename (map { tar_filename_n($_); } (1..$git_tar_count)) {
604
+ unlink($tar_filename);
596
605
  }
606
+ exit (1) if $install_exited != 0;
597
607
  }
598
608
 
599
609
  foreach (qw (script script_version script_parameters runtime_constraints))
@@ -1715,17 +1725,87 @@ sub exit_status_s {
1715
1725
  return $s;
1716
1726
  }
1717
1727
 
1728
+ sub handle_readall {
1729
+ # Pass in a glob reference to a file handle.
1730
+ # Read all its contents and return them as a string.
1731
+ my $fh_glob_ref = shift;
1732
+ local $/ = undef;
1733
+ return <$fh_glob_ref>;
1734
+ }
1735
+
1736
+ sub tar_filename_n {
1737
+ my $n = shift;
1738
+ return sprintf("%s/git.%s.%d.tar", $ENV{CRUNCH_TMP}, $job_id, $n);
1739
+ }
1740
+
1741
+ sub add_git_archive {
1742
+ # Pass in a git archive command as a string or list, a la system().
1743
+ # This method will save its output to be included in the archive sent to the
1744
+ # build script.
1745
+ my $git_input;
1746
+ $git_tar_count++;
1747
+ if (!open(GIT_ARCHIVE, ">", tar_filename_n($git_tar_count))) {
1748
+ croak("Failed to save git archive: $!");
1749
+ }
1750
+ my $git_pid = open2(">&GIT_ARCHIVE", $git_input, @_);
1751
+ close($git_input);
1752
+ waitpid($git_pid, 0);
1753
+ close(GIT_ARCHIVE);
1754
+ if ($?) {
1755
+ croak("Failed to save git archive: git exited " . exit_status_s($?));
1756
+ }
1757
+ }
1758
+
1759
+ sub combined_git_archive {
1760
+ # Combine all saved tar archives into a single archive, then return its
1761
+ # contents in a string. Return undef if no archives have been saved.
1762
+ if ($git_tar_count < 1) {
1763
+ return undef;
1764
+ }
1765
+ my $base_tar_name = tar_filename_n(1);
1766
+ foreach my $tar_to_append (map { tar_filename_n($_); } (2..$git_tar_count)) {
1767
+ my $tar_exit = system("tar", "-Af", $base_tar_name, $tar_to_append);
1768
+ if ($tar_exit != 0) {
1769
+ croak("Error preparing build archive: tar -A exited " .
1770
+ exit_status_s($tar_exit));
1771
+ }
1772
+ }
1773
+ if (!open(GIT_TAR, "<", $base_tar_name)) {
1774
+ croak("Could not open build archive: $!");
1775
+ }
1776
+ my $tar_contents = handle_readall(\*GIT_TAR);
1777
+ close(GIT_TAR);
1778
+ return $tar_contents;
1779
+ }
1780
+
1718
1781
  __DATA__
1719
1782
  #!/usr/bin/perl
1720
-
1721
- # checkout-and-build
1783
+ #
1784
+ # This is crunch-job's internal dispatch script. crunch-job running on the API
1785
+ # server invokes this script on individual compute nodes, or localhost if we're
1786
+ # running a job locally. It gets called in two modes:
1787
+ #
1788
+ # * No arguments: Installation mode. Read a tar archive from the DATA
1789
+ # file handle; it includes the Crunch script's source code, and
1790
+ # maybe SDKs as well. Those should be installed in the proper
1791
+ # locations. This runs outside of any Docker container, so don't try to
1792
+ # introspect Crunch's runtime environment.
1793
+ #
1794
+ # * With arguments: Crunch script run mode. This script should set up the
1795
+ # environment, then run the command specified in the arguments. This runs
1796
+ # inside any Docker container.
1722
1797
 
1723
1798
  use Fcntl ':flock';
1724
1799
  use File::Path qw( make_path remove_tree );
1800
+ use POSIX qw(getcwd);
1801
+
1802
+ # Map SDK subdirectories to the path environments they belong to.
1803
+ my %SDK_ENVVARS = ("perl/lib" => "PERLLIB", "ruby/lib" => "RUBYLIB");
1725
1804
 
1726
1805
  my $destdir = $ENV{"CRUNCH_SRC"};
1727
1806
  my $commit = $ENV{"CRUNCH_SRC_COMMIT"};
1728
1807
  my $repo = $ENV{"CRUNCH_SRC_URL"};
1808
+ my $install_dir = $ENV{"CRUNCH_INSTALL"} || (getcwd() . "/opt");
1729
1809
  my $job_work = $ENV{"JOB_WORK"};
1730
1810
  my $task_work = $ENV{"TASK_WORK"};
1731
1811
 
@@ -1740,43 +1820,110 @@ if ($task_work) {
1740
1820
  remove_tree($task_work, {keep_root => 1});
1741
1821
  }
1742
1822
 
1743
- my @git_archive_data = <DATA>;
1744
- if (!@git_archive_data) {
1745
- # Nothing to extract -> nothing to install.
1746
- run_argv_and_exit();
1823
+ open(STDOUT_ORIG, ">&", STDOUT);
1824
+ open(STDERR_ORIG, ">&", STDERR);
1825
+ open(STDOUT, ">>", "$destdir.log");
1826
+ open(STDERR, ">&", STDOUT);
1827
+
1828
+ ### Crunch script run mode
1829
+ if (@ARGV) {
1830
+ # We want to do routine logging during task 0 only. This gives the user
1831
+ # the information they need, but avoids repeating the information for every
1832
+ # task.
1833
+ my $Log;
1834
+ if ($ENV{TASK_SEQUENCE} eq "0") {
1835
+ $Log = sub {
1836
+ my $msg = shift;
1837
+ printf STDERR_ORIG "[Crunch] $msg\n", @_;
1838
+ };
1839
+ } else {
1840
+ $Log = sub { };
1841
+ }
1842
+
1843
+ my $python_src = "$install_dir/python";
1844
+ my $venv_dir = "$job_work/.arvados.venv";
1845
+ my $venv_built = -e "$venv_dir/bin/activate";
1846
+ if ((!$venv_built) and (-d $python_src) and can_run("virtualenv")) {
1847
+ shell_or_die("virtualenv", "--quiet", "--system-site-packages",
1848
+ "--python=python2.7", $venv_dir);
1849
+ shell_or_die("$venv_dir/bin/pip", "--quiet", "install", $python_src);
1850
+ $venv_built = 1;
1851
+ $Log->("Built Python SDK virtualenv");
1852
+ }
1853
+
1854
+ if ($venv_built) {
1855
+ $Log->("Running in Python SDK virtualenv");
1856
+ my $orig_argv = join(" ", map { quotemeta($_); } @ARGV);
1857
+ @ARGV = ("/bin/sh", "-ec",
1858
+ ". \Q$venv_dir/bin/activate\E; exec $orig_argv");
1859
+ } elsif (-d $python_src) {
1860
+ $Log->("Warning: virtualenv not found inside Docker container default " +
1861
+ "\$PATH. Can't install Python SDK.");
1862
+ }
1863
+
1864
+ while (my ($sdk_dir, $sdk_envkey) = each(%SDK_ENVVARS)) {
1865
+ my $sdk_path = "$install_dir/$sdk_dir";
1866
+ if (-d $sdk_path) {
1867
+ if ($ENV{$sdk_envkey}) {
1868
+ $ENV{$sdk_envkey} = "$sdk_path:" . $ENV{$sdk_envkey};
1869
+ } else {
1870
+ $ENV{$sdk_envkey} = $sdk_path;
1871
+ }
1872
+ $Log->("Arvados SDK added to %s", $sdk_envkey);
1873
+ }
1874
+ }
1875
+
1876
+ close(STDOUT);
1877
+ close(STDERR);
1878
+ open(STDOUT, ">&", STDOUT_ORIG);
1879
+ open(STDERR, ">&", STDERR_ORIG);
1880
+ exec(@ARGV);
1881
+ die "Cannot exec `@ARGV`: $!";
1747
1882
  }
1748
1883
 
1884
+ ### Installation mode
1749
1885
  open L, ">", "$destdir.lock" or die "$destdir.lock: $!";
1750
1886
  flock L, LOCK_EX;
1751
1887
  if (readlink ("$destdir.commit") eq $commit && -d $destdir) {
1752
1888
  # This version already installed -> nothing to do.
1753
- run_argv_and_exit();
1889
+ exit(0);
1754
1890
  }
1755
1891
 
1756
1892
  unlink "$destdir.commit";
1757
- open STDERR_ORIG, ">&STDERR";
1758
- open STDOUT, ">", "$destdir.log";
1759
- open STDERR, ">&STDOUT";
1760
-
1761
1893
  mkdir $destdir;
1762
- open TARX, "|-", "tar", "-C", $destdir, "-xf", "-";
1763
- print TARX @git_archive_data;
1894
+ open TARX, "|-", "tar", "-xC", $destdir;
1895
+ {
1896
+ local $/ = undef;
1897
+ print TARX <DATA>;
1898
+ }
1764
1899
  if(!close(TARX)) {
1765
- die "'tar -C $destdir -xf -' exited $?: $!";
1900
+ die "'tar -xC $destdir' exited $?: $!";
1766
1901
  }
1767
1902
 
1768
- my $pwd;
1769
- chomp ($pwd = `pwd`);
1770
- my $install_dir = $ENV{"CRUNCH_INSTALL"} || "$pwd/opt";
1771
1903
  mkdir $install_dir;
1772
1904
 
1773
- for my $src_path ("$destdir/arvados/sdk/python") {
1774
- if (-d $src_path) {
1775
- shell_or_die ("virtualenv", $install_dir);
1776
- shell_or_die ("cd $src_path && ./build.sh && $install_dir/bin/python setup.py install");
1905
+ my $sdk_root = "$destdir/.arvados.sdk/sdk";
1906
+ if (-d $sdk_root) {
1907
+ foreach my $sdk_lang (("python",
1908
+ map { (split /\//, $_, 2)[0]; } keys(%SDK_ENVVARS))) {
1909
+ if (-d "$sdk_root/$sdk_lang") {
1910
+ if (!rename("$sdk_root/$sdk_lang", "$install_dir/$sdk_lang")) {
1911
+ die "Failed to install $sdk_lang SDK: $!";
1912
+ }
1913
+ }
1777
1914
  }
1778
1915
  }
1779
1916
 
1917
+ my $python_dir = "$install_dir/python";
1918
+ if ((-d $python_dir) and can_run("python2.7") and
1919
+ (system("python2.7", "$python_dir/setup.py", "--quiet", "egg_info") != 0)) {
1920
+ # egg_info failed, probably when it asked git for a build tag.
1921
+ # Specify no build tag.
1922
+ open(my $pysdk_cfg, ">>", "$python_dir/setup.cfg");
1923
+ print $pysdk_cfg "\n[egg_info]\ntag_build =\n";
1924
+ close($pysdk_cfg);
1925
+ }
1926
+
1780
1927
  if (-e "$destdir/crunch_scripts/install") {
1781
1928
  shell_or_die ("$destdir/crunch_scripts/install", $install_dir);
1782
1929
  } elsif (!-e "./install.sh" && -e "./tests/autotests.sh") {
@@ -1794,16 +1941,12 @@ if ($commit) {
1794
1941
 
1795
1942
  close L;
1796
1943
 
1797
- run_argv_and_exit();
1798
-
1799
- sub run_argv_and_exit
1800
- {
1801
- if (@ARGV) {
1802
- exec(@ARGV);
1803
- die "Cannot exec `@ARGV`: $!";
1804
- } else {
1805
- exit 0;
1806
- }
1944
+ sub can_run {
1945
+ my $command_name = shift;
1946
+ open(my $which, "-|", "which", $command_name);
1947
+ while (<$which>) { }
1948
+ close($which);
1949
+ return ($? == 0);
1807
1950
  }
1808
1951
 
1809
1952
  sub shell_or_die
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: arvados-cli
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.20141205182512
4
+ version: 0.1.20141209151444
5
5
  platform: ruby
6
6
  authors:
7
7
  - Arvados Authors
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-12-05 00:00:00.000000000 Z
11
+ date: 2014-12-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: arvados
@@ -178,7 +178,7 @@ dependencies:
178
178
  - - "<"
179
179
  - !ruby/object:Gem::Version
180
180
  version: 1.0.0
181
- description: Arvados command line tools, git commit 5794ce4f00cd583eee74719118516c0c993fca1c
181
+ description: Arvados command line tools, git commit d980949ac4c092a44f3b64fb7cbd4a27a49256fb
182
182
  email: gem-dev@curoverse.com
183
183
  executables:
184
184
  - arv