neptune 0.1.1 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (103) hide show
  1. data/README +7 -4
  2. data/doc/AppControllerClient.html +12 -4
  3. data/doc/CommonFunctions.html +55 -42
  4. data/doc/Kernel.html +187 -0
  5. data/doc/LICENSE.html +2 -0
  6. data/doc/Object.html +488 -198
  7. data/doc/README.html +26 -5
  8. data/doc/bin/neptune.html +1 -1
  9. data/doc/created.rid +6 -6
  10. data/doc/index.html +20 -2
  11. data/doc/lib/app_controller_client_rb.html +2 -2
  12. data/doc/lib/common_functions_rb.html +2 -2
  13. data/doc/lib/neptune_rb.html +3 -1
  14. data/lib/app_controller_client.rb +2 -2
  15. data/lib/common_functions.rb +50 -24
  16. data/lib/neptune.rb +224 -159
  17. data/samples/appscale/add_appserver.rb +10 -0
  18. data/samples/appscale/add_database.rb +9 -0
  19. data/samples/appscale/add_loadbalancer.rb +9 -0
  20. data/samples/appscale/add_slave.rb +9 -0
  21. data/samples/c/compile_helloworld.rb +10 -0
  22. data/samples/c/helloworld/helloworld.c +6 -0
  23. data/samples/erlang/compile_erlang_ring.rb +10 -0
  24. data/samples/erlang/get_erlang_output.rb +8 -0
  25. data/samples/erlang/ring/Makefile +3 -0
  26. data/samples/erlang/ring/ring.erl +90 -0
  27. data/samples/erlang/run_erlang_ring.rb +6 -0
  28. data/samples/go/compile_hello.rb +10 -0
  29. data/samples/go/get_hello_output.rb +6 -0
  30. data/samples/go/hello/hello.go +8 -0
  31. data/samples/go/put_input.rb +8 -0
  32. data/samples/go/run_hello.rb +9 -0
  33. data/samples/mapreduce/expected-output.txt +7078 -0
  34. data/samples/mapreduce/get_mapreduce_output.rb +4 -0
  35. data/samples/mapreduce/hadoop-0.20.0-examples.jar +0 -0
  36. data/samples/mapreduce/input-10 +64 -0
  37. data/samples/mapreduce/input-30 +64 -0
  38. data/samples/mapreduce/input-7 +4 -0
  39. data/samples/mapreduce/map.rb +48 -0
  40. data/samples/mapreduce/reduce.rb +48 -0
  41. data/samples/mapreduce/run_java_mr.rb +14 -0
  42. data/samples/mapreduce/run_mapreduce.rb +13 -0
  43. data/samples/mapreduce/the-end-of-time.txt +11256 -0
  44. data/samples/mpi/Makefile +22 -0
  45. data/samples/mpi/MpiQueen +0 -0
  46. data/samples/mpi/compile_mpi_ring.rb +10 -0
  47. data/samples/mpi/compile_x10_nqueens.rb +8 -0
  48. data/samples/mpi/cpi +0 -0
  49. data/samples/mpi/get_mpi_output.rb +5 -0
  50. data/samples/mpi/get_ring_output.rb +5 -0
  51. data/samples/mpi/hw2.c +205 -0
  52. data/samples/mpi/hw2harness.c +84 -0
  53. data/samples/mpi/hw2harness.h +45 -0
  54. data/samples/mpi/powermethod +0 -0
  55. data/samples/mpi/ring/Makefile +2 -0
  56. data/samples/mpi/ring/Ring.c +76 -0
  57. data/samples/mpi/run_mpi_cpi.rb +10 -0
  58. data/samples/mpi/run_mpi_nqueens.np +6 -0
  59. data/samples/mpi/run_mpi_powermethod.rb +8 -0
  60. data/samples/mpi/run_mpi_ring.rb +12 -0
  61. data/samples/r/compile_hello.rb +10 -0
  62. data/samples/r/get_hello_output.rb +6 -0
  63. data/samples/r/hello/hello.r +1 -0
  64. data/samples/r/put_input.rb +8 -0
  65. data/samples/r/run_hello.rb +9 -0
  66. data/samples/upc/compile_upc_helloworld.rb +10 -0
  67. data/samples/upc/compile_upc_ring.rb +11 -0
  68. data/samples/upc/get_mpi_output.rb +8 -0
  69. data/samples/upc/helloworld/HelloWorld.c +9 -0
  70. data/samples/upc/helloworld/Makefile +3 -0
  71. data/samples/upc/ring/Makefile +3 -0
  72. data/samples/upc/ring/Ring.c +116 -0
  73. data/samples/upc/run_upc_helloworld.rb +12 -0
  74. data/samples/upc/run_upc_ring.rb +12 -0
  75. data/samples/x10/MyPowerMethod +0 -0
  76. data/samples/x10/MyPowerMethod.x10 +236 -0
  77. data/samples/x10/NQueensDist +0 -0
  78. data/samples/x10/NQueensDist.x10 +112 -0
  79. data/samples/x10/compile_x10_nqueens.rb +7 -0
  80. data/samples/x10/compile_x10_ring.rb +12 -0
  81. data/samples/x10/get_x10_output.rb +8 -0
  82. data/samples/x10/ring/Makefile +3 -0
  83. data/samples/x10/ring/Ring.x10 +28 -0
  84. data/samples/x10/ring/RingOld.x10 +68 -0
  85. data/samples/x10/run_x10_nqueens.rb +6 -0
  86. data/samples/x10/run_x10_powermethod.rb +7 -0
  87. data/samples/x10/run_x10_ring.rb +6 -0
  88. data/test/{tc_c.rb → integration/tc_c.rb} +2 -2
  89. data/test/{tc_dfsp.rb → integration/tc_dfsp.rb} +0 -0
  90. data/test/{tc_dwssa.rb → integration/tc_dwssa.rb} +0 -0
  91. data/test/{tc_erlang.rb → integration/tc_erlang.rb} +0 -0
  92. data/test/{tc_mapreduce.rb → integration/tc_mapreduce.rb} +0 -0
  93. data/test/{tc_mpi.rb → integration/tc_mpi.rb} +0 -0
  94. data/test/{tc_storage.rb → integration/tc_storage.rb} +0 -0
  95. data/test/{tc_upc.rb → integration/tc_upc.rb} +0 -0
  96. data/test/{tc_x10.rb → integration/tc_x10.rb} +0 -0
  97. data/test/{test_helper.rb → integration/test_helper.rb} +0 -0
  98. data/test/{ts_neptune.rb → integration/ts_neptune.rb} +2 -2
  99. data/test/unit/test_app_controller_client.rb +106 -0
  100. data/test/unit/test_common_functions.rb +106 -0
  101. data/test/unit/test_neptune.rb +208 -0
  102. data/test/unit/ts_all.rb +6 -0
  103. metadata +91 -15
data/doc/README.html CHANGED
@@ -66,6 +66,8 @@
66
66
 
67
67
  <li><a href="./CommonFunctions.html">CommonFunctions</a></li>
68
68
 
69
+ <li><a href="./Kernel.html">Kernel</a></li>
70
+
69
71
  <li><a href="./Object.html">Object</a></li>
70
72
 
71
73
  </ul>
@@ -119,7 +121,9 @@ in the test folder, with the standard naming convention</p>
119
121
  <p>cases for each type of job that Neptune offers. Before running ts_neptune,
120
122
  you should export the environment variable APPSCALE_HEAD_NODE, which should
121
123
  be set to the IP address of the AppScale machine that runs the Shadow
122
- daemon (a.k.a. the Master AppController).</p>
124
+ daemon (a.k.a. the Master AppController). Running generate_coverage.sh in
125
+ the top-level directory will run rcov and generate the coverage reports
126
+ automatically via unit tests.</p>
123
127
 
124
128
  <p>Developed by Chris Bunch as part of the AppScale project. See <a
125
129
  href="LICENSE.html">LICENSE</a> for the specifics of the New BSD License by
@@ -138,15 +142,32 @@ as adding capabilities for other types of computation. We would also like
138
142
  to refactor Neptune to use symbols instead of instance variables for
139
143
  running jobs: this will likely appear in a future release as well.</p>
140
144
 
141
- <p>An academic paper describing Neptune is in the works - check in for a link
142
- to that as it becomes available.</p>
145
+ <p>Our academic paper on Neptune won best paper at ACM ScienceCloud 2011!
146
+ &lt;a href=‘<a
147
+ href="http://www.neptune-lang.org/2011/6/Neptune-Picks-up-Best-Paper-at-ScienceCloud-2011">www.neptune-lang.org/2011/6/Neptune-Picks-up-Best-Paper-at-ScienceCloud-2011</a>’&gt;
148
+ Here’s a link&lt;/a&gt; to the abstract of the paper and the PDF.</p>
143
149
 
144
150
  <p>Version History:</p>
145
151
 
146
- <p>April 2, 2001 - 0.0.7 released, adding automatic test suite and many bug
152
+ <p>November 10, 2011 - 0.1.2 released, adding unit tests and refactoring all
153
+ around.</p>
154
+
155
+ <p>June 6, 2011 - 0.1.1 released, adding support for code written in Go and R</p>
156
+
157
+ <p>June 4, 2011 - 0.1.0 released, adding verbose / quiet options for users
158
+ wishing to suppress stdout from Neptune jobs.</p>
159
+
160
+ <p>May 28, 2011 - 0.0.9 released, adding generic SSA support for users wanting
161
+ to use StochKit and other SSA codes.</p>
162
+
163
+ <p>April 8, 2011 - 0.0.8 released, fixing MapReduce support for both regular
164
+ Hadoop and Hadoop Streaming. Also increased code coverage to cover a number
165
+ of failure scenarios.</p>
166
+
167
+ <p>April 2, 2011 - 0.0.7 released, adding automatic test suite and many bug
147
168
  fixes for all scenarios. rcov can also be used to generate test coverage
148
169
  information: current coverage stats can be found in coverage directory.
149
- mapreduce broken at the moment - will fix in next release</p>
170
+ MapReduce broken at the moment - will fix in next release</p>
150
171
 
151
172
  <p>March 28, 2011 - 0.0.6 released, adding support for input jobs, so users
152
173
  can place data in the datastore without having to run any computation</p>
data/doc/bin/neptune.html CHANGED
@@ -24,7 +24,7 @@
24
24
  <div id="metadata">
25
25
  <dl>
26
26
  <dt class="modified-date">Last Modified</dt>
27
- <dd class="modified-date">Fri Feb 04 19:49:21 -0800 2011</dd>
27
+ <dd class="modified-date">Sun May 15 22:52:56 -0700 2011</dd>
28
28
 
29
29
 
30
30
  <dt class="requires">Requires</dt>
data/doc/created.rid CHANGED
@@ -1,7 +1,7 @@
1
- Sat, 02 Apr 2011 09:03:26 -0700
2
- ./lib/common_functions.rb Wed, 30 Mar 2011 21:31:10 -0700
1
+ Thu, 10 Nov 2011 12:27:04 -0800
2
+ ./lib/common_functions.rb Wed, 26 Oct 2011 10:37:34 -0700
3
3
  ./LICENSE Thu, 27 Jan 2011 13:24:30 -0800
4
- ./lib/app_controller_client.rb Sun, 27 Mar 2011 23:37:41 -0700
5
- ./lib/neptune.rb Thu, 31 Mar 2011 10:11:31 -0700
6
- ./README Sat, 02 Apr 2011 09:03:23 -0700
7
- ./bin/neptune Fri, 04 Feb 2011 19:49:21 -0800
4
+ ./lib/app_controller_client.rb Tue, 25 Oct 2011 23:47:47 -0700
5
+ ./lib/neptune.rb Wed, 09 Nov 2011 10:54:28 -0800
6
+ ./README Thu, 10 Nov 2011 11:48:11 -0800
7
+ ./bin/neptune Sun, 15 May 2011 22:52:56 -0700
data/doc/index.html CHANGED
@@ -44,6 +44,8 @@
44
44
 
45
45
  <li class="module"><a href="CommonFunctions.html">CommonFunctions</a></li>
46
46
 
47
+ <li class="module"><a href="Kernel.html">Kernel</a></li>
48
+
47
49
  <li class="class"><a href="Object.html">Object</a></li>
48
50
 
49
51
  </ul>
@@ -63,12 +65,20 @@
63
65
 
64
66
  <li><a href="AppControllerClient.html#method-i-compile_code">#compile_code &mdash; AppControllerClient</a></li>
65
67
 
68
+ <li><a href="Object.html#method-i-compile_code">#compile_code &mdash; Object</a></li>
69
+
66
70
  <li><a href="Object.html#method-i-do_preprocessing">#do_preprocessing &mdash; Object</a></li>
67
71
 
68
72
  <li><a href="AppControllerClient.html#method-i-get_acl">#get_acl &mdash; AppControllerClient</a></li>
69
73
 
74
+ <li><a href="Object.html#method-i-get_input">#get_input &mdash; Object</a></li>
75
+
76
+ <li><a href="Object.html#method-i-get_job_data">#get_job_data &mdash; Object</a></li>
77
+
70
78
  <li><a href="AppControllerClient.html#method-i-get_output">#get_output &mdash; AppControllerClient</a></li>
71
79
 
80
+ <li><a href="Object.html#method-i-get_std_out_and_err">#get_std_out_and_err &mdash; Object</a></li>
81
+
72
82
  <li><a href="AppControllerClient.html#method-i-make_call">#make_call &mdash; AppControllerClient</a></li>
73
83
 
74
84
  <li><a href="Object.html#method-i-neptune">#neptune &mdash; Object</a></li>
@@ -77,16 +87,24 @@
77
87
 
78
88
  <li><a href="Object.html#method-i-preprocess_erlang">#preprocess_erlang &mdash; Object</a></li>
79
89
 
80
- <li><a href="Object.html#method-i-preprocess_mapreduce">#preprocess_mapreduce &mdash; Object</a></li>
81
-
82
90
  <li><a href="Object.html#method-i-preprocess_mpi">#preprocess_mpi &mdash; Object</a></li>
83
91
 
92
+ <li><a href="Object.html#method-i-preprocess_ssa">#preprocess_ssa &mdash; Object</a></li>
93
+
84
94
  <li><a href="AppControllerClient.html#method-i-put_input">#put_input &mdash; AppControllerClient</a></li>
85
95
 
96
+ <li><a href="Object.html#method-i-run_job">#run_job &mdash; Object</a></li>
97
+
86
98
  <li><a href="AppControllerClient.html#method-i-set_acl">#set_acl &mdash; AppControllerClient</a></li>
87
99
 
100
+ <li><a href="Kernel.html#method-i-shell">#shell &mdash; Kernel</a></li>
101
+
88
102
  <li><a href="AppControllerClient.html#method-i-start_neptune_job">#start_neptune_job &mdash; AppControllerClient</a></li>
89
103
 
104
+ <li><a href="Object.html#method-i-validate_storage_params">#validate_storage_params &mdash; Object</a></li>
105
+
106
+ <li><a href="Object.html#method-i-wait_for_compilation_to_finish">#wait_for_compilation_to_finish &mdash; Object</a></li>
107
+
90
108
  </ul>
91
109
 
92
110
  <div id="validator-badges">
@@ -24,7 +24,7 @@
24
24
  <div id="metadata">
25
25
  <dl>
26
26
  <dt class="modified-date">Last Modified</dt>
27
- <dd class="modified-date">Sun Mar 27 23:37:41 -0700 2011</dd>
27
+ <dd class="modified-date">Tue Oct 25 23:47:47 -0700 2011</dd>
28
28
 
29
29
 
30
30
  <dt class="requires">Requires</dt>
@@ -50,7 +50,7 @@
50
50
  <div class="description">
51
51
  <h2>Description</h2>
52
52
 
53
- <p>Programmer: Chris Bunch</p>
53
+ <p>Programmer: Chris Bunch (cgb@cs.ucsb.edu)</p>
54
54
 
55
55
  </div>
56
56
 
@@ -24,7 +24,7 @@
24
24
  <div id="metadata">
25
25
  <dl>
26
26
  <dt class="modified-date">Last Modified</dt>
27
- <dd class="modified-date">Wed Mar 30 21:31:10 -0700 2011</dd>
27
+ <dd class="modified-date">Wed Oct 26 10:37:34 -0700 2011</dd>
28
28
 
29
29
 
30
30
  <dt class="requires">Requires</dt>
@@ -58,7 +58,7 @@
58
58
  <div class="description">
59
59
  <h2>Description</h2>
60
60
 
61
- <p>Programmer: Chris Bunch</p>
61
+ <p>Programmer: Chris Bunch (cgb@cs.ucsb.edu)</p>
62
62
 
63
63
  </div>
64
64
 
@@ -24,7 +24,7 @@
24
24
  <div id="metadata">
25
25
  <dl>
26
26
  <dt class="modified-date">Last Modified</dt>
27
- <dd class="modified-date">Thu Mar 31 10:11:31 -0700 2011</dd>
27
+ <dd class="modified-date">Wed Nov 09 10:54:28 -0800 2011</dd>
28
28
 
29
29
 
30
30
  <dt class="requires">Requires</dt>
@@ -48,6 +48,8 @@
48
48
  <div class="description">
49
49
  <h2>Description</h2>
50
50
 
51
+ <p>Programmer: Chris Bunch (cgb@cs.ucsb.edu)</p>
52
+
51
53
  </div>
52
54
 
53
55
  </div>
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/ruby -w
2
- # Programmer: Chris Bunch
2
+ # Programmer: Chris Bunch (cgb@cs.ucsb.edu)
3
3
 
4
4
  require 'openssl'
5
5
  require 'soap/rpc/driver'
@@ -16,7 +16,7 @@ NO_TIMEOUT = -1
16
16
  # platform (here, AppScale). This client is similar to that used in the AppScale
17
17
  # Tools, but with non-Neptune SOAP calls removed.
18
18
  class AppControllerClient
19
- attr_reader :conn, :ip, :secret
19
+ attr_accessor :conn, :ip, :secret
20
20
 
21
21
  # A constructor that requires both the IP address of the machine to communicate
22
22
  # with as well as the secret (string) needed to perform communication.
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/ruby -w
2
- # Programmer: Chris Bunch
2
+ # Programmer: Chris Bunch (cgb@cs.ucsb.edu)
3
3
 
4
4
  require 'digest/sha1'
5
5
  require 'fileutils'
@@ -9,6 +9,12 @@ require 'socket'
9
9
  require 'timeout'
10
10
  require 'yaml'
11
11
 
12
+ module Kernel
13
+ def shell(command)
14
+ return `#{command}`
15
+ end
16
+ end
17
+
12
18
  # A helper module that aggregates functions that are not part of Neptune's
13
19
  # core functionality. Specifically, this module contains methods to scp
14
20
  # files to other machines and the ability to read YAML files, which are
@@ -21,11 +27,17 @@ module CommonFunctions
21
27
  # placed, and the name of the key to use. The keyname is typically
22
28
  # specified by the Neptune job given, but defaults to ''appscale''
23
29
  # if not provided.
24
- def self.scp_to_shadow(local_file_loc, remote_file_loc, keyname, is_dir=false)
25
- shadow_ip = CommonFunctions.get_from_yaml(keyname, :shadow)
26
- ssh_key = File.expand_path("~/.appscale/#{keyname}.key")
30
+ def self.scp_to_shadow(local_file_loc,
31
+ remote_file_loc,
32
+ keyname,
33
+ is_dir=false,
34
+ file=File,
35
+ get_from_yaml=CommonFunctions.method(:get_from_yaml),
36
+ scp_file=CommonFunctions.method(:scp_file))
27
37
 
28
- self.scp_file(local_file_loc, remote_file_loc, shadow_ip, ssh_key, is_dir)
38
+ shadow_ip = get_from_yaml.call(keyname, :shadow, file)
39
+ ssh_key = file.expand_path("~/.appscale/#{keyname}.key")
40
+ scp_file.call(local_file_loc, remote_file_loc, shadow_ip, ssh_key, is_dir)
29
41
  end
30
42
 
31
43
  # Performs the actual remote copying of files: given the IP address
@@ -34,33 +46,39 @@ module CommonFunctions
34
46
  # if the network is down, if a bad keyname is provided, or if the
35
47
  # wrong IP is given. If the user specifies that the file to copy is
36
48
  # actually a directory, we append the -r flag to scp as well.
37
- def self.scp_file(local_file_loc, remote_file_loc, target_ip, public_key_loc, is_dir=false)
49
+ def self.scp_file(local_file_loc, remote_file_loc, target_ip, public_key_loc,
50
+ is_dir=false, file=File, fileutils=FileUtils, kernel=Kernel)
38
51
  cmd = ""
39
- local_file_loc = File.expand_path(local_file_loc)
52
+ local_file_loc = file.expand_path(local_file_loc)
40
53
 
41
54
  ssh_args = "-o StrictHostkeyChecking=no 2>&1"
42
55
  ssh_args << " -r " if is_dir
43
56
 
44
- public_key_loc = File.expand_path(public_key_loc)
57
+ public_key_loc = file.expand_path(public_key_loc)
45
58
  cmd = "scp -i #{public_key_loc} #{ssh_args} #{local_file_loc} root@#{target_ip}:#{remote_file_loc}"
46
59
  cmd << "; echo $? >> ~/.appscale/retval"
47
60
 
48
- retval_loc = File.expand_path("~/.appscale/retval")
49
- FileUtils.rm_f(retval_loc)
61
+ retval_loc = file.expand_path("~/.appscale/retval")
62
+ fileutils.rm_f(retval_loc)
50
63
 
51
64
  begin
52
- Timeout::timeout(-1) { `#{cmd}` }
65
+ Timeout::timeout(-1) { kernel.shell("#{cmd}") }
53
66
  rescue Timeout::Error
54
- abort("Remotely copying over files failed. Is the destination machine on and reachable from this computer? We tried the following command:\n\n#{cmd}")
67
+ abort("Remotely copying over files failed. Is the destination machine" +
68
+ " on and reachable from this computer? We tried the following" +
69
+ " command:\n\n#{cmd}")
55
70
  end
56
71
 
57
72
  loop {
58
- break if File.exists?(retval_loc)
73
+ break if file.exists?(retval_loc)
59
74
  sleep(5)
60
75
  }
61
76
 
62
- retval = (File.open(retval_loc) { |f| f.read }).chomp
63
- abort("\n\n[#{cmd}] returned #{retval} instead of 0 as expected. Is your environment set up properly?") if retval != "0"
77
+ retval = (file.open(retval_loc) { |f| f.read }).chomp
78
+ if retval != "0"
79
+ abort("\n\n[#{cmd}] returned #{retval} instead of 0 as expected. Is " +
80
+ "your environment set up properly?")
81
+ end
64
82
  return cmd
65
83
  end
66
84
 
@@ -70,16 +88,20 @@ module CommonFunctions
70
88
  # method aborts if the value doesn't exist or the YAML file is malformed.
71
89
  # If the required flag is set to false, it returns nil in either scenario
72
90
  # instead.
73
- def self.get_from_yaml(keyname, tag, required=true)
74
- location_file = File.expand_path("~/.appscale/locations-#{keyname}.yaml")
91
+ def self.get_from_yaml(keyname, tag, required=true, file=File, yaml=YAML)
92
+ location_file = file.expand_path("~/.appscale/locations-#{keyname}.yaml")
75
93
 
76
- abort("An AppScale instance is not currently running with the provided keyname, \"#{keyname}\".") unless File.exists?(location_file)
94
+ if !file.exists?(location_file)
95
+ abort("An AppScale instance is not currently running with the provided" +
96
+ " keyname, \"#{keyname}\".")
97
+ end
77
98
 
78
99
  begin
79
- tree = YAML.load_file(location_file)
100
+ tree = yaml.load_file(location_file)
80
101
  rescue ArgumentError
81
102
  if required
82
- abort("The yaml file you provided was malformed. Please correct any errors in it and try again.")
103
+ abort("The yaml file you provided was malformed. Please correct any" +
104
+ " errors in it and try again.")
83
105
  else
84
106
  return nil
85
107
  end
@@ -87,15 +109,19 @@ module CommonFunctions
87
109
 
88
110
  value = tree[tag]
89
111
 
90
- bad_yaml_format_msg = "The file #{location_file} is in the wrong format and doesn't contain a #{tag} tag. Please make sure the file is in the correct format and try again"
91
- abort(bad_yaml_format_msg) if value.nil? and required
112
+ if value.nil? and required
113
+ abort("The file #{location_file} is in the wrong format and doesn't" +
114
+ " contain a #{tag} tag. Please make sure the file is in the correct" +
115
+ " format and try again.")
116
+ end
117
+
92
118
  return value
93
119
  end
94
120
 
95
121
  # Returns the secret key needed for communication with AppScale's
96
122
  # Shadow node. This method is a nice frontend to the get_from_yaml
97
123
  # function, as the secret is stored in a YAML file.
98
- def self.get_secret_key(keyname, required=true)
99
- return CommonFunctions.get_from_yaml(keyname, :secret)
124
+ def self.get_secret_key(keyname, required=true, file=File, yaml=YAML)
125
+ return CommonFunctions.get_from_yaml(keyname, :secret, required, file, yaml)
100
126
  end
101
127
  end
data/lib/neptune.rb CHANGED
@@ -1,4 +1,5 @@
1
1
  #!/usr/bin/ruby
2
+ # Programmer: Chris Bunch (cgb@cs.ucsb.edu)
2
3
 
3
4
  require 'app_controller_client'
4
5
  require 'common_functions'
@@ -39,6 +40,8 @@ NEED_PREPROCESSING = ["compile", "erlang", "mpi", "ssa"]
39
40
  # support. In the future, it is likely that the only exposed / monkey-patched
40
41
  # method should be job, while the others could probably be folded into either
41
42
  # a Neptune-specific class or into CommonFunctions.
43
+ # TODO(cbunch): This doesn't look like it does anything - run the integration
44
+ # test and confirm one way or the other.
42
45
  class Object
43
46
  end
44
47
 
@@ -48,7 +51,9 @@ end
48
51
  # on the type of the job that the user has asked to run.
49
52
  def do_preprocessing(job_data)
50
53
  job_type = job_data["@type"]
51
- return unless NEED_PREPROCESSING.include?(job_type)
54
+ if !NEED_PREPROCESSING.include?(job_type)
55
+ return
56
+ end
52
57
 
53
58
  preprocess = "preprocess_#{job_type}".to_sym
54
59
  send(preprocess, job_data)
@@ -57,10 +62,9 @@ end
57
62
  # This preprocessing method copies over the user's code to the
58
63
  # Shadow node so that it can be compiled there. A future version
59
64
  # of this method may also copy over libraries as well.
60
- def preprocess_compile(job_data)
61
- verbose = job_data["@verbose"]
65
+ def preprocess_compile(job_data, shell=Kernel.method(:`))
62
66
  code = File.expand_path(job_data["@code"])
63
- unless File.exists?(code)
67
+ if !File.exists?(code)
64
68
  abort("The source file #{code} does not exist.")
65
69
  end
66
70
 
@@ -71,94 +75,88 @@ def preprocess_compile(job_data)
71
75
 
72
76
  ssh_args = "-i ~/.appscale/#{keyname}.key -o StrictHostkeyChecking=no root@#{shadow_ip}"
73
77
  remove_dir = "ssh #{ssh_args} 'rm -rf #{dest}' 2>&1"
74
- puts remove_dir if verbose
75
- `#{remove_dir}`
78
+ puts remove_dir
79
+ shell.call(remove_dir)
76
80
 
77
81
  CommonFunctions.scp_to_shadow(code, dest, keyname, is_dir=true)
78
82
 
79
83
  job_data["@code"] = dest
80
84
  end
81
85
 
82
- def preprocess_erlang(job_data)
83
- source_code = File.expand_path(job_data["@code"])
84
- unless File.exists?(source_code)
85
- file_not_found = "The specified code, #{job_data['@code']}," +
86
- " didn't exist. Please specify one that exists and try again"
87
- abort(file_not_found)
86
+ def preprocess_erlang(job_data, file=File, common_functions=CommonFunctions)
87
+ if !job_data["@code"]
88
+ abort("When running Erlang jobs, :code must be specified.")
89
+ end
90
+
91
+ source_code = file.expand_path(job_data["@code"])
92
+ if !file.exists?(source_code)
93
+ abort("The specified code, #{job_data['@code']}," +
94
+ " didn't exist. Please specify one that exists and try again")
88
95
  end
89
96
  dest_code = "/tmp/"
90
97
 
91
98
  keyname = job_data["@keyname"]
92
- CommonFunctions.scp_to_shadow(source_code, dest_code, keyname)
99
+ common_functions.scp_to_shadow(source_code, dest_code, keyname)
93
100
  end
94
101
 
95
- # This preprocessing method copies over the user's MPI
96
- # code to the master node in AppScale - this node will
97
- # then copy it to whoever will run the MPI job.
102
+ # This preprocessing method verifies that the user specified the number of nodes
103
+ # to use. If they also specified the number of processes to use, we also verify
104
+ # that this value is at least as many as the number of nodes (that is, nodes
105
+ # can't be underprovisioned in MPI).
98
106
  def preprocess_mpi(job_data)
99
- verbose = job_data["@verbose"]
107
+ if !job_data["@nodes_to_use"]
108
+ abort("When running MPI jobs, :nodes_to_use must be specified.")
109
+ end
110
+
111
+ if !job_data["@procs_to_use"]
112
+ abort("When running MPI jobs, :procs_to_use must be specified.")
113
+ end
114
+
100
115
  if job_data["@procs_to_use"]
101
116
  p = job_data["@procs_to_use"]
102
117
  n = job_data["@nodes_to_use"]
103
118
  if p < n
104
- not_enough_procs = "When specifying both :procs_to_use and :nodes_to_use" +
119
+ abort("When specifying both :procs_to_use and :nodes_to_use" +
105
120
  ", :procs_to_use must be at least as large as :nodes_to_use. Please " +
106
121
  "change this and try again. You specified :procs_to_use = #{p} and" +
107
- ":nodes_to_use = #{n}."
108
- abort(not_enough_procs)
122
+ ":nodes_to_use = #{n}.")
109
123
  end
110
124
  end
111
125
 
112
- # TODO: verify that source_code is in repo
126
+ return job_data
113
127
  end
114
128
 
129
+ # This preprocessing method verifies that the user specified the number of
130
+ # trajectories to run, via either :trajectories or :simulations. Both should
131
+ # not be specified - only one or the other, and regardless of which they
132
+ # specify, convert it to be :trajectories.
115
133
  def preprocess_ssa(job_data)
134
+ if job_data["@simulations"] and job_data["@trajectories"]
135
+ abort("Both :simulations and :trajectories cannot be specified - use one" +
136
+ " or the other.")
137
+ end
138
+
116
139
  if job_data["@simulations"]
117
140
  job_data["@trajectories"] = job_data["@simulations"]
141
+ job_data.delete("@simulations")
118
142
  end
119
143
 
120
- unless job_data["@trajectories"]
144
+ if !job_data["@trajectories"]
121
145
  abort(":trajectories needs to be specified when running ssa jobs")
122
146
  end
123
- end
124
-
125
- # TODO: actually use me!
126
- #def validate_args(list)
127
- # list.each do |item|
128
- # val = instance_variable_get("@#{item}".to_sym)
129
- # abort("FATAL: #{item} was not defined") if val.nil?
130
- # end
131
- #end
132
147
 
133
- # This method is the heart of Neptune - here, we take
134
- # blocks of code that the user has written and convert them
135
- # into HPC job requests. At a high level, the user can
136
- # request to run a job, retrieve a job's output, or
137
- # modify the access policy (ACL) for the output of a
138
- # job. By default, job data is private, but a Neptune
139
- # job can be used to set it to public later (and
140
- # vice-versa).
141
- def neptune(params)
142
- verbose = params[:verbose]
143
-
144
- puts "Received a request to run a job." if verbose
145
- puts params[:type] if verbose
146
-
147
- keyname = params[:keyname] || "appscale"
148
-
149
- shadow_ip = CommonFunctions.get_from_yaml(keyname, :shadow)
150
- secret = CommonFunctions.get_secret_key(keyname)
151
- controller = AppControllerClient.new(shadow_ip, secret)
152
- ssh_key = File.expand_path("~/.appscale/#{keyname}.key")
148
+ return job_data
149
+ end
153
150
 
151
+ def get_job_data(params)
154
152
  job_data = {}
155
153
  params.each { |k, v|
156
154
  key = "@#{k}"
157
155
  job_data[key] = v
158
156
  }
159
157
 
160
- job_data["@job"] = nil
161
- job_data["@keyname"] = keyname || "appscale"
158
+ job_data.delete("@job")
159
+ job_data["@keyname"] = params[:keyname] || "appscale"
162
160
 
163
161
  job_data["@type"] = job_data["@type"].to_s
164
162
  type = job_data["@type"]
@@ -182,132 +180,174 @@ def neptune(params)
182
180
  end
183
181
  end
184
182
 
185
- if job_data["@storage"]
186
- storage = job_data["@storage"]
187
- unless ALLOWED_STORAGE_TYPES.include?(storage)
188
- msg = "Supported storage types are #{ALLOWED_STORAGE_TYPES.join(', ')}" +
189
- " - we do not support #{storage}."
190
- abort(msg)
191
- end
192
-
193
- # Our implementation for storing / retrieving via Google Storage
194
- # and Walrus uses
195
- # the same library as we do for S3 - so just tell it that it's S3
196
- if storage == "gstorage" or storage == "walrus"
197
- storage = "s3"
198
- job_data["@storage"] = "s3"
199
- end
183
+ return job_data
184
+ end
200
185
 
201
- if storage == "s3"
202
- ["EC2_ACCESS_KEY", "EC2_SECRET_KEY", "S3_URL"].each { |item|
203
- unless job_data["@#{item}"]
204
- if ENV[item]
205
- puts "Using #{item} from environment" if verbose
206
- job_data["@#{item}"] = ENV[item]
207
- else
208
- msg = "When storing data to S3, #{item} must be specified or be in " +
209
- "your environment. Please do so and try again."
210
- abort(msg)
211
- end
212
- end
213
- }
214
- end
215
- else
186
+ def validate_storage_params(job_data)
187
+ if !job_data["@storage"]
216
188
  job_data["@storage"] = "appdb"
217
189
  end
218
190
 
219
- #if job_data["@can_run_on"].class == Range
220
- # job_data["@can_run_on"] = job_data["@can_run_on"].to_a
221
- #elsif job_data["@can_run_on"].class == Fixnum
222
- # job_data["@can_run_on"] = [job_data["@can_run_on"]]
223
- #end
191
+ storage = job_data["@storage"]
192
+ if !ALLOWED_STORAGE_TYPES.include?(storage)
193
+ abort("Supported storage types are #{ALLOWED_STORAGE_TYPES.join(', ')}" +
194
+ " - we do not support #{storage}.")
195
+ end
224
196
 
225
- puts "job data = #{job_data.inspect}" if verbose
197
+ # Our implementation for storing / retrieving via Google Storage
198
+ # and Walrus uses
199
+ # the same library as we do for S3 - so just tell it that it's S3
200
+ if storage == "gstorage" or storage == "walrus"
201
+ storage = "s3"
202
+ job_data["@storage"] = "s3"
203
+ end
226
204
 
227
- do_preprocessing(job_data)
205
+ if storage == "s3"
206
+ ["EC2_ACCESS_KEY", "EC2_SECRET_KEY", "S3_URL"].each { |item|
207
+ if job_data["@#{item}"]
208
+ puts "Using specified #{item}"
209
+ else
210
+ if ENV[item]
211
+ puts "Using #{item} from environment"
212
+ job_data["@#{item}"] = ENV[item]
213
+ else
214
+ abort("When storing data to S3, #{item} must be specified or be in " +
215
+ "your environment. Please do so and try again.")
216
+ end
217
+ end
218
+ }
219
+ end
228
220
 
229
- ssh_args = "-i ~/.appscale/#{keyname}.key -o StrictHostkeyChecking=no "
221
+ return job_data
222
+ end
230
223
 
231
- # TODO - right now the job is assumed to succeed in many cases
232
- # need to investigate the various failure scenarios
233
- result = { :result => :success }
224
+ # This method takes a file on the local user's computer and stores it remotely
225
+ # via AppScale. It returns a hash map indicating whether or not the job
226
+ # succeeded and if it failed, the reason for it.
227
+ def get_input(job_data, ssh_args, shadow_ip, controller, file=File,
228
+ shell=Kernel.method(:`))
229
+ result = {:result => :success}
234
230
 
235
- if type == "input"
236
- # copy file to remote
237
- # set location
238
- local_file = File.expand_path(job_data["@local"])
239
- if !File.exists?(local_file)
240
- msg = "the file you specified to copy, #{local_file}, doesn't exist." +
231
+ if !job_data["@local"]
232
+ abort("You failed to specify a file to copy over via the :local flag.")
233
+ end
234
+
235
+ local_file = file.expand_path(job_data["@local"])
236
+ if !file.exists?(local_file)
237
+ reason = "the file you specified to copy, #{local_file}, doesn't exist." +
241
238
  " Please specify a file that exists and try again."
242
- abort(msg)
239
+ return {:result => :failure, :reason => reason}
240
+ end
241
+
242
+ remote = "/tmp/neptune-input-#{rand(100000)}"
243
+ scp_cmd = "scp -r #{ssh_args} #{local_file} root@#{shadow_ip}:#{remote}"
244
+ puts scp_cmd
245
+ shell.call(scp_cmd)
246
+
247
+ job_data["@local"] = remote
248
+ puts "job data = #{job_data.inspect}"
249
+ response = controller.put_input(job_data)
250
+ if response
251
+ return {:result => :success}
252
+ else
253
+ # TODO - expand this to include the reason why it failed
254
+ return {:result => :failure}
255
+ end
256
+ end
257
+
258
+ # This method waits for AppScale to finish compiling the user's code, indicated
259
+ # by AppScale copying the finished code to a pre-determined location.
260
+ def wait_for_compilation_to_finish(ssh_args, shadow_ip, compiled_location,
261
+ shell=Kernel.method(:`))
262
+ loop {
263
+ ssh_command = "ssh #{ssh_args} root@#{shadow_ip} 'ls #{compiled_location}' 2>&1"
264
+ puts ssh_command
265
+ ssh_result = shell.call(ssh_command)
266
+ puts "result was [#{ssh_result}]"
267
+ if ssh_result =~ /No such file or directory/
268
+ puts "Still waiting for code to be compiled..."
269
+ else
270
+ puts "compilation complete! Copying compiled code to #{copy_to}"
271
+ return
243
272
  end
273
+ sleep(5)
274
+ }
275
+ end
244
276
 
245
- remote = "/tmp/neptune-input-#{rand(100000)}"
246
- scp_cmd = "scp -r #{ssh_args} #{local_file} root@#{shadow_ip}:#{remote}"
247
- puts scp_cmd if verbose
248
- `#{scp_cmd}`
277
+ # This method sends out a request to compile code, waits for it to finish, and
278
+ # gets the standard out and error returned from the compilation. This method
279
+ # returns a hash containing the standard out, error, and a result that indicates
280
+ # whether or not the compilation was successful.
281
+ def compile_code(job_data, ssh_args, shadow_ip, shell=Kernel.method(:`))
282
+ compiled_location = controller.compile_code(job_data)
249
283
 
250
- job_data["@local"] = remote
251
- puts "job data = #{job_data.inspect}" if verbose
252
- result[:input] = controller.put_input(job_data)
253
- elsif type == "output"
254
- result[:output] = controller.get_output(job_data)
255
- elsif type == "get-acl"
256
- job_data["@type"] = "acl"
257
- result[:acl] = controller.get_acl(job_data)
258
- elsif type == "set-acl"
259
- job_data["@type"] = "acl"
260
- result[:acl] = controller.set_acl(job_data)
261
- elsif type == "compile"
262
- compiled_location = controller.compile_code(job_data)
263
-
264
- copy_to = job_data["@copy_to"]
265
-
266
- loop {
267
- ssh_command = "ssh #{ssh_args} root@#{shadow_ip} 'ls #{compiled_location}' 2>&1"
268
- puts ssh_command if verbose
269
- result = `#{ssh_command}`
270
- puts "result was [#{result}]" if verbose
271
- if result =~ /No such file or directory/
272
- puts "Still waiting for code to be compiled..." if verbose
273
- else
274
- puts "compilation complete! Copying compiled code to #{copy_to}" if verbose
275
- break
276
- end
277
- sleep(5)
278
- }
284
+ copy_to = job_data["@copy_to"]
279
285
 
280
- rm_local = "rm -rf #{copy_to}"
281
- puts rm_local if verbose
282
- `#{rm_local}`
286
+ wait_for_compilation_to_finish(ssh_args, shadow_ip, compiled_location)
283
287
 
284
- scp_command = "scp -r #{ssh_args} root@#{shadow_ip}:#{compiled_location} #{copy_to} 2>&1"
285
- puts scp_command if verbose
286
- `#{scp_command}`
288
+ FileUtils.rm_rf(copy_to)
287
289
 
288
- code = job_data["@code"]
289
- dirs = code.split(/\//)
290
- remote_dir = "/tmp/" + dirs[-1]
290
+ scp_command = "scp -r #{ssh_args} root@#{shadow_ip}:#{compiled_location} #{copy_to} 2>&1"
291
+ puts scp_command
292
+ shell.call(scp_command)
291
293
 
292
- ssh_command = "ssh #{ssh_args} root@#{shadow_ip} 'rm -rf #{remote_dir}' 2>&1"
293
- puts ssh_command if verbose
294
- `#{ssh_command}`
294
+ code = job_data["@code"]
295
+ dirs = code.split(/\//)
296
+ remote_dir = "/tmp/" + dirs[-1]
295
297
 
296
- ssh_command = "ssh #{ssh_args} root@#{shadow_ip} 'rm -rf #{compiled_location}' 2>&1"
297
- puts ssh_command if verbose
298
- `#{ssh_command}`
298
+ [remote_dir, compiled_location].each { |remote_files|
299
+ ssh_command = "ssh #{ssh_args} root@#{shadow_ip} 'rm -rf #{remote_files}' 2>&1"
300
+ puts ssh_command
301
+ shell.call(ssh_command)
302
+ }
299
303
 
300
- out = File.open("#{copy_to}/compile_out") { |f| f.read.chomp! }
301
- err = File.open("#{copy_to}/compile_err") { |f| f.read.chomp! }
302
- result = {}
303
- result[:out] = out
304
- result[:err] = err
304
+ return get_std_out_and_err(copy_to)
305
+ end
305
306
 
306
- if result[:err]
307
- result[:result] = :failure
308
- else
309
- result[:result] = :success
310
- end
307
+ # This method returns a hash containing the standard out and standard error
308
+ # from a completed job, as well as a result field that indicates whether or
309
+ # not the job completed successfully (success = no errors).
310
+ def get_std_out_and_err(location)
311
+ result = {}
312
+
313
+ out = File.open("#{location}/compile_out") { |f| f.read.chomp! }
314
+ result[:out] = out
315
+
316
+ err = File.open("#{location}/compile_err") { |f| f.read.chomp! }
317
+ result[:err] = err
318
+
319
+ if result[:err]
320
+ result[:result] = :failure
321
+ else
322
+ result[:result] = :success
323
+ end
324
+
325
+ return result
326
+ end
327
+
328
+ # This method actually runs the Neptune job, given information about the job
329
+ # as well as information about the node to send the request to.
330
+ def run_job(job_data, ssh_args, shadow_ip, secret,
331
+ controller=AppControllerClient, file=File)
332
+ controller = controller.new(shadow_ip, secret)
333
+
334
+ # TODO - right now the job is assumed to succeed in many cases
335
+ # need to investigate the various failure scenarios
336
+ result = { :result => :success }
337
+
338
+ case job_data["@type"]
339
+ when "input"
340
+ result = get_input(job_data, ssh_args, shadow_ip, controller, file)
341
+ when "output"
342
+ result[:output] = controller.get_output(job_data)
343
+ when "get-acl"
344
+ job_data["@type"] = "acl"
345
+ result[:acl] = controller.get_acl(job_data)
346
+ when "set-acl"
347
+ job_data["@type"] = "acl"
348
+ result[:acl] = controller.set_acl(job_data)
349
+ when "compile"
350
+ result = compile_code(job_data, ssh_args, shadow_ip)
311
351
  else
312
352
  msg = controller.start_neptune_job(job_data)
313
353
  result[:msg] = msg
@@ -317,3 +357,28 @@ def neptune(params)
317
357
  return result
318
358
  end
319
359
 
360
+ # This method is the heart of Neptune - here, we take
361
+ # blocks of code that the user has written and convert them
362
+ # into HPC job requests. At a high level, the user can
363
+ # request to run a job, retrieve a job's output, or
364
+ # modify the access policy (ACL) for the output of a
365
+ # job. By default, job data is private, but a Neptune
366
+ # job can be used to set it to public later (and
367
+ # vice-versa).
368
+ def neptune(params)
369
+ puts "Received a request to run a job."
370
+ puts params[:type]
371
+
372
+ job_data = get_job_data(params)
373
+ validate_storage_params(job_data)
374
+ puts "job data = #{job_data.inspect}"
375
+ do_preprocessing(job_data)
376
+ keyname = job_data["@keyname"]
377
+
378
+ shadow_ip = CommonFunctions.get_from_yaml(keyname, :shadow)
379
+ secret = CommonFunctions.get_secret_key(keyname)
380
+ ssh_key = File.expand_path("~/.appscale/#{keyname}.key")
381
+ ssh_args = "-i ~/.appscale/#{keyname}.key -o StrictHostkeyChecking=no "
382
+
383
+ return run_job(job_data, ssh_args, shadow_ip, secret)
384
+ end