neptune 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +7 -4
- data/doc/AppControllerClient.html +12 -4
- data/doc/CommonFunctions.html +55 -42
- data/doc/Kernel.html +187 -0
- data/doc/LICENSE.html +2 -0
- data/doc/Object.html +488 -198
- data/doc/README.html +26 -5
- data/doc/bin/neptune.html +1 -1
- data/doc/created.rid +6 -6
- data/doc/index.html +20 -2
- data/doc/lib/app_controller_client_rb.html +2 -2
- data/doc/lib/common_functions_rb.html +2 -2
- data/doc/lib/neptune_rb.html +3 -1
- data/lib/app_controller_client.rb +2 -2
- data/lib/common_functions.rb +50 -24
- data/lib/neptune.rb +224 -159
- data/samples/appscale/add_appserver.rb +10 -0
- data/samples/appscale/add_database.rb +9 -0
- data/samples/appscale/add_loadbalancer.rb +9 -0
- data/samples/appscale/add_slave.rb +9 -0
- data/samples/c/compile_helloworld.rb +10 -0
- data/samples/c/helloworld/helloworld.c +6 -0
- data/samples/erlang/compile_erlang_ring.rb +10 -0
- data/samples/erlang/get_erlang_output.rb +8 -0
- data/samples/erlang/ring/Makefile +3 -0
- data/samples/erlang/ring/ring.erl +90 -0
- data/samples/erlang/run_erlang_ring.rb +6 -0
- data/samples/go/compile_hello.rb +10 -0
- data/samples/go/get_hello_output.rb +6 -0
- data/samples/go/hello/hello.go +8 -0
- data/samples/go/put_input.rb +8 -0
- data/samples/go/run_hello.rb +9 -0
- data/samples/mapreduce/expected-output.txt +7078 -0
- data/samples/mapreduce/get_mapreduce_output.rb +4 -0
- data/samples/mapreduce/hadoop-0.20.0-examples.jar +0 -0
- data/samples/mapreduce/input-10 +64 -0
- data/samples/mapreduce/input-30 +64 -0
- data/samples/mapreduce/input-7 +4 -0
- data/samples/mapreduce/map.rb +48 -0
- data/samples/mapreduce/reduce.rb +48 -0
- data/samples/mapreduce/run_java_mr.rb +14 -0
- data/samples/mapreduce/run_mapreduce.rb +13 -0
- data/samples/mapreduce/the-end-of-time.txt +11256 -0
- data/samples/mpi/Makefile +22 -0
- data/samples/mpi/MpiQueen +0 -0
- data/samples/mpi/compile_mpi_ring.rb +10 -0
- data/samples/mpi/compile_x10_nqueens.rb +8 -0
- data/samples/mpi/cpi +0 -0
- data/samples/mpi/get_mpi_output.rb +5 -0
- data/samples/mpi/get_ring_output.rb +5 -0
- data/samples/mpi/hw2.c +205 -0
- data/samples/mpi/hw2harness.c +84 -0
- data/samples/mpi/hw2harness.h +45 -0
- data/samples/mpi/powermethod +0 -0
- data/samples/mpi/ring/Makefile +2 -0
- data/samples/mpi/ring/Ring.c +76 -0
- data/samples/mpi/run_mpi_cpi.rb +10 -0
- data/samples/mpi/run_mpi_nqueens.np +6 -0
- data/samples/mpi/run_mpi_powermethod.rb +8 -0
- data/samples/mpi/run_mpi_ring.rb +12 -0
- data/samples/r/compile_hello.rb +10 -0
- data/samples/r/get_hello_output.rb +6 -0
- data/samples/r/hello/hello.r +1 -0
- data/samples/r/put_input.rb +8 -0
- data/samples/r/run_hello.rb +9 -0
- data/samples/upc/compile_upc_helloworld.rb +10 -0
- data/samples/upc/compile_upc_ring.rb +11 -0
- data/samples/upc/get_mpi_output.rb +8 -0
- data/samples/upc/helloworld/HelloWorld.c +9 -0
- data/samples/upc/helloworld/Makefile +3 -0
- data/samples/upc/ring/Makefile +3 -0
- data/samples/upc/ring/Ring.c +116 -0
- data/samples/upc/run_upc_helloworld.rb +12 -0
- data/samples/upc/run_upc_ring.rb +12 -0
- data/samples/x10/MyPowerMethod +0 -0
- data/samples/x10/MyPowerMethod.x10 +236 -0
- data/samples/x10/NQueensDist +0 -0
- data/samples/x10/NQueensDist.x10 +112 -0
- data/samples/x10/compile_x10_nqueens.rb +7 -0
- data/samples/x10/compile_x10_ring.rb +12 -0
- data/samples/x10/get_x10_output.rb +8 -0
- data/samples/x10/ring/Makefile +3 -0
- data/samples/x10/ring/Ring.x10 +28 -0
- data/samples/x10/ring/RingOld.x10 +68 -0
- data/samples/x10/run_x10_nqueens.rb +6 -0
- data/samples/x10/run_x10_powermethod.rb +7 -0
- data/samples/x10/run_x10_ring.rb +6 -0
- data/test/{tc_c.rb → integration/tc_c.rb} +2 -2
- data/test/{tc_dfsp.rb → integration/tc_dfsp.rb} +0 -0
- data/test/{tc_dwssa.rb → integration/tc_dwssa.rb} +0 -0
- data/test/{tc_erlang.rb → integration/tc_erlang.rb} +0 -0
- data/test/{tc_mapreduce.rb → integration/tc_mapreduce.rb} +0 -0
- data/test/{tc_mpi.rb → integration/tc_mpi.rb} +0 -0
- data/test/{tc_storage.rb → integration/tc_storage.rb} +0 -0
- data/test/{tc_upc.rb → integration/tc_upc.rb} +0 -0
- data/test/{tc_x10.rb → integration/tc_x10.rb} +0 -0
- data/test/{test_helper.rb → integration/test_helper.rb} +0 -0
- data/test/{ts_neptune.rb → integration/ts_neptune.rb} +2 -2
- data/test/unit/test_app_controller_client.rb +106 -0
- data/test/unit/test_common_functions.rb +106 -0
- data/test/unit/test_neptune.rb +208 -0
- data/test/unit/ts_all.rb +6 -0
- metadata +91 -15
data/doc/README.html
CHANGED
@@ -66,6 +66,8 @@
|
|
66
66
|
|
67
67
|
<li><a href="./CommonFunctions.html">CommonFunctions</a></li>
|
68
68
|
|
69
|
+
<li><a href="./Kernel.html">Kernel</a></li>
|
70
|
+
|
69
71
|
<li><a href="./Object.html">Object</a></li>
|
70
72
|
|
71
73
|
</ul>
|
@@ -119,7 +121,9 @@ in the test folder, with the standard naming convention</p>
|
|
119
121
|
<p>cases for each type of job that Neptune offers. Before running ts_neptune,
|
120
122
|
you should export the environment variable APPSCALE_HEAD_NODE, which should
|
121
123
|
be set to the IP address of the AppScale machine that runs the Shadow
|
122
|
-
daemon (a.k.a. the Master AppController)
|
124
|
+
daemon (a.k.a. the Master AppController). Running generate_coverage.sh in
|
125
|
+
the top-level directory will run rcov and generate the coverage reports
|
126
|
+
automatically via unit tests.</p>
|
123
127
|
|
124
128
|
<p>Developed by Chris Bunch as part of the AppScale project. See <a
|
125
129
|
href="LICENSE.html">LICENSE</a> for the specifics of the New BSD License by
|
@@ -138,15 +142,32 @@ as adding capabilities for other types of computation. We would also like
|
|
138
142
|
to refactor Neptune to use symbols instead of instance variables for
|
139
143
|
running jobs: this will likely appear in a future release as well.</p>
|
140
144
|
|
141
|
-
<p>
|
142
|
-
|
145
|
+
<p>Our academic paper on Neptune won best paper at ACM ScienceCloud 2011!
|
146
|
+
<a href=‘<a
|
147
|
+
href="http://www.neptune-lang.org/2011/6/Neptune-Picks-up-Best-Paper-at-ScienceCloud-2011">www.neptune-lang.org/2011/6/Neptune-Picks-up-Best-Paper-at-ScienceCloud-2011</a>’>
|
148
|
+
Here’s a link</a> to the abstract of the paper and the PDF.</p>
|
143
149
|
|
144
150
|
<p>Version History:</p>
|
145
151
|
|
146
|
-
<p>
|
152
|
+
<p>November 10, 2011 - 0.1.2 released, adding unit tests and refactoring all
|
153
|
+
around.</p>
|
154
|
+
|
155
|
+
<p>June 6, 2011 - 0.1.1 released, adding support for code written in Go and R</p>
|
156
|
+
|
157
|
+
<p>June 4, 2011 - 0.1.0 released, adding verbose / quiet options for users
|
158
|
+
wishing to suppress stdout from Neptune jobs.</p>
|
159
|
+
|
160
|
+
<p>May 28, 2011 - 0.0.9 released, adding generic SSA support for users wanting
|
161
|
+
to use StochKit and other SSA codes.</p>
|
162
|
+
|
163
|
+
<p>April 8, 2011 - 0.0.8 released, fixing MapReduce support for both regular
|
164
|
+
Hadoop and Hadoop Streaming. Also increased code coverage to cover a number
|
165
|
+
of failure scenarios.</p>
|
166
|
+
|
167
|
+
<p>April 2, 2011 - 0.0.7 released, adding automatic test suite and many bug
|
147
168
|
fixes for all scenarios. rcov can also be used to generate test coverage
|
148
169
|
information: current coverage stats can be found in coverage directory.
|
149
|
-
|
170
|
+
MapReduce broken at the moment - will fix in next release</p>
|
150
171
|
|
151
172
|
<p>March 28, 2011 - 0.0.6 released, adding support for input jobs, so users
|
152
173
|
can place data in the datastore without having to run any computation</p>
|
data/doc/bin/neptune.html
CHANGED
data/doc/created.rid
CHANGED
@@ -1,7 +1,7 @@
|
|
1
|
-
|
2
|
-
./lib/common_functions.rb Wed,
|
1
|
+
Thu, 10 Nov 2011 12:27:04 -0800
|
2
|
+
./lib/common_functions.rb Wed, 26 Oct 2011 10:37:34 -0700
|
3
3
|
./LICENSE Thu, 27 Jan 2011 13:24:30 -0800
|
4
|
-
./lib/app_controller_client.rb
|
5
|
-
./lib/neptune.rb
|
6
|
-
./README
|
7
|
-
./bin/neptune
|
4
|
+
./lib/app_controller_client.rb Tue, 25 Oct 2011 23:47:47 -0700
|
5
|
+
./lib/neptune.rb Wed, 09 Nov 2011 10:54:28 -0800
|
6
|
+
./README Thu, 10 Nov 2011 11:48:11 -0800
|
7
|
+
./bin/neptune Sun, 15 May 2011 22:52:56 -0700
|
data/doc/index.html
CHANGED
@@ -44,6 +44,8 @@
|
|
44
44
|
|
45
45
|
<li class="module"><a href="CommonFunctions.html">CommonFunctions</a></li>
|
46
46
|
|
47
|
+
<li class="module"><a href="Kernel.html">Kernel</a></li>
|
48
|
+
|
47
49
|
<li class="class"><a href="Object.html">Object</a></li>
|
48
50
|
|
49
51
|
</ul>
|
@@ -63,12 +65,20 @@
|
|
63
65
|
|
64
66
|
<li><a href="AppControllerClient.html#method-i-compile_code">#compile_code — AppControllerClient</a></li>
|
65
67
|
|
68
|
+
<li><a href="Object.html#method-i-compile_code">#compile_code — Object</a></li>
|
69
|
+
|
66
70
|
<li><a href="Object.html#method-i-do_preprocessing">#do_preprocessing — Object</a></li>
|
67
71
|
|
68
72
|
<li><a href="AppControllerClient.html#method-i-get_acl">#get_acl — AppControllerClient</a></li>
|
69
73
|
|
74
|
+
<li><a href="Object.html#method-i-get_input">#get_input — Object</a></li>
|
75
|
+
|
76
|
+
<li><a href="Object.html#method-i-get_job_data">#get_job_data — Object</a></li>
|
77
|
+
|
70
78
|
<li><a href="AppControllerClient.html#method-i-get_output">#get_output — AppControllerClient</a></li>
|
71
79
|
|
80
|
+
<li><a href="Object.html#method-i-get_std_out_and_err">#get_std_out_and_err — Object</a></li>
|
81
|
+
|
72
82
|
<li><a href="AppControllerClient.html#method-i-make_call">#make_call — AppControllerClient</a></li>
|
73
83
|
|
74
84
|
<li><a href="Object.html#method-i-neptune">#neptune — Object</a></li>
|
@@ -77,16 +87,24 @@
|
|
77
87
|
|
78
88
|
<li><a href="Object.html#method-i-preprocess_erlang">#preprocess_erlang — Object</a></li>
|
79
89
|
|
80
|
-
<li><a href="Object.html#method-i-preprocess_mapreduce">#preprocess_mapreduce — Object</a></li>
|
81
|
-
|
82
90
|
<li><a href="Object.html#method-i-preprocess_mpi">#preprocess_mpi — Object</a></li>
|
83
91
|
|
92
|
+
<li><a href="Object.html#method-i-preprocess_ssa">#preprocess_ssa — Object</a></li>
|
93
|
+
|
84
94
|
<li><a href="AppControllerClient.html#method-i-put_input">#put_input — AppControllerClient</a></li>
|
85
95
|
|
96
|
+
<li><a href="Object.html#method-i-run_job">#run_job — Object</a></li>
|
97
|
+
|
86
98
|
<li><a href="AppControllerClient.html#method-i-set_acl">#set_acl — AppControllerClient</a></li>
|
87
99
|
|
100
|
+
<li><a href="Kernel.html#method-i-shell">#shell — Kernel</a></li>
|
101
|
+
|
88
102
|
<li><a href="AppControllerClient.html#method-i-start_neptune_job">#start_neptune_job — AppControllerClient</a></li>
|
89
103
|
|
104
|
+
<li><a href="Object.html#method-i-validate_storage_params">#validate_storage_params — Object</a></li>
|
105
|
+
|
106
|
+
<li><a href="Object.html#method-i-wait_for_compilation_to_finish">#wait_for_compilation_to_finish — Object</a></li>
|
107
|
+
|
90
108
|
</ul>
|
91
109
|
|
92
110
|
<div id="validator-badges">
|
@@ -24,7 +24,7 @@
|
|
24
24
|
<div id="metadata">
|
25
25
|
<dl>
|
26
26
|
<dt class="modified-date">Last Modified</dt>
|
27
|
-
<dd class="modified-date">
|
27
|
+
<dd class="modified-date">Tue Oct 25 23:47:47 -0700 2011</dd>
|
28
28
|
|
29
29
|
|
30
30
|
<dt class="requires">Requires</dt>
|
@@ -50,7 +50,7 @@
|
|
50
50
|
<div class="description">
|
51
51
|
<h2>Description</h2>
|
52
52
|
|
53
|
-
<p>Programmer: Chris Bunch</p>
|
53
|
+
<p>Programmer: Chris Bunch (cgb@cs.ucsb.edu)</p>
|
54
54
|
|
55
55
|
</div>
|
56
56
|
|
@@ -24,7 +24,7 @@
|
|
24
24
|
<div id="metadata">
|
25
25
|
<dl>
|
26
26
|
<dt class="modified-date">Last Modified</dt>
|
27
|
-
<dd class="modified-date">Wed
|
27
|
+
<dd class="modified-date">Wed Oct 26 10:37:34 -0700 2011</dd>
|
28
28
|
|
29
29
|
|
30
30
|
<dt class="requires">Requires</dt>
|
@@ -58,7 +58,7 @@
|
|
58
58
|
<div class="description">
|
59
59
|
<h2>Description</h2>
|
60
60
|
|
61
|
-
<p>Programmer: Chris Bunch</p>
|
61
|
+
<p>Programmer: Chris Bunch (cgb@cs.ucsb.edu)</p>
|
62
62
|
|
63
63
|
</div>
|
64
64
|
|
data/doc/lib/neptune_rb.html
CHANGED
@@ -24,7 +24,7 @@
|
|
24
24
|
<div id="metadata">
|
25
25
|
<dl>
|
26
26
|
<dt class="modified-date">Last Modified</dt>
|
27
|
-
<dd class="modified-date">
|
27
|
+
<dd class="modified-date">Wed Nov 09 10:54:28 -0800 2011</dd>
|
28
28
|
|
29
29
|
|
30
30
|
<dt class="requires">Requires</dt>
|
@@ -48,6 +48,8 @@
|
|
48
48
|
<div class="description">
|
49
49
|
<h2>Description</h2>
|
50
50
|
|
51
|
+
<p>Programmer: Chris Bunch (cgb@cs.ucsb.edu)</p>
|
52
|
+
|
51
53
|
</div>
|
52
54
|
|
53
55
|
</div>
|
@@ -1,5 +1,5 @@
|
|
1
1
|
#!/usr/bin/ruby -w
|
2
|
-
# Programmer: Chris Bunch
|
2
|
+
# Programmer: Chris Bunch (cgb@cs.ucsb.edu)
|
3
3
|
|
4
4
|
require 'openssl'
|
5
5
|
require 'soap/rpc/driver'
|
@@ -16,7 +16,7 @@ NO_TIMEOUT = -1
|
|
16
16
|
# platform (here, AppScale). This client is similar to that used in the AppScale
|
17
17
|
# Tools, but with non-Neptune SOAP calls removed.
|
18
18
|
class AppControllerClient
|
19
|
-
|
19
|
+
attr_accessor :conn, :ip, :secret
|
20
20
|
|
21
21
|
# A constructor that requires both the IP address of the machine to communicate
|
22
22
|
# with as well as the secret (string) needed to perform communication.
|
data/lib/common_functions.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
#!/usr/bin/ruby -w
|
2
|
-
# Programmer: Chris Bunch
|
2
|
+
# Programmer: Chris Bunch (cgb@cs.ucsb.edu)
|
3
3
|
|
4
4
|
require 'digest/sha1'
|
5
5
|
require 'fileutils'
|
@@ -9,6 +9,12 @@ require 'socket'
|
|
9
9
|
require 'timeout'
|
10
10
|
require 'yaml'
|
11
11
|
|
12
|
+
module Kernel
|
13
|
+
def shell(command)
|
14
|
+
return `#{command}`
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
12
18
|
# A helper module that aggregates functions that are not part of Neptune's
|
13
19
|
# core functionality. Specifically, this module contains methods to scp
|
14
20
|
# files to other machines and the ability to read YAML files, which are
|
@@ -21,11 +27,17 @@ module CommonFunctions
|
|
21
27
|
# placed, and the name of the key to use. The keyname is typically
|
22
28
|
# specified by the Neptune job given, but defaults to ''appscale''
|
23
29
|
# if not provided.
|
24
|
-
def self.scp_to_shadow(local_file_loc,
|
25
|
-
|
26
|
-
|
30
|
+
def self.scp_to_shadow(local_file_loc,
|
31
|
+
remote_file_loc,
|
32
|
+
keyname,
|
33
|
+
is_dir=false,
|
34
|
+
file=File,
|
35
|
+
get_from_yaml=CommonFunctions.method(:get_from_yaml),
|
36
|
+
scp_file=CommonFunctions.method(:scp_file))
|
27
37
|
|
28
|
-
|
38
|
+
shadow_ip = get_from_yaml.call(keyname, :shadow, file)
|
39
|
+
ssh_key = file.expand_path("~/.appscale/#{keyname}.key")
|
40
|
+
scp_file.call(local_file_loc, remote_file_loc, shadow_ip, ssh_key, is_dir)
|
29
41
|
end
|
30
42
|
|
31
43
|
# Performs the actual remote copying of files: given the IP address
|
@@ -34,33 +46,39 @@ module CommonFunctions
|
|
34
46
|
# if the network is down, if a bad keyname is provided, or if the
|
35
47
|
# wrong IP is given. If the user specifies that the file to copy is
|
36
48
|
# actually a directory, we append the -r flag to scp as well.
|
37
|
-
def self.scp_file(local_file_loc, remote_file_loc, target_ip, public_key_loc,
|
49
|
+
def self.scp_file(local_file_loc, remote_file_loc, target_ip, public_key_loc,
|
50
|
+
is_dir=false, file=File, fileutils=FileUtils, kernel=Kernel)
|
38
51
|
cmd = ""
|
39
|
-
local_file_loc =
|
52
|
+
local_file_loc = file.expand_path(local_file_loc)
|
40
53
|
|
41
54
|
ssh_args = "-o StrictHostkeyChecking=no 2>&1"
|
42
55
|
ssh_args << " -r " if is_dir
|
43
56
|
|
44
|
-
public_key_loc =
|
57
|
+
public_key_loc = file.expand_path(public_key_loc)
|
45
58
|
cmd = "scp -i #{public_key_loc} #{ssh_args} #{local_file_loc} root@#{target_ip}:#{remote_file_loc}"
|
46
59
|
cmd << "; echo $? >> ~/.appscale/retval"
|
47
60
|
|
48
|
-
retval_loc =
|
49
|
-
|
61
|
+
retval_loc = file.expand_path("~/.appscale/retval")
|
62
|
+
fileutils.rm_f(retval_loc)
|
50
63
|
|
51
64
|
begin
|
52
|
-
Timeout::timeout(-1) {
|
65
|
+
Timeout::timeout(-1) { kernel.shell("#{cmd}") }
|
53
66
|
rescue Timeout::Error
|
54
|
-
abort("Remotely copying over files failed. Is the destination machine
|
67
|
+
abort("Remotely copying over files failed. Is the destination machine" +
|
68
|
+
" on and reachable from this computer? We tried the following" +
|
69
|
+
" command:\n\n#{cmd}")
|
55
70
|
end
|
56
71
|
|
57
72
|
loop {
|
58
|
-
break if
|
73
|
+
break if file.exists?(retval_loc)
|
59
74
|
sleep(5)
|
60
75
|
}
|
61
76
|
|
62
|
-
retval = (
|
63
|
-
|
77
|
+
retval = (file.open(retval_loc) { |f| f.read }).chomp
|
78
|
+
if retval != "0"
|
79
|
+
abort("\n\n[#{cmd}] returned #{retval} instead of 0 as expected. Is " +
|
80
|
+
"your environment set up properly?")
|
81
|
+
end
|
64
82
|
return cmd
|
65
83
|
end
|
66
84
|
|
@@ -70,16 +88,20 @@ module CommonFunctions
|
|
70
88
|
# method aborts if the value doesn't exist or the YAML file is malformed.
|
71
89
|
# If the required flag is set to false, it returns nil in either scenario
|
72
90
|
# instead.
|
73
|
-
def self.get_from_yaml(keyname, tag, required=true)
|
74
|
-
location_file =
|
91
|
+
def self.get_from_yaml(keyname, tag, required=true, file=File, yaml=YAML)
|
92
|
+
location_file = file.expand_path("~/.appscale/locations-#{keyname}.yaml")
|
75
93
|
|
76
|
-
|
94
|
+
if !file.exists?(location_file)
|
95
|
+
abort("An AppScale instance is not currently running with the provided" +
|
96
|
+
" keyname, \"#{keyname}\".")
|
97
|
+
end
|
77
98
|
|
78
99
|
begin
|
79
|
-
tree =
|
100
|
+
tree = yaml.load_file(location_file)
|
80
101
|
rescue ArgumentError
|
81
102
|
if required
|
82
|
-
abort("The yaml file you provided was malformed. Please correct any
|
103
|
+
abort("The yaml file you provided was malformed. Please correct any" +
|
104
|
+
" errors in it and try again.")
|
83
105
|
else
|
84
106
|
return nil
|
85
107
|
end
|
@@ -87,15 +109,19 @@ module CommonFunctions
|
|
87
109
|
|
88
110
|
value = tree[tag]
|
89
111
|
|
90
|
-
|
91
|
-
|
112
|
+
if value.nil? and required
|
113
|
+
abort("The file #{location_file} is in the wrong format and doesn't" +
|
114
|
+
" contain a #{tag} tag. Please make sure the file is in the correct" +
|
115
|
+
" format and try again.")
|
116
|
+
end
|
117
|
+
|
92
118
|
return value
|
93
119
|
end
|
94
120
|
|
95
121
|
# Returns the secret key needed for communication with AppScale's
|
96
122
|
# Shadow node. This method is a nice frontend to the get_from_yaml
|
97
123
|
# function, as the secret is stored in a YAML file.
|
98
|
-
def self.get_secret_key(keyname, required=true)
|
99
|
-
return CommonFunctions.get_from_yaml(keyname, :secret)
|
124
|
+
def self.get_secret_key(keyname, required=true, file=File, yaml=YAML)
|
125
|
+
return CommonFunctions.get_from_yaml(keyname, :secret, required, file, yaml)
|
100
126
|
end
|
101
127
|
end
|
data/lib/neptune.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
#!/usr/bin/ruby
|
2
|
+
# Programmer: Chris Bunch (cgb@cs.ucsb.edu)
|
2
3
|
|
3
4
|
require 'app_controller_client'
|
4
5
|
require 'common_functions'
|
@@ -39,6 +40,8 @@ NEED_PREPROCESSING = ["compile", "erlang", "mpi", "ssa"]
|
|
39
40
|
# support. In the future, it is likely that the only exposed / monkey-patched
|
40
41
|
# method should be job, while the others could probably be folded into either
|
41
42
|
# a Neptune-specific class or into CommonFunctions.
|
43
|
+
# TODO(cbunch): This doesn't look like it does anything - run the integration
|
44
|
+
# test and confirm one way or the other.
|
42
45
|
class Object
|
43
46
|
end
|
44
47
|
|
@@ -48,7 +51,9 @@ end
|
|
48
51
|
# on the type of the job that the user has asked to run.
|
49
52
|
def do_preprocessing(job_data)
|
50
53
|
job_type = job_data["@type"]
|
51
|
-
|
54
|
+
if !NEED_PREPROCESSING.include?(job_type)
|
55
|
+
return
|
56
|
+
end
|
52
57
|
|
53
58
|
preprocess = "preprocess_#{job_type}".to_sym
|
54
59
|
send(preprocess, job_data)
|
@@ -57,10 +62,9 @@ end
|
|
57
62
|
# This preprocessing method copies over the user's code to the
|
58
63
|
# Shadow node so that it can be compiled there. A future version
|
59
64
|
# of this method may also copy over libraries as well.
|
60
|
-
def preprocess_compile(job_data)
|
61
|
-
verbose = job_data["@verbose"]
|
65
|
+
def preprocess_compile(job_data, shell=Kernel.method(:`))
|
62
66
|
code = File.expand_path(job_data["@code"])
|
63
|
-
|
67
|
+
if !File.exists?(code)
|
64
68
|
abort("The source file #{code} does not exist.")
|
65
69
|
end
|
66
70
|
|
@@ -71,94 +75,88 @@ def preprocess_compile(job_data)
|
|
71
75
|
|
72
76
|
ssh_args = "-i ~/.appscale/#{keyname}.key -o StrictHostkeyChecking=no root@#{shadow_ip}"
|
73
77
|
remove_dir = "ssh #{ssh_args} 'rm -rf #{dest}' 2>&1"
|
74
|
-
puts remove_dir
|
75
|
-
|
78
|
+
puts remove_dir
|
79
|
+
shell.call(remove_dir)
|
76
80
|
|
77
81
|
CommonFunctions.scp_to_shadow(code, dest, keyname, is_dir=true)
|
78
82
|
|
79
83
|
job_data["@code"] = dest
|
80
84
|
end
|
81
85
|
|
82
|
-
def preprocess_erlang(job_data)
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
86
|
+
def preprocess_erlang(job_data, file=File, common_functions=CommonFunctions)
|
87
|
+
if !job_data["@code"]
|
88
|
+
abort("When running Erlang jobs, :code must be specified.")
|
89
|
+
end
|
90
|
+
|
91
|
+
source_code = file.expand_path(job_data["@code"])
|
92
|
+
if !file.exists?(source_code)
|
93
|
+
abort("The specified code, #{job_data['@code']}," +
|
94
|
+
" didn't exist. Please specify one that exists and try again")
|
88
95
|
end
|
89
96
|
dest_code = "/tmp/"
|
90
97
|
|
91
98
|
keyname = job_data["@keyname"]
|
92
|
-
|
99
|
+
common_functions.scp_to_shadow(source_code, dest_code, keyname)
|
93
100
|
end
|
94
101
|
|
95
|
-
# This preprocessing method
|
96
|
-
#
|
97
|
-
#
|
102
|
+
# This preprocessing method verifies that the user specified the number of nodes
|
103
|
+
# to use. If they also specified the number of processes to use, we also verify
|
104
|
+
# that this value is at least as many as the number of nodes (that is, nodes
|
105
|
+
# can't be underprovisioned in MPI).
|
98
106
|
def preprocess_mpi(job_data)
|
99
|
-
|
107
|
+
if !job_data["@nodes_to_use"]
|
108
|
+
abort("When running MPI jobs, :nodes_to_use must be specified.")
|
109
|
+
end
|
110
|
+
|
111
|
+
if !job_data["@procs_to_use"]
|
112
|
+
abort("When running MPI jobs, :procs_to_use must be specified.")
|
113
|
+
end
|
114
|
+
|
100
115
|
if job_data["@procs_to_use"]
|
101
116
|
p = job_data["@procs_to_use"]
|
102
117
|
n = job_data["@nodes_to_use"]
|
103
118
|
if p < n
|
104
|
-
|
119
|
+
abort("When specifying both :procs_to_use and :nodes_to_use" +
|
105
120
|
", :procs_to_use must be at least as large as :nodes_to_use. Please " +
|
106
121
|
"change this and try again. You specified :procs_to_use = #{p} and" +
|
107
|
-
":nodes_to_use = #{n}."
|
108
|
-
abort(not_enough_procs)
|
122
|
+
":nodes_to_use = #{n}.")
|
109
123
|
end
|
110
124
|
end
|
111
125
|
|
112
|
-
|
126
|
+
return job_data
|
113
127
|
end
|
114
128
|
|
129
|
+
# This preprocessing method verifies that the user specified the number of
|
130
|
+
# trajectories to run, via either :trajectories or :simulations. Both should
|
131
|
+
# not be specified - only one or the other, and regardless of which they
|
132
|
+
# specify, convert it to be :trajectories.
|
115
133
|
def preprocess_ssa(job_data)
|
134
|
+
if job_data["@simulations"] and job_data["@trajectories"]
|
135
|
+
abort("Both :simulations and :trajectories cannot be specified - use one" +
|
136
|
+
" or the other.")
|
137
|
+
end
|
138
|
+
|
116
139
|
if job_data["@simulations"]
|
117
140
|
job_data["@trajectories"] = job_data["@simulations"]
|
141
|
+
job_data.delete("@simulations")
|
118
142
|
end
|
119
143
|
|
120
|
-
|
144
|
+
if !job_data["@trajectories"]
|
121
145
|
abort(":trajectories needs to be specified when running ssa jobs")
|
122
146
|
end
|
123
|
-
end
|
124
|
-
|
125
|
-
# TODO: actually use me!
|
126
|
-
#def validate_args(list)
|
127
|
-
# list.each do |item|
|
128
|
-
# val = instance_variable_get("@#{item}".to_sym)
|
129
|
-
# abort("FATAL: #{item} was not defined") if val.nil?
|
130
|
-
# end
|
131
|
-
#end
|
132
147
|
|
133
|
-
|
134
|
-
|
135
|
-
# into HPC job requests. At a high level, the user can
|
136
|
-
# request to run a job, retrieve a job's output, or
|
137
|
-
# modify the access policy (ACL) for the output of a
|
138
|
-
# job. By default, job data is private, but a Neptune
|
139
|
-
# job can be used to set it to public later (and
|
140
|
-
# vice-versa).
|
141
|
-
def neptune(params)
|
142
|
-
verbose = params[:verbose]
|
143
|
-
|
144
|
-
puts "Received a request to run a job." if verbose
|
145
|
-
puts params[:type] if verbose
|
146
|
-
|
147
|
-
keyname = params[:keyname] || "appscale"
|
148
|
-
|
149
|
-
shadow_ip = CommonFunctions.get_from_yaml(keyname, :shadow)
|
150
|
-
secret = CommonFunctions.get_secret_key(keyname)
|
151
|
-
controller = AppControllerClient.new(shadow_ip, secret)
|
152
|
-
ssh_key = File.expand_path("~/.appscale/#{keyname}.key")
|
148
|
+
return job_data
|
149
|
+
end
|
153
150
|
|
151
|
+
def get_job_data(params)
|
154
152
|
job_data = {}
|
155
153
|
params.each { |k, v|
|
156
154
|
key = "@#{k}"
|
157
155
|
job_data[key] = v
|
158
156
|
}
|
159
157
|
|
160
|
-
job_data
|
161
|
-
job_data["@keyname"] = keyname || "appscale"
|
158
|
+
job_data.delete("@job")
|
159
|
+
job_data["@keyname"] = params[:keyname] || "appscale"
|
162
160
|
|
163
161
|
job_data["@type"] = job_data["@type"].to_s
|
164
162
|
type = job_data["@type"]
|
@@ -182,132 +180,174 @@ def neptune(params)
|
|
182
180
|
end
|
183
181
|
end
|
184
182
|
|
185
|
-
|
186
|
-
|
187
|
-
unless ALLOWED_STORAGE_TYPES.include?(storage)
|
188
|
-
msg = "Supported storage types are #{ALLOWED_STORAGE_TYPES.join(', ')}" +
|
189
|
-
" - we do not support #{storage}."
|
190
|
-
abort(msg)
|
191
|
-
end
|
192
|
-
|
193
|
-
# Our implementation for storing / retrieving via Google Storage
|
194
|
-
# and Walrus uses
|
195
|
-
# the same library as we do for S3 - so just tell it that it's S3
|
196
|
-
if storage == "gstorage" or storage == "walrus"
|
197
|
-
storage = "s3"
|
198
|
-
job_data["@storage"] = "s3"
|
199
|
-
end
|
183
|
+
return job_data
|
184
|
+
end
|
200
185
|
|
201
|
-
|
202
|
-
|
203
|
-
unless job_data["@#{item}"]
|
204
|
-
if ENV[item]
|
205
|
-
puts "Using #{item} from environment" if verbose
|
206
|
-
job_data["@#{item}"] = ENV[item]
|
207
|
-
else
|
208
|
-
msg = "When storing data to S3, #{item} must be specified or be in " +
|
209
|
-
"your environment. Please do so and try again."
|
210
|
-
abort(msg)
|
211
|
-
end
|
212
|
-
end
|
213
|
-
}
|
214
|
-
end
|
215
|
-
else
|
186
|
+
def validate_storage_params(job_data)
|
187
|
+
if !job_data["@storage"]
|
216
188
|
job_data["@storage"] = "appdb"
|
217
189
|
end
|
218
190
|
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
191
|
+
storage = job_data["@storage"]
|
192
|
+
if !ALLOWED_STORAGE_TYPES.include?(storage)
|
193
|
+
abort("Supported storage types are #{ALLOWED_STORAGE_TYPES.join(', ')}" +
|
194
|
+
" - we do not support #{storage}.")
|
195
|
+
end
|
224
196
|
|
225
|
-
|
197
|
+
# Our implementation for storing / retrieving via Google Storage
|
198
|
+
# and Walrus uses
|
199
|
+
# the same library as we do for S3 - so just tell it that it's S3
|
200
|
+
if storage == "gstorage" or storage == "walrus"
|
201
|
+
storage = "s3"
|
202
|
+
job_data["@storage"] = "s3"
|
203
|
+
end
|
226
204
|
|
227
|
-
|
205
|
+
if storage == "s3"
|
206
|
+
["EC2_ACCESS_KEY", "EC2_SECRET_KEY", "S3_URL"].each { |item|
|
207
|
+
if job_data["@#{item}"]
|
208
|
+
puts "Using specified #{item}"
|
209
|
+
else
|
210
|
+
if ENV[item]
|
211
|
+
puts "Using #{item} from environment"
|
212
|
+
job_data["@#{item}"] = ENV[item]
|
213
|
+
else
|
214
|
+
abort("When storing data to S3, #{item} must be specified or be in " +
|
215
|
+
"your environment. Please do so and try again.")
|
216
|
+
end
|
217
|
+
end
|
218
|
+
}
|
219
|
+
end
|
228
220
|
|
229
|
-
|
221
|
+
return job_data
|
222
|
+
end
|
230
223
|
|
231
|
-
|
232
|
-
|
233
|
-
|
224
|
+
# This method takes a file on the local user's computer and stores it remotely
|
225
|
+
# via AppScale. It returns a hash map indicating whether or not the job
|
226
|
+
# succeeded and if it failed, the reason for it.
|
227
|
+
def get_input(job_data, ssh_args, shadow_ip, controller, file=File,
|
228
|
+
shell=Kernel.method(:`))
|
229
|
+
result = {:result => :success}
|
234
230
|
|
235
|
-
if
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
231
|
+
if !job_data["@local"]
|
232
|
+
abort("You failed to specify a file to copy over via the :local flag.")
|
233
|
+
end
|
234
|
+
|
235
|
+
local_file = file.expand_path(job_data["@local"])
|
236
|
+
if !file.exists?(local_file)
|
237
|
+
reason = "the file you specified to copy, #{local_file}, doesn't exist." +
|
241
238
|
" Please specify a file that exists and try again."
|
242
|
-
|
239
|
+
return {:result => :failure, :reason => reason}
|
240
|
+
end
|
241
|
+
|
242
|
+
remote = "/tmp/neptune-input-#{rand(100000)}"
|
243
|
+
scp_cmd = "scp -r #{ssh_args} #{local_file} root@#{shadow_ip}:#{remote}"
|
244
|
+
puts scp_cmd
|
245
|
+
shell.call(scp_cmd)
|
246
|
+
|
247
|
+
job_data["@local"] = remote
|
248
|
+
puts "job data = #{job_data.inspect}"
|
249
|
+
response = controller.put_input(job_data)
|
250
|
+
if response
|
251
|
+
return {:result => :success}
|
252
|
+
else
|
253
|
+
# TODO - expand this to include the reason why it failed
|
254
|
+
return {:result => :failure}
|
255
|
+
end
|
256
|
+
end
|
257
|
+
|
258
|
+
# This method waits for AppScale to finish compiling the user's code, indicated
|
259
|
+
# by AppScale copying the finished code to a pre-determined location.
|
260
|
+
def wait_for_compilation_to_finish(ssh_args, shadow_ip, compiled_location,
|
261
|
+
shell=Kernel.method(:`))
|
262
|
+
loop {
|
263
|
+
ssh_command = "ssh #{ssh_args} root@#{shadow_ip} 'ls #{compiled_location}' 2>&1"
|
264
|
+
puts ssh_command
|
265
|
+
ssh_result = shell.call(ssh_command)
|
266
|
+
puts "result was [#{ssh_result}]"
|
267
|
+
if ssh_result =~ /No such file or directory/
|
268
|
+
puts "Still waiting for code to be compiled..."
|
269
|
+
else
|
270
|
+
puts "compilation complete! Copying compiled code to #{copy_to}"
|
271
|
+
return
|
243
272
|
end
|
273
|
+
sleep(5)
|
274
|
+
}
|
275
|
+
end
|
244
276
|
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
277
|
+
# This method sends out a request to compile code, waits for it to finish, and
|
278
|
+
# gets the standard out and error returned from the compilation. This method
|
279
|
+
# returns a hash containing the standard out, error, and a result that indicates
|
280
|
+
# whether or not the compilation was successful.
|
281
|
+
def compile_code(job_data, ssh_args, shadow_ip, shell=Kernel.method(:`))
|
282
|
+
compiled_location = controller.compile_code(job_data)
|
249
283
|
|
250
|
-
|
251
|
-
puts "job data = #{job_data.inspect}" if verbose
|
252
|
-
result[:input] = controller.put_input(job_data)
|
253
|
-
elsif type == "output"
|
254
|
-
result[:output] = controller.get_output(job_data)
|
255
|
-
elsif type == "get-acl"
|
256
|
-
job_data["@type"] = "acl"
|
257
|
-
result[:acl] = controller.get_acl(job_data)
|
258
|
-
elsif type == "set-acl"
|
259
|
-
job_data["@type"] = "acl"
|
260
|
-
result[:acl] = controller.set_acl(job_data)
|
261
|
-
elsif type == "compile"
|
262
|
-
compiled_location = controller.compile_code(job_data)
|
263
|
-
|
264
|
-
copy_to = job_data["@copy_to"]
|
265
|
-
|
266
|
-
loop {
|
267
|
-
ssh_command = "ssh #{ssh_args} root@#{shadow_ip} 'ls #{compiled_location}' 2>&1"
|
268
|
-
puts ssh_command if verbose
|
269
|
-
result = `#{ssh_command}`
|
270
|
-
puts "result was [#{result}]" if verbose
|
271
|
-
if result =~ /No such file or directory/
|
272
|
-
puts "Still waiting for code to be compiled..." if verbose
|
273
|
-
else
|
274
|
-
puts "compilation complete! Copying compiled code to #{copy_to}" if verbose
|
275
|
-
break
|
276
|
-
end
|
277
|
-
sleep(5)
|
278
|
-
}
|
284
|
+
copy_to = job_data["@copy_to"]
|
279
285
|
|
280
|
-
|
281
|
-
puts rm_local if verbose
|
282
|
-
`#{rm_local}`
|
286
|
+
wait_for_compilation_to_finish(ssh_args, shadow_ip, compiled_location)
|
283
287
|
|
284
|
-
|
285
|
-
puts scp_command if verbose
|
286
|
-
`#{scp_command}`
|
288
|
+
FileUtils.rm_rf(copy_to)
|
287
289
|
|
288
|
-
|
289
|
-
|
290
|
-
|
290
|
+
scp_command = "scp -r #{ssh_args} root@#{shadow_ip}:#{compiled_location} #{copy_to} 2>&1"
|
291
|
+
puts scp_command
|
292
|
+
shell.call(scp_command)
|
291
293
|
|
292
|
-
|
293
|
-
|
294
|
-
|
294
|
+
code = job_data["@code"]
|
295
|
+
dirs = code.split(/\//)
|
296
|
+
remote_dir = "/tmp/" + dirs[-1]
|
295
297
|
|
296
|
-
|
297
|
-
|
298
|
-
|
298
|
+
[remote_dir, compiled_location].each { |remote_files|
|
299
|
+
ssh_command = "ssh #{ssh_args} root@#{shadow_ip} 'rm -rf #{remote_files}' 2>&1"
|
300
|
+
puts ssh_command
|
301
|
+
shell.call(ssh_command)
|
302
|
+
}
|
299
303
|
|
300
|
-
|
301
|
-
|
302
|
-
result = {}
|
303
|
-
result[:out] = out
|
304
|
-
result[:err] = err
|
304
|
+
return get_std_out_and_err(copy_to)
|
305
|
+
end
|
305
306
|
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
307
|
+
# This method returns a hash containing the standard out and standard error
|
308
|
+
# from a completed job, as well as a result field that indicates whether or
|
309
|
+
# not the job completed successfully (success = no errors).
|
310
|
+
def get_std_out_and_err(location)
|
311
|
+
result = {}
|
312
|
+
|
313
|
+
out = File.open("#{location}/compile_out") { |f| f.read.chomp! }
|
314
|
+
result[:out] = out
|
315
|
+
|
316
|
+
err = File.open("#{location}/compile_err") { |f| f.read.chomp! }
|
317
|
+
result[:err] = err
|
318
|
+
|
319
|
+
if result[:err]
|
320
|
+
result[:result] = :failure
|
321
|
+
else
|
322
|
+
result[:result] = :success
|
323
|
+
end
|
324
|
+
|
325
|
+
return result
|
326
|
+
end
|
327
|
+
|
328
|
+
# This method actually runs the Neptune job, given information about the job
|
329
|
+
# as well as information about the node to send the request to.
|
330
|
+
def run_job(job_data, ssh_args, shadow_ip, secret,
|
331
|
+
controller=AppControllerClient, file=File)
|
332
|
+
controller = controller.new(shadow_ip, secret)
|
333
|
+
|
334
|
+
# TODO - right now the job is assumed to succeed in many cases
|
335
|
+
# need to investigate the various failure scenarios
|
336
|
+
result = { :result => :success }
|
337
|
+
|
338
|
+
case job_data["@type"]
|
339
|
+
when "input"
|
340
|
+
result = get_input(job_data, ssh_args, shadow_ip, controller, file)
|
341
|
+
when "output"
|
342
|
+
result[:output] = controller.get_output(job_data)
|
343
|
+
when "get-acl"
|
344
|
+
job_data["@type"] = "acl"
|
345
|
+
result[:acl] = controller.get_acl(job_data)
|
346
|
+
when "set-acl"
|
347
|
+
job_data["@type"] = "acl"
|
348
|
+
result[:acl] = controller.set_acl(job_data)
|
349
|
+
when "compile"
|
350
|
+
result = compile_code(job_data, ssh_args, shadow_ip)
|
311
351
|
else
|
312
352
|
msg = controller.start_neptune_job(job_data)
|
313
353
|
result[:msg] = msg
|
@@ -317,3 +357,28 @@ def neptune(params)
|
|
317
357
|
return result
|
318
358
|
end
|
319
359
|
|
360
|
+
# This method is the heart of Neptune - here, we take
|
361
|
+
# blocks of code that the user has written and convert them
|
362
|
+
# into HPC job requests. At a high level, the user can
|
363
|
+
# request to run a job, retrieve a job's output, or
|
364
|
+
# modify the access policy (ACL) for the output of a
|
365
|
+
# job. By default, job data is private, but a Neptune
|
366
|
+
# job can be used to set it to public later (and
|
367
|
+
# vice-versa).
|
368
|
+
def neptune(params)
|
369
|
+
puts "Received a request to run a job."
|
370
|
+
puts params[:type]
|
371
|
+
|
372
|
+
job_data = get_job_data(params)
|
373
|
+
validate_storage_params(job_data)
|
374
|
+
puts "job data = #{job_data.inspect}"
|
375
|
+
do_preprocessing(job_data)
|
376
|
+
keyname = job_data["@keyname"]
|
377
|
+
|
378
|
+
shadow_ip = CommonFunctions.get_from_yaml(keyname, :shadow)
|
379
|
+
secret = CommonFunctions.get_secret_key(keyname)
|
380
|
+
ssh_key = File.expand_path("~/.appscale/#{keyname}.key")
|
381
|
+
ssh_args = "-i ~/.appscale/#{keyname}.key -o StrictHostkeyChecking=no "
|
382
|
+
|
383
|
+
return run_job(job_data, ssh_args, shadow_ip, secret)
|
384
|
+
end
|