neptune 0.1.1 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- data/README +7 -4
- data/doc/AppControllerClient.html +12 -4
- data/doc/CommonFunctions.html +55 -42
- data/doc/Kernel.html +187 -0
- data/doc/LICENSE.html +2 -0
- data/doc/Object.html +488 -198
- data/doc/README.html +26 -5
- data/doc/bin/neptune.html +1 -1
- data/doc/created.rid +6 -6
- data/doc/index.html +20 -2
- data/doc/lib/app_controller_client_rb.html +2 -2
- data/doc/lib/common_functions_rb.html +2 -2
- data/doc/lib/neptune_rb.html +3 -1
- data/lib/app_controller_client.rb +2 -2
- data/lib/common_functions.rb +50 -24
- data/lib/neptune.rb +224 -159
- data/samples/appscale/add_appserver.rb +10 -0
- data/samples/appscale/add_database.rb +9 -0
- data/samples/appscale/add_loadbalancer.rb +9 -0
- data/samples/appscale/add_slave.rb +9 -0
- data/samples/c/compile_helloworld.rb +10 -0
- data/samples/c/helloworld/helloworld.c +6 -0
- data/samples/erlang/compile_erlang_ring.rb +10 -0
- data/samples/erlang/get_erlang_output.rb +8 -0
- data/samples/erlang/ring/Makefile +3 -0
- data/samples/erlang/ring/ring.erl +90 -0
- data/samples/erlang/run_erlang_ring.rb +6 -0
- data/samples/go/compile_hello.rb +10 -0
- data/samples/go/get_hello_output.rb +6 -0
- data/samples/go/hello/hello.go +8 -0
- data/samples/go/put_input.rb +8 -0
- data/samples/go/run_hello.rb +9 -0
- data/samples/mapreduce/expected-output.txt +7078 -0
- data/samples/mapreduce/get_mapreduce_output.rb +4 -0
- data/samples/mapreduce/hadoop-0.20.0-examples.jar +0 -0
- data/samples/mapreduce/input-10 +64 -0
- data/samples/mapreduce/input-30 +64 -0
- data/samples/mapreduce/input-7 +4 -0
- data/samples/mapreduce/map.rb +48 -0
- data/samples/mapreduce/reduce.rb +48 -0
- data/samples/mapreduce/run_java_mr.rb +14 -0
- data/samples/mapreduce/run_mapreduce.rb +13 -0
- data/samples/mapreduce/the-end-of-time.txt +11256 -0
- data/samples/mpi/Makefile +22 -0
- data/samples/mpi/MpiQueen +0 -0
- data/samples/mpi/compile_mpi_ring.rb +10 -0
- data/samples/mpi/compile_x10_nqueens.rb +8 -0
- data/samples/mpi/cpi +0 -0
- data/samples/mpi/get_mpi_output.rb +5 -0
- data/samples/mpi/get_ring_output.rb +5 -0
- data/samples/mpi/hw2.c +205 -0
- data/samples/mpi/hw2harness.c +84 -0
- data/samples/mpi/hw2harness.h +45 -0
- data/samples/mpi/powermethod +0 -0
- data/samples/mpi/ring/Makefile +2 -0
- data/samples/mpi/ring/Ring.c +76 -0
- data/samples/mpi/run_mpi_cpi.rb +10 -0
- data/samples/mpi/run_mpi_nqueens.np +6 -0
- data/samples/mpi/run_mpi_powermethod.rb +8 -0
- data/samples/mpi/run_mpi_ring.rb +12 -0
- data/samples/r/compile_hello.rb +10 -0
- data/samples/r/get_hello_output.rb +6 -0
- data/samples/r/hello/hello.r +1 -0
- data/samples/r/put_input.rb +8 -0
- data/samples/r/run_hello.rb +9 -0
- data/samples/upc/compile_upc_helloworld.rb +10 -0
- data/samples/upc/compile_upc_ring.rb +11 -0
- data/samples/upc/get_mpi_output.rb +8 -0
- data/samples/upc/helloworld/HelloWorld.c +9 -0
- data/samples/upc/helloworld/Makefile +3 -0
- data/samples/upc/ring/Makefile +3 -0
- data/samples/upc/ring/Ring.c +116 -0
- data/samples/upc/run_upc_helloworld.rb +12 -0
- data/samples/upc/run_upc_ring.rb +12 -0
- data/samples/x10/MyPowerMethod +0 -0
- data/samples/x10/MyPowerMethod.x10 +236 -0
- data/samples/x10/NQueensDist +0 -0
- data/samples/x10/NQueensDist.x10 +112 -0
- data/samples/x10/compile_x10_nqueens.rb +7 -0
- data/samples/x10/compile_x10_ring.rb +12 -0
- data/samples/x10/get_x10_output.rb +8 -0
- data/samples/x10/ring/Makefile +3 -0
- data/samples/x10/ring/Ring.x10 +28 -0
- data/samples/x10/ring/RingOld.x10 +68 -0
- data/samples/x10/run_x10_nqueens.rb +6 -0
- data/samples/x10/run_x10_powermethod.rb +7 -0
- data/samples/x10/run_x10_ring.rb +6 -0
- data/test/{tc_c.rb → integration/tc_c.rb} +2 -2
- data/test/{tc_dfsp.rb → integration/tc_dfsp.rb} +0 -0
- data/test/{tc_dwssa.rb → integration/tc_dwssa.rb} +0 -0
- data/test/{tc_erlang.rb → integration/tc_erlang.rb} +0 -0
- data/test/{tc_mapreduce.rb → integration/tc_mapreduce.rb} +0 -0
- data/test/{tc_mpi.rb → integration/tc_mpi.rb} +0 -0
- data/test/{tc_storage.rb → integration/tc_storage.rb} +0 -0
- data/test/{tc_upc.rb → integration/tc_upc.rb} +0 -0
- data/test/{tc_x10.rb → integration/tc_x10.rb} +0 -0
- data/test/{test_helper.rb → integration/test_helper.rb} +0 -0
- data/test/{ts_neptune.rb → integration/ts_neptune.rb} +2 -2
- data/test/unit/test_app_controller_client.rb +106 -0
- data/test/unit/test_common_functions.rb +106 -0
- data/test/unit/test_neptune.rb +208 -0
- data/test/unit/ts_all.rb +6 -0
- metadata +91 -15
data/doc/README.html
CHANGED
@@ -66,6 +66,8 @@
|
|
66
66
|
|
67
67
|
<li><a href="./CommonFunctions.html">CommonFunctions</a></li>
|
68
68
|
|
69
|
+
<li><a href="./Kernel.html">Kernel</a></li>
|
70
|
+
|
69
71
|
<li><a href="./Object.html">Object</a></li>
|
70
72
|
|
71
73
|
</ul>
|
@@ -119,7 +121,9 @@ in the test folder, with the standard naming convention</p>
|
|
119
121
|
<p>cases for each type of job that Neptune offers. Before running ts_neptune,
|
120
122
|
you should export the environment variable APPSCALE_HEAD_NODE, which should
|
121
123
|
be set to the IP address of the AppScale machine that runs the Shadow
|
122
|
-
daemon (a.k.a. the Master AppController)
|
124
|
+
daemon (a.k.a. the Master AppController). Running generate_coverage.sh in
|
125
|
+
the top-level directory will run rcov and generate the coverage reports
|
126
|
+
automatically via unit tests.</p>
|
123
127
|
|
124
128
|
<p>Developed by Chris Bunch as part of the AppScale project. See <a
|
125
129
|
href="LICENSE.html">LICENSE</a> for the specifics of the New BSD License by
|
@@ -138,15 +142,32 @@ as adding capabilities for other types of computation. We would also like
|
|
138
142
|
to refactor Neptune to use symbols instead of instance variables for
|
139
143
|
running jobs: this will likely appear in a future release as well.</p>
|
140
144
|
|
141
|
-
<p>
|
142
|
-
|
145
|
+
<p>Our academic paper on Neptune won best paper at ACM ScienceCloud 2011!
|
146
|
+
<a href=‘<a
|
147
|
+
href="http://www.neptune-lang.org/2011/6/Neptune-Picks-up-Best-Paper-at-ScienceCloud-2011">www.neptune-lang.org/2011/6/Neptune-Picks-up-Best-Paper-at-ScienceCloud-2011</a>’>
|
148
|
+
Here’s a link</a> to the abstract of the paper and the PDF.</p>
|
143
149
|
|
144
150
|
<p>Version History:</p>
|
145
151
|
|
146
|
-
<p>
|
152
|
+
<p>November 10, 2011 - 0.1.2 released, adding unit tests and refactoring all
|
153
|
+
around.</p>
|
154
|
+
|
155
|
+
<p>June 6, 2011 - 0.1.1 released, adding support for code written in Go and R</p>
|
156
|
+
|
157
|
+
<p>June 4, 2011 - 0.1.0 released, adding verbose / quiet options for users
|
158
|
+
wishing to suppress stdout from Neptune jobs.</p>
|
159
|
+
|
160
|
+
<p>May 28, 2011 - 0.0.9 released, adding generic SSA support for users wanting
|
161
|
+
to use StochKit and other SSA codes.</p>
|
162
|
+
|
163
|
+
<p>April 8, 2011 - 0.0.8 released, fixing MapReduce support for both regular
|
164
|
+
Hadoop and Hadoop Streaming. Also increased code coverage to cover a number
|
165
|
+
of failure scenarios.</p>
|
166
|
+
|
167
|
+
<p>April 2, 2011 - 0.0.7 released, adding automatic test suite and many bug
|
147
168
|
fixes for all scenarios. rcov can also be used to generate test coverage
|
148
169
|
information: current coverage stats can be found in coverage directory.
|
149
|
-
|
170
|
+
MapReduce broken at the moment - will fix in next release</p>
|
150
171
|
|
151
172
|
<p>March 28, 2011 - 0.0.6 released, adding support for input jobs, so users
|
152
173
|
can place data in the datastore without having to run any computation</p>
|
data/doc/bin/neptune.html
CHANGED
data/doc/created.rid
CHANGED
@@ -1,7 +1,7 @@
|
|
1
|
-
|
2
|
-
./lib/common_functions.rb Wed,
|
1
|
+
Thu, 10 Nov 2011 12:27:04 -0800
|
2
|
+
./lib/common_functions.rb Wed, 26 Oct 2011 10:37:34 -0700
|
3
3
|
./LICENSE Thu, 27 Jan 2011 13:24:30 -0800
|
4
|
-
./lib/app_controller_client.rb
|
5
|
-
./lib/neptune.rb
|
6
|
-
./README
|
7
|
-
./bin/neptune
|
4
|
+
./lib/app_controller_client.rb Tue, 25 Oct 2011 23:47:47 -0700
|
5
|
+
./lib/neptune.rb Wed, 09 Nov 2011 10:54:28 -0800
|
6
|
+
./README Thu, 10 Nov 2011 11:48:11 -0800
|
7
|
+
./bin/neptune Sun, 15 May 2011 22:52:56 -0700
|
data/doc/index.html
CHANGED
@@ -44,6 +44,8 @@
|
|
44
44
|
|
45
45
|
<li class="module"><a href="CommonFunctions.html">CommonFunctions</a></li>
|
46
46
|
|
47
|
+
<li class="module"><a href="Kernel.html">Kernel</a></li>
|
48
|
+
|
47
49
|
<li class="class"><a href="Object.html">Object</a></li>
|
48
50
|
|
49
51
|
</ul>
|
@@ -63,12 +65,20 @@
|
|
63
65
|
|
64
66
|
<li><a href="AppControllerClient.html#method-i-compile_code">#compile_code — AppControllerClient</a></li>
|
65
67
|
|
68
|
+
<li><a href="Object.html#method-i-compile_code">#compile_code — Object</a></li>
|
69
|
+
|
66
70
|
<li><a href="Object.html#method-i-do_preprocessing">#do_preprocessing — Object</a></li>
|
67
71
|
|
68
72
|
<li><a href="AppControllerClient.html#method-i-get_acl">#get_acl — AppControllerClient</a></li>
|
69
73
|
|
74
|
+
<li><a href="Object.html#method-i-get_input">#get_input — Object</a></li>
|
75
|
+
|
76
|
+
<li><a href="Object.html#method-i-get_job_data">#get_job_data — Object</a></li>
|
77
|
+
|
70
78
|
<li><a href="AppControllerClient.html#method-i-get_output">#get_output — AppControllerClient</a></li>
|
71
79
|
|
80
|
+
<li><a href="Object.html#method-i-get_std_out_and_err">#get_std_out_and_err — Object</a></li>
|
81
|
+
|
72
82
|
<li><a href="AppControllerClient.html#method-i-make_call">#make_call — AppControllerClient</a></li>
|
73
83
|
|
74
84
|
<li><a href="Object.html#method-i-neptune">#neptune — Object</a></li>
|
@@ -77,16 +87,24 @@
|
|
77
87
|
|
78
88
|
<li><a href="Object.html#method-i-preprocess_erlang">#preprocess_erlang — Object</a></li>
|
79
89
|
|
80
|
-
<li><a href="Object.html#method-i-preprocess_mapreduce">#preprocess_mapreduce — Object</a></li>
|
81
|
-
|
82
90
|
<li><a href="Object.html#method-i-preprocess_mpi">#preprocess_mpi — Object</a></li>
|
83
91
|
|
92
|
+
<li><a href="Object.html#method-i-preprocess_ssa">#preprocess_ssa — Object</a></li>
|
93
|
+
|
84
94
|
<li><a href="AppControllerClient.html#method-i-put_input">#put_input — AppControllerClient</a></li>
|
85
95
|
|
96
|
+
<li><a href="Object.html#method-i-run_job">#run_job — Object</a></li>
|
97
|
+
|
86
98
|
<li><a href="AppControllerClient.html#method-i-set_acl">#set_acl — AppControllerClient</a></li>
|
87
99
|
|
100
|
+
<li><a href="Kernel.html#method-i-shell">#shell — Kernel</a></li>
|
101
|
+
|
88
102
|
<li><a href="AppControllerClient.html#method-i-start_neptune_job">#start_neptune_job — AppControllerClient</a></li>
|
89
103
|
|
104
|
+
<li><a href="Object.html#method-i-validate_storage_params">#validate_storage_params — Object</a></li>
|
105
|
+
|
106
|
+
<li><a href="Object.html#method-i-wait_for_compilation_to_finish">#wait_for_compilation_to_finish — Object</a></li>
|
107
|
+
|
90
108
|
</ul>
|
91
109
|
|
92
110
|
<div id="validator-badges">
|
@@ -24,7 +24,7 @@
|
|
24
24
|
<div id="metadata">
|
25
25
|
<dl>
|
26
26
|
<dt class="modified-date">Last Modified</dt>
|
27
|
-
<dd class="modified-date">
|
27
|
+
<dd class="modified-date">Tue Oct 25 23:47:47 -0700 2011</dd>
|
28
28
|
|
29
29
|
|
30
30
|
<dt class="requires">Requires</dt>
|
@@ -50,7 +50,7 @@
|
|
50
50
|
<div class="description">
|
51
51
|
<h2>Description</h2>
|
52
52
|
|
53
|
-
<p>Programmer: Chris Bunch</p>
|
53
|
+
<p>Programmer: Chris Bunch (cgb@cs.ucsb.edu)</p>
|
54
54
|
|
55
55
|
</div>
|
56
56
|
|
@@ -24,7 +24,7 @@
|
|
24
24
|
<div id="metadata">
|
25
25
|
<dl>
|
26
26
|
<dt class="modified-date">Last Modified</dt>
|
27
|
-
<dd class="modified-date">Wed
|
27
|
+
<dd class="modified-date">Wed Oct 26 10:37:34 -0700 2011</dd>
|
28
28
|
|
29
29
|
|
30
30
|
<dt class="requires">Requires</dt>
|
@@ -58,7 +58,7 @@
|
|
58
58
|
<div class="description">
|
59
59
|
<h2>Description</h2>
|
60
60
|
|
61
|
-
<p>Programmer: Chris Bunch</p>
|
61
|
+
<p>Programmer: Chris Bunch (cgb@cs.ucsb.edu)</p>
|
62
62
|
|
63
63
|
</div>
|
64
64
|
|
data/doc/lib/neptune_rb.html
CHANGED
@@ -24,7 +24,7 @@
|
|
24
24
|
<div id="metadata">
|
25
25
|
<dl>
|
26
26
|
<dt class="modified-date">Last Modified</dt>
|
27
|
-
<dd class="modified-date">
|
27
|
+
<dd class="modified-date">Wed Nov 09 10:54:28 -0800 2011</dd>
|
28
28
|
|
29
29
|
|
30
30
|
<dt class="requires">Requires</dt>
|
@@ -48,6 +48,8 @@
|
|
48
48
|
<div class="description">
|
49
49
|
<h2>Description</h2>
|
50
50
|
|
51
|
+
<p>Programmer: Chris Bunch (cgb@cs.ucsb.edu)</p>
|
52
|
+
|
51
53
|
</div>
|
52
54
|
|
53
55
|
</div>
|
@@ -1,5 +1,5 @@
|
|
1
1
|
#!/usr/bin/ruby -w
|
2
|
-
# Programmer: Chris Bunch
|
2
|
+
# Programmer: Chris Bunch (cgb@cs.ucsb.edu)
|
3
3
|
|
4
4
|
require 'openssl'
|
5
5
|
require 'soap/rpc/driver'
|
@@ -16,7 +16,7 @@ NO_TIMEOUT = -1
|
|
16
16
|
# platform (here, AppScale). This client is similar to that used in the AppScale
|
17
17
|
# Tools, but with non-Neptune SOAP calls removed.
|
18
18
|
class AppControllerClient
|
19
|
-
|
19
|
+
attr_accessor :conn, :ip, :secret
|
20
20
|
|
21
21
|
# A constructor that requires both the IP address of the machine to communicate
|
22
22
|
# with as well as the secret (string) needed to perform communication.
|
data/lib/common_functions.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
#!/usr/bin/ruby -w
|
2
|
-
# Programmer: Chris Bunch
|
2
|
+
# Programmer: Chris Bunch (cgb@cs.ucsb.edu)
|
3
3
|
|
4
4
|
require 'digest/sha1'
|
5
5
|
require 'fileutils'
|
@@ -9,6 +9,12 @@ require 'socket'
|
|
9
9
|
require 'timeout'
|
10
10
|
require 'yaml'
|
11
11
|
|
12
|
+
module Kernel
|
13
|
+
def shell(command)
|
14
|
+
return `#{command}`
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
12
18
|
# A helper module that aggregates functions that are not part of Neptune's
|
13
19
|
# core functionality. Specifically, this module contains methods to scp
|
14
20
|
# files to other machines and the ability to read YAML files, which are
|
@@ -21,11 +27,17 @@ module CommonFunctions
|
|
21
27
|
# placed, and the name of the key to use. The keyname is typically
|
22
28
|
# specified by the Neptune job given, but defaults to ''appscale''
|
23
29
|
# if not provided.
|
24
|
-
def self.scp_to_shadow(local_file_loc,
|
25
|
-
|
26
|
-
|
30
|
+
def self.scp_to_shadow(local_file_loc,
|
31
|
+
remote_file_loc,
|
32
|
+
keyname,
|
33
|
+
is_dir=false,
|
34
|
+
file=File,
|
35
|
+
get_from_yaml=CommonFunctions.method(:get_from_yaml),
|
36
|
+
scp_file=CommonFunctions.method(:scp_file))
|
27
37
|
|
28
|
-
|
38
|
+
shadow_ip = get_from_yaml.call(keyname, :shadow, file)
|
39
|
+
ssh_key = file.expand_path("~/.appscale/#{keyname}.key")
|
40
|
+
scp_file.call(local_file_loc, remote_file_loc, shadow_ip, ssh_key, is_dir)
|
29
41
|
end
|
30
42
|
|
31
43
|
# Performs the actual remote copying of files: given the IP address
|
@@ -34,33 +46,39 @@ module CommonFunctions
|
|
34
46
|
# if the network is down, if a bad keyname is provided, or if the
|
35
47
|
# wrong IP is given. If the user specifies that the file to copy is
|
36
48
|
# actually a directory, we append the -r flag to scp as well.
|
37
|
-
def self.scp_file(local_file_loc, remote_file_loc, target_ip, public_key_loc,
|
49
|
+
def self.scp_file(local_file_loc, remote_file_loc, target_ip, public_key_loc,
|
50
|
+
is_dir=false, file=File, fileutils=FileUtils, kernel=Kernel)
|
38
51
|
cmd = ""
|
39
|
-
local_file_loc =
|
52
|
+
local_file_loc = file.expand_path(local_file_loc)
|
40
53
|
|
41
54
|
ssh_args = "-o StrictHostkeyChecking=no 2>&1"
|
42
55
|
ssh_args << " -r " if is_dir
|
43
56
|
|
44
|
-
public_key_loc =
|
57
|
+
public_key_loc = file.expand_path(public_key_loc)
|
45
58
|
cmd = "scp -i #{public_key_loc} #{ssh_args} #{local_file_loc} root@#{target_ip}:#{remote_file_loc}"
|
46
59
|
cmd << "; echo $? >> ~/.appscale/retval"
|
47
60
|
|
48
|
-
retval_loc =
|
49
|
-
|
61
|
+
retval_loc = file.expand_path("~/.appscale/retval")
|
62
|
+
fileutils.rm_f(retval_loc)
|
50
63
|
|
51
64
|
begin
|
52
|
-
Timeout::timeout(-1) {
|
65
|
+
Timeout::timeout(-1) { kernel.shell("#{cmd}") }
|
53
66
|
rescue Timeout::Error
|
54
|
-
abort("Remotely copying over files failed. Is the destination machine
|
67
|
+
abort("Remotely copying over files failed. Is the destination machine" +
|
68
|
+
" on and reachable from this computer? We tried the following" +
|
69
|
+
" command:\n\n#{cmd}")
|
55
70
|
end
|
56
71
|
|
57
72
|
loop {
|
58
|
-
break if
|
73
|
+
break if file.exists?(retval_loc)
|
59
74
|
sleep(5)
|
60
75
|
}
|
61
76
|
|
62
|
-
retval = (
|
63
|
-
|
77
|
+
retval = (file.open(retval_loc) { |f| f.read }).chomp
|
78
|
+
if retval != "0"
|
79
|
+
abort("\n\n[#{cmd}] returned #{retval} instead of 0 as expected. Is " +
|
80
|
+
"your environment set up properly?")
|
81
|
+
end
|
64
82
|
return cmd
|
65
83
|
end
|
66
84
|
|
@@ -70,16 +88,20 @@ module CommonFunctions
|
|
70
88
|
# method aborts if the value doesn't exist or the YAML file is malformed.
|
71
89
|
# If the required flag is set to false, it returns nil in either scenario
|
72
90
|
# instead.
|
73
|
-
def self.get_from_yaml(keyname, tag, required=true)
|
74
|
-
location_file =
|
91
|
+
def self.get_from_yaml(keyname, tag, required=true, file=File, yaml=YAML)
|
92
|
+
location_file = file.expand_path("~/.appscale/locations-#{keyname}.yaml")
|
75
93
|
|
76
|
-
|
94
|
+
if !file.exists?(location_file)
|
95
|
+
abort("An AppScale instance is not currently running with the provided" +
|
96
|
+
" keyname, \"#{keyname}\".")
|
97
|
+
end
|
77
98
|
|
78
99
|
begin
|
79
|
-
tree =
|
100
|
+
tree = yaml.load_file(location_file)
|
80
101
|
rescue ArgumentError
|
81
102
|
if required
|
82
|
-
abort("The yaml file you provided was malformed. Please correct any
|
103
|
+
abort("The yaml file you provided was malformed. Please correct any" +
|
104
|
+
" errors in it and try again.")
|
83
105
|
else
|
84
106
|
return nil
|
85
107
|
end
|
@@ -87,15 +109,19 @@ module CommonFunctions
|
|
87
109
|
|
88
110
|
value = tree[tag]
|
89
111
|
|
90
|
-
|
91
|
-
|
112
|
+
if value.nil? and required
|
113
|
+
abort("The file #{location_file} is in the wrong format and doesn't" +
|
114
|
+
" contain a #{tag} tag. Please make sure the file is in the correct" +
|
115
|
+
" format and try again.")
|
116
|
+
end
|
117
|
+
|
92
118
|
return value
|
93
119
|
end
|
94
120
|
|
95
121
|
# Returns the secret key needed for communication with AppScale's
|
96
122
|
# Shadow node. This method is a nice frontend to the get_from_yaml
|
97
123
|
# function, as the secret is stored in a YAML file.
|
98
|
-
def self.get_secret_key(keyname, required=true)
|
99
|
-
return CommonFunctions.get_from_yaml(keyname, :secret)
|
124
|
+
def self.get_secret_key(keyname, required=true, file=File, yaml=YAML)
|
125
|
+
return CommonFunctions.get_from_yaml(keyname, :secret, required, file, yaml)
|
100
126
|
end
|
101
127
|
end
|
data/lib/neptune.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
#!/usr/bin/ruby
|
2
|
+
# Programmer: Chris Bunch (cgb@cs.ucsb.edu)
|
2
3
|
|
3
4
|
require 'app_controller_client'
|
4
5
|
require 'common_functions'
|
@@ -39,6 +40,8 @@ NEED_PREPROCESSING = ["compile", "erlang", "mpi", "ssa"]
|
|
39
40
|
# support. In the future, it is likely that the only exposed / monkey-patched
|
40
41
|
# method should be job, while the others could probably be folded into either
|
41
42
|
# a Neptune-specific class or into CommonFunctions.
|
43
|
+
# TODO(cbunch): This doesn't look like it does anything - run the integration
|
44
|
+
# test and confirm one way or the other.
|
42
45
|
class Object
|
43
46
|
end
|
44
47
|
|
@@ -48,7 +51,9 @@ end
|
|
48
51
|
# on the type of the job that the user has asked to run.
|
49
52
|
def do_preprocessing(job_data)
|
50
53
|
job_type = job_data["@type"]
|
51
|
-
|
54
|
+
if !NEED_PREPROCESSING.include?(job_type)
|
55
|
+
return
|
56
|
+
end
|
52
57
|
|
53
58
|
preprocess = "preprocess_#{job_type}".to_sym
|
54
59
|
send(preprocess, job_data)
|
@@ -57,10 +62,9 @@ end
|
|
57
62
|
# This preprocessing method copies over the user's code to the
|
58
63
|
# Shadow node so that it can be compiled there. A future version
|
59
64
|
# of this method may also copy over libraries as well.
|
60
|
-
def preprocess_compile(job_data)
|
61
|
-
verbose = job_data["@verbose"]
|
65
|
+
def preprocess_compile(job_data, shell=Kernel.method(:`))
|
62
66
|
code = File.expand_path(job_data["@code"])
|
63
|
-
|
67
|
+
if !File.exists?(code)
|
64
68
|
abort("The source file #{code} does not exist.")
|
65
69
|
end
|
66
70
|
|
@@ -71,94 +75,88 @@ def preprocess_compile(job_data)
|
|
71
75
|
|
72
76
|
ssh_args = "-i ~/.appscale/#{keyname}.key -o StrictHostkeyChecking=no root@#{shadow_ip}"
|
73
77
|
remove_dir = "ssh #{ssh_args} 'rm -rf #{dest}' 2>&1"
|
74
|
-
puts remove_dir
|
75
|
-
|
78
|
+
puts remove_dir
|
79
|
+
shell.call(remove_dir)
|
76
80
|
|
77
81
|
CommonFunctions.scp_to_shadow(code, dest, keyname, is_dir=true)
|
78
82
|
|
79
83
|
job_data["@code"] = dest
|
80
84
|
end
|
81
85
|
|
82
|
-
def preprocess_erlang(job_data)
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
86
|
+
def preprocess_erlang(job_data, file=File, common_functions=CommonFunctions)
|
87
|
+
if !job_data["@code"]
|
88
|
+
abort("When running Erlang jobs, :code must be specified.")
|
89
|
+
end
|
90
|
+
|
91
|
+
source_code = file.expand_path(job_data["@code"])
|
92
|
+
if !file.exists?(source_code)
|
93
|
+
abort("The specified code, #{job_data['@code']}," +
|
94
|
+
" didn't exist. Please specify one that exists and try again")
|
88
95
|
end
|
89
96
|
dest_code = "/tmp/"
|
90
97
|
|
91
98
|
keyname = job_data["@keyname"]
|
92
|
-
|
99
|
+
common_functions.scp_to_shadow(source_code, dest_code, keyname)
|
93
100
|
end
|
94
101
|
|
95
|
-
# This preprocessing method
|
96
|
-
#
|
97
|
-
#
|
102
|
+
# This preprocessing method verifies that the user specified the number of nodes
|
103
|
+
# to use. If they also specified the number of processes to use, we also verify
|
104
|
+
# that this value is at least as many as the number of nodes (that is, nodes
|
105
|
+
# can't be underprovisioned in MPI).
|
98
106
|
def preprocess_mpi(job_data)
|
99
|
-
|
107
|
+
if !job_data["@nodes_to_use"]
|
108
|
+
abort("When running MPI jobs, :nodes_to_use must be specified.")
|
109
|
+
end
|
110
|
+
|
111
|
+
if !job_data["@procs_to_use"]
|
112
|
+
abort("When running MPI jobs, :procs_to_use must be specified.")
|
113
|
+
end
|
114
|
+
|
100
115
|
if job_data["@procs_to_use"]
|
101
116
|
p = job_data["@procs_to_use"]
|
102
117
|
n = job_data["@nodes_to_use"]
|
103
118
|
if p < n
|
104
|
-
|
119
|
+
abort("When specifying both :procs_to_use and :nodes_to_use" +
|
105
120
|
", :procs_to_use must be at least as large as :nodes_to_use. Please " +
|
106
121
|
"change this and try again. You specified :procs_to_use = #{p} and" +
|
107
|
-
":nodes_to_use = #{n}."
|
108
|
-
abort(not_enough_procs)
|
122
|
+
":nodes_to_use = #{n}.")
|
109
123
|
end
|
110
124
|
end
|
111
125
|
|
112
|
-
|
126
|
+
return job_data
|
113
127
|
end
|
114
128
|
|
129
|
+
# This preprocessing method verifies that the user specified the number of
|
130
|
+
# trajectories to run, via either :trajectories or :simulations. Both should
|
131
|
+
# not be specified - only one or the other, and regardless of which they
|
132
|
+
# specify, convert it to be :trajectories.
|
115
133
|
def preprocess_ssa(job_data)
|
134
|
+
if job_data["@simulations"] and job_data["@trajectories"]
|
135
|
+
abort("Both :simulations and :trajectories cannot be specified - use one" +
|
136
|
+
" or the other.")
|
137
|
+
end
|
138
|
+
|
116
139
|
if job_data["@simulations"]
|
117
140
|
job_data["@trajectories"] = job_data["@simulations"]
|
141
|
+
job_data.delete("@simulations")
|
118
142
|
end
|
119
143
|
|
120
|
-
|
144
|
+
if !job_data["@trajectories"]
|
121
145
|
abort(":trajectories needs to be specified when running ssa jobs")
|
122
146
|
end
|
123
|
-
end
|
124
|
-
|
125
|
-
# TODO: actually use me!
|
126
|
-
#def validate_args(list)
|
127
|
-
# list.each do |item|
|
128
|
-
# val = instance_variable_get("@#{item}".to_sym)
|
129
|
-
# abort("FATAL: #{item} was not defined") if val.nil?
|
130
|
-
# end
|
131
|
-
#end
|
132
147
|
|
133
|
-
|
134
|
-
|
135
|
-
# into HPC job requests. At a high level, the user can
|
136
|
-
# request to run a job, retrieve a job's output, or
|
137
|
-
# modify the access policy (ACL) for the output of a
|
138
|
-
# job. By default, job data is private, but a Neptune
|
139
|
-
# job can be used to set it to public later (and
|
140
|
-
# vice-versa).
|
141
|
-
def neptune(params)
|
142
|
-
verbose = params[:verbose]
|
143
|
-
|
144
|
-
puts "Received a request to run a job." if verbose
|
145
|
-
puts params[:type] if verbose
|
146
|
-
|
147
|
-
keyname = params[:keyname] || "appscale"
|
148
|
-
|
149
|
-
shadow_ip = CommonFunctions.get_from_yaml(keyname, :shadow)
|
150
|
-
secret = CommonFunctions.get_secret_key(keyname)
|
151
|
-
controller = AppControllerClient.new(shadow_ip, secret)
|
152
|
-
ssh_key = File.expand_path("~/.appscale/#{keyname}.key")
|
148
|
+
return job_data
|
149
|
+
end
|
153
150
|
|
151
|
+
def get_job_data(params)
|
154
152
|
job_data = {}
|
155
153
|
params.each { |k, v|
|
156
154
|
key = "@#{k}"
|
157
155
|
job_data[key] = v
|
158
156
|
}
|
159
157
|
|
160
|
-
job_data
|
161
|
-
job_data["@keyname"] = keyname || "appscale"
|
158
|
+
job_data.delete("@job")
|
159
|
+
job_data["@keyname"] = params[:keyname] || "appscale"
|
162
160
|
|
163
161
|
job_data["@type"] = job_data["@type"].to_s
|
164
162
|
type = job_data["@type"]
|
@@ -182,132 +180,174 @@ def neptune(params)
|
|
182
180
|
end
|
183
181
|
end
|
184
182
|
|
185
|
-
|
186
|
-
|
187
|
-
unless ALLOWED_STORAGE_TYPES.include?(storage)
|
188
|
-
msg = "Supported storage types are #{ALLOWED_STORAGE_TYPES.join(', ')}" +
|
189
|
-
" - we do not support #{storage}."
|
190
|
-
abort(msg)
|
191
|
-
end
|
192
|
-
|
193
|
-
# Our implementation for storing / retrieving via Google Storage
|
194
|
-
# and Walrus uses
|
195
|
-
# the same library as we do for S3 - so just tell it that it's S3
|
196
|
-
if storage == "gstorage" or storage == "walrus"
|
197
|
-
storage = "s3"
|
198
|
-
job_data["@storage"] = "s3"
|
199
|
-
end
|
183
|
+
return job_data
|
184
|
+
end
|
200
185
|
|
201
|
-
|
202
|
-
|
203
|
-
unless job_data["@#{item}"]
|
204
|
-
if ENV[item]
|
205
|
-
puts "Using #{item} from environment" if verbose
|
206
|
-
job_data["@#{item}"] = ENV[item]
|
207
|
-
else
|
208
|
-
msg = "When storing data to S3, #{item} must be specified or be in " +
|
209
|
-
"your environment. Please do so and try again."
|
210
|
-
abort(msg)
|
211
|
-
end
|
212
|
-
end
|
213
|
-
}
|
214
|
-
end
|
215
|
-
else
|
186
|
+
def validate_storage_params(job_data)
|
187
|
+
if !job_data["@storage"]
|
216
188
|
job_data["@storage"] = "appdb"
|
217
189
|
end
|
218
190
|
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
191
|
+
storage = job_data["@storage"]
|
192
|
+
if !ALLOWED_STORAGE_TYPES.include?(storage)
|
193
|
+
abort("Supported storage types are #{ALLOWED_STORAGE_TYPES.join(', ')}" +
|
194
|
+
" - we do not support #{storage}.")
|
195
|
+
end
|
224
196
|
|
225
|
-
|
197
|
+
# Our implementation for storing / retrieving via Google Storage
|
198
|
+
# and Walrus uses
|
199
|
+
# the same library as we do for S3 - so just tell it that it's S3
|
200
|
+
if storage == "gstorage" or storage == "walrus"
|
201
|
+
storage = "s3"
|
202
|
+
job_data["@storage"] = "s3"
|
203
|
+
end
|
226
204
|
|
227
|
-
|
205
|
+
if storage == "s3"
|
206
|
+
["EC2_ACCESS_KEY", "EC2_SECRET_KEY", "S3_URL"].each { |item|
|
207
|
+
if job_data["@#{item}"]
|
208
|
+
puts "Using specified #{item}"
|
209
|
+
else
|
210
|
+
if ENV[item]
|
211
|
+
puts "Using #{item} from environment"
|
212
|
+
job_data["@#{item}"] = ENV[item]
|
213
|
+
else
|
214
|
+
abort("When storing data to S3, #{item} must be specified or be in " +
|
215
|
+
"your environment. Please do so and try again.")
|
216
|
+
end
|
217
|
+
end
|
218
|
+
}
|
219
|
+
end
|
228
220
|
|
229
|
-
|
221
|
+
return job_data
|
222
|
+
end
|
230
223
|
|
231
|
-
|
232
|
-
|
233
|
-
|
224
|
+
# This method takes a file on the local user's computer and stores it remotely
|
225
|
+
# via AppScale. It returns a hash map indicating whether or not the job
|
226
|
+
# succeeded and if it failed, the reason for it.
|
227
|
+
def get_input(job_data, ssh_args, shadow_ip, controller, file=File,
|
228
|
+
shell=Kernel.method(:`))
|
229
|
+
result = {:result => :success}
|
234
230
|
|
235
|
-
if
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
231
|
+
if !job_data["@local"]
|
232
|
+
abort("You failed to specify a file to copy over via the :local flag.")
|
233
|
+
end
|
234
|
+
|
235
|
+
local_file = file.expand_path(job_data["@local"])
|
236
|
+
if !file.exists?(local_file)
|
237
|
+
reason = "the file you specified to copy, #{local_file}, doesn't exist." +
|
241
238
|
" Please specify a file that exists and try again."
|
242
|
-
|
239
|
+
return {:result => :failure, :reason => reason}
|
240
|
+
end
|
241
|
+
|
242
|
+
remote = "/tmp/neptune-input-#{rand(100000)}"
|
243
|
+
scp_cmd = "scp -r #{ssh_args} #{local_file} root@#{shadow_ip}:#{remote}"
|
244
|
+
puts scp_cmd
|
245
|
+
shell.call(scp_cmd)
|
246
|
+
|
247
|
+
job_data["@local"] = remote
|
248
|
+
puts "job data = #{job_data.inspect}"
|
249
|
+
response = controller.put_input(job_data)
|
250
|
+
if response
|
251
|
+
return {:result => :success}
|
252
|
+
else
|
253
|
+
# TODO - expand this to include the reason why it failed
|
254
|
+
return {:result => :failure}
|
255
|
+
end
|
256
|
+
end
|
257
|
+
|
258
|
+
# This method waits for AppScale to finish compiling the user's code, indicated
|
259
|
+
# by AppScale copying the finished code to a pre-determined location.
|
260
|
+
def wait_for_compilation_to_finish(ssh_args, shadow_ip, compiled_location,
|
261
|
+
shell=Kernel.method(:`))
|
262
|
+
loop {
|
263
|
+
ssh_command = "ssh #{ssh_args} root@#{shadow_ip} 'ls #{compiled_location}' 2>&1"
|
264
|
+
puts ssh_command
|
265
|
+
ssh_result = shell.call(ssh_command)
|
266
|
+
puts "result was [#{ssh_result}]"
|
267
|
+
if ssh_result =~ /No such file or directory/
|
268
|
+
puts "Still waiting for code to be compiled..."
|
269
|
+
else
|
270
|
+
puts "compilation complete! Copying compiled code to #{copy_to}"
|
271
|
+
return
|
243
272
|
end
|
273
|
+
sleep(5)
|
274
|
+
}
|
275
|
+
end
|
244
276
|
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
277
|
+
# This method sends out a request to compile code, waits for it to finish, and
|
278
|
+
# gets the standard out and error returned from the compilation. This method
|
279
|
+
# returns a hash containing the standard out, error, and a result that indicates
|
280
|
+
# whether or not the compilation was successful.
|
281
|
+
def compile_code(job_data, ssh_args, shadow_ip, shell=Kernel.method(:`))
|
282
|
+
compiled_location = controller.compile_code(job_data)
|
249
283
|
|
250
|
-
|
251
|
-
puts "job data = #{job_data.inspect}" if verbose
|
252
|
-
result[:input] = controller.put_input(job_data)
|
253
|
-
elsif type == "output"
|
254
|
-
result[:output] = controller.get_output(job_data)
|
255
|
-
elsif type == "get-acl"
|
256
|
-
job_data["@type"] = "acl"
|
257
|
-
result[:acl] = controller.get_acl(job_data)
|
258
|
-
elsif type == "set-acl"
|
259
|
-
job_data["@type"] = "acl"
|
260
|
-
result[:acl] = controller.set_acl(job_data)
|
261
|
-
elsif type == "compile"
|
262
|
-
compiled_location = controller.compile_code(job_data)
|
263
|
-
|
264
|
-
copy_to = job_data["@copy_to"]
|
265
|
-
|
266
|
-
loop {
|
267
|
-
ssh_command = "ssh #{ssh_args} root@#{shadow_ip} 'ls #{compiled_location}' 2>&1"
|
268
|
-
puts ssh_command if verbose
|
269
|
-
result = `#{ssh_command}`
|
270
|
-
puts "result was [#{result}]" if verbose
|
271
|
-
if result =~ /No such file or directory/
|
272
|
-
puts "Still waiting for code to be compiled..." if verbose
|
273
|
-
else
|
274
|
-
puts "compilation complete! Copying compiled code to #{copy_to}" if verbose
|
275
|
-
break
|
276
|
-
end
|
277
|
-
sleep(5)
|
278
|
-
}
|
284
|
+
copy_to = job_data["@copy_to"]
|
279
285
|
|
280
|
-
|
281
|
-
puts rm_local if verbose
|
282
|
-
`#{rm_local}`
|
286
|
+
wait_for_compilation_to_finish(ssh_args, shadow_ip, compiled_location)
|
283
287
|
|
284
|
-
|
285
|
-
puts scp_command if verbose
|
286
|
-
`#{scp_command}`
|
288
|
+
FileUtils.rm_rf(copy_to)
|
287
289
|
|
288
|
-
|
289
|
-
|
290
|
-
|
290
|
+
scp_command = "scp -r #{ssh_args} root@#{shadow_ip}:#{compiled_location} #{copy_to} 2>&1"
|
291
|
+
puts scp_command
|
292
|
+
shell.call(scp_command)
|
291
293
|
|
292
|
-
|
293
|
-
|
294
|
-
|
294
|
+
code = job_data["@code"]
|
295
|
+
dirs = code.split(/\//)
|
296
|
+
remote_dir = "/tmp/" + dirs[-1]
|
295
297
|
|
296
|
-
|
297
|
-
|
298
|
-
|
298
|
+
[remote_dir, compiled_location].each { |remote_files|
|
299
|
+
ssh_command = "ssh #{ssh_args} root@#{shadow_ip} 'rm -rf #{remote_files}' 2>&1"
|
300
|
+
puts ssh_command
|
301
|
+
shell.call(ssh_command)
|
302
|
+
}
|
299
303
|
|
300
|
-
|
301
|
-
|
302
|
-
result = {}
|
303
|
-
result[:out] = out
|
304
|
-
result[:err] = err
|
304
|
+
return get_std_out_and_err(copy_to)
|
305
|
+
end
|
305
306
|
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
307
|
+
# This method returns a hash containing the standard out and standard error
|
308
|
+
# from a completed job, as well as a result field that indicates whether or
|
309
|
+
# not the job completed successfully (success = no errors).
|
310
|
+
def get_std_out_and_err(location)
|
311
|
+
result = {}
|
312
|
+
|
313
|
+
out = File.open("#{location}/compile_out") { |f| f.read.chomp! }
|
314
|
+
result[:out] = out
|
315
|
+
|
316
|
+
err = File.open("#{location}/compile_err") { |f| f.read.chomp! }
|
317
|
+
result[:err] = err
|
318
|
+
|
319
|
+
if result[:err]
|
320
|
+
result[:result] = :failure
|
321
|
+
else
|
322
|
+
result[:result] = :success
|
323
|
+
end
|
324
|
+
|
325
|
+
return result
|
326
|
+
end
|
327
|
+
|
328
|
+
# This method actually runs the Neptune job, given information about the job
|
329
|
+
# as well as information about the node to send the request to.
|
330
|
+
def run_job(job_data, ssh_args, shadow_ip, secret,
|
331
|
+
controller=AppControllerClient, file=File)
|
332
|
+
controller = controller.new(shadow_ip, secret)
|
333
|
+
|
334
|
+
# TODO - right now the job is assumed to succeed in many cases
|
335
|
+
# need to investigate the various failure scenarios
|
336
|
+
result = { :result => :success }
|
337
|
+
|
338
|
+
case job_data["@type"]
|
339
|
+
when "input"
|
340
|
+
result = get_input(job_data, ssh_args, shadow_ip, controller, file)
|
341
|
+
when "output"
|
342
|
+
result[:output] = controller.get_output(job_data)
|
343
|
+
when "get-acl"
|
344
|
+
job_data["@type"] = "acl"
|
345
|
+
result[:acl] = controller.get_acl(job_data)
|
346
|
+
when "set-acl"
|
347
|
+
job_data["@type"] = "acl"
|
348
|
+
result[:acl] = controller.set_acl(job_data)
|
349
|
+
when "compile"
|
350
|
+
result = compile_code(job_data, ssh_args, shadow_ip)
|
311
351
|
else
|
312
352
|
msg = controller.start_neptune_job(job_data)
|
313
353
|
result[:msg] = msg
|
@@ -317,3 +357,28 @@ def neptune(params)
|
|
317
357
|
return result
|
318
358
|
end
|
319
359
|
|
360
|
+
# This method is the heart of Neptune - here, we take
|
361
|
+
# blocks of code that the user has written and convert them
|
362
|
+
# into HPC job requests. At a high level, the user can
|
363
|
+
# request to run a job, retrieve a job's output, or
|
364
|
+
# modify the access policy (ACL) for the output of a
|
365
|
+
# job. By default, job data is private, but a Neptune
|
366
|
+
# job can be used to set it to public later (and
|
367
|
+
# vice-versa).
|
368
|
+
def neptune(params)
|
369
|
+
puts "Received a request to run a job."
|
370
|
+
puts params[:type]
|
371
|
+
|
372
|
+
job_data = get_job_data(params)
|
373
|
+
validate_storage_params(job_data)
|
374
|
+
puts "job data = #{job_data.inspect}"
|
375
|
+
do_preprocessing(job_data)
|
376
|
+
keyname = job_data["@keyname"]
|
377
|
+
|
378
|
+
shadow_ip = CommonFunctions.get_from_yaml(keyname, :shadow)
|
379
|
+
secret = CommonFunctions.get_secret_key(keyname)
|
380
|
+
ssh_key = File.expand_path("~/.appscale/#{keyname}.key")
|
381
|
+
ssh_args = "-i ~/.appscale/#{keyname}.key -o StrictHostkeyChecking=no "
|
382
|
+
|
383
|
+
return run_job(job_data, ssh_args, shadow_ip, secret)
|
384
|
+
end
|