neptune 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +4 -0
- data/doc/BabelHelper.html +393 -376
- data/doc/BadConfigurationException.html +121 -127
- data/doc/CommonFunctions.html +237 -265
- data/doc/ExodusHelper.html +820 -0
- data/doc/ExodusTaskInfo.html +263 -0
- data/doc/FileNotFoundException.html +121 -127
- data/doc/NeptuneHelper.html +527 -592
- data/doc/NeptuneManagerClient.html +696 -0
- data/doc/NeptuneManagerException.html +139 -0
- data/doc/Object.html +334 -236
- data/doc/TaskInfo.html +428 -0
- data/doc/created.rid +8 -5
- data/doc/images/add.png +0 -0
- data/doc/images/delete.png +0 -0
- data/doc/images/tag_blue.png +0 -0
- data/doc/images/transparent.png +0 -0
- data/doc/index.html +74 -142
- data/doc/js/darkfish.js +99 -62
- data/doc/js/jquery.js +15 -29
- data/doc/js/navigation.js +142 -0
- data/doc/js/search.js +94 -0
- data/doc/js/search_index.js +1 -0
- data/doc/js/searcher.js +228 -0
- data/doc/table_of_contents.html +226 -0
- data/lib/babel.rb +116 -50
- data/lib/custom_exceptions.rb +2 -2
- data/lib/exodus.rb +311 -0
- data/lib/exodus_task_info.rb +36 -0
- data/lib/neptune.rb +52 -18
- data/lib/{app_controller_client.rb → neptune_manager_client.rb} +54 -38
- data/lib/task_info.rb +155 -0
- data/test/{unit/test_babel.rb → test_babel.rb} +161 -26
- data/test/{unit/test_common_functions.rb → test_common_functions.rb} +1 -1
- data/test/test_exodus.rb +687 -0
- data/test/{unit/test_neptune.rb → test_neptune.rb} +28 -17
- data/test/{unit/test_app_controller_client.rb → test_neptune_manager_client.rb} +15 -16
- data/test/test_task_info.rb +32 -0
- data/test/{unit/ts_all.rb → ts_all.rb} +3 -1
- metadata +30 -34
- data/doc/AppControllerClient.html +0 -702
- data/doc/AppControllerException.html +0 -145
- data/doc/bin/neptune.html +0 -56
- data/doc/js/quicksearch.js +0 -114
- data/doc/js/thickbox-compressed.js +0 -10
- data/doc/lib/app_controller_client_rb.html +0 -60
- data/doc/lib/babel_rb.html +0 -68
- data/doc/lib/common_functions_rb.html +0 -70
- data/doc/lib/custom_exceptions_rb.html +0 -54
- data/doc/lib/neptune_rb.html +0 -60
- data/test/integration/tc_c.rb +0 -57
- data/test/integration/tc_dfsp.rb +0 -37
- data/test/integration/tc_dwssa.rb +0 -38
- data/test/integration/tc_erlang.rb +0 -183
- data/test/integration/tc_mapreduce.rb +0 -282
- data/test/integration/tc_mpi.rb +0 -160
- data/test/integration/tc_storage.rb +0 -209
- data/test/integration/tc_upc.rb +0 -75
- data/test/integration/tc_x10.rb +0 -94
- data/test/integration/test_helper.rb +0 -135
- data/test/integration/ts_neptune.rb +0 -40
@@ -10,47 +10,52 @@ require 'timeout'
|
|
10
10
|
# endlessly timeout and retry, so as a hack, just don't let them timeout.
|
11
11
|
# The next version should replace this and properly timeout and not use
|
12
12
|
# long calls unless necessary.
|
13
|
-
NO_TIMEOUT =
|
13
|
+
NO_TIMEOUT = 100000
|
14
14
|
|
15
15
|
|
16
16
|
# A client that uses SOAP messages to communicate with the underlying cloud
|
17
17
|
# platform (here, AppScale). This client is similar to that used in the AppScale
|
18
18
|
# Tools, but with non-Neptune SOAP calls removed.
|
19
|
-
class
|
19
|
+
class NeptuneManagerClient
|
20
|
+
|
21
|
+
|
22
|
+
# The port that the Neptune Manager runs on, by default.
|
23
|
+
SERVER_PORT = 17445
|
20
24
|
|
21
25
|
|
22
|
-
# The SOAP client that we use to communicate with the
|
26
|
+
# The SOAP client that we use to communicate with the NeptuneManager.
|
23
27
|
attr_accessor :conn
|
24
28
|
|
25
29
|
|
26
|
-
# The IP address of the
|
30
|
+
# The IP address of the NeptuneManager that we will be connecting to.
|
27
31
|
attr_accessor :ip
|
28
32
|
|
29
33
|
|
30
34
|
# The secret string that is used to authenticate this client with
|
31
|
-
#
|
35
|
+
# NeptuneManagers. It is initially generated by appscale-run-instances and can
|
32
36
|
# be found on the machine that ran that tool, or on any AppScale machine.
|
33
37
|
attr_accessor :secret
|
34
38
|
|
35
39
|
|
36
40
|
# A constructor that requires both the IP address of the machine to communicate
|
37
41
|
# with as well as the secret (string) needed to perform communication.
|
38
|
-
#
|
42
|
+
# NeptuneManagers will reject SOAP calls if this secret (basically a password)
|
39
43
|
# is not present - it can be found in the user's .appscale directory, and a
|
40
44
|
# helper method is usually present to fetch this for us.
|
41
45
|
def initialize(ip, secret)
|
42
46
|
@ip = ip
|
43
47
|
@secret = secret
|
44
48
|
|
45
|
-
@conn = SOAP::RPC::Driver.new("https://#{@ip}
|
46
|
-
@conn.add_method("
|
47
|
-
@conn.add_method("
|
48
|
-
@conn.add_method("
|
49
|
-
@conn.add_method("
|
50
|
-
@conn.add_method("
|
51
|
-
@conn.add_method("
|
52
|
-
@conn.add_method("
|
53
|
-
@conn.add_method("
|
49
|
+
@conn = SOAP::RPC::Driver.new("https://#{@ip}:#{SERVER_PORT}")
|
50
|
+
@conn.add_method("start_job", "jobs", "secret")
|
51
|
+
@conn.add_method("put_input", "job_data", "secret")
|
52
|
+
@conn.add_method("get_output", "job_data", "secret")
|
53
|
+
@conn.add_method("get_acl", "job_data", "secret")
|
54
|
+
@conn.add_method("set_acl", "job_data", "secret")
|
55
|
+
@conn.add_method("compile_code", "job_data", "secret")
|
56
|
+
@conn.add_method("get_supported_babel_engines", "job_data", "secret")
|
57
|
+
@conn.add_method("does_file_exist", "file", "job_data", "secret")
|
58
|
+
@conn.add_method("get_profiling_info", "key", "secret")
|
54
59
|
end
|
55
60
|
|
56
61
|
|
@@ -76,7 +81,7 @@ class AppControllerClient
|
|
76
81
|
if retry_on_except
|
77
82
|
retry
|
78
83
|
else
|
79
|
-
raise
|
84
|
+
raise NeptuneManagerException.new("Connection was refused. Is the NeptuneManager running?")
|
80
85
|
end
|
81
86
|
rescue OpenSSL::SSL::SSLError, NotImplementedError, Timeout::Error
|
82
87
|
retry
|
@@ -84,7 +89,7 @@ class AppControllerClient
|
|
84
89
|
if retry_on_except
|
85
90
|
retry
|
86
91
|
else
|
87
|
-
raise
|
92
|
+
raise NeptuneManagerException.new("We saw an unexpected error of the type #{except.class} talking to #{@ip} with the following message:\n#{except}.")
|
88
93
|
end
|
89
94
|
end
|
90
95
|
end
|
@@ -94,16 +99,16 @@ class AppControllerClient
|
|
94
99
|
# or MapReduce), or a scaling job (e.g., for AppScale itself). This method
|
95
100
|
# should not be used for retrieving the output of a job or getting / setting
|
96
101
|
# output ACLs, but just for starting new HPC / scaling jobs. This method
|
97
|
-
# takes a hash containing the parameters of the job to run, and can raise
|
98
|
-
# the
|
102
|
+
# takes a hash containing the parameters of the job to run, and can raise NeptuneManagerException.new if
|
103
|
+
# the NeptuneManager it calls returns an error (e.g., if a bad secret is used
|
99
104
|
# or the machine isn't running). Otherwise, the return value of this method
|
100
|
-
# is the result returned from the
|
105
|
+
# is the result returned from the NeptuneManager.
|
101
106
|
def start_neptune_job(job_data)
|
102
107
|
result = ""
|
103
108
|
make_call(NO_TIMEOUT, false) {
|
104
|
-
result = conn.
|
109
|
+
result = conn.start_job(job_data, @secret)
|
105
110
|
}
|
106
|
-
raise
|
111
|
+
raise NeptuneManagerException.new(result) if result =~ /Error:/
|
107
112
|
return result
|
108
113
|
end
|
109
114
|
|
@@ -116,9 +121,9 @@ class AppControllerClient
|
|
116
121
|
def put_input(job_data)
|
117
122
|
result = ""
|
118
123
|
make_call(NO_TIMEOUT, false) {
|
119
|
-
result = conn.
|
124
|
+
result = conn.put_input(job_data, @secret)
|
120
125
|
}
|
121
|
-
raise
|
126
|
+
raise NeptuneManagerException.new(result) if result =~ /Error:/
|
122
127
|
return result
|
123
128
|
end
|
124
129
|
|
@@ -132,13 +137,13 @@ class AppControllerClient
|
|
132
137
|
# for non-trivial output jobs, the next version of Neptune will add an
|
133
138
|
# additional call to directly copy the output to a file on the local
|
134
139
|
# filesystem. See start_neptune_job for conditions by which this method
|
135
|
-
# can raise
|
140
|
+
# can raise NeptuneManagerException.new as well as the input format used for job_data.
|
136
141
|
def get_output(job_data)
|
137
142
|
result = ""
|
138
143
|
make_call(NO_TIMEOUT, false) {
|
139
|
-
result = conn.
|
144
|
+
result = conn.get_output(job_data, @secret)
|
140
145
|
}
|
141
|
-
raise
|
146
|
+
raise NeptuneManagerException.new(result) if result =~ /Error:/
|
142
147
|
return result
|
143
148
|
end
|
144
149
|
|
@@ -151,9 +156,9 @@ class AppControllerClient
|
|
151
156
|
def get_acl(job_data)
|
152
157
|
result = ""
|
153
158
|
make_call(NO_TIMEOUT, false) {
|
154
|
-
result = conn.
|
159
|
+
result = conn.get_acl(job_data, @secret)
|
155
160
|
}
|
156
|
-
raise
|
161
|
+
raise NeptuneManagerException.new(result) if result =~ /Error:/
|
157
162
|
return result
|
158
163
|
end
|
159
164
|
|
@@ -166,45 +171,56 @@ class AppControllerClient
|
|
166
171
|
def set_acl(job_data)
|
167
172
|
result = ""
|
168
173
|
make_call(NO_TIMEOUT, false) {
|
169
|
-
result = conn.
|
174
|
+
result = conn.set_acl(job_data, @secret)
|
170
175
|
}
|
171
|
-
raise
|
176
|
+
raise NeptuneManagerException.new(result) if result =~ /Error:/
|
172
177
|
return result
|
173
178
|
end
|
174
179
|
|
175
180
|
|
176
|
-
# Instructs the
|
181
|
+
# Instructs the NeptuneManager to fetch the code specified and compile it.
|
177
182
|
# The result should then be placed in a location specified in the job data.
|
178
183
|
def compile_code(job_data)
|
179
184
|
result = ""
|
180
185
|
make_call(NO_TIMEOUT, false) {
|
181
|
-
result = conn.
|
186
|
+
result = conn.compile_code(job_data, @secret)
|
182
187
|
}
|
183
|
-
raise
|
188
|
+
raise NeptuneManagerException.new(result) if result =~ /Error:/
|
184
189
|
return result
|
185
190
|
end
|
186
191
|
|
187
192
|
|
188
|
-
# Asks the
|
193
|
+
# Asks the NeptuneManager for a list of all the Babel engines (each of which
|
189
194
|
# is a queue to store jobs and something that executes tasks) that are
|
190
195
|
# supported for the given credentials.
|
191
196
|
def get_supported_babel_engines(job_data)
|
192
197
|
result = []
|
193
198
|
make_call(NO_TIMEOUT, false) {
|
194
|
-
result = conn.
|
199
|
+
result = conn.get_supported_babel_engines(job_data, @secret)
|
195
200
|
}
|
196
201
|
return result
|
197
202
|
end
|
198
203
|
|
199
204
|
|
200
|
-
# Asks the
|
205
|
+
# Asks the NeptuneManager to see if the given file exists in the remote
|
201
206
|
# datastore. If extra credentials are needed for this operation, they are
|
202
207
|
# searched for within the job data.
|
203
208
|
def does_file_exist?(file, job_data)
|
204
209
|
result = false
|
205
210
|
make_call(NO_TIMEOUT, false) {
|
206
|
-
result = conn.
|
211
|
+
result = conn.does_file_exist(file, job_data, @secret)
|
212
|
+
}
|
213
|
+
return result
|
214
|
+
end
|
215
|
+
|
216
|
+
|
217
|
+
def get_profiling_info(key)
|
218
|
+
result = {}
|
219
|
+
make_call(NO_TIMEOUT, false) {
|
220
|
+
result = conn.get_profiling_info(key, @secret)
|
207
221
|
}
|
208
222
|
return result
|
209
223
|
end
|
224
|
+
|
225
|
+
|
210
226
|
end
|
data/lib/task_info.rb
ADDED
@@ -0,0 +1,155 @@
|
|
1
|
+
# Programmer: Chris Bunch
|
2
|
+
|
3
|
+
|
4
|
+
# Imports from Ruby's stdlib
|
5
|
+
require 'thread' # needed for Mutex
|
6
|
+
|
7
|
+
|
8
|
+
# Imports for RubyGems
|
9
|
+
require 'rubygems'
|
10
|
+
require 'json'
|
11
|
+
|
12
|
+
|
13
|
+
# Imports for other Neptune libraries
|
14
|
+
require 'babel'
|
15
|
+
require 'custom_exceptions'
|
16
|
+
|
17
|
+
|
18
|
+
# TaskInfo represents the result of a babel call, an object with all the
|
19
|
+
# information that the user would be interested in relating to their task.
|
20
|
+
# At the simplest level, this is just the output of their job, but it also
|
21
|
+
# can includes profiling information (e.g., performance and cost), as well
|
22
|
+
# as information that may help with debugging (e.g. info about the environment
|
23
|
+
# we executed the task in).
|
24
|
+
class TaskInfo
|
25
|
+
|
26
|
+
|
27
|
+
# A Hash consisting of the parameters that the user passed to babel().
|
28
|
+
attr_accessor :job_data
|
29
|
+
|
30
|
+
|
31
|
+
# Creates a new TaskInfo object, storing the parameters the user gave us to
|
32
|
+
# invoke the job for later use. The user can give us a Hash containing the
|
33
|
+
# parameters that the job was started with, or a String that is the
|
34
|
+
# JSON-dumped version of that data (also obtainable from TaskInfo objects
|
35
|
+
# via to_json).
|
36
|
+
def initialize(job_data)
|
37
|
+
if job_data.class == String
|
38
|
+
begin
|
39
|
+
job_data = JSON.load(job_data)
|
40
|
+
rescue JSON::ParserError
|
41
|
+
raise BadConfigurationException.new("job data not JSONable")
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
if job_data.class != Hash
|
46
|
+
raise BadConfigurationException.new("job data not a Hash")
|
47
|
+
end
|
48
|
+
@job_data = job_data
|
49
|
+
|
50
|
+
# To prevent us from repeatedly grabbing (potentially) large files over the
|
51
|
+
# network repeatedly, we keep a local, cached copy of the task's standard
|
52
|
+
# output, error, and metadata - initially empty, but pulled in the first
|
53
|
+
# time that the user asks for it. Since we expose this functionality through
|
54
|
+
# the accessor methods below, we should not use attr_accessor or attr_reader
|
55
|
+
# to directly expose this variables.
|
56
|
+
@output = nil
|
57
|
+
@error = nil
|
58
|
+
@metadata = nil
|
59
|
+
|
60
|
+
# To prevent concurrent threads from pulling in output multiple times, we
|
61
|
+
# guard access to remotely grabbing output/error/metadata with this
|
62
|
+
# lock.
|
63
|
+
@lock = Mutex.new
|
64
|
+
end
|
65
|
+
|
66
|
+
|
67
|
+
# Returns a string with the standard output produced by this Babel task. If
|
68
|
+
# the task has not yet completed, this call blocks until it completes.
|
69
|
+
def stdout
|
70
|
+
if @output.nil?
|
71
|
+
@lock.synchronize {
|
72
|
+
@output = BabelHelper.wait_and_get_output(@job_data,
|
73
|
+
@job_data['@output'])
|
74
|
+
}
|
75
|
+
end
|
76
|
+
|
77
|
+
return @output
|
78
|
+
end
|
79
|
+
|
80
|
+
|
81
|
+
# Returns a string with the standard error produced by this Babel task. While
|
82
|
+
# all jobs should produce standard output, they may not produce standard
|
83
|
+
# error, so it is reasonable that this could return an empty string to the
|
84
|
+
# user.
|
85
|
+
def stderr
|
86
|
+
if @error.nil?
|
87
|
+
@lock.synchronize {
|
88
|
+
@error = BabelHelper.wait_and_get_output(@job_data, @job_data['@error'])
|
89
|
+
}
|
90
|
+
end
|
91
|
+
|
92
|
+
return @error
|
93
|
+
end
|
94
|
+
|
95
|
+
|
96
|
+
# An alias for stdout.
|
97
|
+
def to_s
|
98
|
+
return stdout
|
99
|
+
end
|
100
|
+
|
101
|
+
|
102
|
+
# A common operation that users may perform is asking if the task executed
|
103
|
+
# successfully, indicated by a return value of zero. This method provides
|
104
|
+
# a quick alias for that functionality.
|
105
|
+
def success?
|
106
|
+
return return_value.zero?
|
107
|
+
end
|
108
|
+
|
109
|
+
|
110
|
+
# Converts this object to JSON, so that it can be written to disk or
|
111
|
+
# passed over the network. Since our stdout/stderr/metadata objects
|
112
|
+
# are all locally cached, we don't need to write them (and thus can
|
113
|
+
# potentially save a lot of space).
|
114
|
+
def to_json
|
115
|
+
return JSON.dump(@job_data)
|
116
|
+
end
|
117
|
+
|
118
|
+
|
119
|
+
private
|
120
|
+
|
121
|
+
|
122
|
+
# We store all the task information that isn't standard out or standard err
|
123
|
+
# as a JSON-encoded Hash in a metadata file. This function provides easy
|
124
|
+
# access to that hash, retrieving it remotely if needed. It's private since
|
125
|
+
# we intend for other methods in this class to call it, and not the user
|
126
|
+
# directly.
|
127
|
+
def metadata
|
128
|
+
if @metadata.nil?
|
129
|
+
@lock.synchronize {
|
130
|
+
info = BabelHelper.wait_and_get_output(@job_data,
|
131
|
+
@job_data['@metadata'])
|
132
|
+
@metadata = JSON.load(info)
|
133
|
+
}
|
134
|
+
end
|
135
|
+
|
136
|
+
return @metadata
|
137
|
+
end
|
138
|
+
|
139
|
+
|
140
|
+
# We would like to be able to directly call .name on anything in the metadata
|
141
|
+
# hash for a task. One way to avoid having to add all of these method calls
|
142
|
+
# ourselves and keep it in sync with whatever Neptune over AppScale offers
|
143
|
+
# is just to use method_missing and automatically respond to anything that
|
144
|
+
# is a key in the metadata hash.
|
145
|
+
def method_missing(id, *args, &block)
|
146
|
+
methods_available = metadata()
|
147
|
+
if methods_available[id.to_s].nil?
|
148
|
+
super
|
149
|
+
else
|
150
|
+
return methods_available[id.to_s]
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
|
155
|
+
end
|
@@ -1,9 +1,12 @@
|
|
1
1
|
# Programmer: Chris Bunch (cgb@cs.ucsb.edu)
|
2
2
|
|
3
|
-
|
3
|
+
|
4
|
+
$:.unshift File.join(File.dirname(__FILE__), "..", "lib")
|
4
5
|
require 'babel'
|
5
6
|
|
6
|
-
|
7
|
+
|
8
|
+
require 'rubygems'
|
9
|
+
require 'flexmock/test_unit'
|
7
10
|
|
8
11
|
|
9
12
|
class TestBabel < Test::Unit::TestCase
|
@@ -19,44 +22,102 @@ class TestBabel < Test::Unit::TestCase
|
|
19
22
|
:EC2_ACCESS_KEY => "boo",
|
20
23
|
:EC2_SECRET_KEY => "baz",
|
21
24
|
:S3_URL => "http://baz.com",
|
22
|
-
:
|
25
|
+
:is_remote => true,
|
26
|
+
:keyname => keyname,
|
27
|
+
:metadata_info => {'time_to_store_inputs' => 0.0}
|
23
28
|
}
|
24
29
|
|
25
30
|
job_data = {}
|
26
31
|
params.each { |k, v|
|
27
32
|
job_data["@#{k}"] = v
|
28
33
|
}
|
29
|
-
job_data["@is_remote"] = true
|
30
34
|
|
31
35
|
output = "/bucket/babel/temp-0123456789"
|
32
36
|
job_data["@output"] = output
|
37
|
+
job_data_no_err = job_data.dup
|
38
|
+
|
39
|
+
error = "/bucket/babel/temp-1111111111"
|
40
|
+
job_data["@error"] = error
|
41
|
+
job_data_no_metadata = job_data.dup
|
42
|
+
|
43
|
+
metadata = "/bucket/babel/temp-2222222222"
|
44
|
+
job_data["@metadata"] = metadata
|
33
45
|
|
34
46
|
run_job_data = job_data.dup
|
35
47
|
run_job_data["@engine"] = "executor-sqs"
|
36
48
|
run_job_data["@run_local"] = true
|
49
|
+
run_job_data["@failed_attempts"] = 0
|
50
|
+
|
51
|
+
run_job_data_second_try = run_job_data.dup
|
52
|
+
run_job_data["@failed_attempts"] = 1
|
37
53
|
|
38
54
|
output_job_data = job_data.dup
|
39
55
|
output_job_data["@type"] = "output"
|
40
56
|
|
57
|
+
error_job_data = output_job_data.dup
|
58
|
+
error_job_data['@output'] = error
|
59
|
+
|
60
|
+
metadata_job_data = output_job_data.dup
|
61
|
+
metadata_job_data['@output'] = metadata
|
62
|
+
json_metadata = JSON.dump({'command' => 'ls /home/baz', 'return_value' => 0})
|
63
|
+
|
41
64
|
kernel = flexmock(Kernel)
|
42
65
|
kernel.should_receive(:puts).and_return()
|
43
|
-
kernel.should_receive(:rand).and_return(0,1,2,3,4,5,6,7,8,9)
|
66
|
+
kernel.should_receive(:rand).and_return(0,1,2,3,4,5,6,7,8,9,1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2)
|
44
67
|
kernel.should_receive(:sleep).and_return()
|
45
68
|
|
46
|
-
flexmock(
|
47
|
-
instance.should_receive(:does_file_exist?).with(file, job_data).
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
69
|
+
flexmock(NeptuneManagerClient).new_instances { |instance|
|
70
|
+
instance.should_receive(:does_file_exist?).with(file, job_data).
|
71
|
+
and_return(true)
|
72
|
+
instance.should_receive(:does_file_exist?).with(output, job_data_no_err).
|
73
|
+
and_return(false)
|
74
|
+
instance.should_receive(:does_file_exist?).
|
75
|
+
with(error, job_data_no_metadata).and_return(false)
|
76
|
+
instance.should_receive(:does_file_exist?).with(metadata, job_data).
|
77
|
+
and_return(false)
|
78
|
+
|
79
|
+
# So the first time we start the job, let's say that it failed, so that
|
80
|
+
# we can make sure that the caller properly catches this and tries again.
|
81
|
+
instance.should_receive(:start_neptune_job).with(run_job_data).
|
82
|
+
and_return("error")
|
83
|
+
instance.should_receive(:start_neptune_job).with(run_job_data_second_try).
|
84
|
+
and_return("MPI job is now running")
|
85
|
+
|
86
|
+
instance.should_receive(:get_output).with(output_job_data).
|
87
|
+
and_return("output")
|
88
|
+
instance.should_receive(:get_output).with(error_job_data).
|
89
|
+
and_return("error")
|
90
|
+
instance.should_receive(:get_output).with(metadata_job_data).
|
91
|
+
and_return(json_metadata)
|
53
92
|
}
|
54
93
|
|
55
94
|
commonfunctions = flexmock(CommonFunctions)
|
56
|
-
commonfunctions.should_receive(:get_from_yaml).with(keyname, :shadow).
|
57
|
-
|
58
|
-
|
59
|
-
|
95
|
+
commonfunctions.should_receive(:get_from_yaml).with(keyname, :shadow).
|
96
|
+
and_return("127.0.0.1")
|
97
|
+
commonfunctions.should_receive(:get_secret_key).with(keyname).
|
98
|
+
and_return("secret")
|
99
|
+
|
100
|
+
# Calling either to_s or stdout will return the standard out that the
|
101
|
+
# program produced.
|
102
|
+
expected = "output"
|
103
|
+
actual = babel(params)
|
104
|
+
assert_equal(expected, actual.to_s)
|
105
|
+
assert_equal(expected, actual.stdout)
|
106
|
+
|
107
|
+
# Calling stderr returns the standard error that we are expecting.
|
108
|
+
assert_equal("error", actual.stderr)
|
109
|
+
|
110
|
+
# Calling command returns the command that was remotely exec'ed, hidden in
|
111
|
+
# the metadata.
|
112
|
+
assert_equal("ls /home/baz", actual.command)
|
113
|
+
|
114
|
+
# Calling success? returns true if the command's return value is zero,
|
115
|
+
# also hidden in the metadata
|
116
|
+
assert_equal(true, actual.success?)
|
117
|
+
|
118
|
+
# We're using method_missing under the hood, so make sure that a method call
|
119
|
+
# that doesn't exist fails accordingly
|
120
|
+
assert_raise(NoMethodError) { actual.baz }
|
60
121
|
end
|
61
122
|
|
62
123
|
def test_bad_babel_params
|
@@ -104,7 +165,8 @@ class TestBabel < Test::Unit::TestCase
|
|
104
165
|
|
105
166
|
# Finally, if we run a job and specify remote code, that should be used
|
106
167
|
# as the bucket.
|
107
|
-
job_data_remote_code = {"@code" => "/baz/boo/code.baz", "@storage" => "s3"
|
168
|
+
job_data_remote_code = {"@code" => "/baz/boo/code.baz", "@storage" => "s3",
|
169
|
+
"@is_remote" => true}
|
108
170
|
expected_remote = "/baz/babel/temp-10"
|
109
171
|
|
110
172
|
actual_remote = BabelHelper.generate_output_location(job_data_remote_code)
|
@@ -141,8 +203,8 @@ class TestBabel < Test::Unit::TestCase
|
|
141
203
|
actual_2 = BabelHelper.put_inputs(job_data)
|
142
204
|
assert_equal(job_data, actual_2)
|
143
205
|
|
144
|
-
# If we specify inputs on the file system, they should be uploaded and
|
145
|
-
# with remote file locations
|
206
|
+
# If we specify inputs on the file system, they should be uploaded and
|
207
|
+
# replaced with remote file locations
|
146
208
|
neptune_params = {
|
147
209
|
:type => "input",
|
148
210
|
:local => "/baz",
|
@@ -155,26 +217,31 @@ class TestBabel < Test::Unit::TestCase
|
|
155
217
|
kernel = flexmock(Kernel)
|
156
218
|
kernel.should_receive(:neptune).with(neptune_params)
|
157
219
|
|
220
|
+
time = flexmock(Time)
|
221
|
+
time.should_receive(:now).and_return(0.0)
|
222
|
+
|
158
223
|
job_data["@argv"] = ["boo", "/baz", "gbaz"]
|
159
224
|
expected = job_data.dup
|
160
225
|
expected["@argv"] = ["boo", "/remote/babel/baz", "gbaz"]
|
226
|
+
expected["@metadata_info"] = {"time_to_store_inputs" => 0.0}
|
161
227
|
actual_3 = BabelHelper.put_inputs(job_data.dup)
|
162
228
|
assert_equal(expected, actual_3)
|
163
229
|
end
|
164
230
|
|
165
231
|
def test_run_babel_job
|
166
232
|
# Running a job with no @type specified means it should be a Babel job
|
167
|
-
job_data = {
|
233
|
+
job_data = [{
|
168
234
|
"@code" => "/baz/boo/code.baz",
|
169
235
|
"@argv" => ["boo", "/remote/babel/baz", "gbaz"]
|
170
|
-
}
|
236
|
+
}]
|
171
237
|
|
172
238
|
neptune_params = {
|
173
239
|
:type => "babel",
|
174
240
|
:code => "/baz/boo/code.baz",
|
175
241
|
:argv => ["boo", "/remote/babel/baz", "gbaz"],
|
176
242
|
:run_local => true,
|
177
|
-
:engine => "executor-sqs"
|
243
|
+
:engine => "executor-sqs",
|
244
|
+
:failed_attempts => 0
|
178
245
|
}
|
179
246
|
|
180
247
|
result = { :result => :success }
|
@@ -188,18 +255,19 @@ class TestBabel < Test::Unit::TestCase
|
|
188
255
|
|
189
256
|
def test_run_mpi_job
|
190
257
|
# Running a job with @type specified should preserve the job type
|
191
|
-
job_data = {
|
258
|
+
job_data = [{
|
192
259
|
"@type" => "mpi",
|
193
260
|
"@code" => "/baz/boo/code.baz",
|
194
261
|
"@argv" => ["boo", "/remote/babel/baz", "gbaz"]
|
195
|
-
}
|
262
|
+
}]
|
196
263
|
|
197
264
|
neptune_params = {
|
198
265
|
:type => "mpi",
|
199
266
|
:code => "/baz/boo/code.baz",
|
200
267
|
:argv => ["boo", "/remote/babel/baz", "gbaz"],
|
201
268
|
:run_local => true,
|
202
|
-
:engine => "executor-sqs"
|
269
|
+
:engine => "executor-sqs",
|
270
|
+
:failed_attempts => 0
|
203
271
|
}
|
204
272
|
|
205
273
|
result = { :result => :success }
|
@@ -228,7 +296,74 @@ class TestBabel < Test::Unit::TestCase
|
|
228
296
|
kernel.should_receive(:sleep).and_return()
|
229
297
|
|
230
298
|
expected = "output goes here"
|
231
|
-
actual = BabelHelper.wait_and_get_output(job_data)
|
299
|
+
actual = BabelHelper.wait_and_get_output(job_data, job_data['@output'])
|
232
300
|
assert_equal(expected, actual)
|
233
301
|
end
|
302
|
+
|
303
|
+
def test_batch_tasks_operation
|
304
|
+
# if we give babel an array of hashes, it should should give us back
|
305
|
+
# task information for each of the jobs we asked it to run
|
306
|
+
# essentially this saves us the overhead of the repeated SOAP calls
|
307
|
+
# to AppScale
|
308
|
+
|
309
|
+
neptune_params = {
|
310
|
+
:type => "babel",
|
311
|
+
:code => "/baz/boo/code.baz",
|
312
|
+
:argv => ["boo", "/remote/babel/baz", "gbaz"],
|
313
|
+
:output => "/baz/output",
|
314
|
+
:error => "/baz/error",
|
315
|
+
:metadata => "/baz/metadata",
|
316
|
+
:run_local => true,
|
317
|
+
:engine => "executor-sqs",
|
318
|
+
:failed_attempts => 0,
|
319
|
+
:metadata_info => {'time_to_store_inputs' => 0.0},
|
320
|
+
:storage => "appdb",
|
321
|
+
:keyname => "appscale"
|
322
|
+
}
|
323
|
+
tasks = [neptune_params, neptune_params]
|
324
|
+
|
325
|
+
job_data = {}
|
326
|
+
neptune_params.each { |k, v|
|
327
|
+
job_data["@#{k}"] = v
|
328
|
+
}
|
329
|
+
|
330
|
+
# mocks - mock out most of the babel stuff, since we just want to verify
|
331
|
+
# the semantics of passing in an array of hashes instead of just a hash
|
332
|
+
babelhelper = flexmock(BabelHelper)
|
333
|
+
babelhelper.should_receive(:check_output_files).and_return()
|
334
|
+
babelhelper.should_receive(:validate_inputs).and_return()
|
335
|
+
babelhelper.should_receive(:put_code).and_return()
|
336
|
+
babelhelper.should_receive(:put_inputs).and_return()
|
337
|
+
|
338
|
+
# mocks for neptune
|
339
|
+
neptunehelper = flexmock(NeptuneHelper)
|
340
|
+
neptunehelper.should_receive(:require_file_to_exist).and_return()
|
341
|
+
neptunehelper.should_receive(:require_file_to_not_exist).and_return()
|
342
|
+
|
343
|
+
# finally, mock out the libraries that neptune uses
|
344
|
+
commonfunctions = flexmock(CommonFunctions)
|
345
|
+
commonfunctions.should_receive(:get_from_yaml).with("appscale", :shadow).
|
346
|
+
and_return("public_ip")
|
347
|
+
commonfunctions.should_receive(:get_from_yaml).with("appscale", :secret,
|
348
|
+
true).and_return("secret")
|
349
|
+
|
350
|
+
appcontroller = flexmock('appcontroller')
|
351
|
+
appcontroller.should_receive(:get_supported_babel_engines).with(job_data).
|
352
|
+
and_return(["executor-sqs"])
|
353
|
+
appcontroller.should_receive(:start_neptune_job).
|
354
|
+
and_return("babel job is now running")
|
355
|
+
flexmock(NeptuneManagerClient).should_receive(:new).and_return(appcontroller)
|
356
|
+
|
357
|
+
flexmock(TaskInfo).new_instances { |instance|
|
358
|
+
instance.should_receive(:stdout).and_return("output")
|
359
|
+
}
|
360
|
+
|
361
|
+
expected = ["output", "output"]
|
362
|
+
actual = []
|
363
|
+
babel(tasks).each { |task|
|
364
|
+
actual << task.stdout
|
365
|
+
}
|
366
|
+
assert_equal(expected, actual)
|
367
|
+
end
|
368
|
+
|
234
369
|
end
|