neptune 0.2.1 → 0.2.2
Sign up to get free protection for your applications and to get access to all the features.
- data/README +4 -0
- data/doc/BabelHelper.html +393 -376
- data/doc/BadConfigurationException.html +121 -127
- data/doc/CommonFunctions.html +237 -265
- data/doc/ExodusHelper.html +820 -0
- data/doc/ExodusTaskInfo.html +263 -0
- data/doc/FileNotFoundException.html +121 -127
- data/doc/NeptuneHelper.html +527 -592
- data/doc/NeptuneManagerClient.html +696 -0
- data/doc/NeptuneManagerException.html +139 -0
- data/doc/Object.html +334 -236
- data/doc/TaskInfo.html +428 -0
- data/doc/created.rid +8 -5
- data/doc/images/add.png +0 -0
- data/doc/images/delete.png +0 -0
- data/doc/images/tag_blue.png +0 -0
- data/doc/images/transparent.png +0 -0
- data/doc/index.html +74 -142
- data/doc/js/darkfish.js +99 -62
- data/doc/js/jquery.js +15 -29
- data/doc/js/navigation.js +142 -0
- data/doc/js/search.js +94 -0
- data/doc/js/search_index.js +1 -0
- data/doc/js/searcher.js +228 -0
- data/doc/table_of_contents.html +226 -0
- data/lib/babel.rb +116 -50
- data/lib/custom_exceptions.rb +2 -2
- data/lib/exodus.rb +311 -0
- data/lib/exodus_task_info.rb +36 -0
- data/lib/neptune.rb +52 -18
- data/lib/{app_controller_client.rb → neptune_manager_client.rb} +54 -38
- data/lib/task_info.rb +155 -0
- data/test/{unit/test_babel.rb → test_babel.rb} +161 -26
- data/test/{unit/test_common_functions.rb → test_common_functions.rb} +1 -1
- data/test/test_exodus.rb +687 -0
- data/test/{unit/test_neptune.rb → test_neptune.rb} +28 -17
- data/test/{unit/test_app_controller_client.rb → test_neptune_manager_client.rb} +15 -16
- data/test/test_task_info.rb +32 -0
- data/test/{unit/ts_all.rb → ts_all.rb} +3 -1
- metadata +30 -34
- data/doc/AppControllerClient.html +0 -702
- data/doc/AppControllerException.html +0 -145
- data/doc/bin/neptune.html +0 -56
- data/doc/js/quicksearch.js +0 -114
- data/doc/js/thickbox-compressed.js +0 -10
- data/doc/lib/app_controller_client_rb.html +0 -60
- data/doc/lib/babel_rb.html +0 -68
- data/doc/lib/common_functions_rb.html +0 -70
- data/doc/lib/custom_exceptions_rb.html +0 -54
- data/doc/lib/neptune_rb.html +0 -60
- data/test/integration/tc_c.rb +0 -57
- data/test/integration/tc_dfsp.rb +0 -37
- data/test/integration/tc_dwssa.rb +0 -38
- data/test/integration/tc_erlang.rb +0 -183
- data/test/integration/tc_mapreduce.rb +0 -282
- data/test/integration/tc_mpi.rb +0 -160
- data/test/integration/tc_storage.rb +0 -209
- data/test/integration/tc_upc.rb +0 -75
- data/test/integration/tc_x10.rb +0 -94
- data/test/integration/test_helper.rb +0 -135
- data/test/integration/ts_neptune.rb +0 -40
@@ -10,47 +10,52 @@ require 'timeout'
|
|
10
10
|
# endlessly timeout and retry, so as a hack, just don't let them timeout.
|
11
11
|
# The next version should replace this and properly timeout and not use
|
12
12
|
# long calls unless necessary.
|
13
|
-
NO_TIMEOUT =
|
13
|
+
NO_TIMEOUT = 100000
|
14
14
|
|
15
15
|
|
16
16
|
# A client that uses SOAP messages to communicate with the underlying cloud
|
17
17
|
# platform (here, AppScale). This client is similar to that used in the AppScale
|
18
18
|
# Tools, but with non-Neptune SOAP calls removed.
|
19
|
-
class
|
19
|
+
class NeptuneManagerClient
|
20
|
+
|
21
|
+
|
22
|
+
# The port that the Neptune Manager runs on, by default.
|
23
|
+
SERVER_PORT = 17445
|
20
24
|
|
21
25
|
|
22
|
-
# The SOAP client that we use to communicate with the
|
26
|
+
# The SOAP client that we use to communicate with the NeptuneManager.
|
23
27
|
attr_accessor :conn
|
24
28
|
|
25
29
|
|
26
|
-
# The IP address of the
|
30
|
+
# The IP address of the NeptuneManager that we will be connecting to.
|
27
31
|
attr_accessor :ip
|
28
32
|
|
29
33
|
|
30
34
|
# The secret string that is used to authenticate this client with
|
31
|
-
#
|
35
|
+
# NeptuneManagers. It is initially generated by appscale-run-instances and can
|
32
36
|
# be found on the machine that ran that tool, or on any AppScale machine.
|
33
37
|
attr_accessor :secret
|
34
38
|
|
35
39
|
|
36
40
|
# A constructor that requires both the IP address of the machine to communicate
|
37
41
|
# with as well as the secret (string) needed to perform communication.
|
38
|
-
#
|
42
|
+
# NeptuneManagers will reject SOAP calls if this secret (basically a password)
|
39
43
|
# is not present - it can be found in the user's .appscale directory, and a
|
40
44
|
# helper method is usually present to fetch this for us.
|
41
45
|
def initialize(ip, secret)
|
42
46
|
@ip = ip
|
43
47
|
@secret = secret
|
44
48
|
|
45
|
-
@conn = SOAP::RPC::Driver.new("https://#{@ip}
|
46
|
-
@conn.add_method("
|
47
|
-
@conn.add_method("
|
48
|
-
@conn.add_method("
|
49
|
-
@conn.add_method("
|
50
|
-
@conn.add_method("
|
51
|
-
@conn.add_method("
|
52
|
-
@conn.add_method("
|
53
|
-
@conn.add_method("
|
49
|
+
@conn = SOAP::RPC::Driver.new("https://#{@ip}:#{SERVER_PORT}")
|
50
|
+
@conn.add_method("start_job", "jobs", "secret")
|
51
|
+
@conn.add_method("put_input", "job_data", "secret")
|
52
|
+
@conn.add_method("get_output", "job_data", "secret")
|
53
|
+
@conn.add_method("get_acl", "job_data", "secret")
|
54
|
+
@conn.add_method("set_acl", "job_data", "secret")
|
55
|
+
@conn.add_method("compile_code", "job_data", "secret")
|
56
|
+
@conn.add_method("get_supported_babel_engines", "job_data", "secret")
|
57
|
+
@conn.add_method("does_file_exist", "file", "job_data", "secret")
|
58
|
+
@conn.add_method("get_profiling_info", "key", "secret")
|
54
59
|
end
|
55
60
|
|
56
61
|
|
@@ -76,7 +81,7 @@ class AppControllerClient
|
|
76
81
|
if retry_on_except
|
77
82
|
retry
|
78
83
|
else
|
79
|
-
raise
|
84
|
+
raise NeptuneManagerException.new("Connection was refused. Is the NeptuneManager running?")
|
80
85
|
end
|
81
86
|
rescue OpenSSL::SSL::SSLError, NotImplementedError, Timeout::Error
|
82
87
|
retry
|
@@ -84,7 +89,7 @@ class AppControllerClient
|
|
84
89
|
if retry_on_except
|
85
90
|
retry
|
86
91
|
else
|
87
|
-
raise
|
92
|
+
raise NeptuneManagerException.new("We saw an unexpected error of the type #{except.class} talking to #{@ip} with the following message:\n#{except}.")
|
88
93
|
end
|
89
94
|
end
|
90
95
|
end
|
@@ -94,16 +99,16 @@ class AppControllerClient
|
|
94
99
|
# or MapReduce), or a scaling job (e.g., for AppScale itself). This method
|
95
100
|
# should not be used for retrieving the output of a job or getting / setting
|
96
101
|
# output ACLs, but just for starting new HPC / scaling jobs. This method
|
97
|
-
# takes a hash containing the parameters of the job to run, and can raise
|
98
|
-
# the
|
102
|
+
# takes a hash containing the parameters of the job to run, and can raise NeptuneManagerException.new if
|
103
|
+
# the NeptuneManager it calls returns an error (e.g., if a bad secret is used
|
99
104
|
# or the machine isn't running). Otherwise, the return value of this method
|
100
|
-
# is the result returned from the
|
105
|
+
# is the result returned from the NeptuneManager.
|
101
106
|
def start_neptune_job(job_data)
|
102
107
|
result = ""
|
103
108
|
make_call(NO_TIMEOUT, false) {
|
104
|
-
result = conn.
|
109
|
+
result = conn.start_job(job_data, @secret)
|
105
110
|
}
|
106
|
-
raise
|
111
|
+
raise NeptuneManagerException.new(result) if result =~ /Error:/
|
107
112
|
return result
|
108
113
|
end
|
109
114
|
|
@@ -116,9 +121,9 @@ class AppControllerClient
|
|
116
121
|
def put_input(job_data)
|
117
122
|
result = ""
|
118
123
|
make_call(NO_TIMEOUT, false) {
|
119
|
-
result = conn.
|
124
|
+
result = conn.put_input(job_data, @secret)
|
120
125
|
}
|
121
|
-
raise
|
126
|
+
raise NeptuneManagerException.new(result) if result =~ /Error:/
|
122
127
|
return result
|
123
128
|
end
|
124
129
|
|
@@ -132,13 +137,13 @@ class AppControllerClient
|
|
132
137
|
# for non-trivial output jobs, the next version of Neptune will add an
|
133
138
|
# additional call to directly copy the output to a file on the local
|
134
139
|
# filesystem. See start_neptune_job for conditions by which this method
|
135
|
-
# can raise
|
140
|
+
# can raise NeptuneManagerException.new as well as the input format used for job_data.
|
136
141
|
def get_output(job_data)
|
137
142
|
result = ""
|
138
143
|
make_call(NO_TIMEOUT, false) {
|
139
|
-
result = conn.
|
144
|
+
result = conn.get_output(job_data, @secret)
|
140
145
|
}
|
141
|
-
raise
|
146
|
+
raise NeptuneManagerException.new(result) if result =~ /Error:/
|
142
147
|
return result
|
143
148
|
end
|
144
149
|
|
@@ -151,9 +156,9 @@ class AppControllerClient
|
|
151
156
|
def get_acl(job_data)
|
152
157
|
result = ""
|
153
158
|
make_call(NO_TIMEOUT, false) {
|
154
|
-
result = conn.
|
159
|
+
result = conn.get_acl(job_data, @secret)
|
155
160
|
}
|
156
|
-
raise
|
161
|
+
raise NeptuneManagerException.new(result) if result =~ /Error:/
|
157
162
|
return result
|
158
163
|
end
|
159
164
|
|
@@ -166,45 +171,56 @@ class AppControllerClient
|
|
166
171
|
def set_acl(job_data)
|
167
172
|
result = ""
|
168
173
|
make_call(NO_TIMEOUT, false) {
|
169
|
-
result = conn.
|
174
|
+
result = conn.set_acl(job_data, @secret)
|
170
175
|
}
|
171
|
-
raise
|
176
|
+
raise NeptuneManagerException.new(result) if result =~ /Error:/
|
172
177
|
return result
|
173
178
|
end
|
174
179
|
|
175
180
|
|
176
|
-
# Instructs the
|
181
|
+
# Instructs the NeptuneManager to fetch the code specified and compile it.
|
177
182
|
# The result should then be placed in a location specified in the job data.
|
178
183
|
def compile_code(job_data)
|
179
184
|
result = ""
|
180
185
|
make_call(NO_TIMEOUT, false) {
|
181
|
-
result = conn.
|
186
|
+
result = conn.compile_code(job_data, @secret)
|
182
187
|
}
|
183
|
-
raise
|
188
|
+
raise NeptuneManagerException.new(result) if result =~ /Error:/
|
184
189
|
return result
|
185
190
|
end
|
186
191
|
|
187
192
|
|
188
|
-
# Asks the
|
193
|
+
# Asks the NeptuneManager for a list of all the Babel engines (each of which
|
189
194
|
# is a queue to store jobs and something that executes tasks) that are
|
190
195
|
# supported for the given credentials.
|
191
196
|
def get_supported_babel_engines(job_data)
|
192
197
|
result = []
|
193
198
|
make_call(NO_TIMEOUT, false) {
|
194
|
-
result = conn.
|
199
|
+
result = conn.get_supported_babel_engines(job_data, @secret)
|
195
200
|
}
|
196
201
|
return result
|
197
202
|
end
|
198
203
|
|
199
204
|
|
200
|
-
# Asks the
|
205
|
+
# Asks the NeptuneManager to see if the given file exists in the remote
|
201
206
|
# datastore. If extra credentials are needed for this operation, they are
|
202
207
|
# searched for within the job data.
|
203
208
|
def does_file_exist?(file, job_data)
|
204
209
|
result = false
|
205
210
|
make_call(NO_TIMEOUT, false) {
|
206
|
-
result = conn.
|
211
|
+
result = conn.does_file_exist(file, job_data, @secret)
|
212
|
+
}
|
213
|
+
return result
|
214
|
+
end
|
215
|
+
|
216
|
+
|
217
|
+
def get_profiling_info(key)
|
218
|
+
result = {}
|
219
|
+
make_call(NO_TIMEOUT, false) {
|
220
|
+
result = conn.get_profiling_info(key, @secret)
|
207
221
|
}
|
208
222
|
return result
|
209
223
|
end
|
224
|
+
|
225
|
+
|
210
226
|
end
|
data/lib/task_info.rb
ADDED
@@ -0,0 +1,155 @@
|
|
1
|
+
# Programmer: Chris Bunch
|
2
|
+
|
3
|
+
|
4
|
+
# Imports from Ruby's stdlib
|
5
|
+
require 'thread' # needed for Mutex
|
6
|
+
|
7
|
+
|
8
|
+
# Imports for RubyGems
|
9
|
+
require 'rubygems'
|
10
|
+
require 'json'
|
11
|
+
|
12
|
+
|
13
|
+
# Imports for other Neptune libraries
|
14
|
+
require 'babel'
|
15
|
+
require 'custom_exceptions'
|
16
|
+
|
17
|
+
|
18
|
+
# TaskInfo represents the result of a babel call, an object with all the
|
19
|
+
# information that the user would be interested in relating to their task.
|
20
|
+
# At the simplest level, this is just the output of their job, but it also
|
21
|
+
# can includes profiling information (e.g., performance and cost), as well
|
22
|
+
# as information that may help with debugging (e.g. info about the environment
|
23
|
+
# we executed the task in).
|
24
|
+
class TaskInfo
|
25
|
+
|
26
|
+
|
27
|
+
# A Hash consisting of the parameters that the user passed to babel().
|
28
|
+
attr_accessor :job_data
|
29
|
+
|
30
|
+
|
31
|
+
# Creates a new TaskInfo object, storing the parameters the user gave us to
|
32
|
+
# invoke the job for later use. The user can give us a Hash containing the
|
33
|
+
# parameters that the job was started with, or a String that is the
|
34
|
+
# JSON-dumped version of that data (also obtainable from TaskInfo objects
|
35
|
+
# via to_json).
|
36
|
+
def initialize(job_data)
|
37
|
+
if job_data.class == String
|
38
|
+
begin
|
39
|
+
job_data = JSON.load(job_data)
|
40
|
+
rescue JSON::ParserError
|
41
|
+
raise BadConfigurationException.new("job data not JSONable")
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
if job_data.class != Hash
|
46
|
+
raise BadConfigurationException.new("job data not a Hash")
|
47
|
+
end
|
48
|
+
@job_data = job_data
|
49
|
+
|
50
|
+
# To prevent us from repeatedly grabbing (potentially) large files over the
|
51
|
+
# network repeatedly, we keep a local, cached copy of the task's standard
|
52
|
+
# output, error, and metadata - initially empty, but pulled in the first
|
53
|
+
# time that the user asks for it. Since we expose this functionality through
|
54
|
+
# the accessor methods below, we should not use attr_accessor or attr_reader
|
55
|
+
# to directly expose this variables.
|
56
|
+
@output = nil
|
57
|
+
@error = nil
|
58
|
+
@metadata = nil
|
59
|
+
|
60
|
+
# To prevent concurrent threads from pulling in output multiple times, we
|
61
|
+
# guard access to remotely grabbing output/error/metadata with this
|
62
|
+
# lock.
|
63
|
+
@lock = Mutex.new
|
64
|
+
end
|
65
|
+
|
66
|
+
|
67
|
+
# Returns a string with the standard output produced by this Babel task. If
|
68
|
+
# the task has not yet completed, this call blocks until it completes.
|
69
|
+
def stdout
|
70
|
+
if @output.nil?
|
71
|
+
@lock.synchronize {
|
72
|
+
@output = BabelHelper.wait_and_get_output(@job_data,
|
73
|
+
@job_data['@output'])
|
74
|
+
}
|
75
|
+
end
|
76
|
+
|
77
|
+
return @output
|
78
|
+
end
|
79
|
+
|
80
|
+
|
81
|
+
# Returns a string with the standard error produced by this Babel task. While
|
82
|
+
# all jobs should produce standard output, they may not produce standard
|
83
|
+
# error, so it is reasonable that this could return an empty string to the
|
84
|
+
# user.
|
85
|
+
def stderr
|
86
|
+
if @error.nil?
|
87
|
+
@lock.synchronize {
|
88
|
+
@error = BabelHelper.wait_and_get_output(@job_data, @job_data['@error'])
|
89
|
+
}
|
90
|
+
end
|
91
|
+
|
92
|
+
return @error
|
93
|
+
end
|
94
|
+
|
95
|
+
|
96
|
+
# An alias for stdout.
|
97
|
+
def to_s
|
98
|
+
return stdout
|
99
|
+
end
|
100
|
+
|
101
|
+
|
102
|
+
# A common operation that users may perform is asking if the task executed
|
103
|
+
# successfully, indicated by a return value of zero. This method provides
|
104
|
+
# a quick alias for that functionality.
|
105
|
+
def success?
|
106
|
+
return return_value.zero?
|
107
|
+
end
|
108
|
+
|
109
|
+
|
110
|
+
# Converts this object to JSON, so that it can be written to disk or
|
111
|
+
# passed over the network. Since our stdout/stderr/metadata objects
|
112
|
+
# are all locally cached, we don't need to write them (and thus can
|
113
|
+
# potentially save a lot of space).
|
114
|
+
def to_json
|
115
|
+
return JSON.dump(@job_data)
|
116
|
+
end
|
117
|
+
|
118
|
+
|
119
|
+
private
|
120
|
+
|
121
|
+
|
122
|
+
# We store all the task information that isn't standard out or standard err
|
123
|
+
# as a JSON-encoded Hash in a metadata file. This function provides easy
|
124
|
+
# access to that hash, retrieving it remotely if needed. It's private since
|
125
|
+
# we intend for other methods in this class to call it, and not the user
|
126
|
+
# directly.
|
127
|
+
def metadata
|
128
|
+
if @metadata.nil?
|
129
|
+
@lock.synchronize {
|
130
|
+
info = BabelHelper.wait_and_get_output(@job_data,
|
131
|
+
@job_data['@metadata'])
|
132
|
+
@metadata = JSON.load(info)
|
133
|
+
}
|
134
|
+
end
|
135
|
+
|
136
|
+
return @metadata
|
137
|
+
end
|
138
|
+
|
139
|
+
|
140
|
+
# We would like to be able to directly call .name on anything in the metadata
|
141
|
+
# hash for a task. One way to avoid having to add all of these method calls
|
142
|
+
# ourselves and keep it in sync with whatever Neptune over AppScale offers
|
143
|
+
# is just to use method_missing and automatically respond to anything that
|
144
|
+
# is a key in the metadata hash.
|
145
|
+
def method_missing(id, *args, &block)
|
146
|
+
methods_available = metadata()
|
147
|
+
if methods_available[id.to_s].nil?
|
148
|
+
super
|
149
|
+
else
|
150
|
+
return methods_available[id.to_s]
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
|
155
|
+
end
|
@@ -1,9 +1,12 @@
|
|
1
1
|
# Programmer: Chris Bunch (cgb@cs.ucsb.edu)
|
2
2
|
|
3
|
-
|
3
|
+
|
4
|
+
$:.unshift File.join(File.dirname(__FILE__), "..", "lib")
|
4
5
|
require 'babel'
|
5
6
|
|
6
|
-
|
7
|
+
|
8
|
+
require 'rubygems'
|
9
|
+
require 'flexmock/test_unit'
|
7
10
|
|
8
11
|
|
9
12
|
class TestBabel < Test::Unit::TestCase
|
@@ -19,44 +22,102 @@ class TestBabel < Test::Unit::TestCase
|
|
19
22
|
:EC2_ACCESS_KEY => "boo",
|
20
23
|
:EC2_SECRET_KEY => "baz",
|
21
24
|
:S3_URL => "http://baz.com",
|
22
|
-
:
|
25
|
+
:is_remote => true,
|
26
|
+
:keyname => keyname,
|
27
|
+
:metadata_info => {'time_to_store_inputs' => 0.0}
|
23
28
|
}
|
24
29
|
|
25
30
|
job_data = {}
|
26
31
|
params.each { |k, v|
|
27
32
|
job_data["@#{k}"] = v
|
28
33
|
}
|
29
|
-
job_data["@is_remote"] = true
|
30
34
|
|
31
35
|
output = "/bucket/babel/temp-0123456789"
|
32
36
|
job_data["@output"] = output
|
37
|
+
job_data_no_err = job_data.dup
|
38
|
+
|
39
|
+
error = "/bucket/babel/temp-1111111111"
|
40
|
+
job_data["@error"] = error
|
41
|
+
job_data_no_metadata = job_data.dup
|
42
|
+
|
43
|
+
metadata = "/bucket/babel/temp-2222222222"
|
44
|
+
job_data["@metadata"] = metadata
|
33
45
|
|
34
46
|
run_job_data = job_data.dup
|
35
47
|
run_job_data["@engine"] = "executor-sqs"
|
36
48
|
run_job_data["@run_local"] = true
|
49
|
+
run_job_data["@failed_attempts"] = 0
|
50
|
+
|
51
|
+
run_job_data_second_try = run_job_data.dup
|
52
|
+
run_job_data["@failed_attempts"] = 1
|
37
53
|
|
38
54
|
output_job_data = job_data.dup
|
39
55
|
output_job_data["@type"] = "output"
|
40
56
|
|
57
|
+
error_job_data = output_job_data.dup
|
58
|
+
error_job_data['@output'] = error
|
59
|
+
|
60
|
+
metadata_job_data = output_job_data.dup
|
61
|
+
metadata_job_data['@output'] = metadata
|
62
|
+
json_metadata = JSON.dump({'command' => 'ls /home/baz', 'return_value' => 0})
|
63
|
+
|
41
64
|
kernel = flexmock(Kernel)
|
42
65
|
kernel.should_receive(:puts).and_return()
|
43
|
-
kernel.should_receive(:rand).and_return(0,1,2,3,4,5,6,7,8,9)
|
66
|
+
kernel.should_receive(:rand).and_return(0,1,2,3,4,5,6,7,8,9,1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2)
|
44
67
|
kernel.should_receive(:sleep).and_return()
|
45
68
|
|
46
|
-
flexmock(
|
47
|
-
instance.should_receive(:does_file_exist?).with(file, job_data).
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
69
|
+
flexmock(NeptuneManagerClient).new_instances { |instance|
|
70
|
+
instance.should_receive(:does_file_exist?).with(file, job_data).
|
71
|
+
and_return(true)
|
72
|
+
instance.should_receive(:does_file_exist?).with(output, job_data_no_err).
|
73
|
+
and_return(false)
|
74
|
+
instance.should_receive(:does_file_exist?).
|
75
|
+
with(error, job_data_no_metadata).and_return(false)
|
76
|
+
instance.should_receive(:does_file_exist?).with(metadata, job_data).
|
77
|
+
and_return(false)
|
78
|
+
|
79
|
+
# So the first time we start the job, let's say that it failed, so that
|
80
|
+
# we can make sure that the caller properly catches this and tries again.
|
81
|
+
instance.should_receive(:start_neptune_job).with(run_job_data).
|
82
|
+
and_return("error")
|
83
|
+
instance.should_receive(:start_neptune_job).with(run_job_data_second_try).
|
84
|
+
and_return("MPI job is now running")
|
85
|
+
|
86
|
+
instance.should_receive(:get_output).with(output_job_data).
|
87
|
+
and_return("output")
|
88
|
+
instance.should_receive(:get_output).with(error_job_data).
|
89
|
+
and_return("error")
|
90
|
+
instance.should_receive(:get_output).with(metadata_job_data).
|
91
|
+
and_return(json_metadata)
|
53
92
|
}
|
54
93
|
|
55
94
|
commonfunctions = flexmock(CommonFunctions)
|
56
|
-
commonfunctions.should_receive(:get_from_yaml).with(keyname, :shadow).
|
57
|
-
|
58
|
-
|
59
|
-
|
95
|
+
commonfunctions.should_receive(:get_from_yaml).with(keyname, :shadow).
|
96
|
+
and_return("127.0.0.1")
|
97
|
+
commonfunctions.should_receive(:get_secret_key).with(keyname).
|
98
|
+
and_return("secret")
|
99
|
+
|
100
|
+
# Calling either to_s or stdout will return the standard out that the
|
101
|
+
# program produced.
|
102
|
+
expected = "output"
|
103
|
+
actual = babel(params)
|
104
|
+
assert_equal(expected, actual.to_s)
|
105
|
+
assert_equal(expected, actual.stdout)
|
106
|
+
|
107
|
+
# Calling stderr returns the standard error that we are expecting.
|
108
|
+
assert_equal("error", actual.stderr)
|
109
|
+
|
110
|
+
# Calling command returns the command that was remotely exec'ed, hidden in
|
111
|
+
# the metadata.
|
112
|
+
assert_equal("ls /home/baz", actual.command)
|
113
|
+
|
114
|
+
# Calling success? returns true if the command's return value is zero,
|
115
|
+
# also hidden in the metadata
|
116
|
+
assert_equal(true, actual.success?)
|
117
|
+
|
118
|
+
# We're using method_missing under the hood, so make sure that a method call
|
119
|
+
# that doesn't exist fails accordingly
|
120
|
+
assert_raise(NoMethodError) { actual.baz }
|
60
121
|
end
|
61
122
|
|
62
123
|
def test_bad_babel_params
|
@@ -104,7 +165,8 @@ class TestBabel < Test::Unit::TestCase
|
|
104
165
|
|
105
166
|
# Finally, if we run a job and specify remote code, that should be used
|
106
167
|
# as the bucket.
|
107
|
-
job_data_remote_code = {"@code" => "/baz/boo/code.baz", "@storage" => "s3"
|
168
|
+
job_data_remote_code = {"@code" => "/baz/boo/code.baz", "@storage" => "s3",
|
169
|
+
"@is_remote" => true}
|
108
170
|
expected_remote = "/baz/babel/temp-10"
|
109
171
|
|
110
172
|
actual_remote = BabelHelper.generate_output_location(job_data_remote_code)
|
@@ -141,8 +203,8 @@ class TestBabel < Test::Unit::TestCase
|
|
141
203
|
actual_2 = BabelHelper.put_inputs(job_data)
|
142
204
|
assert_equal(job_data, actual_2)
|
143
205
|
|
144
|
-
# If we specify inputs on the file system, they should be uploaded and
|
145
|
-
# with remote file locations
|
206
|
+
# If we specify inputs on the file system, they should be uploaded and
|
207
|
+
# replaced with remote file locations
|
146
208
|
neptune_params = {
|
147
209
|
:type => "input",
|
148
210
|
:local => "/baz",
|
@@ -155,26 +217,31 @@ class TestBabel < Test::Unit::TestCase
|
|
155
217
|
kernel = flexmock(Kernel)
|
156
218
|
kernel.should_receive(:neptune).with(neptune_params)
|
157
219
|
|
220
|
+
time = flexmock(Time)
|
221
|
+
time.should_receive(:now).and_return(0.0)
|
222
|
+
|
158
223
|
job_data["@argv"] = ["boo", "/baz", "gbaz"]
|
159
224
|
expected = job_data.dup
|
160
225
|
expected["@argv"] = ["boo", "/remote/babel/baz", "gbaz"]
|
226
|
+
expected["@metadata_info"] = {"time_to_store_inputs" => 0.0}
|
161
227
|
actual_3 = BabelHelper.put_inputs(job_data.dup)
|
162
228
|
assert_equal(expected, actual_3)
|
163
229
|
end
|
164
230
|
|
165
231
|
def test_run_babel_job
|
166
232
|
# Running a job with no @type specified means it should be a Babel job
|
167
|
-
job_data = {
|
233
|
+
job_data = [{
|
168
234
|
"@code" => "/baz/boo/code.baz",
|
169
235
|
"@argv" => ["boo", "/remote/babel/baz", "gbaz"]
|
170
|
-
}
|
236
|
+
}]
|
171
237
|
|
172
238
|
neptune_params = {
|
173
239
|
:type => "babel",
|
174
240
|
:code => "/baz/boo/code.baz",
|
175
241
|
:argv => ["boo", "/remote/babel/baz", "gbaz"],
|
176
242
|
:run_local => true,
|
177
|
-
:engine => "executor-sqs"
|
243
|
+
:engine => "executor-sqs",
|
244
|
+
:failed_attempts => 0
|
178
245
|
}
|
179
246
|
|
180
247
|
result = { :result => :success }
|
@@ -188,18 +255,19 @@ class TestBabel < Test::Unit::TestCase
|
|
188
255
|
|
189
256
|
def test_run_mpi_job
|
190
257
|
# Running a job with @type specified should preserve the job type
|
191
|
-
job_data = {
|
258
|
+
job_data = [{
|
192
259
|
"@type" => "mpi",
|
193
260
|
"@code" => "/baz/boo/code.baz",
|
194
261
|
"@argv" => ["boo", "/remote/babel/baz", "gbaz"]
|
195
|
-
}
|
262
|
+
}]
|
196
263
|
|
197
264
|
neptune_params = {
|
198
265
|
:type => "mpi",
|
199
266
|
:code => "/baz/boo/code.baz",
|
200
267
|
:argv => ["boo", "/remote/babel/baz", "gbaz"],
|
201
268
|
:run_local => true,
|
202
|
-
:engine => "executor-sqs"
|
269
|
+
:engine => "executor-sqs",
|
270
|
+
:failed_attempts => 0
|
203
271
|
}
|
204
272
|
|
205
273
|
result = { :result => :success }
|
@@ -228,7 +296,74 @@ class TestBabel < Test::Unit::TestCase
|
|
228
296
|
kernel.should_receive(:sleep).and_return()
|
229
297
|
|
230
298
|
expected = "output goes here"
|
231
|
-
actual = BabelHelper.wait_and_get_output(job_data)
|
299
|
+
actual = BabelHelper.wait_and_get_output(job_data, job_data['@output'])
|
232
300
|
assert_equal(expected, actual)
|
233
301
|
end
|
302
|
+
|
303
|
+
def test_batch_tasks_operation
|
304
|
+
# if we give babel an array of hashes, it should should give us back
|
305
|
+
# task information for each of the jobs we asked it to run
|
306
|
+
# essentially this saves us the overhead of the repeated SOAP calls
|
307
|
+
# to AppScale
|
308
|
+
|
309
|
+
neptune_params = {
|
310
|
+
:type => "babel",
|
311
|
+
:code => "/baz/boo/code.baz",
|
312
|
+
:argv => ["boo", "/remote/babel/baz", "gbaz"],
|
313
|
+
:output => "/baz/output",
|
314
|
+
:error => "/baz/error",
|
315
|
+
:metadata => "/baz/metadata",
|
316
|
+
:run_local => true,
|
317
|
+
:engine => "executor-sqs",
|
318
|
+
:failed_attempts => 0,
|
319
|
+
:metadata_info => {'time_to_store_inputs' => 0.0},
|
320
|
+
:storage => "appdb",
|
321
|
+
:keyname => "appscale"
|
322
|
+
}
|
323
|
+
tasks = [neptune_params, neptune_params]
|
324
|
+
|
325
|
+
job_data = {}
|
326
|
+
neptune_params.each { |k, v|
|
327
|
+
job_data["@#{k}"] = v
|
328
|
+
}
|
329
|
+
|
330
|
+
# mocks - mock out most of the babel stuff, since we just want to verify
|
331
|
+
# the semantics of passing in an array of hashes instead of just a hash
|
332
|
+
babelhelper = flexmock(BabelHelper)
|
333
|
+
babelhelper.should_receive(:check_output_files).and_return()
|
334
|
+
babelhelper.should_receive(:validate_inputs).and_return()
|
335
|
+
babelhelper.should_receive(:put_code).and_return()
|
336
|
+
babelhelper.should_receive(:put_inputs).and_return()
|
337
|
+
|
338
|
+
# mocks for neptune
|
339
|
+
neptunehelper = flexmock(NeptuneHelper)
|
340
|
+
neptunehelper.should_receive(:require_file_to_exist).and_return()
|
341
|
+
neptunehelper.should_receive(:require_file_to_not_exist).and_return()
|
342
|
+
|
343
|
+
# finally, mock out the libraries that neptune uses
|
344
|
+
commonfunctions = flexmock(CommonFunctions)
|
345
|
+
commonfunctions.should_receive(:get_from_yaml).with("appscale", :shadow).
|
346
|
+
and_return("public_ip")
|
347
|
+
commonfunctions.should_receive(:get_from_yaml).with("appscale", :secret,
|
348
|
+
true).and_return("secret")
|
349
|
+
|
350
|
+
appcontroller = flexmock('appcontroller')
|
351
|
+
appcontroller.should_receive(:get_supported_babel_engines).with(job_data).
|
352
|
+
and_return(["executor-sqs"])
|
353
|
+
appcontroller.should_receive(:start_neptune_job).
|
354
|
+
and_return("babel job is now running")
|
355
|
+
flexmock(NeptuneManagerClient).should_receive(:new).and_return(appcontroller)
|
356
|
+
|
357
|
+
flexmock(TaskInfo).new_instances { |instance|
|
358
|
+
instance.should_receive(:stdout).and_return("output")
|
359
|
+
}
|
360
|
+
|
361
|
+
expected = ["output", "output"]
|
362
|
+
actual = []
|
363
|
+
babel(tasks).each { |task|
|
364
|
+
actual << task.stdout
|
365
|
+
}
|
366
|
+
assert_equal(expected, actual)
|
367
|
+
end
|
368
|
+
|
234
369
|
end
|