neptune 0.1.4 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +4 -0
- data/bin/neptune +7 -0
- data/doc/AppControllerClient.html +113 -22
- data/doc/{Kernel.html → AppControllerException.html} +23 -55
- data/doc/BabelHelper.html +707 -0
- data/doc/BadConfigurationException.html +142 -0
- data/doc/CommonFunctions.html +121 -33
- data/doc/FileNotFoundException.html +142 -0
- data/doc/NeptuneHelper.html +1102 -0
- data/doc/Object.html +94 -749
- data/doc/bin/neptune.html +3 -1
- data/doc/created.rid +7 -5
- data/doc/index.html +73 -25
- data/doc/lib/app_controller_client_rb.html +1 -1
- data/doc/lib/babel_rb.html +68 -0
- data/doc/lib/common_functions_rb.html +3 -1
- data/doc/lib/custom_exceptions_rb.html +54 -0
- data/doc/lib/neptune_rb.html +3 -1
- data/lib/app_controller_client.rb +28 -10
- data/lib/babel.rb +260 -0
- data/lib/common_functions.rb +42 -28
- data/lib/custom_exceptions.rb +10 -0
- data/lib/neptune.rb +371 -304
- data/test/unit/test_app_controller_client.rb +9 -9
- data/test/unit/test_babel.rb +154 -0
- data/test/unit/test_common_functions.rb +39 -75
- data/test/unit/test_neptune.rb +168 -76
- data/test/unit/ts_all.rb +5 -0
- metadata +32 -8
data/lib/babel.rb
ADDED
@@ -0,0 +1,260 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
# Programmer: Chris Bunch (cgb@cs.ucsb.edu)
|
3
|
+
|
4
|
+
require 'app_controller_client'
|
5
|
+
require 'common_functions'
|
6
|
+
require 'custom_exceptions'
|
7
|
+
require 'neptune'
|
8
|
+
|
9
|
+
|
10
|
+
# The promise gem gives us futures / promises out-of-the-box, which we need
|
11
|
+
# to hide the fact that babel jobs are asynchronous.
|
12
|
+
require 'rubygems'
|
13
|
+
require 'promise'
|
14
|
+
require 'future'
|
15
|
+
|
16
|
+
|
17
|
+
# If the user doesn't give us enough info to infer what bucket we should place
|
18
|
+
# their code in, this message is displayed and execution aborts.
|
19
|
+
NEEDS_BUCKET_INFO = "When running Babel jobs with local inputs / code, the " +
|
20
|
+
"bucket to store them in must be specified by either the :bucket_name " +
|
21
|
+
"parameter or the BABEL_BUCKET_NAME environment variable."
|
22
|
+
|
23
|
+
|
24
|
+
# The constant string that a Neptune output job returns if the output does not
|
25
|
+
# yet exist.
|
26
|
+
DOES_NOT_EXIST = "error: output does not exist"
|
27
|
+
|
28
|
+
|
29
|
+
# The initial amount of time, in seconds, to sleep between output job requests.
|
30
|
+
# An exponential backoff is used with this value as the starting sleep time.
|
31
|
+
SLEEP_TIME = 5 # seconds
|
32
|
+
|
33
|
+
|
34
|
+
# The maximum amount of time that we should sleep to, when waiting for output
|
35
|
+
# job requests.
|
36
|
+
MAX_SLEEP_TIME = 60 # seconds
|
37
|
+
|
38
|
+
# Babel provides a nice wrapper around Neptune jobs. Instead of making users
|
39
|
+
# write multiple Neptune jobs to actually run code (e.g., putting input in the
|
40
|
+
# datastore, run the job, get the output back), Babel automatically handles
|
41
|
+
# this.
|
42
|
+
def babel(params)
|
43
|
+
# Since this whole function should run asynchronously, we run it as a future.
|
44
|
+
# It automatically starts running in a new thread, and attempting to get the
|
45
|
+
# value of what this returns causes it to block until the job completes.
|
46
|
+
future {
|
47
|
+
job_data = BabelHelper.convert_from_neptune_params(params)
|
48
|
+
NeptuneHelper.validate_storage_params(job_data) # adds in S3 storage params
|
49
|
+
|
50
|
+
# :code is the only required parameter - everything else can use default vals
|
51
|
+
NeptuneHelper.require_param("@code", job_data)
|
52
|
+
|
53
|
+
if job_data["@output"].nil? or job_data["@output"].empty?
|
54
|
+
job_data["@output"] = BabelHelper.generate_output_location(job_data)
|
55
|
+
end
|
56
|
+
BabelHelper.ensure_output_does_not_exist(job_data)
|
57
|
+
|
58
|
+
if job_data["@is_remote"]
|
59
|
+
BabelHelper.validate_inputs(job_data)
|
60
|
+
else
|
61
|
+
BabelHelper.put_code(job_data)
|
62
|
+
BabelHelper.put_inputs(job_data)
|
63
|
+
end
|
64
|
+
|
65
|
+
BabelHelper.run_job(job_data)
|
66
|
+
# So actually retrieving the job's output is done via a promise, so only if
|
67
|
+
# the user actually uses the value do we actually go and poll for output.
|
68
|
+
# The running of the job is done above, outside of the promise, so
|
69
|
+
# the job is always run, regardless of whether or not we get its output.
|
70
|
+
BabelHelper.wait_and_get_output(job_data)
|
71
|
+
# promise { BabelHelper.wait_and_get_output(job_data) }
|
72
|
+
}
|
73
|
+
end
|
74
|
+
|
75
|
+
|
76
|
+
# This module provides convenience functions for babel().
|
77
|
+
module BabelHelper
|
78
|
+
# If the user fails to give us an output location, this function will generate
|
79
|
+
# one for them, based on either the location of their code (for remotely
|
80
|
+
# specified code), or a babel parameter (for locally specified code).
|
81
|
+
def self.generate_output_location(job_data)
|
82
|
+
if job_data["@is_remote"]
|
83
|
+
# We already know the bucket name - the same one that the user
|
84
|
+
# has told us their code is located in.
|
85
|
+
prefix = job_data["@code"].scan(/\/(.*?)\//)[0].to_s
|
86
|
+
else
|
87
|
+
prefix = self.get_bucket_for_local_data(job_data)
|
88
|
+
end
|
89
|
+
|
90
|
+
return "/#{prefix}/babel/temp-#{CommonFunctions.get_random_alphanumeric()}"
|
91
|
+
end
|
92
|
+
|
93
|
+
# Provides a common way for callers to get the name of the bucket that
|
94
|
+
# should be used for Neptune jobs where the code is stored locally.
|
95
|
+
def self.get_bucket_for_local_data(job_data)
|
96
|
+
bucket_name = job_data["@bucket_name"] || ENV['BABEL_BUCKET_NAME']
|
97
|
+
|
98
|
+
if bucket_name.nil?
|
99
|
+
raise BadConfigurationException.new(NEEDS_BUCKET_INFO)
|
100
|
+
end
|
101
|
+
|
102
|
+
# If the bucket name starts with a slash, remove it
|
103
|
+
if bucket_name[0].chr == "/"
|
104
|
+
bucket_name = bucket_name[1, bucket_name.length]
|
105
|
+
end
|
106
|
+
|
107
|
+
return bucket_name
|
108
|
+
end
|
109
|
+
|
110
|
+
# For jobs where the code is stored remotely, this method ensures that
|
111
|
+
# the code and any possible inputs actually do exist, before attempting to
|
112
|
+
# use them for computation.
|
113
|
+
def self.validate_inputs(job_data)
|
114
|
+
controller = self.get_appcontroller(job_data)
|
115
|
+
|
116
|
+
# First, make sure the code exists
|
117
|
+
NeptuneHelper.require_file_to_exist(job_data["@code"], job_data, controller)
|
118
|
+
|
119
|
+
if job_data["@argv"].nil? or job_data["@argv"].empty?
|
120
|
+
return
|
121
|
+
end
|
122
|
+
|
123
|
+
# We assume anything that begins with a slash is a remote file
|
124
|
+
job_data["@argv"].each { |arg|
|
125
|
+
if arg[0].chr == "/"
|
126
|
+
NeptuneHelper.require_file_to_exist(arg, job_data, controller)
|
127
|
+
end
|
128
|
+
}
|
129
|
+
end
|
130
|
+
|
131
|
+
# To avoid accidentally overwriting outputs from previous jobs, we first
|
132
|
+
# check to make sure an output file doesn't exist before starting a new job
|
133
|
+
# with the given name.
|
134
|
+
def self.ensure_output_does_not_exist(job_data)
|
135
|
+
file = job_data["@output"]
|
136
|
+
controller = self.get_appcontroller(job_data)
|
137
|
+
puts job_data.inspect
|
138
|
+
NeptuneHelper.require_file_to_not_exist(file, job_data, controller)
|
139
|
+
end
|
140
|
+
|
141
|
+
# Returns an AppControllerClient for the given job data.
|
142
|
+
def self.get_appcontroller(job_data)
|
143
|
+
keyname = job_data["@keyname"] || "appscale"
|
144
|
+
shadow_ip = CommonFunctions.get_from_yaml(keyname, :shadow)
|
145
|
+
secret = CommonFunctions.get_secret_key(keyname)
|
146
|
+
return AppControllerClient.new(shadow_ip, secret)
|
147
|
+
end
|
148
|
+
|
149
|
+
# Stores the user's code (and the directory it's in, and directories in the
|
150
|
+
# same directory as the user's code, since there could be libraries used)
|
151
|
+
# in the remote datastore.
|
152
|
+
def self.put_code(job_data)
|
153
|
+
code_dir = File.dirname(job_data["@code"])
|
154
|
+
code = File.basename(job_data["@code"])
|
155
|
+
remote_code_dir = self.put_file(code_dir, job_data)
|
156
|
+
job_data["@code"] = remote_code_dir + "/" + code
|
157
|
+
return job_data["@code"]
|
158
|
+
end
|
159
|
+
|
160
|
+
# If any input files are specified, they are copied to the remote datastore
|
161
|
+
# via Neptune 'input' jobs. Inputs are assumed to be files on the local
|
162
|
+
# filesystem if they begin with a slash, and job_data gets updated with
|
163
|
+
# the remote location of these files.
|
164
|
+
def self.put_inputs(job_data)
|
165
|
+
if job_data["@argv"].nil? or job_data["@argv"].empty?
|
166
|
+
return job_data
|
167
|
+
end
|
168
|
+
|
169
|
+
job_data["@argv"].each_index { |i|
|
170
|
+
arg = job_data["@argv"][i]
|
171
|
+
if arg[0].chr == "/"
|
172
|
+
job_data["@argv"][i] = self.put_file(arg, job_data)
|
173
|
+
end
|
174
|
+
}
|
175
|
+
|
176
|
+
return job_data
|
177
|
+
end
|
178
|
+
|
179
|
+
# If the user gives us local code or local inputs, this function will
|
180
|
+
# run a Neptune 'input' job to store the data remotely.
|
181
|
+
def self.put_file(local_path, job_data)
|
182
|
+
input_data = self.convert_to_neptune_params(job_data)
|
183
|
+
input_data[:type] = "input"
|
184
|
+
input_data[:local] = local_path
|
185
|
+
|
186
|
+
bucket_name = self.get_bucket_for_local_data(job_data)
|
187
|
+
input_data[:remote] = "/#{bucket_name}/babel#{local_path}"
|
188
|
+
|
189
|
+
Kernel.neptune(input_data)
|
190
|
+
|
191
|
+
return input_data[:remote]
|
192
|
+
end
|
193
|
+
|
194
|
+
# Neptune internally uses job_data with keys of the form @name, but since the
|
195
|
+
# user has given them to us in the form :name, we convert it here.
|
196
|
+
# TODO(cgb): It looks like this conversion to/from may be unnecessary since
|
197
|
+
# neptune() just re-converts it - how can we remove it?
|
198
|
+
def self.convert_from_neptune_params(params)
|
199
|
+
job_data = {}
|
200
|
+
params.each { |k, v|
|
201
|
+
key = "@#{k}"
|
202
|
+
job_data[key] = v
|
203
|
+
}
|
204
|
+
return job_data
|
205
|
+
end
|
206
|
+
|
207
|
+
# Neptune input jobs expect keys of the form :name, but since we've already
|
208
|
+
# converted them to the form @name, this function reverses that conversion.
|
209
|
+
def self.convert_to_neptune_params(job_data)
|
210
|
+
neptune_params = {}
|
211
|
+
|
212
|
+
job_data.each { |k, v|
|
213
|
+
key = k.delete("@").to_sym
|
214
|
+
neptune_params[key] = v
|
215
|
+
}
|
216
|
+
|
217
|
+
return neptune_params
|
218
|
+
end
|
219
|
+
|
220
|
+
# Constructs a Neptune job to run the user's code as a Babel job (task queue)
|
221
|
+
# from the given parameters.
|
222
|
+
def self.run_job(job_data)
|
223
|
+
run_data = self.convert_to_neptune_params(job_data)
|
224
|
+
run_data[:type] = "babel"
|
225
|
+
|
226
|
+
# TODO(cgb): Once AppScale+Babel gets support for RabbitMQ, change this to
|
227
|
+
# exec tasks over it, instead of locally.
|
228
|
+
if job_data["@run_local"].nil?
|
229
|
+
run_data[:run_local] = true
|
230
|
+
run_data[:engine] = "executor-sqs"
|
231
|
+
end
|
232
|
+
|
233
|
+
return Kernel.neptune(run_data)
|
234
|
+
end
|
235
|
+
|
236
|
+
# Constructs a Neptune job to get the output of a Babel job. If the job is not
|
237
|
+
# yet finished, this function waits until it does, and then returns the output
|
238
|
+
# of the job.
|
239
|
+
def self.wait_and_get_output(job_data)
|
240
|
+
output_data = self.convert_to_neptune_params(job_data)
|
241
|
+
output_data[:type] = "output"
|
242
|
+
|
243
|
+
output = ""
|
244
|
+
time_to_sleep = SLEEP_TIME
|
245
|
+
loop {
|
246
|
+
output = Kernel.neptune(output_data)[:output]
|
247
|
+
if output == DOES_NOT_EXIST
|
248
|
+
# Exponentially back off, up to a limit of MAX_SLEEP_TIME
|
249
|
+
Kernel.sleep(time_to_sleep)
|
250
|
+
if time_to_sleep < MAX_SLEEP_TIME
|
251
|
+
time_to_sleep *= 2
|
252
|
+
end
|
253
|
+
else
|
254
|
+
break
|
255
|
+
end
|
256
|
+
}
|
257
|
+
|
258
|
+
return output
|
259
|
+
end
|
260
|
+
end
|
data/lib/common_functions.rb
CHANGED
@@ -9,11 +9,7 @@ require 'socket'
|
|
9
9
|
require 'timeout'
|
10
10
|
require 'yaml'
|
11
11
|
|
12
|
-
|
13
|
-
def shell(command)
|
14
|
-
return `#{command}`
|
15
|
-
end
|
16
|
-
end
|
12
|
+
require 'custom_exceptions'
|
17
13
|
|
18
14
|
# A helper module that aggregates functions that are not part of Neptune's
|
19
15
|
# core functionality. Specifically, this module contains methods to scp
|
@@ -21,6 +17,27 @@ end
|
|
21
17
|
# often needed to determine which machine should be used for computation
|
22
18
|
# or to copy over code and input files.
|
23
19
|
module CommonFunctions
|
20
|
+
# Executes a command and returns the result. Is needed to get around
|
21
|
+
# Flexmock's inability to mock out Kernel:` (the standard shell exec
|
22
|
+
# method).
|
23
|
+
def self.shell(cmd)
|
24
|
+
return `#{cmd}`
|
25
|
+
end
|
26
|
+
|
27
|
+
# Returns a random string composed of alphanumeric characters, as long
|
28
|
+
# as the user requests.
|
29
|
+
def self.get_random_alphanumeric(length=10)
|
30
|
+
random = ""
|
31
|
+
possible = "0123456789abcdefghijklmnopqrstuvxwyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
32
|
+
possibleLength = possible.length
|
33
|
+
|
34
|
+
length.times { |index|
|
35
|
+
random << possible[rand(possibleLength)]
|
36
|
+
}
|
37
|
+
|
38
|
+
return random
|
39
|
+
end
|
40
|
+
|
24
41
|
# Copies a file to the Shadow node (head node) within AppScale.
|
25
42
|
# The caller specifies
|
26
43
|
# the local file location, the destination where the file should be
|
@@ -30,14 +47,11 @@ module CommonFunctions
|
|
30
47
|
def self.scp_to_shadow(local_file_loc,
|
31
48
|
remote_file_loc,
|
32
49
|
keyname,
|
33
|
-
is_dir=false
|
34
|
-
file=File,
|
35
|
-
get_from_yaml=CommonFunctions.method(:get_from_yaml),
|
36
|
-
scp_file=CommonFunctions.method(:scp_file))
|
50
|
+
is_dir=false)
|
37
51
|
|
38
|
-
shadow_ip = get_from_yaml
|
39
|
-
ssh_key =
|
40
|
-
scp_file
|
52
|
+
shadow_ip = CommonFunctions.get_from_yaml(keyname, :shadow)
|
53
|
+
ssh_key = File.expand_path("~/.appscale/#{keyname}.key")
|
54
|
+
CommonFunctions.scp_file(local_file_loc, remote_file_loc, shadow_ip, ssh_key, is_dir)
|
41
55
|
end
|
42
56
|
|
43
57
|
# Performs the actual remote copying of files: given the IP address
|
@@ -47,22 +61,22 @@ module CommonFunctions
|
|
47
61
|
# wrong IP is given. If the user specifies that the file to copy is
|
48
62
|
# actually a directory, we append the -r flag to scp as well.
|
49
63
|
def self.scp_file(local_file_loc, remote_file_loc, target_ip, public_key_loc,
|
50
|
-
is_dir=false
|
64
|
+
is_dir=false)
|
51
65
|
cmd = ""
|
52
|
-
local_file_loc =
|
66
|
+
local_file_loc = File.expand_path(local_file_loc)
|
53
67
|
|
54
68
|
ssh_args = "-o StrictHostkeyChecking=no 2>&1"
|
55
69
|
ssh_args << " -r " if is_dir
|
56
70
|
|
57
|
-
public_key_loc =
|
71
|
+
public_key_loc = File.expand_path(public_key_loc)
|
58
72
|
cmd = "scp -i #{public_key_loc} #{ssh_args} #{local_file_loc} root@#{target_ip}:#{remote_file_loc}"
|
59
73
|
cmd << "; echo $? >> ~/.appscale/retval"
|
60
74
|
|
61
|
-
retval_loc =
|
62
|
-
|
75
|
+
retval_loc = File.expand_path("~/.appscale/retval")
|
76
|
+
FileUtils.rm_f(retval_loc)
|
63
77
|
|
64
78
|
begin
|
65
|
-
Timeout::timeout(-1) {
|
79
|
+
Timeout::timeout(-1) { CommonFunctions.shell("#{cmd}") }
|
66
80
|
rescue Timeout::Error
|
67
81
|
abort("Remotely copying over files failed. Is the destination machine" +
|
68
82
|
" on and reachable from this computer? We tried the following" +
|
@@ -70,11 +84,11 @@ module CommonFunctions
|
|
70
84
|
end
|
71
85
|
|
72
86
|
loop {
|
73
|
-
break if
|
87
|
+
break if File.exists?(retval_loc)
|
74
88
|
sleep(5)
|
75
89
|
}
|
76
90
|
|
77
|
-
retval = (
|
91
|
+
retval = (File.open(retval_loc) { |f| f.read }).chomp
|
78
92
|
if retval != "0"
|
79
93
|
abort("\n\n[#{cmd}] returned #{retval} instead of 0 as expected. Is " +
|
80
94
|
"your environment set up properly?")
|
@@ -88,16 +102,16 @@ module CommonFunctions
|
|
88
102
|
# method aborts if the value doesn't exist or the YAML file is malformed.
|
89
103
|
# If the required flag is set to false, it returns nil in either scenario
|
90
104
|
# instead.
|
91
|
-
def self.get_from_yaml(keyname, tag, required=true
|
92
|
-
location_file =
|
105
|
+
def self.get_from_yaml(keyname, tag, required=true)
|
106
|
+
location_file = File.expand_path("~/.appscale/locations-#{keyname}.yaml")
|
93
107
|
|
94
|
-
if !
|
95
|
-
|
96
|
-
" keyname, \"#{keyname}\".")
|
108
|
+
if !File.exists?(location_file)
|
109
|
+
raise BadConfigurationException.new("An AppScale instance is not " +
|
110
|
+
"currently running with the provided keyname, \"#{keyname}\".")
|
97
111
|
end
|
98
112
|
|
99
113
|
begin
|
100
|
-
tree =
|
114
|
+
tree = YAML.load_file(location_file)
|
101
115
|
rescue ArgumentError
|
102
116
|
if required
|
103
117
|
abort("The yaml file you provided was malformed. Please correct any" +
|
@@ -121,7 +135,7 @@ module CommonFunctions
|
|
121
135
|
# Returns the secret key needed for communication with AppScale's
|
122
136
|
# Shadow node. This method is a nice frontend to the get_from_yaml
|
123
137
|
# function, as the secret is stored in a YAML file.
|
124
|
-
def self.get_secret_key(keyname, required=true
|
125
|
-
return CommonFunctions.get_from_yaml(keyname, :secret, required
|
138
|
+
def self.get_secret_key(keyname, required=true)
|
139
|
+
return CommonFunctions.get_from_yaml(keyname, :secret, required)
|
126
140
|
end
|
127
141
|
end
|
data/lib/neptune.rb
CHANGED
@@ -3,6 +3,7 @@
|
|
3
3
|
|
4
4
|
require 'app_controller_client'
|
5
5
|
require 'common_functions'
|
6
|
+
require 'custom_exceptions'
|
6
7
|
|
7
8
|
# Setting verbose to nil here suppresses the otherwise
|
8
9
|
# excessive SSL cert warning messages that will pollute
|
@@ -20,6 +21,12 @@ $VERBOSE = nil
|
|
20
21
|
#MR_RUN_JOB_REQUIRED = %w{ }
|
21
22
|
#MR_REQUIRED = %w{ output }
|
22
23
|
|
24
|
+
# A list of all the Neptune job types that we support
|
25
|
+
ALLOWED_JOB_TYPES = %w{acl cicero compile erlang mpi input output ssa babel upc x10}
|
26
|
+
|
27
|
+
# The string to display for disallowed job types.
|
28
|
+
JOB_TYPE_NOT_ALLOWED = "The job type you specified is not supported."
|
29
|
+
|
23
30
|
# A list of Neptune jobs that do not require nodes to be spawned
|
24
31
|
# up for computation
|
25
32
|
NO_NODES_NEEDED = ["acl", "input", "output", "compile"]
|
@@ -34,7 +41,7 @@ ALLOWED_STORAGE_TYPES = ["appdb", "gstorage", "s3", "walrus"]
|
|
34
41
|
|
35
42
|
# A list of jobs that require some kind of work to be done before
|
36
43
|
# the actual computation can be performed.
|
37
|
-
NEED_PREPROCESSING = ["compile", "erlang", "mpi", "ssa"]
|
44
|
+
NEED_PREPROCESSING = ["babel", "compile", "erlang", "mpi", "ssa"]
|
38
45
|
|
39
46
|
# A set of methods and constants that we've monkey-patched to enable Neptune
|
40
47
|
# support. In the future, it is likely that the only exposed / monkey-patched
|
@@ -45,385 +52,443 @@ NEED_PREPROCESSING = ["compile", "erlang", "mpi", "ssa"]
|
|
45
52
|
class Object
|
46
53
|
end
|
47
54
|
|
48
|
-
|
49
|
-
#
|
50
|
-
#
|
51
|
-
#
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
preprocess = "preprocess_#{job_type}".to_sym
|
59
|
-
send(preprocess, job_data)
|
60
|
-
end
|
55
|
+
module NeptuneHelper
|
56
|
+
# Certain types of jobs need steps to be taken before they
|
57
|
+
# can be started (e.g., copying input data or code over).
|
58
|
+
# This method dispatches the right method to use based
|
59
|
+
# on the type of the job that the user has asked to run.
|
60
|
+
def self.do_preprocessing(job_data, controller)
|
61
|
+
job_type = job_data["@type"]
|
62
|
+
if !NEED_PREPROCESSING.include?(job_type)
|
63
|
+
return
|
64
|
+
end
|
61
65
|
|
62
|
-
#
|
63
|
-
#
|
64
|
-
|
65
|
-
|
66
|
-
code = File.expand_path(job_data["@code"])
|
67
|
-
if !File.exists?(code)
|
68
|
-
abort("The source file #{code} does not exist.")
|
66
|
+
# Don't worry about adding on the self. prefix - send will resolve
|
67
|
+
# it the right way
|
68
|
+
preprocess = "preprocess_#{job_type}".to_sym
|
69
|
+
send(preprocess, job_data, controller)
|
69
70
|
end
|
70
71
|
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
shell.call(remove_dir)
|
72
|
+
# This preprocessing method copies over the user's code to the
|
73
|
+
# Shadow node so that it can be compiled there. A future version
|
74
|
+
# of this method may also copy over libraries as well.
|
75
|
+
def self.preprocess_compile(job_data, controller)
|
76
|
+
code = File.expand_path(job_data["@code"])
|
77
|
+
if !File.exists?(code)
|
78
|
+
raise BadConfigurationException.new("The source file #{code} does not exist.")
|
79
|
+
end
|
80
80
|
|
81
|
-
|
81
|
+
suffix = code.split('/')[-1]
|
82
|
+
dest = "/tmp/#{suffix}"
|
83
|
+
keyname = job_data["@keyname"]
|
84
|
+
shadow_ip = CommonFunctions.get_from_yaml(keyname, :shadow)
|
82
85
|
|
83
|
-
|
84
|
-
|
86
|
+
ssh_args = "-i ~/.appscale/#{keyname}.key -o StrictHostkeyChecking=no root@#{shadow_ip}"
|
87
|
+
remove_dir = "ssh #{ssh_args} 'rm -rf #{dest}' 2>&1"
|
88
|
+
Kernel.puts remove_dir
|
89
|
+
CommonFunctions.shell(remove_dir)
|
90
|
+
CommonFunctions.scp_to_shadow(code, dest, keyname, is_dir=true)
|
85
91
|
|
86
|
-
|
87
|
-
if !job_data["@code"]
|
88
|
-
abort("When running Erlang jobs, :code must be specified.")
|
92
|
+
job_data["@code"] = dest
|
89
93
|
end
|
90
94
|
|
91
|
-
|
92
|
-
|
93
|
-
abort("The specified code, #{job_data['@code']}," +
|
94
|
-
" didn't exist. Please specify one that exists and try again")
|
95
|
-
end
|
96
|
-
dest_code = "/tmp/"
|
95
|
+
def self.preprocess_erlang(job_data, controller)
|
96
|
+
self.require_param("@code", job_data)
|
97
97
|
|
98
|
-
|
99
|
-
|
100
|
-
|
98
|
+
source_code = File.expand_path(job_data["@code"])
|
99
|
+
if !File.exists?(source_code)
|
100
|
+
raise BadConfigurationException.new("The specified code, #{job_data['@code']}," +
|
101
|
+
" didn't exist. Please specify one that exists and try again")
|
102
|
+
end
|
103
|
+
dest_code = "/tmp/"
|
101
104
|
|
102
|
-
|
103
|
-
|
104
|
-
# that this value is at least as many as the number of nodes (that is, nodes
|
105
|
-
# can't be underprovisioned in MPI).
|
106
|
-
def preprocess_mpi(job_data)
|
107
|
-
if !job_data["@nodes_to_use"]
|
108
|
-
abort("When running MPI jobs, :nodes_to_use must be specified.")
|
105
|
+
keyname = job_data["@keyname"]
|
106
|
+
CommonFunctions.scp_to_shadow(source_code, dest_code, keyname)
|
109
107
|
end
|
110
108
|
|
111
|
-
|
112
|
-
|
113
|
-
|
109
|
+
# This preprocessing method verifies that the user specified the number of nodes
|
110
|
+
# to use. If they also specified the number of processes to use, we also verify
|
111
|
+
# that this value is at least as many as the number of nodes (that is, nodes
|
112
|
+
# can't be underprovisioned in MPI).
|
113
|
+
def self.preprocess_mpi(job_data, controller)
|
114
|
+
self.require_param("@nodes_to_use", job_data)
|
115
|
+
self.require_param("@procs_to_use", job_data)
|
116
|
+
|
117
|
+
if job_data["@procs_to_use"]
|
118
|
+
p = job_data["@procs_to_use"]
|
119
|
+
n = job_data["@nodes_to_use"]
|
120
|
+
if p < n
|
121
|
+
raise BadConfigurationException.new(":procs_to_use must be at least as " +
|
122
|
+
"large as :nodes_to_use.")
|
123
|
+
end
|
124
|
+
end
|
114
125
|
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
"
|
122
|
-
|
126
|
+
if job_data["@argv"]
|
127
|
+
argv = job_data["@argv"]
|
128
|
+
|
129
|
+
if argv.class == String
|
130
|
+
job_data["@argv"] = argv
|
131
|
+
elsif argv.class == Array
|
132
|
+
job_data["@argv"] = argv.join(' ')
|
133
|
+
else
|
134
|
+
raise BadConfigurationException.new(":argv must be either a String or Array")
|
135
|
+
end
|
123
136
|
end
|
137
|
+
|
138
|
+
return job_data
|
124
139
|
end
|
125
140
|
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
141
|
+
# This preprocessing method verifies that the user specified the number of
|
142
|
+
# trajectories to run, via either :trajectories or :simulations. Both should
|
143
|
+
# not be specified - only one or the other, and regardless of which they
|
144
|
+
# specify, convert it to be :trajectories.
|
145
|
+
def self.preprocess_ssa(job_data, controller)
|
146
|
+
if job_data["@simulations"] and job_data["@trajectories"]
|
147
|
+
raise BadConfigurationException.new(":simulations and :trajectories " +
|
148
|
+
"not both be specified.")
|
130
149
|
end
|
131
150
|
|
132
|
-
if
|
133
|
-
job_data["@
|
151
|
+
if job_data["@simulations"]
|
152
|
+
job_data["@trajectories"] = job_data["@simulations"]
|
153
|
+
job_data.delete("@simulations")
|
134
154
|
end
|
135
|
-
end
|
136
155
|
|
137
|
-
|
138
|
-
|
156
|
+
self.require_param("@trajectories", job_data)
|
157
|
+
return job_data
|
158
|
+
end
|
139
159
|
|
140
|
-
|
141
|
-
|
142
|
-
#
|
143
|
-
|
144
|
-
def preprocess_ssa(job_data)
|
145
|
-
if job_data["@simulations"] and job_data["@trajectories"]
|
146
|
-
abort("Both :simulations and :trajectories cannot be specified - use one" +
|
147
|
-
" or the other.")
|
160
|
+
def self.require_param(param, job_data)
|
161
|
+
if !job_data[param]
|
162
|
+
raise BadConfigurationException.new("#{param} must be specified")
|
163
|
+
end
|
148
164
|
end
|
149
165
|
|
150
|
-
|
151
|
-
|
152
|
-
|
166
|
+
def self.require_file_to_exist(file, job_data, controller)
|
167
|
+
if controller.does_file_exist?(file, job_data)
|
168
|
+
return
|
169
|
+
else
|
170
|
+
raise FileNotFoundException
|
171
|
+
end
|
153
172
|
end
|
154
173
|
|
155
|
-
|
156
|
-
|
174
|
+
def self.require_file_to_not_exist(file, job_data, controller)
|
175
|
+
begin
|
176
|
+
self.require_file_to_exist(file, job_data, controller)
|
177
|
+
# no exception thrown previously means that the output file exists
|
178
|
+
raise BadConfigurationException.new('Output specified already exists')
|
179
|
+
rescue FileNotFoundException
|
180
|
+
return
|
181
|
+
end
|
157
182
|
end
|
158
183
|
|
159
|
-
|
160
|
-
|
184
|
+
# This preprocessing method verifies that the user specified code that
|
185
|
+
# should be run, where the output should be placed, and an engine to run over.
|
186
|
+
# It also verifies that all files to be used are actually reachable.
|
187
|
+
# Supported engines can be found by contacting an AppScale node.
|
188
|
+
def self.preprocess_babel(job_data, controller)
|
189
|
+
self.require_param("@code", job_data)
|
190
|
+
self.require_param("@engine", job_data)
|
191
|
+
self.require_param("@output", job_data)
|
192
|
+
|
193
|
+
# For most code types, the file's name given is the thing to exec.
|
194
|
+
# For Java, the actual file to search for is whatever the user gives
|
195
|
+
# us, with a .class extension.
|
196
|
+
code_file_name = job_data["@code"]
|
197
|
+
if !job_data["@executable"].nil? and job_data["@executable"] == "java"
|
198
|
+
code_file_name += ".class"
|
199
|
+
end
|
200
|
+
|
201
|
+
self.require_file_to_exist(code_file_name, job_data, controller)
|
202
|
+
self.require_file_to_not_exist(job_data["@output"], job_data, controller)
|
203
|
+
|
204
|
+
if job_data["@argv"]
|
205
|
+
argv = job_data["@argv"]
|
206
|
+
if argv.class != Array
|
207
|
+
raise BadConfigurationException.new("argv must be an array")
|
208
|
+
end
|
161
209
|
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
210
|
+
argv.each { |arg|
|
211
|
+
if arg =~ /\/.*\/.*/
|
212
|
+
self.require_file_to_exist(arg, job_data, controller)
|
213
|
+
end
|
214
|
+
}
|
215
|
+
end
|
168
216
|
|
169
|
-
|
170
|
-
|
217
|
+
if job_data["@appcfg_cookies"]
|
218
|
+
self.require_file_to_exist(job_data["@appcfg_cookies"], job_data, controller)
|
219
|
+
end
|
171
220
|
|
172
|
-
|
173
|
-
type = job_data["@type"]
|
221
|
+
user_specified_engine = job_data["@engine"]
|
174
222
|
|
175
|
-
|
176
|
-
|
177
|
-
|
223
|
+
# validate the engine here
|
224
|
+
engines = controller.get_supported_babel_engines(job_data)
|
225
|
+
if !engines.include?(user_specified_engine)
|
226
|
+
raise BadConfigurationException.new("The engine you specified, " +
|
227
|
+
"#{user_specified_engine}, is not a supported engine. Supported engines" +
|
228
|
+
" are: #{engines.join(', ')}")
|
229
|
+
end
|
178
230
|
end
|
179
231
|
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
232
|
+
def self.get_job_data(params)
|
233
|
+
job_data = {}
|
234
|
+
params.each { |k, v|
|
235
|
+
key = "@#{k}"
|
236
|
+
job_data[key] = v
|
237
|
+
}
|
185
238
|
|
186
|
-
job_data
|
187
|
-
|
239
|
+
job_data.delete("@job")
|
240
|
+
job_data["@keyname"] = params[:keyname] || "appscale"
|
188
241
|
|
189
|
-
|
190
|
-
|
191
|
-
end
|
242
|
+
job_data["@type"] = job_data["@type"].to_s
|
243
|
+
type = job_data["@type"]
|
192
244
|
|
193
|
-
|
194
|
-
|
195
|
-
abort("Job output must be specified")
|
245
|
+
if !ALLOWED_JOB_TYPES.include?(type)
|
246
|
+
raise BadConfigurationException.new(JOB_TYPE_NOT_ALLOWED)
|
196
247
|
end
|
197
248
|
|
198
|
-
if
|
199
|
-
|
249
|
+
if type == "upc" or type == "x10"
|
250
|
+
job_data["@type"] = "mpi"
|
251
|
+
type = "mpi"
|
200
252
|
end
|
201
|
-
end
|
202
253
|
|
203
|
-
|
204
|
-
|
254
|
+
# kdt jobs also run as mpi jobs, but need to pass along an executable
|
255
|
+
# parameter to let mpiexec know to use python to exec it
|
256
|
+
if type == "kdt"
|
257
|
+
job_data["@type"] = "mpi"
|
258
|
+
type = "mpi"
|
205
259
|
|
206
|
-
|
207
|
-
|
208
|
-
job_data["@storage"] = "appdb"
|
209
|
-
end
|
260
|
+
job_data["@executable"] = "python"
|
261
|
+
end
|
210
262
|
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
" - we do not support #{storage}.")
|
215
|
-
end
|
263
|
+
if job_data["@nodes_to_use"].class == Hash
|
264
|
+
job_data["@nodes_to_use"] = job_data["@nodes_to_use"].to_a.flatten
|
265
|
+
end
|
216
266
|
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
storage = "s3"
|
222
|
-
job_data["@storage"] = "s3"
|
223
|
-
end
|
267
|
+
if !NO_OUTPUT_NEEDED.include?(type)
|
268
|
+
if (job_data["@output"].nil? or job_data["@output"].empty?)
|
269
|
+
raise BadConfigurationException.new("Job output must be specified")
|
270
|
+
end
|
224
271
|
|
225
|
-
|
226
|
-
|
227
|
-
if job_data["@#{item}"]
|
228
|
-
puts "Using specified #{item}"
|
229
|
-
else
|
230
|
-
if ENV[item]
|
231
|
-
puts "Using #{item} from environment"
|
232
|
-
job_data["@#{item}"] = ENV[item]
|
233
|
-
else
|
234
|
-
abort("When storing data to S3, #{item} must be specified or be in " +
|
235
|
-
"your environment. Please do so and try again.")
|
236
|
-
end
|
272
|
+
if job_data["@output"][0].chr != "/"
|
273
|
+
raise BadConfigurationException.new("Job output must begin with a slash ('/')")
|
237
274
|
end
|
238
|
-
|
275
|
+
end
|
276
|
+
|
277
|
+
return job_data
|
239
278
|
end
|
240
279
|
|
241
|
-
|
242
|
-
|
280
|
+
def self.validate_storage_params(job_data)
|
281
|
+
job_data["@storage"] ||= "appdb"
|
243
282
|
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
result = {:result => :success}
|
283
|
+
storage = job_data["@storage"]
|
284
|
+
if !ALLOWED_STORAGE_TYPES.include?(storage)
|
285
|
+
raise BadConfigurationException.new("Supported storage types are " +
|
286
|
+
"#{ALLOWED_STORAGE_TYPES.join(', ')} - #{storage} is not supported.")
|
287
|
+
end
|
250
288
|
|
251
|
-
|
252
|
-
|
253
|
-
|
289
|
+
# Our implementation for storing / retrieving via Google Storage
|
290
|
+
# and Walrus uses
|
291
|
+
# the same library as we do for S3 - so just tell it that it's S3
|
292
|
+
if storage == "gstorage" or storage == "walrus"
|
293
|
+
storage = "s3"
|
294
|
+
job_data["@storage"] = "s3"
|
295
|
+
end
|
254
296
|
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
297
|
+
if storage == "s3"
|
298
|
+
["EC2_ACCESS_KEY", "EC2_SECRET_KEY", "S3_URL"].each { |item|
|
299
|
+
if job_data["@#{item}"]
|
300
|
+
Kernel.puts "Using specified #{item}"
|
301
|
+
else
|
302
|
+
if ENV[item]
|
303
|
+
Kernel.puts "Using #{item} from environment"
|
304
|
+
job_data["@#{item}"] = ENV[item]
|
305
|
+
else
|
306
|
+
raise BadConfigurationException.new("When storing data to S3, #{item} must be specified or be in " +
|
307
|
+
"your environment. Please do so and try again.")
|
308
|
+
end
|
309
|
+
end
|
310
|
+
}
|
311
|
+
end
|
261
312
|
|
262
|
-
|
263
|
-
scp_cmd = "scp -r #{ssh_args} #{local_file} root@#{shadow_ip}:#{remote}"
|
264
|
-
puts scp_cmd
|
265
|
-
shell.call(scp_cmd)
|
266
|
-
|
267
|
-
job_data["@local"] = remote
|
268
|
-
puts "job data = #{job_data.inspect}"
|
269
|
-
response = controller.put_input(job_data)
|
270
|
-
if response
|
271
|
-
return {:result => :success}
|
272
|
-
else
|
273
|
-
# TODO - expand this to include the reason why it failed
|
274
|
-
return {:result => :failure}
|
313
|
+
return job_data
|
275
314
|
end
|
276
|
-
end
|
277
315
|
|
278
|
-
# This method
|
279
|
-
#
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
ssh_command = "ssh #{ssh_args} root@#{shadow_ip} 'ls #{compiled_location}' 2>&1"
|
284
|
-
puts ssh_command
|
285
|
-
ssh_result = shell.call(ssh_command)
|
286
|
-
puts "result was [#{ssh_result}]"
|
287
|
-
if ssh_result =~ /No such file or directory/
|
288
|
-
puts "Still waiting for code to be compiled..."
|
289
|
-
else
|
290
|
-
puts "compilation complete! Copying compiled code to #{copy_to}"
|
291
|
-
return
|
292
|
-
end
|
293
|
-
sleep(5)
|
294
|
-
}
|
295
|
-
end
|
316
|
+
# This method takes a file on the local user's computer and stores it remotely
|
317
|
+
# via AppScale. It returns a hash map indicating whether or not the job
|
318
|
+
# succeeded and if it failed, the reason for it.
|
319
|
+
def self.get_input(job_data, ssh_args, shadow_ip, controller)
|
320
|
+
result = {:result => :success}
|
296
321
|
|
297
|
-
|
298
|
-
# gets the standard out and error returned from the compilation. This method
|
299
|
-
# returns a hash containing the standard out, error, and a result that indicates
|
300
|
-
# whether or not the compilation was successful.
|
301
|
-
def compile_code(job_data, ssh_args, shadow_ip, shell=Kernel.method(:`))
|
302
|
-
compiled_location = controller.compile_code(job_data)
|
322
|
+
self.require_param("@local", job_data)
|
303
323
|
|
304
|
-
|
324
|
+
local_file = File.expand_path(job_data["@local"])
|
325
|
+
if !File.exists?(local_file)
|
326
|
+
reason = "the file you specified to copy, #{local_file}, doesn't exist." +
|
327
|
+
" Please specify a file that exists and try again."
|
328
|
+
return {:result => :failure, :reason => reason}
|
329
|
+
end
|
305
330
|
|
306
|
-
|
331
|
+
remote = "/tmp/neptune-input-#{rand(100000)}"
|
332
|
+
scp_cmd = "scp -r #{ssh_args} #{local_file} root@#{shadow_ip}:#{remote}"
|
333
|
+
Kernel.puts scp_cmd
|
334
|
+
CommonFunctions.shell(scp_cmd)
|
307
335
|
|
308
|
-
|
336
|
+
job_data["@local"] = remote
|
337
|
+
Kernel.puts "job data = #{job_data.inspect}"
|
338
|
+
response = controller.put_input(job_data)
|
339
|
+
if response
|
340
|
+
return {:result => :success}
|
341
|
+
else
|
342
|
+
# TODO - expand this to include the reason why it failed
|
343
|
+
return {:result => :failure}
|
344
|
+
end
|
345
|
+
end
|
309
346
|
|
310
|
-
|
311
|
-
|
312
|
-
|
347
|
+
# This method waits for AppScale to finish compiling the user's code, indicated
|
348
|
+
# by AppScale copying the finished code to a pre-determined location.
|
349
|
+
def self.wait_for_compilation_to_finish(ssh_args, shadow_ip, compiled_location)
|
350
|
+
loop {
|
351
|
+
ssh_command = "ssh #{ssh_args} root@#{shadow_ip} 'ls #{compiled_location}' 2>&1"
|
352
|
+
Kernel.puts ssh_command
|
353
|
+
ssh_result = CommonFunctions.shell(ssh_command)
|
354
|
+
Kernel.puts "result was [#{ssh_result}]"
|
355
|
+
if ssh_result =~ /No such file or directory/
|
356
|
+
Kernel.puts "Still waiting for code to be compiled..."
|
357
|
+
else
|
358
|
+
Kernel.puts "compilation complete! Copying compiled code to #{copy_to}"
|
359
|
+
return
|
360
|
+
end
|
361
|
+
sleep(5)
|
362
|
+
}
|
363
|
+
end
|
313
364
|
|
314
|
-
code
|
315
|
-
|
316
|
-
|
365
|
+
# This method sends out a request to compile code, waits for it to finish, and
|
366
|
+
# gets the standard out and error returned from the compilation. This method
|
367
|
+
# returns a hash containing the standard out, error, and a result that indicates
|
368
|
+
# whether or not the compilation was successful.
|
369
|
+
def self.compile_code(job_data, ssh_args, shadow_ip)
|
370
|
+
compiled_location = controller.compile_code(job_data)
|
371
|
+
copy_to = job_data["@copy_to"]
|
372
|
+
self.wait_for_compilation_to_finish(ssh_args, shadow_ip, compiled_location)
|
373
|
+
|
374
|
+
FileUtils.rm_rf(copy_to)
|
375
|
+
|
376
|
+
scp_command = "scp -r #{ssh_args} root@#{shadow_ip}:#{compiled_location} #{copy_to} 2>&1"
|
377
|
+
Kernel.puts scp_command
|
378
|
+
CommonFunctions.shell(scp_command)
|
379
|
+
|
380
|
+
code = job_data["@code"]
|
381
|
+
dirs = code.split(/\//)
|
382
|
+
remote_dir = "/tmp/" + dirs[-1]
|
383
|
+
|
384
|
+
[remote_dir, compiled_location].each { |remote_files|
|
385
|
+
ssh_command = "ssh #{ssh_args} root@#{shadow_ip} 'rm -rf #{remote_files}' 2>&1"
|
386
|
+
Kernel.puts ssh_command
|
387
|
+
CommonFunctions.shell(ssh_command)
|
388
|
+
}
|
317
389
|
|
318
|
-
|
319
|
-
|
320
|
-
puts ssh_command
|
321
|
-
shell.call(ssh_command)
|
322
|
-
}
|
390
|
+
return get_std_out_and_err(copy_to)
|
391
|
+
end
|
323
392
|
|
324
|
-
|
325
|
-
|
393
|
+
# This method returns a hash containing the standard out and standard error
|
394
|
+
# from a completed job, as well as a result field that indicates whether or
|
395
|
+
# not the job completed successfully (success = no errors).
|
396
|
+
def self.get_std_out_and_err(location)
|
397
|
+
result = {}
|
326
398
|
|
327
|
-
|
328
|
-
|
329
|
-
# not the job completed successfully (success = no errors).
|
330
|
-
def get_std_out_and_err(location)
|
331
|
-
result = {}
|
399
|
+
out = File.open("#{location}/compile_out") { |f| f.read.chomp! }
|
400
|
+
result[:out] = out
|
332
401
|
|
333
|
-
|
334
|
-
|
402
|
+
err = File.open("#{location}/compile_err") { |f| f.read.chomp! }
|
403
|
+
result[:err] = err
|
335
404
|
|
336
|
-
|
337
|
-
|
405
|
+
if result[:err]
|
406
|
+
result[:result] = :failure
|
407
|
+
else
|
408
|
+
result[:result] = :success
|
409
|
+
end
|
338
410
|
|
339
|
-
|
340
|
-
|
341
|
-
else
|
342
|
-
result[:result] = :success
|
343
|
-
end
|
411
|
+
return result
|
412
|
+
end
|
344
413
|
|
345
|
-
|
346
|
-
|
414
|
+
def self.upload_app_for_cicero(job_data)
|
415
|
+
if !job_data["@app"]
|
416
|
+
Kernel.puts "No app specified, not uploading..."
|
417
|
+
return
|
418
|
+
end
|
347
419
|
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
420
|
+
app_location = File.expand_path(job_data["@app"])
|
421
|
+
if !File.exists?(app_location)
|
422
|
+
raise BadConfigurationException.new("The app you specified, #{app_location}, does not exist." +
|
423
|
+
"Please specify one that does and try again.")
|
424
|
+
end
|
353
425
|
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
426
|
+
keyname = job_data["@keyname"] || "appscale"
|
427
|
+
if job_data["@appscale_tools"]
|
428
|
+
upload_app = File.expand_path(job_data["@appscale_tools"]) +
|
429
|
+
File::SEPARATOR + "bin" + File::SEPARATOR + "appscale-upload-app"
|
430
|
+
else
|
431
|
+
upload_app = "appscale-upload-app"
|
432
|
+
end
|
359
433
|
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
else
|
365
|
-
upload_app = "appscale-upload-app"
|
434
|
+
Kernel.puts "Uploading AppEngine app at #{app_location}"
|
435
|
+
upload_command = "#{upload_app} --file #{app_location} --test --keyname #{keyname}"
|
436
|
+
Kernel.puts upload_command
|
437
|
+
Kernel.puts `#{upload_command}`
|
366
438
|
end
|
367
439
|
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
440
|
+
# This method actually runs the Neptune job, given information about the job
|
441
|
+
# as well as information about the node to send the request to.
|
442
|
+
def self.run_job(job_data, ssh_args, shadow_ip, secret)
|
443
|
+
controller = AppControllerClient.new(shadow_ip, secret)
|
444
|
+
|
445
|
+
# TODO - right now the job is assumed to succeed in many cases
|
446
|
+
# need to investigate the various failure scenarios
|
447
|
+
result = { :result => :success }
|
448
|
+
|
449
|
+
case job_data["@type"]
|
450
|
+
when "input"
|
451
|
+
result = self.get_input(job_data, ssh_args, shadow_ip, controller)
|
452
|
+
when "output"
|
453
|
+
result[:output] = controller.get_output(job_data)
|
454
|
+
when "get-acl"
|
455
|
+
job_data["@type"] = "acl"
|
456
|
+
result[:acl] = controller.get_acl(job_data)
|
457
|
+
when "set-acl"
|
458
|
+
job_data["@type"] = "acl"
|
459
|
+
result[:acl] = controller.set_acl(job_data)
|
460
|
+
when "compile"
|
461
|
+
result = self.compile_code(job_data, ssh_args, shadow_ip)
|
462
|
+
when "cicero"
|
463
|
+
self.upload_app_for_cicero(job_data)
|
464
|
+
msg = controller.start_neptune_job(job_data)
|
465
|
+
result[:msg] = msg
|
466
|
+
result[:result] = :failure if result[:msg] !~ /job is now running\Z/
|
467
|
+
else
|
468
|
+
msg = controller.start_neptune_job(job_data)
|
469
|
+
result[:msg] = msg
|
470
|
+
result[:result] = :failure if result[:msg] !~ /job is now running\Z/
|
471
|
+
end
|
373
472
|
|
374
|
-
|
375
|
-
# as well as information about the node to send the request to.
|
376
|
-
def run_job(job_data, ssh_args, shadow_ip, secret,
|
377
|
-
controller=AppControllerClient, file=File)
|
378
|
-
controller = controller.new(shadow_ip, secret)
|
379
|
-
|
380
|
-
# TODO - right now the job is assumed to succeed in many cases
|
381
|
-
# need to investigate the various failure scenarios
|
382
|
-
result = { :result => :success }
|
383
|
-
|
384
|
-
case job_data["@type"]
|
385
|
-
when "input"
|
386
|
-
result = get_input(job_data, ssh_args, shadow_ip, controller, file)
|
387
|
-
when "output"
|
388
|
-
result[:output] = controller.get_output(job_data)
|
389
|
-
when "get-acl"
|
390
|
-
job_data["@type"] = "acl"
|
391
|
-
result[:acl] = controller.get_acl(job_data)
|
392
|
-
when "set-acl"
|
393
|
-
job_data["@type"] = "acl"
|
394
|
-
result[:acl] = controller.set_acl(job_data)
|
395
|
-
when "compile"
|
396
|
-
result = compile_code(job_data, ssh_args, shadow_ip)
|
397
|
-
when "cicero"
|
398
|
-
upload_app_for_cicero(job_data)
|
399
|
-
msg = controller.start_neptune_job(job_data)
|
400
|
-
result[:msg] = msg
|
401
|
-
result[:result] = :failure if result[:msg] !~ /job is now running\Z/
|
402
|
-
else
|
403
|
-
msg = controller.start_neptune_job(job_data)
|
404
|
-
result[:msg] = msg
|
405
|
-
result[:result] = :failure if result[:msg] !~ /job is now running\Z/
|
473
|
+
return result
|
406
474
|
end
|
407
|
-
|
408
|
-
return result
|
409
475
|
end
|
410
476
|
|
411
|
-
#
|
412
|
-
|
413
|
-
|
414
|
-
#
|
415
|
-
#
|
416
|
-
#
|
417
|
-
#
|
418
|
-
# vice-versa).
|
477
|
+
# Make neptune() public so that babel() can call it
|
478
|
+
public
|
479
|
+
|
480
|
+
# This method is the heart of Neptune - here, we take blocks of code that the
|
481
|
+
# user has written and convert them into HPC job requests. At a high level,
|
482
|
+
# the user can request to run a job, retrieve a job's output, or modify the
|
483
|
+
# access policy (ACL) for the output of a job. By default, job data is private,
|
484
|
+
# but a Neptune job can be used to set it to public later (and vice-versa).
|
419
485
|
def neptune(params)
|
420
|
-
puts "Received a request to run a job."
|
421
|
-
puts params[:type]
|
486
|
+
Kernel.puts "Received a request to run a job."
|
487
|
+
Kernel.puts params[:type]
|
422
488
|
|
423
|
-
job_data = get_job_data(params)
|
424
|
-
validate_storage_params(job_data)
|
425
|
-
puts "job data = #{job_data.inspect}"
|
426
|
-
do_preprocessing(job_data)
|
489
|
+
job_data = NeptuneHelper.get_job_data(params)
|
490
|
+
NeptuneHelper.validate_storage_params(job_data)
|
491
|
+
Kernel.puts "job data = #{job_data.inspect}"
|
427
492
|
keyname = job_data["@keyname"]
|
428
493
|
|
429
494
|
shadow_ip = CommonFunctions.get_from_yaml(keyname, :shadow)
|
@@ -431,5 +496,7 @@ def neptune(params)
|
|
431
496
|
ssh_key = File.expand_path("~/.appscale/#{keyname}.key")
|
432
497
|
ssh_args = "-i ~/.appscale/#{keyname}.key -o StrictHostkeyChecking=no "
|
433
498
|
|
434
|
-
|
499
|
+
controller = AppControllerClient.new(shadow_ip, secret)
|
500
|
+
NeptuneHelper.do_preprocessing(job_data, controller)
|
501
|
+
return NeptuneHelper.run_job(job_data, ssh_args, shadow_ip, secret)
|
435
502
|
end
|