neptune 0.1.4 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README +4 -0
- data/bin/neptune +7 -0
- data/doc/AppControllerClient.html +113 -22
- data/doc/{Kernel.html → AppControllerException.html} +23 -55
- data/doc/BabelHelper.html +707 -0
- data/doc/BadConfigurationException.html +142 -0
- data/doc/CommonFunctions.html +121 -33
- data/doc/FileNotFoundException.html +142 -0
- data/doc/NeptuneHelper.html +1102 -0
- data/doc/Object.html +94 -749
- data/doc/bin/neptune.html +3 -1
- data/doc/created.rid +7 -5
- data/doc/index.html +73 -25
- data/doc/lib/app_controller_client_rb.html +1 -1
- data/doc/lib/babel_rb.html +68 -0
- data/doc/lib/common_functions_rb.html +3 -1
- data/doc/lib/custom_exceptions_rb.html +54 -0
- data/doc/lib/neptune_rb.html +3 -1
- data/lib/app_controller_client.rb +28 -10
- data/lib/babel.rb +260 -0
- data/lib/common_functions.rb +42 -28
- data/lib/custom_exceptions.rb +10 -0
- data/lib/neptune.rb +371 -304
- data/test/unit/test_app_controller_client.rb +9 -9
- data/test/unit/test_babel.rb +154 -0
- data/test/unit/test_common_functions.rb +39 -75
- data/test/unit/test_neptune.rb +168 -76
- data/test/unit/ts_all.rb +5 -0
- metadata +32 -8
data/lib/babel.rb
ADDED
@@ -0,0 +1,260 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
# Programmer: Chris Bunch (cgb@cs.ucsb.edu)
|
3
|
+
|
4
|
+
require 'app_controller_client'
|
5
|
+
require 'common_functions'
|
6
|
+
require 'custom_exceptions'
|
7
|
+
require 'neptune'
|
8
|
+
|
9
|
+
|
10
|
+
# The promise gem gives us futures / promises out-of-the-box, which we need
|
11
|
+
# to hide the fact that babel jobs are asynchronous.
|
12
|
+
require 'rubygems'
|
13
|
+
require 'promise'
|
14
|
+
require 'future'
|
15
|
+
|
16
|
+
|
17
|
+
# If the user doesn't give us enough info to infer what bucket we should place
|
18
|
+
# their code in, this message is displayed and execution aborts.
|
19
|
+
NEEDS_BUCKET_INFO = "When running Babel jobs with local inputs / code, the " +
|
20
|
+
"bucket to store them in must be specified by either the :bucket_name " +
|
21
|
+
"parameter or the BABEL_BUCKET_NAME environment variable."
|
22
|
+
|
23
|
+
|
24
|
+
# The constant string that a Neptune output job returns if the output does not
|
25
|
+
# yet exist.
|
26
|
+
DOES_NOT_EXIST = "error: output does not exist"
|
27
|
+
|
28
|
+
|
29
|
+
# The initial amount of time, in seconds, to sleep between output job requests.
|
30
|
+
# An exponential backoff is used with this value as the starting sleep time.
|
31
|
+
SLEEP_TIME = 5 # seconds
|
32
|
+
|
33
|
+
|
34
|
+
# The maximum amount of time that we should sleep to, when waiting for output
|
35
|
+
# job requests.
|
36
|
+
MAX_SLEEP_TIME = 60 # seconds
|
37
|
+
|
38
|
+
# Babel provides a nice wrapper around Neptune jobs. Instead of making users
|
39
|
+
# write multiple Neptune jobs to actually run code (e.g., putting input in the
|
40
|
+
# datastore, run the job, get the output back), Babel automatically handles
|
41
|
+
# this.
|
42
|
+
def babel(params)
|
43
|
+
# Since this whole function should run asynchronously, we run it as a future.
|
44
|
+
# It automatically starts running in a new thread, and attempting to get the
|
45
|
+
# value of what this returns causes it to block until the job completes.
|
46
|
+
future {
|
47
|
+
job_data = BabelHelper.convert_from_neptune_params(params)
|
48
|
+
NeptuneHelper.validate_storage_params(job_data) # adds in S3 storage params
|
49
|
+
|
50
|
+
# :code is the only required parameter - everything else can use default vals
|
51
|
+
NeptuneHelper.require_param("@code", job_data)
|
52
|
+
|
53
|
+
if job_data["@output"].nil? or job_data["@output"].empty?
|
54
|
+
job_data["@output"] = BabelHelper.generate_output_location(job_data)
|
55
|
+
end
|
56
|
+
BabelHelper.ensure_output_does_not_exist(job_data)
|
57
|
+
|
58
|
+
if job_data["@is_remote"]
|
59
|
+
BabelHelper.validate_inputs(job_data)
|
60
|
+
else
|
61
|
+
BabelHelper.put_code(job_data)
|
62
|
+
BabelHelper.put_inputs(job_data)
|
63
|
+
end
|
64
|
+
|
65
|
+
BabelHelper.run_job(job_data)
|
66
|
+
# So actually retrieving the job's output is done via a promise, so only if
|
67
|
+
# the user actually uses the value do we actually go and poll for output.
|
68
|
+
# The running of the job is done above, outside of the promise, so
|
69
|
+
# the job is always run, regardless of whether or not we get its output.
|
70
|
+
BabelHelper.wait_and_get_output(job_data)
|
71
|
+
# promise { BabelHelper.wait_and_get_output(job_data) }
|
72
|
+
}
|
73
|
+
end
|
74
|
+
|
75
|
+
|
76
|
+
# This module provides convenience functions for babel().
|
77
|
+
module BabelHelper
|
78
|
+
# If the user fails to give us an output location, this function will generate
|
79
|
+
# one for them, based on either the location of their code (for remotely
|
80
|
+
# specified code), or a babel parameter (for locally specified code).
|
81
|
+
def self.generate_output_location(job_data)
|
82
|
+
if job_data["@is_remote"]
|
83
|
+
# We already know the bucket name - the same one that the user
|
84
|
+
# has told us their code is located in.
|
85
|
+
prefix = job_data["@code"].scan(/\/(.*?)\//)[0].to_s
|
86
|
+
else
|
87
|
+
prefix = self.get_bucket_for_local_data(job_data)
|
88
|
+
end
|
89
|
+
|
90
|
+
return "/#{prefix}/babel/temp-#{CommonFunctions.get_random_alphanumeric()}"
|
91
|
+
end
|
92
|
+
|
93
|
+
# Provides a common way for callers to get the name of the bucket that
|
94
|
+
# should be used for Neptune jobs where the code is stored locally.
|
95
|
+
def self.get_bucket_for_local_data(job_data)
|
96
|
+
bucket_name = job_data["@bucket_name"] || ENV['BABEL_BUCKET_NAME']
|
97
|
+
|
98
|
+
if bucket_name.nil?
|
99
|
+
raise BadConfigurationException.new(NEEDS_BUCKET_INFO)
|
100
|
+
end
|
101
|
+
|
102
|
+
# If the bucket name starts with a slash, remove it
|
103
|
+
if bucket_name[0].chr == "/"
|
104
|
+
bucket_name = bucket_name[1, bucket_name.length]
|
105
|
+
end
|
106
|
+
|
107
|
+
return bucket_name
|
108
|
+
end
|
109
|
+
|
110
|
+
# For jobs where the code is stored remotely, this method ensures that
|
111
|
+
# the code and any possible inputs actually do exist, before attempting to
|
112
|
+
# use them for computation.
|
113
|
+
def self.validate_inputs(job_data)
|
114
|
+
controller = self.get_appcontroller(job_data)
|
115
|
+
|
116
|
+
# First, make sure the code exists
|
117
|
+
NeptuneHelper.require_file_to_exist(job_data["@code"], job_data, controller)
|
118
|
+
|
119
|
+
if job_data["@argv"].nil? or job_data["@argv"].empty?
|
120
|
+
return
|
121
|
+
end
|
122
|
+
|
123
|
+
# We assume anything that begins with a slash is a remote file
|
124
|
+
job_data["@argv"].each { |arg|
|
125
|
+
if arg[0].chr == "/"
|
126
|
+
NeptuneHelper.require_file_to_exist(arg, job_data, controller)
|
127
|
+
end
|
128
|
+
}
|
129
|
+
end
|
130
|
+
|
131
|
+
# To avoid accidentally overwriting outputs from previous jobs, we first
|
132
|
+
# check to make sure an output file doesn't exist before starting a new job
|
133
|
+
# with the given name.
|
134
|
+
def self.ensure_output_does_not_exist(job_data)
|
135
|
+
file = job_data["@output"]
|
136
|
+
controller = self.get_appcontroller(job_data)
|
137
|
+
puts job_data.inspect
|
138
|
+
NeptuneHelper.require_file_to_not_exist(file, job_data, controller)
|
139
|
+
end
|
140
|
+
|
141
|
+
# Returns an AppControllerClient for the given job data.
|
142
|
+
def self.get_appcontroller(job_data)
|
143
|
+
keyname = job_data["@keyname"] || "appscale"
|
144
|
+
shadow_ip = CommonFunctions.get_from_yaml(keyname, :shadow)
|
145
|
+
secret = CommonFunctions.get_secret_key(keyname)
|
146
|
+
return AppControllerClient.new(shadow_ip, secret)
|
147
|
+
end
|
148
|
+
|
149
|
+
# Stores the user's code (and the directory it's in, and directories in the
|
150
|
+
# same directory as the user's code, since there could be libraries used)
|
151
|
+
# in the remote datastore.
|
152
|
+
def self.put_code(job_data)
|
153
|
+
code_dir = File.dirname(job_data["@code"])
|
154
|
+
code = File.basename(job_data["@code"])
|
155
|
+
remote_code_dir = self.put_file(code_dir, job_data)
|
156
|
+
job_data["@code"] = remote_code_dir + "/" + code
|
157
|
+
return job_data["@code"]
|
158
|
+
end
|
159
|
+
|
160
|
+
# If any input files are specified, they are copied to the remote datastore
|
161
|
+
# via Neptune 'input' jobs. Inputs are assumed to be files on the local
|
162
|
+
# filesystem if they begin with a slash, and job_data gets updated with
|
163
|
+
# the remote location of these files.
|
164
|
+
def self.put_inputs(job_data)
|
165
|
+
if job_data["@argv"].nil? or job_data["@argv"].empty?
|
166
|
+
return job_data
|
167
|
+
end
|
168
|
+
|
169
|
+
job_data["@argv"].each_index { |i|
|
170
|
+
arg = job_data["@argv"][i]
|
171
|
+
if arg[0].chr == "/"
|
172
|
+
job_data["@argv"][i] = self.put_file(arg, job_data)
|
173
|
+
end
|
174
|
+
}
|
175
|
+
|
176
|
+
return job_data
|
177
|
+
end
|
178
|
+
|
179
|
+
# If the user gives us local code or local inputs, this function will
|
180
|
+
# run a Neptune 'input' job to store the data remotely.
|
181
|
+
def self.put_file(local_path, job_data)
|
182
|
+
input_data = self.convert_to_neptune_params(job_data)
|
183
|
+
input_data[:type] = "input"
|
184
|
+
input_data[:local] = local_path
|
185
|
+
|
186
|
+
bucket_name = self.get_bucket_for_local_data(job_data)
|
187
|
+
input_data[:remote] = "/#{bucket_name}/babel#{local_path}"
|
188
|
+
|
189
|
+
Kernel.neptune(input_data)
|
190
|
+
|
191
|
+
return input_data[:remote]
|
192
|
+
end
|
193
|
+
|
194
|
+
# Neptune internally uses job_data with keys of the form @name, but since the
|
195
|
+
# user has given them to us in the form :name, we convert it here.
|
196
|
+
# TODO(cgb): It looks like this conversion to/from may be unnecessary since
|
197
|
+
# neptune() just re-converts it - how can we remove it?
|
198
|
+
def self.convert_from_neptune_params(params)
|
199
|
+
job_data = {}
|
200
|
+
params.each { |k, v|
|
201
|
+
key = "@#{k}"
|
202
|
+
job_data[key] = v
|
203
|
+
}
|
204
|
+
return job_data
|
205
|
+
end
|
206
|
+
|
207
|
+
# Neptune input jobs expect keys of the form :name, but since we've already
|
208
|
+
# converted them to the form @name, this function reverses that conversion.
|
209
|
+
def self.convert_to_neptune_params(job_data)
|
210
|
+
neptune_params = {}
|
211
|
+
|
212
|
+
job_data.each { |k, v|
|
213
|
+
key = k.delete("@").to_sym
|
214
|
+
neptune_params[key] = v
|
215
|
+
}
|
216
|
+
|
217
|
+
return neptune_params
|
218
|
+
end
|
219
|
+
|
220
|
+
# Constructs a Neptune job to run the user's code as a Babel job (task queue)
|
221
|
+
# from the given parameters.
|
222
|
+
def self.run_job(job_data)
|
223
|
+
run_data = self.convert_to_neptune_params(job_data)
|
224
|
+
run_data[:type] = "babel"
|
225
|
+
|
226
|
+
# TODO(cgb): Once AppScale+Babel gets support for RabbitMQ, change this to
|
227
|
+
# exec tasks over it, instead of locally.
|
228
|
+
if job_data["@run_local"].nil?
|
229
|
+
run_data[:run_local] = true
|
230
|
+
run_data[:engine] = "executor-sqs"
|
231
|
+
end
|
232
|
+
|
233
|
+
return Kernel.neptune(run_data)
|
234
|
+
end
|
235
|
+
|
236
|
+
# Constructs a Neptune job to get the output of a Babel job. If the job is not
|
237
|
+
# yet finished, this function waits until it does, and then returns the output
|
238
|
+
# of the job.
|
239
|
+
def self.wait_and_get_output(job_data)
|
240
|
+
output_data = self.convert_to_neptune_params(job_data)
|
241
|
+
output_data[:type] = "output"
|
242
|
+
|
243
|
+
output = ""
|
244
|
+
time_to_sleep = SLEEP_TIME
|
245
|
+
loop {
|
246
|
+
output = Kernel.neptune(output_data)[:output]
|
247
|
+
if output == DOES_NOT_EXIST
|
248
|
+
# Exponentially back off, up to a limit of MAX_SLEEP_TIME
|
249
|
+
Kernel.sleep(time_to_sleep)
|
250
|
+
if time_to_sleep < MAX_SLEEP_TIME
|
251
|
+
time_to_sleep *= 2
|
252
|
+
end
|
253
|
+
else
|
254
|
+
break
|
255
|
+
end
|
256
|
+
}
|
257
|
+
|
258
|
+
return output
|
259
|
+
end
|
260
|
+
end
|
data/lib/common_functions.rb
CHANGED
@@ -9,11 +9,7 @@ require 'socket'
|
|
9
9
|
require 'timeout'
|
10
10
|
require 'yaml'
|
11
11
|
|
12
|
-
|
13
|
-
def shell(command)
|
14
|
-
return `#{command}`
|
15
|
-
end
|
16
|
-
end
|
12
|
+
require 'custom_exceptions'
|
17
13
|
|
18
14
|
# A helper module that aggregates functions that are not part of Neptune's
|
19
15
|
# core functionality. Specifically, this module contains methods to scp
|
@@ -21,6 +17,27 @@ end
|
|
21
17
|
# often needed to determine which machine should be used for computation
|
22
18
|
# or to copy over code and input files.
|
23
19
|
module CommonFunctions
|
20
|
+
# Executes a command and returns the result. Is needed to get around
|
21
|
+
# Flexmock's inability to mock out Kernel:` (the standard shell exec
|
22
|
+
# method).
|
23
|
+
def self.shell(cmd)
|
24
|
+
return `#{cmd}`
|
25
|
+
end
|
26
|
+
|
27
|
+
# Returns a random string composed of alphanumeric characters, as long
|
28
|
+
# as the user requests.
|
29
|
+
def self.get_random_alphanumeric(length=10)
|
30
|
+
random = ""
|
31
|
+
possible = "0123456789abcdefghijklmnopqrstuvxwyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
32
|
+
possibleLength = possible.length
|
33
|
+
|
34
|
+
length.times { |index|
|
35
|
+
random << possible[rand(possibleLength)]
|
36
|
+
}
|
37
|
+
|
38
|
+
return random
|
39
|
+
end
|
40
|
+
|
24
41
|
# Copies a file to the Shadow node (head node) within AppScale.
|
25
42
|
# The caller specifies
|
26
43
|
# the local file location, the destination where the file should be
|
@@ -30,14 +47,11 @@ module CommonFunctions
|
|
30
47
|
def self.scp_to_shadow(local_file_loc,
|
31
48
|
remote_file_loc,
|
32
49
|
keyname,
|
33
|
-
is_dir=false
|
34
|
-
file=File,
|
35
|
-
get_from_yaml=CommonFunctions.method(:get_from_yaml),
|
36
|
-
scp_file=CommonFunctions.method(:scp_file))
|
50
|
+
is_dir=false)
|
37
51
|
|
38
|
-
shadow_ip = get_from_yaml
|
39
|
-
ssh_key =
|
40
|
-
scp_file
|
52
|
+
shadow_ip = CommonFunctions.get_from_yaml(keyname, :shadow)
|
53
|
+
ssh_key = File.expand_path("~/.appscale/#{keyname}.key")
|
54
|
+
CommonFunctions.scp_file(local_file_loc, remote_file_loc, shadow_ip, ssh_key, is_dir)
|
41
55
|
end
|
42
56
|
|
43
57
|
# Performs the actual remote copying of files: given the IP address
|
@@ -47,22 +61,22 @@ module CommonFunctions
|
|
47
61
|
# wrong IP is given. If the user specifies that the file to copy is
|
48
62
|
# actually a directory, we append the -r flag to scp as well.
|
49
63
|
def self.scp_file(local_file_loc, remote_file_loc, target_ip, public_key_loc,
|
50
|
-
is_dir=false
|
64
|
+
is_dir=false)
|
51
65
|
cmd = ""
|
52
|
-
local_file_loc =
|
66
|
+
local_file_loc = File.expand_path(local_file_loc)
|
53
67
|
|
54
68
|
ssh_args = "-o StrictHostkeyChecking=no 2>&1"
|
55
69
|
ssh_args << " -r " if is_dir
|
56
70
|
|
57
|
-
public_key_loc =
|
71
|
+
public_key_loc = File.expand_path(public_key_loc)
|
58
72
|
cmd = "scp -i #{public_key_loc} #{ssh_args} #{local_file_loc} root@#{target_ip}:#{remote_file_loc}"
|
59
73
|
cmd << "; echo $? >> ~/.appscale/retval"
|
60
74
|
|
61
|
-
retval_loc =
|
62
|
-
|
75
|
+
retval_loc = File.expand_path("~/.appscale/retval")
|
76
|
+
FileUtils.rm_f(retval_loc)
|
63
77
|
|
64
78
|
begin
|
65
|
-
Timeout::timeout(-1) {
|
79
|
+
Timeout::timeout(-1) { CommonFunctions.shell("#{cmd}") }
|
66
80
|
rescue Timeout::Error
|
67
81
|
abort("Remotely copying over files failed. Is the destination machine" +
|
68
82
|
" on and reachable from this computer? We tried the following" +
|
@@ -70,11 +84,11 @@ module CommonFunctions
|
|
70
84
|
end
|
71
85
|
|
72
86
|
loop {
|
73
|
-
break if
|
87
|
+
break if File.exists?(retval_loc)
|
74
88
|
sleep(5)
|
75
89
|
}
|
76
90
|
|
77
|
-
retval = (
|
91
|
+
retval = (File.open(retval_loc) { |f| f.read }).chomp
|
78
92
|
if retval != "0"
|
79
93
|
abort("\n\n[#{cmd}] returned #{retval} instead of 0 as expected. Is " +
|
80
94
|
"your environment set up properly?")
|
@@ -88,16 +102,16 @@ module CommonFunctions
|
|
88
102
|
# method aborts if the value doesn't exist or the YAML file is malformed.
|
89
103
|
# If the required flag is set to false, it returns nil in either scenario
|
90
104
|
# instead.
|
91
|
-
def self.get_from_yaml(keyname, tag, required=true
|
92
|
-
location_file =
|
105
|
+
def self.get_from_yaml(keyname, tag, required=true)
|
106
|
+
location_file = File.expand_path("~/.appscale/locations-#{keyname}.yaml")
|
93
107
|
|
94
|
-
if !
|
95
|
-
|
96
|
-
" keyname, \"#{keyname}\".")
|
108
|
+
if !File.exists?(location_file)
|
109
|
+
raise BadConfigurationException.new("An AppScale instance is not " +
|
110
|
+
"currently running with the provided keyname, \"#{keyname}\".")
|
97
111
|
end
|
98
112
|
|
99
113
|
begin
|
100
|
-
tree =
|
114
|
+
tree = YAML.load_file(location_file)
|
101
115
|
rescue ArgumentError
|
102
116
|
if required
|
103
117
|
abort("The yaml file you provided was malformed. Please correct any" +
|
@@ -121,7 +135,7 @@ module CommonFunctions
|
|
121
135
|
# Returns the secret key needed for communication with AppScale's
|
122
136
|
# Shadow node. This method is a nice frontend to the get_from_yaml
|
123
137
|
# function, as the secret is stored in a YAML file.
|
124
|
-
def self.get_secret_key(keyname, required=true
|
125
|
-
return CommonFunctions.get_from_yaml(keyname, :secret, required
|
138
|
+
def self.get_secret_key(keyname, required=true)
|
139
|
+
return CommonFunctions.get_from_yaml(keyname, :secret, required)
|
126
140
|
end
|
127
141
|
end
|
data/lib/neptune.rb
CHANGED
@@ -3,6 +3,7 @@
|
|
3
3
|
|
4
4
|
require 'app_controller_client'
|
5
5
|
require 'common_functions'
|
6
|
+
require 'custom_exceptions'
|
6
7
|
|
7
8
|
# Setting verbose to nil here suppresses the otherwise
|
8
9
|
# excessive SSL cert warning messages that will pollute
|
@@ -20,6 +21,12 @@ $VERBOSE = nil
|
|
20
21
|
#MR_RUN_JOB_REQUIRED = %w{ }
|
21
22
|
#MR_REQUIRED = %w{ output }
|
22
23
|
|
24
|
+
# A list of all the Neptune job types that we support
|
25
|
+
ALLOWED_JOB_TYPES = %w{acl cicero compile erlang mpi input output ssa babel upc x10}
|
26
|
+
|
27
|
+
# The string to display for disallowed job types.
|
28
|
+
JOB_TYPE_NOT_ALLOWED = "The job type you specified is not supported."
|
29
|
+
|
23
30
|
# A list of Neptune jobs that do not require nodes to be spawned
|
24
31
|
# up for computation
|
25
32
|
NO_NODES_NEEDED = ["acl", "input", "output", "compile"]
|
@@ -34,7 +41,7 @@ ALLOWED_STORAGE_TYPES = ["appdb", "gstorage", "s3", "walrus"]
|
|
34
41
|
|
35
42
|
# A list of jobs that require some kind of work to be done before
|
36
43
|
# the actual computation can be performed.
|
37
|
-
NEED_PREPROCESSING = ["compile", "erlang", "mpi", "ssa"]
|
44
|
+
NEED_PREPROCESSING = ["babel", "compile", "erlang", "mpi", "ssa"]
|
38
45
|
|
39
46
|
# A set of methods and constants that we've monkey-patched to enable Neptune
|
40
47
|
# support. In the future, it is likely that the only exposed / monkey-patched
|
@@ -45,385 +52,443 @@ NEED_PREPROCESSING = ["compile", "erlang", "mpi", "ssa"]
|
|
45
52
|
class Object
|
46
53
|
end
|
47
54
|
|
48
|
-
|
49
|
-
#
|
50
|
-
#
|
51
|
-
#
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
preprocess = "preprocess_#{job_type}".to_sym
|
59
|
-
send(preprocess, job_data)
|
60
|
-
end
|
55
|
+
module NeptuneHelper
|
56
|
+
# Certain types of jobs need steps to be taken before they
|
57
|
+
# can be started (e.g., copying input data or code over).
|
58
|
+
# This method dispatches the right method to use based
|
59
|
+
# on the type of the job that the user has asked to run.
|
60
|
+
def self.do_preprocessing(job_data, controller)
|
61
|
+
job_type = job_data["@type"]
|
62
|
+
if !NEED_PREPROCESSING.include?(job_type)
|
63
|
+
return
|
64
|
+
end
|
61
65
|
|
62
|
-
#
|
63
|
-
#
|
64
|
-
|
65
|
-
|
66
|
-
code = File.expand_path(job_data["@code"])
|
67
|
-
if !File.exists?(code)
|
68
|
-
abort("The source file #{code} does not exist.")
|
66
|
+
# Don't worry about adding on the self. prefix - send will resolve
|
67
|
+
# it the right way
|
68
|
+
preprocess = "preprocess_#{job_type}".to_sym
|
69
|
+
send(preprocess, job_data, controller)
|
69
70
|
end
|
70
71
|
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
shell.call(remove_dir)
|
72
|
+
# This preprocessing method copies over the user's code to the
|
73
|
+
# Shadow node so that it can be compiled there. A future version
|
74
|
+
# of this method may also copy over libraries as well.
|
75
|
+
def self.preprocess_compile(job_data, controller)
|
76
|
+
code = File.expand_path(job_data["@code"])
|
77
|
+
if !File.exists?(code)
|
78
|
+
raise BadConfigurationException.new("The source file #{code} does not exist.")
|
79
|
+
end
|
80
80
|
|
81
|
-
|
81
|
+
suffix = code.split('/')[-1]
|
82
|
+
dest = "/tmp/#{suffix}"
|
83
|
+
keyname = job_data["@keyname"]
|
84
|
+
shadow_ip = CommonFunctions.get_from_yaml(keyname, :shadow)
|
82
85
|
|
83
|
-
|
84
|
-
|
86
|
+
ssh_args = "-i ~/.appscale/#{keyname}.key -o StrictHostkeyChecking=no root@#{shadow_ip}"
|
87
|
+
remove_dir = "ssh #{ssh_args} 'rm -rf #{dest}' 2>&1"
|
88
|
+
Kernel.puts remove_dir
|
89
|
+
CommonFunctions.shell(remove_dir)
|
90
|
+
CommonFunctions.scp_to_shadow(code, dest, keyname, is_dir=true)
|
85
91
|
|
86
|
-
|
87
|
-
if !job_data["@code"]
|
88
|
-
abort("When running Erlang jobs, :code must be specified.")
|
92
|
+
job_data["@code"] = dest
|
89
93
|
end
|
90
94
|
|
91
|
-
|
92
|
-
|
93
|
-
abort("The specified code, #{job_data['@code']}," +
|
94
|
-
" didn't exist. Please specify one that exists and try again")
|
95
|
-
end
|
96
|
-
dest_code = "/tmp/"
|
95
|
+
def self.preprocess_erlang(job_data, controller)
|
96
|
+
self.require_param("@code", job_data)
|
97
97
|
|
98
|
-
|
99
|
-
|
100
|
-
|
98
|
+
source_code = File.expand_path(job_data["@code"])
|
99
|
+
if !File.exists?(source_code)
|
100
|
+
raise BadConfigurationException.new("The specified code, #{job_data['@code']}," +
|
101
|
+
" didn't exist. Please specify one that exists and try again")
|
102
|
+
end
|
103
|
+
dest_code = "/tmp/"
|
101
104
|
|
102
|
-
|
103
|
-
|
104
|
-
# that this value is at least as many as the number of nodes (that is, nodes
|
105
|
-
# can't be underprovisioned in MPI).
|
106
|
-
def preprocess_mpi(job_data)
|
107
|
-
if !job_data["@nodes_to_use"]
|
108
|
-
abort("When running MPI jobs, :nodes_to_use must be specified.")
|
105
|
+
keyname = job_data["@keyname"]
|
106
|
+
CommonFunctions.scp_to_shadow(source_code, dest_code, keyname)
|
109
107
|
end
|
110
108
|
|
111
|
-
|
112
|
-
|
113
|
-
|
109
|
+
# This preprocessing method verifies that the user specified the number of nodes
|
110
|
+
# to use. If they also specified the number of processes to use, we also verify
|
111
|
+
# that this value is at least as many as the number of nodes (that is, nodes
|
112
|
+
# can't be underprovisioned in MPI).
|
113
|
+
def self.preprocess_mpi(job_data, controller)
|
114
|
+
self.require_param("@nodes_to_use", job_data)
|
115
|
+
self.require_param("@procs_to_use", job_data)
|
116
|
+
|
117
|
+
if job_data["@procs_to_use"]
|
118
|
+
p = job_data["@procs_to_use"]
|
119
|
+
n = job_data["@nodes_to_use"]
|
120
|
+
if p < n
|
121
|
+
raise BadConfigurationException.new(":procs_to_use must be at least as " +
|
122
|
+
"large as :nodes_to_use.")
|
123
|
+
end
|
124
|
+
end
|
114
125
|
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
"
|
122
|
-
|
126
|
+
if job_data["@argv"]
|
127
|
+
argv = job_data["@argv"]
|
128
|
+
|
129
|
+
if argv.class == String
|
130
|
+
job_data["@argv"] = argv
|
131
|
+
elsif argv.class == Array
|
132
|
+
job_data["@argv"] = argv.join(' ')
|
133
|
+
else
|
134
|
+
raise BadConfigurationException.new(":argv must be either a String or Array")
|
135
|
+
end
|
123
136
|
end
|
137
|
+
|
138
|
+
return job_data
|
124
139
|
end
|
125
140
|
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
141
|
+
# This preprocessing method verifies that the user specified the number of
|
142
|
+
# trajectories to run, via either :trajectories or :simulations. Both should
|
143
|
+
# not be specified - only one or the other, and regardless of which they
|
144
|
+
# specify, convert it to be :trajectories.
|
145
|
+
def self.preprocess_ssa(job_data, controller)
|
146
|
+
if job_data["@simulations"] and job_data["@trajectories"]
|
147
|
+
raise BadConfigurationException.new(":simulations and :trajectories " +
|
148
|
+
"not both be specified.")
|
130
149
|
end
|
131
150
|
|
132
|
-
if
|
133
|
-
job_data["@
|
151
|
+
if job_data["@simulations"]
|
152
|
+
job_data["@trajectories"] = job_data["@simulations"]
|
153
|
+
job_data.delete("@simulations")
|
134
154
|
end
|
135
|
-
end
|
136
155
|
|
137
|
-
|
138
|
-
|
156
|
+
self.require_param("@trajectories", job_data)
|
157
|
+
return job_data
|
158
|
+
end
|
139
159
|
|
140
|
-
|
141
|
-
|
142
|
-
#
|
143
|
-
|
144
|
-
def preprocess_ssa(job_data)
|
145
|
-
if job_data["@simulations"] and job_data["@trajectories"]
|
146
|
-
abort("Both :simulations and :trajectories cannot be specified - use one" +
|
147
|
-
" or the other.")
|
160
|
+
def self.require_param(param, job_data)
|
161
|
+
if !job_data[param]
|
162
|
+
raise BadConfigurationException.new("#{param} must be specified")
|
163
|
+
end
|
148
164
|
end
|
149
165
|
|
150
|
-
|
151
|
-
|
152
|
-
|
166
|
+
def self.require_file_to_exist(file, job_data, controller)
|
167
|
+
if controller.does_file_exist?(file, job_data)
|
168
|
+
return
|
169
|
+
else
|
170
|
+
raise FileNotFoundException
|
171
|
+
end
|
153
172
|
end
|
154
173
|
|
155
|
-
|
156
|
-
|
174
|
+
def self.require_file_to_not_exist(file, job_data, controller)
|
175
|
+
begin
|
176
|
+
self.require_file_to_exist(file, job_data, controller)
|
177
|
+
# no exception thrown previously means that the output file exists
|
178
|
+
raise BadConfigurationException.new('Output specified already exists')
|
179
|
+
rescue FileNotFoundException
|
180
|
+
return
|
181
|
+
end
|
157
182
|
end
|
158
183
|
|
159
|
-
|
160
|
-
|
184
|
+
# This preprocessing method verifies that the user specified code that
|
185
|
+
# should be run, where the output should be placed, and an engine to run over.
|
186
|
+
# It also verifies that all files to be used are actually reachable.
|
187
|
+
# Supported engines can be found by contacting an AppScale node.
|
188
|
+
def self.preprocess_babel(job_data, controller)
|
189
|
+
self.require_param("@code", job_data)
|
190
|
+
self.require_param("@engine", job_data)
|
191
|
+
self.require_param("@output", job_data)
|
192
|
+
|
193
|
+
# For most code types, the file's name given is the thing to exec.
|
194
|
+
# For Java, the actual file to search for is whatever the user gives
|
195
|
+
# us, with a .class extension.
|
196
|
+
code_file_name = job_data["@code"]
|
197
|
+
if !job_data["@executable"].nil? and job_data["@executable"] == "java"
|
198
|
+
code_file_name += ".class"
|
199
|
+
end
|
200
|
+
|
201
|
+
self.require_file_to_exist(code_file_name, job_data, controller)
|
202
|
+
self.require_file_to_not_exist(job_data["@output"], job_data, controller)
|
203
|
+
|
204
|
+
if job_data["@argv"]
|
205
|
+
argv = job_data["@argv"]
|
206
|
+
if argv.class != Array
|
207
|
+
raise BadConfigurationException.new("argv must be an array")
|
208
|
+
end
|
161
209
|
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
210
|
+
argv.each { |arg|
|
211
|
+
if arg =~ /\/.*\/.*/
|
212
|
+
self.require_file_to_exist(arg, job_data, controller)
|
213
|
+
end
|
214
|
+
}
|
215
|
+
end
|
168
216
|
|
169
|
-
|
170
|
-
|
217
|
+
if job_data["@appcfg_cookies"]
|
218
|
+
self.require_file_to_exist(job_data["@appcfg_cookies"], job_data, controller)
|
219
|
+
end
|
171
220
|
|
172
|
-
|
173
|
-
type = job_data["@type"]
|
221
|
+
user_specified_engine = job_data["@engine"]
|
174
222
|
|
175
|
-
|
176
|
-
|
177
|
-
|
223
|
+
# validate the engine here
|
224
|
+
engines = controller.get_supported_babel_engines(job_data)
|
225
|
+
if !engines.include?(user_specified_engine)
|
226
|
+
raise BadConfigurationException.new("The engine you specified, " +
|
227
|
+
"#{user_specified_engine}, is not a supported engine. Supported engines" +
|
228
|
+
" are: #{engines.join(', ')}")
|
229
|
+
end
|
178
230
|
end
|
179
231
|
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
232
|
+
def self.get_job_data(params)
|
233
|
+
job_data = {}
|
234
|
+
params.each { |k, v|
|
235
|
+
key = "@#{k}"
|
236
|
+
job_data[key] = v
|
237
|
+
}
|
185
238
|
|
186
|
-
job_data
|
187
|
-
|
239
|
+
job_data.delete("@job")
|
240
|
+
job_data["@keyname"] = params[:keyname] || "appscale"
|
188
241
|
|
189
|
-
|
190
|
-
|
191
|
-
end
|
242
|
+
job_data["@type"] = job_data["@type"].to_s
|
243
|
+
type = job_data["@type"]
|
192
244
|
|
193
|
-
|
194
|
-
|
195
|
-
abort("Job output must be specified")
|
245
|
+
if !ALLOWED_JOB_TYPES.include?(type)
|
246
|
+
raise BadConfigurationException.new(JOB_TYPE_NOT_ALLOWED)
|
196
247
|
end
|
197
248
|
|
198
|
-
if
|
199
|
-
|
249
|
+
if type == "upc" or type == "x10"
|
250
|
+
job_data["@type"] = "mpi"
|
251
|
+
type = "mpi"
|
200
252
|
end
|
201
|
-
end
|
202
253
|
|
203
|
-
|
204
|
-
|
254
|
+
# kdt jobs also run as mpi jobs, but need to pass along an executable
|
255
|
+
# parameter to let mpiexec know to use python to exec it
|
256
|
+
if type == "kdt"
|
257
|
+
job_data["@type"] = "mpi"
|
258
|
+
type = "mpi"
|
205
259
|
|
206
|
-
|
207
|
-
|
208
|
-
job_data["@storage"] = "appdb"
|
209
|
-
end
|
260
|
+
job_data["@executable"] = "python"
|
261
|
+
end
|
210
262
|
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
" - we do not support #{storage}.")
|
215
|
-
end
|
263
|
+
if job_data["@nodes_to_use"].class == Hash
|
264
|
+
job_data["@nodes_to_use"] = job_data["@nodes_to_use"].to_a.flatten
|
265
|
+
end
|
216
266
|
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
storage = "s3"
|
222
|
-
job_data["@storage"] = "s3"
|
223
|
-
end
|
267
|
+
if !NO_OUTPUT_NEEDED.include?(type)
|
268
|
+
if (job_data["@output"].nil? or job_data["@output"].empty?)
|
269
|
+
raise BadConfigurationException.new("Job output must be specified")
|
270
|
+
end
|
224
271
|
|
225
|
-
|
226
|
-
|
227
|
-
if job_data["@#{item}"]
|
228
|
-
puts "Using specified #{item}"
|
229
|
-
else
|
230
|
-
if ENV[item]
|
231
|
-
puts "Using #{item} from environment"
|
232
|
-
job_data["@#{item}"] = ENV[item]
|
233
|
-
else
|
234
|
-
abort("When storing data to S3, #{item} must be specified or be in " +
|
235
|
-
"your environment. Please do so and try again.")
|
236
|
-
end
|
272
|
+
if job_data["@output"][0].chr != "/"
|
273
|
+
raise BadConfigurationException.new("Job output must begin with a slash ('/')")
|
237
274
|
end
|
238
|
-
|
275
|
+
end
|
276
|
+
|
277
|
+
return job_data
|
239
278
|
end
|
240
279
|
|
241
|
-
|
242
|
-
|
280
|
+
def self.validate_storage_params(job_data)
|
281
|
+
job_data["@storage"] ||= "appdb"
|
243
282
|
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
result = {:result => :success}
|
283
|
+
storage = job_data["@storage"]
|
284
|
+
if !ALLOWED_STORAGE_TYPES.include?(storage)
|
285
|
+
raise BadConfigurationException.new("Supported storage types are " +
|
286
|
+
"#{ALLOWED_STORAGE_TYPES.join(', ')} - #{storage} is not supported.")
|
287
|
+
end
|
250
288
|
|
251
|
-
|
252
|
-
|
253
|
-
|
289
|
+
# Our implementation for storing / retrieving via Google Storage
|
290
|
+
# and Walrus uses
|
291
|
+
# the same library as we do for S3 - so just tell it that it's S3
|
292
|
+
if storage == "gstorage" or storage == "walrus"
|
293
|
+
storage = "s3"
|
294
|
+
job_data["@storage"] = "s3"
|
295
|
+
end
|
254
296
|
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
297
|
+
if storage == "s3"
|
298
|
+
["EC2_ACCESS_KEY", "EC2_SECRET_KEY", "S3_URL"].each { |item|
|
299
|
+
if job_data["@#{item}"]
|
300
|
+
Kernel.puts "Using specified #{item}"
|
301
|
+
else
|
302
|
+
if ENV[item]
|
303
|
+
Kernel.puts "Using #{item} from environment"
|
304
|
+
job_data["@#{item}"] = ENV[item]
|
305
|
+
else
|
306
|
+
raise BadConfigurationException.new("When storing data to S3, #{item} must be specified or be in " +
|
307
|
+
"your environment. Please do so and try again.")
|
308
|
+
end
|
309
|
+
end
|
310
|
+
}
|
311
|
+
end
|
261
312
|
|
262
|
-
|
263
|
-
scp_cmd = "scp -r #{ssh_args} #{local_file} root@#{shadow_ip}:#{remote}"
|
264
|
-
puts scp_cmd
|
265
|
-
shell.call(scp_cmd)
|
266
|
-
|
267
|
-
job_data["@local"] = remote
|
268
|
-
puts "job data = #{job_data.inspect}"
|
269
|
-
response = controller.put_input(job_data)
|
270
|
-
if response
|
271
|
-
return {:result => :success}
|
272
|
-
else
|
273
|
-
# TODO - expand this to include the reason why it failed
|
274
|
-
return {:result => :failure}
|
313
|
+
return job_data
|
275
314
|
end
|
276
|
-
end
|
277
315
|
|
278
|
-
# This method
|
279
|
-
#
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
ssh_command = "ssh #{ssh_args} root@#{shadow_ip} 'ls #{compiled_location}' 2>&1"
|
284
|
-
puts ssh_command
|
285
|
-
ssh_result = shell.call(ssh_command)
|
286
|
-
puts "result was [#{ssh_result}]"
|
287
|
-
if ssh_result =~ /No such file or directory/
|
288
|
-
puts "Still waiting for code to be compiled..."
|
289
|
-
else
|
290
|
-
puts "compilation complete! Copying compiled code to #{copy_to}"
|
291
|
-
return
|
292
|
-
end
|
293
|
-
sleep(5)
|
294
|
-
}
|
295
|
-
end
|
316
|
+
# This method takes a file on the local user's computer and stores it remotely
|
317
|
+
# via AppScale. It returns a hash map indicating whether or not the job
|
318
|
+
# succeeded and if it failed, the reason for it.
|
319
|
+
def self.get_input(job_data, ssh_args, shadow_ip, controller)
|
320
|
+
result = {:result => :success}
|
296
321
|
|
297
|
-
|
298
|
-
# gets the standard out and error returned from the compilation. This method
|
299
|
-
# returns a hash containing the standard out, error, and a result that indicates
|
300
|
-
# whether or not the compilation was successful.
|
301
|
-
def compile_code(job_data, ssh_args, shadow_ip, shell=Kernel.method(:`))
|
302
|
-
compiled_location = controller.compile_code(job_data)
|
322
|
+
self.require_param("@local", job_data)
|
303
323
|
|
304
|
-
|
324
|
+
local_file = File.expand_path(job_data["@local"])
|
325
|
+
if !File.exists?(local_file)
|
326
|
+
reason = "the file you specified to copy, #{local_file}, doesn't exist." +
|
327
|
+
" Please specify a file that exists and try again."
|
328
|
+
return {:result => :failure, :reason => reason}
|
329
|
+
end
|
305
330
|
|
306
|
-
|
331
|
+
remote = "/tmp/neptune-input-#{rand(100000)}"
|
332
|
+
scp_cmd = "scp -r #{ssh_args} #{local_file} root@#{shadow_ip}:#{remote}"
|
333
|
+
Kernel.puts scp_cmd
|
334
|
+
CommonFunctions.shell(scp_cmd)
|
307
335
|
|
308
|
-
|
336
|
+
job_data["@local"] = remote
|
337
|
+
Kernel.puts "job data = #{job_data.inspect}"
|
338
|
+
response = controller.put_input(job_data)
|
339
|
+
if response
|
340
|
+
return {:result => :success}
|
341
|
+
else
|
342
|
+
# TODO - expand this to include the reason why it failed
|
343
|
+
return {:result => :failure}
|
344
|
+
end
|
345
|
+
end
|
309
346
|
|
310
|
-
|
311
|
-
|
312
|
-
|
347
|
+
# This method waits for AppScale to finish compiling the user's code, indicated
|
348
|
+
# by AppScale copying the finished code to a pre-determined location.
|
349
|
+
def self.wait_for_compilation_to_finish(ssh_args, shadow_ip, compiled_location)
|
350
|
+
loop {
|
351
|
+
ssh_command = "ssh #{ssh_args} root@#{shadow_ip} 'ls #{compiled_location}' 2>&1"
|
352
|
+
Kernel.puts ssh_command
|
353
|
+
ssh_result = CommonFunctions.shell(ssh_command)
|
354
|
+
Kernel.puts "result was [#{ssh_result}]"
|
355
|
+
if ssh_result =~ /No such file or directory/
|
356
|
+
Kernel.puts "Still waiting for code to be compiled..."
|
357
|
+
else
|
358
|
+
Kernel.puts "compilation complete! Copying compiled code to #{copy_to}"
|
359
|
+
return
|
360
|
+
end
|
361
|
+
sleep(5)
|
362
|
+
}
|
363
|
+
end
|
313
364
|
|
314
|
-
code
|
315
|
-
|
316
|
-
|
365
|
+
# This method sends out a request to compile code, waits for it to finish, and
|
366
|
+
# gets the standard out and error returned from the compilation. This method
|
367
|
+
# returns a hash containing the standard out, error, and a result that indicates
|
368
|
+
# whether or not the compilation was successful.
|
369
|
+
def self.compile_code(job_data, ssh_args, shadow_ip)
|
370
|
+
compiled_location = controller.compile_code(job_data)
|
371
|
+
copy_to = job_data["@copy_to"]
|
372
|
+
self.wait_for_compilation_to_finish(ssh_args, shadow_ip, compiled_location)
|
373
|
+
|
374
|
+
FileUtils.rm_rf(copy_to)
|
375
|
+
|
376
|
+
scp_command = "scp -r #{ssh_args} root@#{shadow_ip}:#{compiled_location} #{copy_to} 2>&1"
|
377
|
+
Kernel.puts scp_command
|
378
|
+
CommonFunctions.shell(scp_command)
|
379
|
+
|
380
|
+
code = job_data["@code"]
|
381
|
+
dirs = code.split(/\//)
|
382
|
+
remote_dir = "/tmp/" + dirs[-1]
|
383
|
+
|
384
|
+
[remote_dir, compiled_location].each { |remote_files|
|
385
|
+
ssh_command = "ssh #{ssh_args} root@#{shadow_ip} 'rm -rf #{remote_files}' 2>&1"
|
386
|
+
Kernel.puts ssh_command
|
387
|
+
CommonFunctions.shell(ssh_command)
|
388
|
+
}
|
317
389
|
|
318
|
-
|
319
|
-
|
320
|
-
puts ssh_command
|
321
|
-
shell.call(ssh_command)
|
322
|
-
}
|
390
|
+
return get_std_out_and_err(copy_to)
|
391
|
+
end
|
323
392
|
|
324
|
-
|
325
|
-
|
393
|
+
# This method returns a hash containing the standard out and standard error
|
394
|
+
# from a completed job, as well as a result field that indicates whether or
|
395
|
+
# not the job completed successfully (success = no errors).
|
396
|
+
def self.get_std_out_and_err(location)
|
397
|
+
result = {}
|
326
398
|
|
327
|
-
|
328
|
-
|
329
|
-
# not the job completed successfully (success = no errors).
|
330
|
-
def get_std_out_and_err(location)
|
331
|
-
result = {}
|
399
|
+
out = File.open("#{location}/compile_out") { |f| f.read.chomp! }
|
400
|
+
result[:out] = out
|
332
401
|
|
333
|
-
|
334
|
-
|
402
|
+
err = File.open("#{location}/compile_err") { |f| f.read.chomp! }
|
403
|
+
result[:err] = err
|
335
404
|
|
336
|
-
|
337
|
-
|
405
|
+
if result[:err]
|
406
|
+
result[:result] = :failure
|
407
|
+
else
|
408
|
+
result[:result] = :success
|
409
|
+
end
|
338
410
|
|
339
|
-
|
340
|
-
|
341
|
-
else
|
342
|
-
result[:result] = :success
|
343
|
-
end
|
411
|
+
return result
|
412
|
+
end
|
344
413
|
|
345
|
-
|
346
|
-
|
414
|
+
def self.upload_app_for_cicero(job_data)
|
415
|
+
if !job_data["@app"]
|
416
|
+
Kernel.puts "No app specified, not uploading..."
|
417
|
+
return
|
418
|
+
end
|
347
419
|
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
420
|
+
app_location = File.expand_path(job_data["@app"])
|
421
|
+
if !File.exists?(app_location)
|
422
|
+
raise BadConfigurationException.new("The app you specified, #{app_location}, does not exist." +
|
423
|
+
"Please specify one that does and try again.")
|
424
|
+
end
|
353
425
|
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
426
|
+
keyname = job_data["@keyname"] || "appscale"
|
427
|
+
if job_data["@appscale_tools"]
|
428
|
+
upload_app = File.expand_path(job_data["@appscale_tools"]) +
|
429
|
+
File::SEPARATOR + "bin" + File::SEPARATOR + "appscale-upload-app"
|
430
|
+
else
|
431
|
+
upload_app = "appscale-upload-app"
|
432
|
+
end
|
359
433
|
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
else
|
365
|
-
upload_app = "appscale-upload-app"
|
434
|
+
Kernel.puts "Uploading AppEngine app at #{app_location}"
|
435
|
+
upload_command = "#{upload_app} --file #{app_location} --test --keyname #{keyname}"
|
436
|
+
Kernel.puts upload_command
|
437
|
+
Kernel.puts `#{upload_command}`
|
366
438
|
end
|
367
439
|
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
440
|
+
# This method actually runs the Neptune job, given information about the job
|
441
|
+
# as well as information about the node to send the request to.
|
442
|
+
def self.run_job(job_data, ssh_args, shadow_ip, secret)
|
443
|
+
controller = AppControllerClient.new(shadow_ip, secret)
|
444
|
+
|
445
|
+
# TODO - right now the job is assumed to succeed in many cases
|
446
|
+
# need to investigate the various failure scenarios
|
447
|
+
result = { :result => :success }
|
448
|
+
|
449
|
+
case job_data["@type"]
|
450
|
+
when "input"
|
451
|
+
result = self.get_input(job_data, ssh_args, shadow_ip, controller)
|
452
|
+
when "output"
|
453
|
+
result[:output] = controller.get_output(job_data)
|
454
|
+
when "get-acl"
|
455
|
+
job_data["@type"] = "acl"
|
456
|
+
result[:acl] = controller.get_acl(job_data)
|
457
|
+
when "set-acl"
|
458
|
+
job_data["@type"] = "acl"
|
459
|
+
result[:acl] = controller.set_acl(job_data)
|
460
|
+
when "compile"
|
461
|
+
result = self.compile_code(job_data, ssh_args, shadow_ip)
|
462
|
+
when "cicero"
|
463
|
+
self.upload_app_for_cicero(job_data)
|
464
|
+
msg = controller.start_neptune_job(job_data)
|
465
|
+
result[:msg] = msg
|
466
|
+
result[:result] = :failure if result[:msg] !~ /job is now running\Z/
|
467
|
+
else
|
468
|
+
msg = controller.start_neptune_job(job_data)
|
469
|
+
result[:msg] = msg
|
470
|
+
result[:result] = :failure if result[:msg] !~ /job is now running\Z/
|
471
|
+
end
|
373
472
|
|
374
|
-
|
375
|
-
# as well as information about the node to send the request to.
|
376
|
-
def run_job(job_data, ssh_args, shadow_ip, secret,
|
377
|
-
controller=AppControllerClient, file=File)
|
378
|
-
controller = controller.new(shadow_ip, secret)
|
379
|
-
|
380
|
-
# TODO - right now the job is assumed to succeed in many cases
|
381
|
-
# need to investigate the various failure scenarios
|
382
|
-
result = { :result => :success }
|
383
|
-
|
384
|
-
case job_data["@type"]
|
385
|
-
when "input"
|
386
|
-
result = get_input(job_data, ssh_args, shadow_ip, controller, file)
|
387
|
-
when "output"
|
388
|
-
result[:output] = controller.get_output(job_data)
|
389
|
-
when "get-acl"
|
390
|
-
job_data["@type"] = "acl"
|
391
|
-
result[:acl] = controller.get_acl(job_data)
|
392
|
-
when "set-acl"
|
393
|
-
job_data["@type"] = "acl"
|
394
|
-
result[:acl] = controller.set_acl(job_data)
|
395
|
-
when "compile"
|
396
|
-
result = compile_code(job_data, ssh_args, shadow_ip)
|
397
|
-
when "cicero"
|
398
|
-
upload_app_for_cicero(job_data)
|
399
|
-
msg = controller.start_neptune_job(job_data)
|
400
|
-
result[:msg] = msg
|
401
|
-
result[:result] = :failure if result[:msg] !~ /job is now running\Z/
|
402
|
-
else
|
403
|
-
msg = controller.start_neptune_job(job_data)
|
404
|
-
result[:msg] = msg
|
405
|
-
result[:result] = :failure if result[:msg] !~ /job is now running\Z/
|
473
|
+
return result
|
406
474
|
end
|
407
|
-
|
408
|
-
return result
|
409
475
|
end
|
410
476
|
|
411
|
-
#
|
412
|
-
|
413
|
-
|
414
|
-
#
|
415
|
-
#
|
416
|
-
#
|
417
|
-
#
|
418
|
-
# vice-versa).
|
477
|
+
# Make neptune() public so that babel() can call it
|
478
|
+
public
|
479
|
+
|
480
|
+
# This method is the heart of Neptune - here, we take blocks of code that the
|
481
|
+
# user has written and convert them into HPC job requests. At a high level,
|
482
|
+
# the user can request to run a job, retrieve a job's output, or modify the
|
483
|
+
# access policy (ACL) for the output of a job. By default, job data is private,
|
484
|
+
# but a Neptune job can be used to set it to public later (and vice-versa).
|
419
485
|
def neptune(params)
|
420
|
-
puts "Received a request to run a job."
|
421
|
-
puts params[:type]
|
486
|
+
Kernel.puts "Received a request to run a job."
|
487
|
+
Kernel.puts params[:type]
|
422
488
|
|
423
|
-
job_data = get_job_data(params)
|
424
|
-
validate_storage_params(job_data)
|
425
|
-
puts "job data = #{job_data.inspect}"
|
426
|
-
do_preprocessing(job_data)
|
489
|
+
job_data = NeptuneHelper.get_job_data(params)
|
490
|
+
NeptuneHelper.validate_storage_params(job_data)
|
491
|
+
Kernel.puts "job data = #{job_data.inspect}"
|
427
492
|
keyname = job_data["@keyname"]
|
428
493
|
|
429
494
|
shadow_ip = CommonFunctions.get_from_yaml(keyname, :shadow)
|
@@ -431,5 +496,7 @@ def neptune(params)
|
|
431
496
|
ssh_key = File.expand_path("~/.appscale/#{keyname}.key")
|
432
497
|
ssh_args = "-i ~/.appscale/#{keyname}.key -o StrictHostkeyChecking=no "
|
433
498
|
|
434
|
-
|
499
|
+
controller = AppControllerClient.new(shadow_ip, secret)
|
500
|
+
NeptuneHelper.do_preprocessing(job_data, controller)
|
501
|
+
return NeptuneHelper.run_job(job_data, ssh_args, shadow_ip, secret)
|
435
502
|
end
|