neptune 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +39 -0
- data/README +50 -0
- data/bin/neptune +16 -0
- data/doc/AppControllerClient.html +468 -0
- data/doc/CommonFunctions.html +368 -0
- data/doc/LICENSE.html +131 -0
- data/doc/Object.html +431 -0
- data/doc/README.html +135 -0
- data/doc/bin/neptune.html +58 -0
- data/doc/created.rid +7 -0
- data/doc/images/brick.png +0 -0
- data/doc/images/brick_link.png +0 -0
- data/doc/images/bug.png +0 -0
- data/doc/images/bullet_black.png +0 -0
- data/doc/images/bullet_toggle_minus.png +0 -0
- data/doc/images/bullet_toggle_plus.png +0 -0
- data/doc/images/date.png +0 -0
- data/doc/images/find.png +0 -0
- data/doc/images/loadingAnimation.gif +0 -0
- data/doc/images/macFFBgHack.png +0 -0
- data/doc/images/package.png +0 -0
- data/doc/images/page_green.png +0 -0
- data/doc/images/page_white_text.png +0 -0
- data/doc/images/page_white_width.png +0 -0
- data/doc/images/plugin.png +0 -0
- data/doc/images/ruby.png +0 -0
- data/doc/images/tag_green.png +0 -0
- data/doc/images/wrench.png +0 -0
- data/doc/images/wrench_orange.png +0 -0
- data/doc/images/zoom.png +0 -0
- data/doc/index.html +90 -0
- data/doc/js/darkfish.js +116 -0
- data/doc/js/jquery.js +32 -0
- data/doc/js/quicksearch.js +114 -0
- data/doc/js/thickbox-compressed.js +10 -0
- data/doc/lib/app_controller_client_rb.html +60 -0
- data/doc/lib/common_functions_rb.html +68 -0
- data/doc/lib/job_rb.html +55 -0
- data/doc/scripts/appscale/add_appserver_rb.html +52 -0
- data/doc/scripts/appscale/add_database_rb.html +52 -0
- data/doc/scripts/appscale/add_loadbalancer_rb.html +52 -0
- data/doc/scripts/appscale/add_slave_rb.html +52 -0
- data/doc/scripts/cewssa/cewssa-output.html +104 -0
- data/doc/scripts/cewssa/get_cewssa_output_rb.html +52 -0
- data/doc/scripts/cewssa/run_cewssa_rb.html +52 -0
- data/doc/scripts/cewssa/test_dfsp_rb.html +52 -0
- data/doc/scripts/dfsp/get_dfsp_output_rb.html +52 -0
- data/doc/scripts/dfsp/make_output_public_rb.html +52 -0
- data/doc/scripts/dfsp/run_dfsp_rb.html +52 -0
- data/doc/scripts/dfsp/see_acl_rb.html +52 -0
- data/doc/scripts/dfsp/test_dfsp_rb.html +52 -0
- data/doc/scripts/mapreduce/get_mapreduce_output_rb.html +52 -0
- data/doc/scripts/mapreduce/input-10.html +114 -0
- data/doc/scripts/mapreduce/input-30.html +114 -0
- data/doc/scripts/mapreduce/input-7.html +104 -0
- data/doc/scripts/mapreduce/map_rb.html +56 -0
- data/doc/scripts/mapreduce/reduce_rb.html +56 -0
- data/doc/scripts/mapreduce/run_java_mr_rb.html +52 -0
- data/doc/scripts/mapreduce/run_mapreduce_rb.html +52 -0
- data/doc/scripts/mpi/Makefile.html +125 -0
- data/doc/scripts/mpi/get_mpi_output_rb.html +52 -0
- data/doc/scripts/mpi/hw2_c.html +52 -0
- data/doc/scripts/mpi/hw2harness_c.html +52 -0
- data/doc/scripts/mpi/hw2harness_h.html +52 -0
- data/doc/scripts/mpi/run_mpi_cpi_rb.html +52 -0
- data/doc/scripts/mpi/run_mpi_powermethod_rb.html +52 -0
- data/doc/scripts/mpi/test_rb.html +52 -0
- data/doc/scripts/x10/Makefile.html +106 -0
- data/doc/scripts/x10/get_x10_output_rb.html +52 -0
- data/doc/scripts/x10/run_x10_kmeans_rb.html +52 -0
- data/doc/scripts/x10/run_x10_montypi_rb.html +52 -0
- data/doc/scripts/x10/run_x10_nqueens_rb.html +52 -0
- data/doc/scripts/x10/run_x10_powermethod_rb.html +52 -0
- data/doc/scripts/x10/run_x10_qsort_rb.html +52 -0
- data/doc/scripts/x10/test_rb.html +52 -0
- data/lib/app_controller_client.rb +135 -0
- data/lib/common_functions.rb +107 -0
- data/lib/job.rb +161 -0
- metadata +145 -0
@@ -0,0 +1,52 @@
|
|
1
|
+
<?xml version="1.0" encoding="utf-8"?>
|
2
|
+
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
3
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
4
|
+
|
5
|
+
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
6
|
+
<head>
|
7
|
+
<meta content="text/html; charset=UTF-8" http-equiv="Content-Type" />
|
8
|
+
|
9
|
+
<title>File: run_x10_qsort.rb [RDoc Documentation]</title>
|
10
|
+
|
11
|
+
<link type="text/css" media="screen" href="../../rdoc.css" rel="stylesheet" />
|
12
|
+
|
13
|
+
<script src="../../js/jquery.js" type="text/javascript"
|
14
|
+
charset="utf-8"></script>
|
15
|
+
<script src="../../js/thickbox-compressed.js" type="text/javascript"
|
16
|
+
charset="utf-8"></script>
|
17
|
+
<script src="../../js/quicksearch.js" type="text/javascript"
|
18
|
+
charset="utf-8"></script>
|
19
|
+
<script src="../../js/darkfish.js" type="text/javascript"
|
20
|
+
charset="utf-8"></script>
|
21
|
+
</head>
|
22
|
+
|
23
|
+
<body class="file file-popup">
|
24
|
+
<div id="metadata">
|
25
|
+
<dl>
|
26
|
+
<dt class="modified-date">Last Modified</dt>
|
27
|
+
<dd class="modified-date">Mon Dec 27 19:27:35 -0800 2010</dd>
|
28
|
+
|
29
|
+
|
30
|
+
<dt class="requires">Requires</dt>
|
31
|
+
<dd class="requires">
|
32
|
+
<ul>
|
33
|
+
|
34
|
+
</ul>
|
35
|
+
</dd>
|
36
|
+
|
37
|
+
|
38
|
+
|
39
|
+
</dl>
|
40
|
+
</div>
|
41
|
+
|
42
|
+
<div id="documentation">
|
43
|
+
|
44
|
+
<div class="description">
|
45
|
+
<h2>Description</h2>
|
46
|
+
|
47
|
+
</div>
|
48
|
+
|
49
|
+
</div>
|
50
|
+
</body>
|
51
|
+
</html>
|
52
|
+
|
@@ -0,0 +1,52 @@
|
|
1
|
+
<?xml version="1.0" encoding="utf-8"?>
|
2
|
+
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
3
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
4
|
+
|
5
|
+
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
6
|
+
<head>
|
7
|
+
<meta content="text/html; charset=UTF-8" http-equiv="Content-Type" />
|
8
|
+
|
9
|
+
<title>File: test.rb [RDoc Documentation]</title>
|
10
|
+
|
11
|
+
<link type="text/css" media="screen" href="../../rdoc.css" rel="stylesheet" />
|
12
|
+
|
13
|
+
<script src="../../js/jquery.js" type="text/javascript"
|
14
|
+
charset="utf-8"></script>
|
15
|
+
<script src="../../js/thickbox-compressed.js" type="text/javascript"
|
16
|
+
charset="utf-8"></script>
|
17
|
+
<script src="../../js/quicksearch.js" type="text/javascript"
|
18
|
+
charset="utf-8"></script>
|
19
|
+
<script src="../../js/darkfish.js" type="text/javascript"
|
20
|
+
charset="utf-8"></script>
|
21
|
+
</head>
|
22
|
+
|
23
|
+
<body class="file file-popup">
|
24
|
+
<div id="metadata">
|
25
|
+
<dl>
|
26
|
+
<dt class="modified-date">Last Modified</dt>
|
27
|
+
<dd class="modified-date">Wed Dec 29 14:05:27 -0800 2010</dd>
|
28
|
+
|
29
|
+
|
30
|
+
<dt class="requires">Requires</dt>
|
31
|
+
<dd class="requires">
|
32
|
+
<ul>
|
33
|
+
|
34
|
+
</ul>
|
35
|
+
</dd>
|
36
|
+
|
37
|
+
|
38
|
+
|
39
|
+
</dl>
|
40
|
+
</div>
|
41
|
+
|
42
|
+
<div id="documentation">
|
43
|
+
|
44
|
+
<div class="description">
|
45
|
+
<h2>Description</h2>
|
46
|
+
|
47
|
+
</div>
|
48
|
+
|
49
|
+
</div>
|
50
|
+
</body>
|
51
|
+
</html>
|
52
|
+
|
@@ -0,0 +1,135 @@
|
|
1
|
+
#!/usr/bin/ruby -w
|
2
|
+
# Programmer: Chris Bunch
|
3
|
+
|
4
|
+
require 'openssl'
|
5
|
+
require 'soap/rpc/driver'
|
6
|
+
require 'timeout'
|
7
|
+
|
8
|
+
# Sometimes SOAP calls take a long time if large amounts of data are being
|
9
|
+
# sent over the network: for this first version we don't want these calls to
|
10
|
+
# endlessly timeout and retry, so as a hack, just don't let them timeout.
|
11
|
+
# The next version should replace this and properly timeout and not use
|
12
|
+
# long calls unless necessary.
|
13
|
+
NO_TIMEOUT = -1
|
14
|
+
|
15
|
+
# A client that uses SOAP messages to communicate with the underlying cloud
|
16
|
+
# platform (here, AppScale). This client is similar to that used in the AppScale
|
17
|
+
# Tools, but with non-Neptune SOAP calls removed.
|
18
|
+
class AppControllerClient
|
19
|
+
attr_reader :conn, :ip, :secret
|
20
|
+
|
21
|
+
# A constructor that requires both the IP address of the machine to communicate
|
22
|
+
# with as well as the secret (string) needed to perform communication.
|
23
|
+
# AppControllers will reject SOAP calls if this secret (basically a password)
|
24
|
+
# is not present - it can be found in the user's .appscale directory, and a
|
25
|
+
# helper method is usually present to fetch this for us.
|
26
|
+
def initialize(ip, secret)
|
27
|
+
@ip = ip
|
28
|
+
@secret = secret
|
29
|
+
|
30
|
+
@conn = SOAP::RPC::Driver.new("https://#{@ip}:17443")
|
31
|
+
@conn.add_method("neptune_start_job", "job_data", "secret")
|
32
|
+
@conn.add_method("neptune_get_output", "job_data", "secret")
|
33
|
+
@conn.add_method("neptune_get_acl", "job_data", "secret")
|
34
|
+
@conn.add_method("neptune_set_acl", "job_data", "secret")
|
35
|
+
end
|
36
|
+
|
37
|
+
# A helper method to make SOAP calls for us. This method is mainly here to
|
38
|
+
# reduce code duplication: all SOAP calls expect a certain timeout and can
|
39
|
+
# tolerate certain exceptions, so we consolidate this code into this method.
|
40
|
+
# Here, the caller specifies the timeout for the SOAP call (or NO_TIMEOUT
|
41
|
+
# if an infinite timeout is required) as well as whether the call should
|
42
|
+
# be retried in the face of exceptions. Exceptions can occur if the machine
|
43
|
+
# is not yet running or is too busy to handle the request, so these exceptions
|
44
|
+
# are automatically retried regardless of the retry value. Typically
|
45
|
+
# callers set this to false to catch 'Connection Refused' exceptions or
|
46
|
+
# the like. Finally, the caller must provide a block of
|
47
|
+
# code that indicates the SOAP call to make: this is really all that differs
|
48
|
+
# between the calling methods. The result of the block is returned to the
|
49
|
+
# caller.
|
50
|
+
def make_call(time, retry_on_except)
|
51
|
+
begin
|
52
|
+
Timeout::timeout(time) {
|
53
|
+
yield if block_given?
|
54
|
+
}
|
55
|
+
rescue Errno::ECONNREFUSED
|
56
|
+
if retry_on_except
|
57
|
+
retry
|
58
|
+
else
|
59
|
+
abort("Connection was refused. Is the AppController running?")
|
60
|
+
end
|
61
|
+
rescue OpenSSL::SSL::SSLError, NotImplementedError, Timeout::Error
|
62
|
+
retry
|
63
|
+
rescue Exception => except
|
64
|
+
if retry_on_except
|
65
|
+
retry
|
66
|
+
else
|
67
|
+
abort("We saw an unexpected error of the type #{except.class} with the following message:\n#{except}.")
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
# Initiates the start of a Neptune job, whether it be a HPC job (MPI, X10,
|
73
|
+
# or MapReduce), or a scaling job (e.g., for AppScale itself). This method
|
74
|
+
# should not be used for retrieving the output of a job or getting / setting
|
75
|
+
# output ACLs, but just for starting new HPC / scaling jobs. This method
|
76
|
+
# takes a hash containing the parameters of the job to run, and can abort if
|
77
|
+
# the AppController it calls returns an error (e.g., if a bad secret is used
|
78
|
+
# or the machine isn't running). Otherwise, the return value of this method
|
79
|
+
# is the result returned from the AppController.
|
80
|
+
def start_neptune_job(job_data)
|
81
|
+
result = ""
|
82
|
+
make_call(NO_TIMEOUT, false) {
|
83
|
+
result = conn.neptune_start_job(job_data, @secret)
|
84
|
+
}
|
85
|
+
abort(result) if result =~ /Error:/
|
86
|
+
return result
|
87
|
+
end
|
88
|
+
|
89
|
+
# Retrieves the output of a Neptune job, stored in an underlying
|
90
|
+
# database. Within AppScale, a special application runs, referred to as the
|
91
|
+
# Repository, which provides a key-value interface to Neptune job data.
|
92
|
+
# Data is stored as though it were on a file system, therefore output
|
93
|
+
# be of the usual form /folder/filename . Currently the contents of the
|
94
|
+
# file is returned as a string to the caller, but as this is inefficient
|
95
|
+
# for non-trivial output jobs, the next version of Neptune will add an
|
96
|
+
# additional call to directly copy the output to a file on the local
|
97
|
+
# filesystem. See start_neptune_job for conditions by which this method
|
98
|
+
# can abort as well as the input format used for job_data.
|
99
|
+
def get_output(job_data)
|
100
|
+
result = ""
|
101
|
+
make_call(NO_TIMEOUT, false) {
|
102
|
+
result = conn.neptune_get_output(job_data, @secret)
|
103
|
+
}
|
104
|
+
abort(result) if result =~ /Error:/
|
105
|
+
return result
|
106
|
+
end
|
107
|
+
|
108
|
+
# Returns the ACL associated with the named piece of data stored
|
109
|
+
# in the underlying cloud platform. Right now, data can only be
|
110
|
+
# public or private, but future versions will add individual user
|
111
|
+
# support. Input, output, and exceptions mirror that of
|
112
|
+
# start_neptune_job.
|
113
|
+
def get_acl(job_data)
|
114
|
+
result = ""
|
115
|
+
make_call(NO_TIMEOUT, false) {
|
116
|
+
result = conn.neptune_get_acl(job_data, @secret)
|
117
|
+
}
|
118
|
+
abort(result) if result =~ /Error:/
|
119
|
+
return result
|
120
|
+
end
|
121
|
+
|
122
|
+
# Sets the ACL of a specified pieces of data stored in the underlying
|
123
|
+
# cloud platform. As is the case with get_acl, ACLs can be either
|
124
|
+
# public or private right now, but this will be expanded upon in
|
125
|
+
# the future. As with the other SOAP calls, input, output, and exceptions
|
126
|
+
# mirror that of start_neptune_job.
|
127
|
+
def set_acl(job_data)
|
128
|
+
result = ""
|
129
|
+
make_call(NO_TIMEOUT, false) {
|
130
|
+
result = conn.neptune_set_acl(job_data, @secret)
|
131
|
+
}
|
132
|
+
abort(result) if result =~ /Error:/
|
133
|
+
return result
|
134
|
+
end
|
135
|
+
end
|
@@ -0,0 +1,107 @@
|
|
1
|
+
#!/usr/bin/ruby -w
|
2
|
+
# Programmer: Chris Bunch
|
3
|
+
|
4
|
+
require 'digest/sha1'
|
5
|
+
require 'fileutils'
|
6
|
+
require 'net/http'
|
7
|
+
require 'openssl'
|
8
|
+
require 'socket'
|
9
|
+
require 'timeout'
|
10
|
+
require 'yaml'
|
11
|
+
|
12
|
+
# A helper module that aggregates functions that are not part of Neptune's
|
13
|
+
# core functionality. Specifically, this module contains methods to scp
|
14
|
+
# files to other machines and the ability to read YAML files, which are
|
15
|
+
# often needed to determine which machine should be used for computation
|
16
|
+
# or to copy over code and input files.
|
17
|
+
module CommonFunctions
|
18
|
+
|
19
|
+
# Copies a file to the Shadow node (head node) within AppScale.
|
20
|
+
# The caller specifies
|
21
|
+
# the local file location, the destination where the file should be
|
22
|
+
# placed, and the name of the key to use. The keyname is typically
|
23
|
+
# specified by the Neptune job given, but defaults to ''appscale''
|
24
|
+
# if not provided.
|
25
|
+
def self.scp_to_shadow(local_file_loc, remote_file_loc, keyname)
|
26
|
+
shadow_ip = CommonFunctions.get_from_yaml(keyname, :shadow)
|
27
|
+
ssh_key = File.expand_path("~/.appscale/#{keyname}.key")
|
28
|
+
|
29
|
+
self.scp_file(local_file_loc, remote_file_loc, shadow_ip, ssh_key)
|
30
|
+
end
|
31
|
+
|
32
|
+
# Performs the actual remote copying of files: given the IP address
|
33
|
+
# and other information from scp_to_shadow, attempts to use scp
|
34
|
+
# to copy the file over. Aborts if the scp fails, which can occur
|
35
|
+
# if the network is down, if a bad keyname is provided, or if the
|
36
|
+
# wrong IP is given.
|
37
|
+
def self.scp_file(local_file_loc, remote_file_loc, target_ip, public_key_loc)
|
38
|
+
cmd = ""
|
39
|
+
local_file_loc = File.expand_path(local_file_loc)
|
40
|
+
|
41
|
+
if public_key_loc.class == Array
|
42
|
+
public_key_loc.each { |key|
|
43
|
+
key = File.expand_path(key)
|
44
|
+
}
|
45
|
+
|
46
|
+
cmd = "scp -i #{public_key_loc.join(' -i ')} -o StrictHostkeyChecking=no 2>&1 #{local_file_loc} root@#{target_ip}:#{remote_file_loc}"
|
47
|
+
else
|
48
|
+
public_key_loc = File.expand_path(public_key_loc)
|
49
|
+
cmd = "scp -i #{public_key_loc} -o StrictHostkeyChecking=no 2>&1 #{local_file_loc} root@#{target_ip}:#{remote_file_loc}"
|
50
|
+
end
|
51
|
+
|
52
|
+
cmd << "; echo $? >> ~/.appscale/retval"
|
53
|
+
|
54
|
+
retval_loc = File.expand_path("~/.appscale/retval")
|
55
|
+
FileUtils.rm_f(retval_loc)
|
56
|
+
|
57
|
+
begin
|
58
|
+
Timeout::timeout(-1) { `#{cmd}` }
|
59
|
+
rescue Timeout::Error
|
60
|
+
abort("Remotely copying over files failed. Is the destination machine on and reachable from this computer? We tried the following command:\n\n#{cmd}")
|
61
|
+
end
|
62
|
+
|
63
|
+
loop {
|
64
|
+
break if File.exists?(retval_loc)
|
65
|
+
sleep(5)
|
66
|
+
}
|
67
|
+
|
68
|
+
retval = (File.open(retval_loc) { |f| f.read }).chomp
|
69
|
+
abort("\n\n[#{cmd}] returned #{retval} instead of 0 as expected. Is your environment set up properly?") if retval != "0"
|
70
|
+
return cmd
|
71
|
+
end
|
72
|
+
|
73
|
+
# Given the AppScale keyname, reads the associated YAML file and returns
|
74
|
+
# the contents of the given tag. The required flag (default value is true)
|
75
|
+
# indicates whether a value must exist for this tag: if set to true, this
|
76
|
+
# method aborts if the value doesn't exist or the YAML file is malformed.
|
77
|
+
# If the required flag is set to false, it returns nil in either scenario
|
78
|
+
# instead.
|
79
|
+
def self.get_from_yaml(keyname, tag, required=true)
|
80
|
+
location_file = File.expand_path("~/.appscale/locations-#{keyname}.yaml")
|
81
|
+
|
82
|
+
abort("An AppScale instance is not currently running with the provided keyname, \"#{keyname}\".") unless File.exists?(location_file)
|
83
|
+
|
84
|
+
begin
|
85
|
+
tree = YAML.load_file(location_file)
|
86
|
+
rescue ArgumentError
|
87
|
+
if required
|
88
|
+
abort("The yaml file you provided was malformed. Please correct any errors in it and try again.")
|
89
|
+
else
|
90
|
+
return nil
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
value = tree[tag]
|
95
|
+
|
96
|
+
bad_yaml_format_msg = "The file #{location_file} is in the wrong format and doesn't contain a #{tag} tag. Please make sure the file is in the correct format and try again"
|
97
|
+
abort(bad_yaml_format_msg) if value.nil? and required
|
98
|
+
return value
|
99
|
+
end
|
100
|
+
|
101
|
+
# Returns the secret key needed for communication with AppScale's
|
102
|
+
# Shadow node. This method is a nice frontend to the get_from_yaml
|
103
|
+
# function, as the secret is stored in a YAML file.
|
104
|
+
def self.get_secret_key(keyname, required=true)
|
105
|
+
return CommonFunctions.get_from_yaml(keyname, :secret)
|
106
|
+
end
|
107
|
+
end
|
data/lib/job.rb
ADDED
@@ -0,0 +1,161 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
|
3
|
+
# Setting verbose to nil here suppresses the otherwise
|
4
|
+
# excessive SSL cert warning messages that will pollute
|
5
|
+
# stderr and worry users unnecessarily.
|
6
|
+
$VERBOSE = nil
|
7
|
+
|
8
|
+
#MPI_RUN_JOB_REQUIRED = %w{ input output code filesystem }
|
9
|
+
#MPI_REQUIRED = %w{ output }
|
10
|
+
#X10_RUN_JOB_REQUIRED = %w{ input output code filesystem }
|
11
|
+
#X10_REQUIRED = %w{ output }
|
12
|
+
#DFSP_RUN_JOB_REQUIRED = %w{ output simulations }
|
13
|
+
#DFSP_REQUIRED = %w{ output }
|
14
|
+
#CEWSSA_RUN_JOB_REQUIRED = %w{ output simulations }
|
15
|
+
#CEWSSA_REQUIRED = %w{ output }
|
16
|
+
#MR_RUN_JOB_REQUIRED = %w{ }
|
17
|
+
#MR_REQUIRED = %w{ output }
|
18
|
+
|
19
|
+
# A list of jobs that require some kind of work to be done before
|
20
|
+
# the actual computation can be performed.
|
21
|
+
NEED_PREPROCESSING = %w{ mpi mapreduce }
|
22
|
+
|
23
|
+
# A set of methods and constants that we've monkey-patched to enable Neptune
|
24
|
+
# support. In the future, it is likely that the only exposed / monkey-patched
|
25
|
+
# method should be job, while the others could probably be folded into either
|
26
|
+
# a Neptune-specific class or into CommonFunctions.
|
27
|
+
class Object
|
28
|
+
end
|
29
|
+
|
30
|
+
# Certain types of jobs need steps to be taken before they
|
31
|
+
# can be started (e.g., copying input data or code over).
|
32
|
+
# This method dispatches the right method to use based
|
33
|
+
# on the type of the job that the user has asked to run.
|
34
|
+
def do_preprocessing(job_data)
|
35
|
+
job_type = job_data["@type"]
|
36
|
+
return unless NEED_PREPROCESSING.include?(job_type)
|
37
|
+
|
38
|
+
preprocess = "preprocess_#{job_type}".to_sym
|
39
|
+
send(preprocess, job_data)
|
40
|
+
end
|
41
|
+
|
42
|
+
# This preprocessing method copies over the user's MPI
|
43
|
+
# code to the master node in AppScale - this node will
|
44
|
+
# then copy it to whoever will run the MPI job.
|
45
|
+
def preprocess_mpi(job_data)
|
46
|
+
source_code = File.expand_path(job_data["@code"])
|
47
|
+
unless File.exists?(source_code)
|
48
|
+
file_not_found = "The specified code, #{job_data['@code']}," +
|
49
|
+
" didn't exist. Please specify one that exists and try again"
|
50
|
+
abort(file_not_found)
|
51
|
+
end
|
52
|
+
dest_code = "/tmp/thempicode"
|
53
|
+
|
54
|
+
keyname = job_data["@keyname"]
|
55
|
+
CommonFunctions.scp_to_shadow(source_code, dest_code, keyname)
|
56
|
+
end
|
57
|
+
|
58
|
+
# This preprocessing method handles copying data for regular
|
59
|
+
# Hadoop MapReduce and Hadoop MapReduce Streaming. For the former
|
60
|
+
# case, we copy over just the JAR the user has given us, and
|
61
|
+
# in the latter case, we copy over the Map and Reduce files
|
62
|
+
# that have been specified. In either case, if the user has
|
63
|
+
# specified to us to copy over an input file, we do that as well:
|
64
|
+
# AppScale will copy it into HDFS for us.
|
65
|
+
def preprocess_mapreduce(job_data)
|
66
|
+
items_to_copy = ["@map", "@reduce"] if job_data["@map"] and job_data["@reduce"]
|
67
|
+
items_to_copy = ["@mapreducejar"] if job_data["@mapreducejar"]
|
68
|
+
items_to_copy << "@input" if job_data["@copy_input"]
|
69
|
+
items_to_copy.each { |item|
|
70
|
+
source = File.expand_path(job_data[item])
|
71
|
+
unless File.exists?(source)
|
72
|
+
abort("The #{item} file #{source} does not exist.")
|
73
|
+
end
|
74
|
+
|
75
|
+
suffix = source.split('/')[-1]
|
76
|
+
dest = "/tmp/#{suffix}"
|
77
|
+
|
78
|
+
keyname = job_data["@keyname"]
|
79
|
+
CommonFunctions.scp_to_shadow(source, dest, keyname)
|
80
|
+
|
81
|
+
job_data[item] = dest
|
82
|
+
}
|
83
|
+
end
|
84
|
+
|
85
|
+
# TODO: actually use me!
|
86
|
+
#def validate_args(list)
|
87
|
+
# list.each do |item|
|
88
|
+
# val = instance_variable_get("@#{item}".to_sym)
|
89
|
+
# abort("FATAL: #{item} was not defined") if val.nil?
|
90
|
+
# end
|
91
|
+
#end
|
92
|
+
|
93
|
+
# This method is the heart of Neptune - here, we take
|
94
|
+
# blocks of code that the user has written and convert them
|
95
|
+
# into HPC job requests. At a high level, the user can
|
96
|
+
# request to run a job, retrieve a job's output, or
|
97
|
+
# modify the access policy (ACL) for the output of a
|
98
|
+
# job. By default, job data is private, but a Neptune
|
99
|
+
# job can be used to set it to public later (and
|
100
|
+
# vice-versa).
|
101
|
+
def job(name, &block)
|
102
|
+
puts "Received a request to run a job."
|
103
|
+
puts name
|
104
|
+
block.call()
|
105
|
+
|
106
|
+
if @keyname.nil?
|
107
|
+
keyname = "appscale"
|
108
|
+
else
|
109
|
+
keyname = @keyname
|
110
|
+
end
|
111
|
+
|
112
|
+
shadow_ip = CommonFunctions.get_from_yaml(keyname, :shadow)
|
113
|
+
secret = CommonFunctions.get_secret_key(keyname)
|
114
|
+
controller = AppControllerClient.new(shadow_ip, secret)
|
115
|
+
ssh_key = File.expand_path("~/.appscale/#{keyname}.key")
|
116
|
+
|
117
|
+
job_data = {}
|
118
|
+
instance_variables.each { |var|
|
119
|
+
job_data[var] = instance_variable_get(var)
|
120
|
+
}
|
121
|
+
|
122
|
+
job_data["@job"] = nil
|
123
|
+
job_data["@type"] = name
|
124
|
+
job_data["@keyname"] = keyname
|
125
|
+
|
126
|
+
if job_data["@output"].nil? or job_data["@output"] == ""
|
127
|
+
abort("Job output must be specified")
|
128
|
+
end
|
129
|
+
|
130
|
+
if job_data["@output"][0].chr != "/"
|
131
|
+
abort("Job output must begin with a slash ('/')")
|
132
|
+
end
|
133
|
+
|
134
|
+
#if job_data["@can_run_on"].class == Range
|
135
|
+
# job_data["@can_run_on"] = job_data["@can_run_on"].to_a
|
136
|
+
#elsif job_data["@can_run_on"].class == Fixnum
|
137
|
+
# job_data["@can_run_on"] = [job_data["@can_run_on"]]
|
138
|
+
#end
|
139
|
+
|
140
|
+
puts "job data = #{job_data.inspect}"
|
141
|
+
|
142
|
+
do_preprocessing(job_data)
|
143
|
+
|
144
|
+
if job_data["@type"] == "output"
|
145
|
+
return controller.get_output(job_data)
|
146
|
+
elsif job_data["@type"] == "get-acl"
|
147
|
+
job_data["@type"] = "acl"
|
148
|
+
return controller.get_acl(job_data)
|
149
|
+
elsif job_data["@type"] == "set-acl"
|
150
|
+
job_data["@type"] = "acl"
|
151
|
+
return controller.set_acl(job_data)
|
152
|
+
else
|
153
|
+
result = controller.start_neptune_job(job_data)
|
154
|
+
if result =~ /job is now running\Z/
|
155
|
+
return :success
|
156
|
+
else
|
157
|
+
return :failure
|
158
|
+
end
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|