neptune 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +39 -0
- data/README +50 -0
- data/bin/neptune +16 -0
- data/doc/AppControllerClient.html +468 -0
- data/doc/CommonFunctions.html +368 -0
- data/doc/LICENSE.html +131 -0
- data/doc/Object.html +431 -0
- data/doc/README.html +135 -0
- data/doc/bin/neptune.html +58 -0
- data/doc/created.rid +7 -0
- data/doc/images/brick.png +0 -0
- data/doc/images/brick_link.png +0 -0
- data/doc/images/bug.png +0 -0
- data/doc/images/bullet_black.png +0 -0
- data/doc/images/bullet_toggle_minus.png +0 -0
- data/doc/images/bullet_toggle_plus.png +0 -0
- data/doc/images/date.png +0 -0
- data/doc/images/find.png +0 -0
- data/doc/images/loadingAnimation.gif +0 -0
- data/doc/images/macFFBgHack.png +0 -0
- data/doc/images/package.png +0 -0
- data/doc/images/page_green.png +0 -0
- data/doc/images/page_white_text.png +0 -0
- data/doc/images/page_white_width.png +0 -0
- data/doc/images/plugin.png +0 -0
- data/doc/images/ruby.png +0 -0
- data/doc/images/tag_green.png +0 -0
- data/doc/images/wrench.png +0 -0
- data/doc/images/wrench_orange.png +0 -0
- data/doc/images/zoom.png +0 -0
- data/doc/index.html +90 -0
- data/doc/js/darkfish.js +116 -0
- data/doc/js/jquery.js +32 -0
- data/doc/js/quicksearch.js +114 -0
- data/doc/js/thickbox-compressed.js +10 -0
- data/doc/lib/app_controller_client_rb.html +60 -0
- data/doc/lib/common_functions_rb.html +68 -0
- data/doc/lib/job_rb.html +55 -0
- data/doc/scripts/appscale/add_appserver_rb.html +52 -0
- data/doc/scripts/appscale/add_database_rb.html +52 -0
- data/doc/scripts/appscale/add_loadbalancer_rb.html +52 -0
- data/doc/scripts/appscale/add_slave_rb.html +52 -0
- data/doc/scripts/cewssa/cewssa-output.html +104 -0
- data/doc/scripts/cewssa/get_cewssa_output_rb.html +52 -0
- data/doc/scripts/cewssa/run_cewssa_rb.html +52 -0
- data/doc/scripts/cewssa/test_dfsp_rb.html +52 -0
- data/doc/scripts/dfsp/get_dfsp_output_rb.html +52 -0
- data/doc/scripts/dfsp/make_output_public_rb.html +52 -0
- data/doc/scripts/dfsp/run_dfsp_rb.html +52 -0
- data/doc/scripts/dfsp/see_acl_rb.html +52 -0
- data/doc/scripts/dfsp/test_dfsp_rb.html +52 -0
- data/doc/scripts/mapreduce/get_mapreduce_output_rb.html +52 -0
- data/doc/scripts/mapreduce/input-10.html +114 -0
- data/doc/scripts/mapreduce/input-30.html +114 -0
- data/doc/scripts/mapreduce/input-7.html +104 -0
- data/doc/scripts/mapreduce/map_rb.html +56 -0
- data/doc/scripts/mapreduce/reduce_rb.html +56 -0
- data/doc/scripts/mapreduce/run_java_mr_rb.html +52 -0
- data/doc/scripts/mapreduce/run_mapreduce_rb.html +52 -0
- data/doc/scripts/mpi/Makefile.html +125 -0
- data/doc/scripts/mpi/get_mpi_output_rb.html +52 -0
- data/doc/scripts/mpi/hw2_c.html +52 -0
- data/doc/scripts/mpi/hw2harness_c.html +52 -0
- data/doc/scripts/mpi/hw2harness_h.html +52 -0
- data/doc/scripts/mpi/run_mpi_cpi_rb.html +52 -0
- data/doc/scripts/mpi/run_mpi_powermethod_rb.html +52 -0
- data/doc/scripts/mpi/test_rb.html +52 -0
- data/doc/scripts/x10/Makefile.html +106 -0
- data/doc/scripts/x10/get_x10_output_rb.html +52 -0
- data/doc/scripts/x10/run_x10_kmeans_rb.html +52 -0
- data/doc/scripts/x10/run_x10_montypi_rb.html +52 -0
- data/doc/scripts/x10/run_x10_nqueens_rb.html +52 -0
- data/doc/scripts/x10/run_x10_powermethod_rb.html +52 -0
- data/doc/scripts/x10/run_x10_qsort_rb.html +52 -0
- data/doc/scripts/x10/test_rb.html +52 -0
- data/lib/app_controller_client.rb +135 -0
- data/lib/common_functions.rb +107 -0
- data/lib/job.rb +161 -0
- metadata +145 -0
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
<?xml version="1.0" encoding="utf-8"?>
|
|
2
|
+
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
|
3
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
|
4
|
+
|
|
5
|
+
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
|
6
|
+
<head>
|
|
7
|
+
<meta content="text/html; charset=UTF-8" http-equiv="Content-Type" />
|
|
8
|
+
|
|
9
|
+
<title>File: run_x10_qsort.rb [RDoc Documentation]</title>
|
|
10
|
+
|
|
11
|
+
<link type="text/css" media="screen" href="../../rdoc.css" rel="stylesheet" />
|
|
12
|
+
|
|
13
|
+
<script src="../../js/jquery.js" type="text/javascript"
|
|
14
|
+
charset="utf-8"></script>
|
|
15
|
+
<script src="../../js/thickbox-compressed.js" type="text/javascript"
|
|
16
|
+
charset="utf-8"></script>
|
|
17
|
+
<script src="../../js/quicksearch.js" type="text/javascript"
|
|
18
|
+
charset="utf-8"></script>
|
|
19
|
+
<script src="../../js/darkfish.js" type="text/javascript"
|
|
20
|
+
charset="utf-8"></script>
|
|
21
|
+
</head>
|
|
22
|
+
|
|
23
|
+
<body class="file file-popup">
|
|
24
|
+
<div id="metadata">
|
|
25
|
+
<dl>
|
|
26
|
+
<dt class="modified-date">Last Modified</dt>
|
|
27
|
+
<dd class="modified-date">Mon Dec 27 19:27:35 -0800 2010</dd>
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
<dt class="requires">Requires</dt>
|
|
31
|
+
<dd class="requires">
|
|
32
|
+
<ul>
|
|
33
|
+
|
|
34
|
+
</ul>
|
|
35
|
+
</dd>
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
</dl>
|
|
40
|
+
</div>
|
|
41
|
+
|
|
42
|
+
<div id="documentation">
|
|
43
|
+
|
|
44
|
+
<div class="description">
|
|
45
|
+
<h2>Description</h2>
|
|
46
|
+
|
|
47
|
+
</div>
|
|
48
|
+
|
|
49
|
+
</div>
|
|
50
|
+
</body>
|
|
51
|
+
</html>
|
|
52
|
+
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
<?xml version="1.0" encoding="utf-8"?>
|
|
2
|
+
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
|
3
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
|
4
|
+
|
|
5
|
+
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
|
6
|
+
<head>
|
|
7
|
+
<meta content="text/html; charset=UTF-8" http-equiv="Content-Type" />
|
|
8
|
+
|
|
9
|
+
<title>File: test.rb [RDoc Documentation]</title>
|
|
10
|
+
|
|
11
|
+
<link type="text/css" media="screen" href="../../rdoc.css" rel="stylesheet" />
|
|
12
|
+
|
|
13
|
+
<script src="../../js/jquery.js" type="text/javascript"
|
|
14
|
+
charset="utf-8"></script>
|
|
15
|
+
<script src="../../js/thickbox-compressed.js" type="text/javascript"
|
|
16
|
+
charset="utf-8"></script>
|
|
17
|
+
<script src="../../js/quicksearch.js" type="text/javascript"
|
|
18
|
+
charset="utf-8"></script>
|
|
19
|
+
<script src="../../js/darkfish.js" type="text/javascript"
|
|
20
|
+
charset="utf-8"></script>
|
|
21
|
+
</head>
|
|
22
|
+
|
|
23
|
+
<body class="file file-popup">
|
|
24
|
+
<div id="metadata">
|
|
25
|
+
<dl>
|
|
26
|
+
<dt class="modified-date">Last Modified</dt>
|
|
27
|
+
<dd class="modified-date">Wed Dec 29 14:05:27 -0800 2010</dd>
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
<dt class="requires">Requires</dt>
|
|
31
|
+
<dd class="requires">
|
|
32
|
+
<ul>
|
|
33
|
+
|
|
34
|
+
</ul>
|
|
35
|
+
</dd>
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
</dl>
|
|
40
|
+
</div>
|
|
41
|
+
|
|
42
|
+
<div id="documentation">
|
|
43
|
+
|
|
44
|
+
<div class="description">
|
|
45
|
+
<h2>Description</h2>
|
|
46
|
+
|
|
47
|
+
</div>
|
|
48
|
+
|
|
49
|
+
</div>
|
|
50
|
+
</body>
|
|
51
|
+
</html>
|
|
52
|
+
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
#!/usr/bin/ruby -w
|
|
2
|
+
# Programmer: Chris Bunch
|
|
3
|
+
|
|
4
|
+
require 'openssl'
|
|
5
|
+
require 'soap/rpc/driver'
|
|
6
|
+
require 'timeout'
|
|
7
|
+
|
|
8
|
+
# Sometimes SOAP calls take a long time if large amounts of data are being
|
|
9
|
+
# sent over the network: for this first version we don't want these calls to
|
|
10
|
+
# endlessly timeout and retry, so as a hack, just don't let them timeout.
|
|
11
|
+
# The next version should replace this and properly timeout and not use
|
|
12
|
+
# long calls unless necessary.
|
|
13
|
+
NO_TIMEOUT = -1
|
|
14
|
+
|
|
15
|
+
# A client that uses SOAP messages to communicate with the underlying cloud
|
|
16
|
+
# platform (here, AppScale). This client is similar to that used in the AppScale
|
|
17
|
+
# Tools, but with non-Neptune SOAP calls removed.
|
|
18
|
+
class AppControllerClient
|
|
19
|
+
attr_reader :conn, :ip, :secret
|
|
20
|
+
|
|
21
|
+
# A constructor that requires both the IP address of the machine to communicate
|
|
22
|
+
# with as well as the secret (string) needed to perform communication.
|
|
23
|
+
# AppControllers will reject SOAP calls if this secret (basically a password)
|
|
24
|
+
# is not present - it can be found in the user's .appscale directory, and a
|
|
25
|
+
# helper method is usually present to fetch this for us.
|
|
26
|
+
def initialize(ip, secret)
|
|
27
|
+
@ip = ip
|
|
28
|
+
@secret = secret
|
|
29
|
+
|
|
30
|
+
@conn = SOAP::RPC::Driver.new("https://#{@ip}:17443")
|
|
31
|
+
@conn.add_method("neptune_start_job", "job_data", "secret")
|
|
32
|
+
@conn.add_method("neptune_get_output", "job_data", "secret")
|
|
33
|
+
@conn.add_method("neptune_get_acl", "job_data", "secret")
|
|
34
|
+
@conn.add_method("neptune_set_acl", "job_data", "secret")
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# A helper method to make SOAP calls for us. This method is mainly here to
|
|
38
|
+
# reduce code duplication: all SOAP calls expect a certain timeout and can
|
|
39
|
+
# tolerate certain exceptions, so we consolidate this code into this method.
|
|
40
|
+
# Here, the caller specifies the timeout for the SOAP call (or NO_TIMEOUT
|
|
41
|
+
# if an infinite timeout is required) as well as whether the call should
|
|
42
|
+
# be retried in the face of exceptions. Exceptions can occur if the machine
|
|
43
|
+
# is not yet running or is too busy to handle the request, so these exceptions
|
|
44
|
+
# are automatically retried regardless of the retry value. Typically
|
|
45
|
+
# callers set this to false to catch 'Connection Refused' exceptions or
|
|
46
|
+
# the like. Finally, the caller must provide a block of
|
|
47
|
+
# code that indicates the SOAP call to make: this is really all that differs
|
|
48
|
+
# between the calling methods. The result of the block is returned to the
|
|
49
|
+
# caller.
|
|
50
|
+
def make_call(time, retry_on_except)
|
|
51
|
+
begin
|
|
52
|
+
Timeout::timeout(time) {
|
|
53
|
+
yield if block_given?
|
|
54
|
+
}
|
|
55
|
+
rescue Errno::ECONNREFUSED
|
|
56
|
+
if retry_on_except
|
|
57
|
+
retry
|
|
58
|
+
else
|
|
59
|
+
abort("Connection was refused. Is the AppController running?")
|
|
60
|
+
end
|
|
61
|
+
rescue OpenSSL::SSL::SSLError, NotImplementedError, Timeout::Error
|
|
62
|
+
retry
|
|
63
|
+
rescue Exception => except
|
|
64
|
+
if retry_on_except
|
|
65
|
+
retry
|
|
66
|
+
else
|
|
67
|
+
abort("We saw an unexpected error of the type #{except.class} with the following message:\n#{except}.")
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# Initiates the start of a Neptune job, whether it be a HPC job (MPI, X10,
|
|
73
|
+
# or MapReduce), or a scaling job (e.g., for AppScale itself). This method
|
|
74
|
+
# should not be used for retrieving the output of a job or getting / setting
|
|
75
|
+
# output ACLs, but just for starting new HPC / scaling jobs. This method
|
|
76
|
+
# takes a hash containing the parameters of the job to run, and can abort if
|
|
77
|
+
# the AppController it calls returns an error (e.g., if a bad secret is used
|
|
78
|
+
# or the machine isn't running). Otherwise, the return value of this method
|
|
79
|
+
# is the result returned from the AppController.
|
|
80
|
+
def start_neptune_job(job_data)
|
|
81
|
+
result = ""
|
|
82
|
+
make_call(NO_TIMEOUT, false) {
|
|
83
|
+
result = conn.neptune_start_job(job_data, @secret)
|
|
84
|
+
}
|
|
85
|
+
abort(result) if result =~ /Error:/
|
|
86
|
+
return result
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# Retrieves the output of a Neptune job, stored in an underlying
|
|
90
|
+
# database. Within AppScale, a special application runs, referred to as the
|
|
91
|
+
# Repository, which provides a key-value interface to Neptune job data.
|
|
92
|
+
# Data is stored as though it were on a file system, therefore output
|
|
93
|
+
# be of the usual form /folder/filename . Currently the contents of the
|
|
94
|
+
# file is returned as a string to the caller, but as this is inefficient
|
|
95
|
+
# for non-trivial output jobs, the next version of Neptune will add an
|
|
96
|
+
# additional call to directly copy the output to a file on the local
|
|
97
|
+
# filesystem. See start_neptune_job for conditions by which this method
|
|
98
|
+
# can abort as well as the input format used for job_data.
|
|
99
|
+
def get_output(job_data)
|
|
100
|
+
result = ""
|
|
101
|
+
make_call(NO_TIMEOUT, false) {
|
|
102
|
+
result = conn.neptune_get_output(job_data, @secret)
|
|
103
|
+
}
|
|
104
|
+
abort(result) if result =~ /Error:/
|
|
105
|
+
return result
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
# Returns the ACL associated with the named piece of data stored
|
|
109
|
+
# in the underlying cloud platform. Right now, data can only be
|
|
110
|
+
# public or private, but future versions will add individual user
|
|
111
|
+
# support. Input, output, and exceptions mirror that of
|
|
112
|
+
# start_neptune_job.
|
|
113
|
+
def get_acl(job_data)
|
|
114
|
+
result = ""
|
|
115
|
+
make_call(NO_TIMEOUT, false) {
|
|
116
|
+
result = conn.neptune_get_acl(job_data, @secret)
|
|
117
|
+
}
|
|
118
|
+
abort(result) if result =~ /Error:/
|
|
119
|
+
return result
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
# Sets the ACL of a specified pieces of data stored in the underlying
|
|
123
|
+
# cloud platform. As is the case with get_acl, ACLs can be either
|
|
124
|
+
# public or private right now, but this will be expanded upon in
|
|
125
|
+
# the future. As with the other SOAP calls, input, output, and exceptions
|
|
126
|
+
# mirror that of start_neptune_job.
|
|
127
|
+
def set_acl(job_data)
|
|
128
|
+
result = ""
|
|
129
|
+
make_call(NO_TIMEOUT, false) {
|
|
130
|
+
result = conn.neptune_set_acl(job_data, @secret)
|
|
131
|
+
}
|
|
132
|
+
abort(result) if result =~ /Error:/
|
|
133
|
+
return result
|
|
134
|
+
end
|
|
135
|
+
end
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
#!/usr/bin/ruby -w
|
|
2
|
+
# Programmer: Chris Bunch
|
|
3
|
+
|
|
4
|
+
require 'digest/sha1'
|
|
5
|
+
require 'fileutils'
|
|
6
|
+
require 'net/http'
|
|
7
|
+
require 'openssl'
|
|
8
|
+
require 'socket'
|
|
9
|
+
require 'timeout'
|
|
10
|
+
require 'yaml'
|
|
11
|
+
|
|
12
|
+
# A helper module that aggregates functions that are not part of Neptune's
|
|
13
|
+
# core functionality. Specifically, this module contains methods to scp
|
|
14
|
+
# files to other machines and the ability to read YAML files, which are
|
|
15
|
+
# often needed to determine which machine should be used for computation
|
|
16
|
+
# or to copy over code and input files.
|
|
17
|
+
module CommonFunctions
|
|
18
|
+
|
|
19
|
+
# Copies a file to the Shadow node (head node) within AppScale.
|
|
20
|
+
# The caller specifies
|
|
21
|
+
# the local file location, the destination where the file should be
|
|
22
|
+
# placed, and the name of the key to use. The keyname is typically
|
|
23
|
+
# specified by the Neptune job given, but defaults to ''appscale''
|
|
24
|
+
# if not provided.
|
|
25
|
+
def self.scp_to_shadow(local_file_loc, remote_file_loc, keyname)
|
|
26
|
+
shadow_ip = CommonFunctions.get_from_yaml(keyname, :shadow)
|
|
27
|
+
ssh_key = File.expand_path("~/.appscale/#{keyname}.key")
|
|
28
|
+
|
|
29
|
+
self.scp_file(local_file_loc, remote_file_loc, shadow_ip, ssh_key)
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# Performs the actual remote copying of files: given the IP address
|
|
33
|
+
# and other information from scp_to_shadow, attempts to use scp
|
|
34
|
+
# to copy the file over. Aborts if the scp fails, which can occur
|
|
35
|
+
# if the network is down, if a bad keyname is provided, or if the
|
|
36
|
+
# wrong IP is given.
|
|
37
|
+
def self.scp_file(local_file_loc, remote_file_loc, target_ip, public_key_loc)
|
|
38
|
+
cmd = ""
|
|
39
|
+
local_file_loc = File.expand_path(local_file_loc)
|
|
40
|
+
|
|
41
|
+
if public_key_loc.class == Array
|
|
42
|
+
public_key_loc.each { |key|
|
|
43
|
+
key = File.expand_path(key)
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
cmd = "scp -i #{public_key_loc.join(' -i ')} -o StrictHostkeyChecking=no 2>&1 #{local_file_loc} root@#{target_ip}:#{remote_file_loc}"
|
|
47
|
+
else
|
|
48
|
+
public_key_loc = File.expand_path(public_key_loc)
|
|
49
|
+
cmd = "scp -i #{public_key_loc} -o StrictHostkeyChecking=no 2>&1 #{local_file_loc} root@#{target_ip}:#{remote_file_loc}"
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
cmd << "; echo $? >> ~/.appscale/retval"
|
|
53
|
+
|
|
54
|
+
retval_loc = File.expand_path("~/.appscale/retval")
|
|
55
|
+
FileUtils.rm_f(retval_loc)
|
|
56
|
+
|
|
57
|
+
begin
|
|
58
|
+
Timeout::timeout(-1) { `#{cmd}` }
|
|
59
|
+
rescue Timeout::Error
|
|
60
|
+
abort("Remotely copying over files failed. Is the destination machine on and reachable from this computer? We tried the following command:\n\n#{cmd}")
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
loop {
|
|
64
|
+
break if File.exists?(retval_loc)
|
|
65
|
+
sleep(5)
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
retval = (File.open(retval_loc) { |f| f.read }).chomp
|
|
69
|
+
abort("\n\n[#{cmd}] returned #{retval} instead of 0 as expected. Is your environment set up properly?") if retval != "0"
|
|
70
|
+
return cmd
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# Given the AppScale keyname, reads the associated YAML file and returns
|
|
74
|
+
# the contents of the given tag. The required flag (default value is true)
|
|
75
|
+
# indicates whether a value must exist for this tag: if set to true, this
|
|
76
|
+
# method aborts if the value doesn't exist or the YAML file is malformed.
|
|
77
|
+
# If the required flag is set to false, it returns nil in either scenario
|
|
78
|
+
# instead.
|
|
79
|
+
def self.get_from_yaml(keyname, tag, required=true)
|
|
80
|
+
location_file = File.expand_path("~/.appscale/locations-#{keyname}.yaml")
|
|
81
|
+
|
|
82
|
+
abort("An AppScale instance is not currently running with the provided keyname, \"#{keyname}\".") unless File.exists?(location_file)
|
|
83
|
+
|
|
84
|
+
begin
|
|
85
|
+
tree = YAML.load_file(location_file)
|
|
86
|
+
rescue ArgumentError
|
|
87
|
+
if required
|
|
88
|
+
abort("The yaml file you provided was malformed. Please correct any errors in it and try again.")
|
|
89
|
+
else
|
|
90
|
+
return nil
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
value = tree[tag]
|
|
95
|
+
|
|
96
|
+
bad_yaml_format_msg = "The file #{location_file} is in the wrong format and doesn't contain a #{tag} tag. Please make sure the file is in the correct format and try again"
|
|
97
|
+
abort(bad_yaml_format_msg) if value.nil? and required
|
|
98
|
+
return value
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
# Returns the secret key needed for communication with AppScale's
|
|
102
|
+
# Shadow node. This method is a nice frontend to the get_from_yaml
|
|
103
|
+
# function, as the secret is stored in a YAML file.
|
|
104
|
+
def self.get_secret_key(keyname, required=true)
|
|
105
|
+
return CommonFunctions.get_from_yaml(keyname, :secret)
|
|
106
|
+
end
|
|
107
|
+
end
|
data/lib/job.rb
ADDED
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
#!/usr/bin/ruby
|
|
2
|
+
|
|
3
|
+
# Setting verbose to nil here suppresses the otherwise
|
|
4
|
+
# excessive SSL cert warning messages that will pollute
|
|
5
|
+
# stderr and worry users unnecessarily.
|
|
6
|
+
$VERBOSE = nil
|
|
7
|
+
|
|
8
|
+
#MPI_RUN_JOB_REQUIRED = %w{ input output code filesystem }
|
|
9
|
+
#MPI_REQUIRED = %w{ output }
|
|
10
|
+
#X10_RUN_JOB_REQUIRED = %w{ input output code filesystem }
|
|
11
|
+
#X10_REQUIRED = %w{ output }
|
|
12
|
+
#DFSP_RUN_JOB_REQUIRED = %w{ output simulations }
|
|
13
|
+
#DFSP_REQUIRED = %w{ output }
|
|
14
|
+
#CEWSSA_RUN_JOB_REQUIRED = %w{ output simulations }
|
|
15
|
+
#CEWSSA_REQUIRED = %w{ output }
|
|
16
|
+
#MR_RUN_JOB_REQUIRED = %w{ }
|
|
17
|
+
#MR_REQUIRED = %w{ output }
|
|
18
|
+
|
|
19
|
+
# A list of jobs that require some kind of work to be done before
|
|
20
|
+
# the actual computation can be performed.
|
|
21
|
+
NEED_PREPROCESSING = %w{ mpi mapreduce }
|
|
22
|
+
|
|
23
|
+
# A set of methods and constants that we've monkey-patched to enable Neptune
|
|
24
|
+
# support. In the future, it is likely that the only exposed / monkey-patched
|
|
25
|
+
# method should be job, while the others could probably be folded into either
|
|
26
|
+
# a Neptune-specific class or into CommonFunctions.
|
|
27
|
+
class Object
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# Certain types of jobs need steps to be taken before they
|
|
31
|
+
# can be started (e.g., copying input data or code over).
|
|
32
|
+
# This method dispatches the right method to use based
|
|
33
|
+
# on the type of the job that the user has asked to run.
|
|
34
|
+
def do_preprocessing(job_data)
|
|
35
|
+
job_type = job_data["@type"]
|
|
36
|
+
return unless NEED_PREPROCESSING.include?(job_type)
|
|
37
|
+
|
|
38
|
+
preprocess = "preprocess_#{job_type}".to_sym
|
|
39
|
+
send(preprocess, job_data)
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# This preprocessing method copies over the user's MPI
|
|
43
|
+
# code to the master node in AppScale - this node will
|
|
44
|
+
# then copy it to whoever will run the MPI job.
|
|
45
|
+
def preprocess_mpi(job_data)
|
|
46
|
+
source_code = File.expand_path(job_data["@code"])
|
|
47
|
+
unless File.exists?(source_code)
|
|
48
|
+
file_not_found = "The specified code, #{job_data['@code']}," +
|
|
49
|
+
" didn't exist. Please specify one that exists and try again"
|
|
50
|
+
abort(file_not_found)
|
|
51
|
+
end
|
|
52
|
+
dest_code = "/tmp/thempicode"
|
|
53
|
+
|
|
54
|
+
keyname = job_data["@keyname"]
|
|
55
|
+
CommonFunctions.scp_to_shadow(source_code, dest_code, keyname)
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# This preprocessing method handles copying data for regular
|
|
59
|
+
# Hadoop MapReduce and Hadoop MapReduce Streaming. For the former
|
|
60
|
+
# case, we copy over just the JAR the user has given us, and
|
|
61
|
+
# in the latter case, we copy over the Map and Reduce files
|
|
62
|
+
# that have been specified. In either case, if the user has
|
|
63
|
+
# specified to us to copy over an input file, we do that as well:
|
|
64
|
+
# AppScale will copy it into HDFS for us.
|
|
65
|
+
def preprocess_mapreduce(job_data)
|
|
66
|
+
items_to_copy = ["@map", "@reduce"] if job_data["@map"] and job_data["@reduce"]
|
|
67
|
+
items_to_copy = ["@mapreducejar"] if job_data["@mapreducejar"]
|
|
68
|
+
items_to_copy << "@input" if job_data["@copy_input"]
|
|
69
|
+
items_to_copy.each { |item|
|
|
70
|
+
source = File.expand_path(job_data[item])
|
|
71
|
+
unless File.exists?(source)
|
|
72
|
+
abort("The #{item} file #{source} does not exist.")
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
suffix = source.split('/')[-1]
|
|
76
|
+
dest = "/tmp/#{suffix}"
|
|
77
|
+
|
|
78
|
+
keyname = job_data["@keyname"]
|
|
79
|
+
CommonFunctions.scp_to_shadow(source, dest, keyname)
|
|
80
|
+
|
|
81
|
+
job_data[item] = dest
|
|
82
|
+
}
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
# TODO: actually use me!
|
|
86
|
+
#def validate_args(list)
|
|
87
|
+
# list.each do |item|
|
|
88
|
+
# val = instance_variable_get("@#{item}".to_sym)
|
|
89
|
+
# abort("FATAL: #{item} was not defined") if val.nil?
|
|
90
|
+
# end
|
|
91
|
+
#end
|
|
92
|
+
|
|
93
|
+
# This method is the heart of Neptune - here, we take
|
|
94
|
+
# blocks of code that the user has written and convert them
|
|
95
|
+
# into HPC job requests. At a high level, the user can
|
|
96
|
+
# request to run a job, retrieve a job's output, or
|
|
97
|
+
# modify the access policy (ACL) for the output of a
|
|
98
|
+
# job. By default, job data is private, but a Neptune
|
|
99
|
+
# job can be used to set it to public later (and
|
|
100
|
+
# vice-versa).
|
|
101
|
+
def job(name, &block)
|
|
102
|
+
puts "Received a request to run a job."
|
|
103
|
+
puts name
|
|
104
|
+
block.call()
|
|
105
|
+
|
|
106
|
+
if @keyname.nil?
|
|
107
|
+
keyname = "appscale"
|
|
108
|
+
else
|
|
109
|
+
keyname = @keyname
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
shadow_ip = CommonFunctions.get_from_yaml(keyname, :shadow)
|
|
113
|
+
secret = CommonFunctions.get_secret_key(keyname)
|
|
114
|
+
controller = AppControllerClient.new(shadow_ip, secret)
|
|
115
|
+
ssh_key = File.expand_path("~/.appscale/#{keyname}.key")
|
|
116
|
+
|
|
117
|
+
job_data = {}
|
|
118
|
+
instance_variables.each { |var|
|
|
119
|
+
job_data[var] = instance_variable_get(var)
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
job_data["@job"] = nil
|
|
123
|
+
job_data["@type"] = name
|
|
124
|
+
job_data["@keyname"] = keyname
|
|
125
|
+
|
|
126
|
+
if job_data["@output"].nil? or job_data["@output"] == ""
|
|
127
|
+
abort("Job output must be specified")
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
if job_data["@output"][0].chr != "/"
|
|
131
|
+
abort("Job output must begin with a slash ('/')")
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
#if job_data["@can_run_on"].class == Range
|
|
135
|
+
# job_data["@can_run_on"] = job_data["@can_run_on"].to_a
|
|
136
|
+
#elsif job_data["@can_run_on"].class == Fixnum
|
|
137
|
+
# job_data["@can_run_on"] = [job_data["@can_run_on"]]
|
|
138
|
+
#end
|
|
139
|
+
|
|
140
|
+
puts "job data = #{job_data.inspect}"
|
|
141
|
+
|
|
142
|
+
do_preprocessing(job_data)
|
|
143
|
+
|
|
144
|
+
if job_data["@type"] == "output"
|
|
145
|
+
return controller.get_output(job_data)
|
|
146
|
+
elsif job_data["@type"] == "get-acl"
|
|
147
|
+
job_data["@type"] = "acl"
|
|
148
|
+
return controller.get_acl(job_data)
|
|
149
|
+
elsif job_data["@type"] == "set-acl"
|
|
150
|
+
job_data["@type"] = "acl"
|
|
151
|
+
return controller.set_acl(job_data)
|
|
152
|
+
else
|
|
153
|
+
result = controller.start_neptune_job(job_data)
|
|
154
|
+
if result =~ /job is now running\Z/
|
|
155
|
+
return :success
|
|
156
|
+
else
|
|
157
|
+
return :failure
|
|
158
|
+
end
|
|
159
|
+
end
|
|
160
|
+
end
|
|
161
|
+
|