neptune 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,10 +1,18 @@
1
1
  # Programmer: Chris Bunch
2
2
 
3
+ # A special class of exceptions that are thrown whenever the AppController
4
+ # experiences an unexpected result.
3
5
  class AppControllerException < Exception
4
6
  end
5
7
 
8
+
9
+ # A class of exceptions that are thrown when the user tries to run a Neptune
10
+ # job but fails to give us the correct parameters to do so.
6
11
  class BadConfigurationException < Exception
7
12
  end
8
13
 
14
+
15
+ # An exception that is thrown whenever the user specifies a file to use
16
+ # that does not exist.
9
17
  class FileNotFoundException < Exception
10
18
  end
@@ -5,54 +5,81 @@ require 'app_controller_client'
5
5
  require 'common_functions'
6
6
  require 'custom_exceptions'
7
7
 
8
+
8
9
  # Setting verbose to nil here suppresses the otherwise
9
10
  # excessive SSL cert warning messages that will pollute
10
11
  # stderr and worry users unnecessarily.
11
12
  $VERBOSE = nil
12
13
 
13
- #MPI_RUN_JOB_REQUIRED = %w{ input output code filesystem }
14
- #MPI_REQUIRED = %w{ output }
15
- #X10_RUN_JOB_REQUIRED = %w{ input output code filesystem }
16
- #X10_REQUIRED = %w{ output }
17
- #DFSP_RUN_JOB_REQUIRED = %w{ output simulations }
18
- #DFSP_REQUIRED = %w{ output }
19
- #CEWSSA_RUN_JOB_REQUIRED = %w{ output simulations }
20
- #CEWSSA_REQUIRED = %w{ output }
21
- #MR_RUN_JOB_REQUIRED = %w{ }
22
- #MR_REQUIRED = %w{ output }
23
14
 
24
15
  # A list of all the Neptune job types that we support
25
16
  ALLOWED_JOB_TYPES = %w{acl cicero compile erlang mpi input output ssa babel upc x10}
26
17
 
18
+
27
19
  # The string to display for disallowed job types.
28
20
  JOB_TYPE_NOT_ALLOWED = "The job type you specified is not supported."
29
21
 
22
+
30
23
  # A list of Neptune jobs that do not require nodes to be spawned
31
24
  # up for computation
32
25
  NO_NODES_NEEDED = ["acl", "input", "output", "compile"]
33
26
 
27
+
34
28
  # A list of Neptune jobs that do not require the output to be
35
29
  # specified beforehand
36
30
  NO_OUTPUT_NEEDED = ["input"]
37
31
 
32
+
38
33
  # A list of storage mechanisms that we can use to store and retrieve
39
34
  # data to for Neptune jobs.
40
35
  ALLOWED_STORAGE_TYPES = ["appdb", "gstorage", "s3", "walrus"]
41
36
 
37
+
42
38
  # A list of jobs that require some kind of work to be done before
43
39
  # the actual computation can be performed.
44
40
  NEED_PREPROCESSING = ["babel", "compile", "erlang", "mpi", "ssa"]
45
41
 
46
- # A set of methods and constants that we've monkey-patched to enable Neptune
47
- # support. In the future, it is likely that the only exposed / monkey-patched
48
- # method should be job, while the others could probably be folded into either
49
- # a Neptune-specific class or into CommonFunctions.
50
- # TODO(cbunch): This doesn't look like it does anything - run the integration
51
- # test and confirm one way or the other.
42
+
43
+ # Since we're monkeypatching Object to add neptune() and babel(), a short
44
+ # blurb is necessary here to make rdoc happy.
52
45
  class Object
53
46
  end
54
47
 
48
+
49
+ # Make neptune() public so that babel() can call it
50
+ public
51
+
52
+
53
+ # This method is the heart of Neptune - here, we take blocks of code that the
54
+ # user has written and convert them into HPC job requests. At a high level,
55
+ # the user can request to run a job, retrieve a job's output, or modify the
56
+ # access policy (ACL) for the output of a job. By default, job data is private,
57
+ # but a Neptune job can be used to set it to public later (and vice-versa).
58
+ def neptune(params)
59
+ # Kernel.puts "Received a request to run a job."
60
+ # Kernel.puts params[:type]
61
+
62
+ job_data = NeptuneHelper.get_job_data(params)
63
+ NeptuneHelper.validate_storage_params(job_data)
64
+ # Kernel.puts "job data = #{job_data.inspect}"
65
+ keyname = job_data["@keyname"]
66
+
67
+ shadow_ip = CommonFunctions.get_from_yaml(keyname, :shadow)
68
+ secret = CommonFunctions.get_secret_key(keyname)
69
+ ssh_key = File.expand_path("~/.appscale/#{keyname}.key")
70
+ ssh_args = "-i ~/.appscale/#{keyname}.key -o StrictHostkeyChecking=no "
71
+
72
+ controller = AppControllerClient.new(shadow_ip, secret)
73
+ NeptuneHelper.do_preprocessing(job_data, controller)
74
+ return NeptuneHelper.run_job(job_data, ssh_args, shadow_ip, secret)
75
+ end
76
+
77
+
78
+ # NeptuneHelper provides methods that are used by neptune() and babel() to
79
+ # validate parameters and run the user's job.
55
80
  module NeptuneHelper
81
+
82
+
56
83
  # Certain types of jobs need steps to be taken before they
57
84
  # can be started (e.g., copying input data or code over).
58
85
  # This method dispatches the right method to use based
@@ -69,6 +96,7 @@ module NeptuneHelper
69
96
  send(preprocess, job_data, controller)
70
97
  end
71
98
 
99
+
72
100
  # This preprocessing method copies over the user's code to the
73
101
  # Shadow node so that it can be compiled there. A future version
74
102
  # of this method may also copy over libraries as well.
@@ -85,13 +113,16 @@ module NeptuneHelper
85
113
 
86
114
  ssh_args = "-i ~/.appscale/#{keyname}.key -o StrictHostkeyChecking=no root@#{shadow_ip}"
87
115
  remove_dir = "ssh #{ssh_args} 'rm -rf #{dest}' 2>&1"
88
- Kernel.puts remove_dir
116
+ # Kernel.puts remove_dir
89
117
  CommonFunctions.shell(remove_dir)
90
118
  CommonFunctions.scp_to_shadow(code, dest, keyname, is_dir=true)
91
119
 
92
120
  job_data["@code"] = dest
93
121
  end
94
122
 
123
+
124
+ # This preprocessing method makes sure that the user's Erlang code exists
125
+ # and copies it over to the AppScale Shadow node.
95
126
  def self.preprocess_erlang(job_data, controller)
96
127
  self.require_param("@code", job_data)
97
128
 
@@ -106,6 +137,7 @@ module NeptuneHelper
106
137
  CommonFunctions.scp_to_shadow(source_code, dest_code, keyname)
107
138
  end
108
139
 
140
+
109
141
  # This preprocessing method verifies that the user specified the number of nodes
110
142
  # to use. If they also specified the number of processes to use, we also verify
111
143
  # that this value is at least as many as the number of nodes (that is, nodes
@@ -138,6 +170,7 @@ module NeptuneHelper
138
170
  return job_data
139
171
  end
140
172
 
173
+
141
174
  # This preprocessing method verifies that the user specified the number of
142
175
  # trajectories to run, via either :trajectories or :simulations. Both should
143
176
  # not be specified - only one or the other, and regardless of which they
@@ -157,12 +190,18 @@ module NeptuneHelper
157
190
  return job_data
158
191
  end
159
192
 
193
+
194
+ # This helper method aborts if the given parameter is not present in the
195
+ # job data provided.
160
196
  def self.require_param(param, job_data)
161
197
  if !job_data[param]
162
198
  raise BadConfigurationException.new("#{param} must be specified")
163
199
  end
164
200
  end
165
201
 
202
+
203
+ # This helper method asks the AppController if the named file exists,
204
+ # and if it does not, throws an exception.
166
205
  def self.require_file_to_exist(file, job_data, controller)
167
206
  if controller.does_file_exist?(file, job_data)
168
207
  return
@@ -171,6 +210,9 @@ module NeptuneHelper
171
210
  end
172
211
  end
173
212
 
213
+
214
+ # This helper method performs the opposite function of require_file_to_exist,
215
+ # raising an exception if the named file does exist.
174
216
  def self.require_file_to_not_exist(file, job_data, controller)
175
217
  begin
176
218
  self.require_file_to_exist(file, job_data, controller)
@@ -181,6 +223,7 @@ module NeptuneHelper
181
223
  end
182
224
  end
183
225
 
226
+
184
227
  # This preprocessing method verifies that the user specified code that
185
228
  # should be run, where the output should be placed, and an engine to run over.
186
229
  # It also verifies that all files to be used are actually reachable.
@@ -229,6 +272,10 @@ module NeptuneHelper
229
272
  end
230
273
  end
231
274
 
275
+
276
+ # This method takes in a hash in the format that users write neptune/babel
277
+ # jobs in {:a => "b"} and converts it to the legacy format that Neptune
278
+ # used to use {"@a" => "b"}, and is understood by the AppController.
232
279
  def self.get_job_data(params)
233
280
  job_data = {}
234
281
  params.each { |k, v|
@@ -277,6 +324,11 @@ module NeptuneHelper
277
324
  return job_data
278
325
  end
279
326
 
327
+
328
+ # This method looks through the given job data and makes sure that the correct
329
+ # parameters are present for the storage mechanism specified. It throws an
330
+ # exception if there are errors in the job data or if a needed parameter is
331
+ # missing.
280
332
  def self.validate_storage_params(job_data)
281
333
  job_data["@storage"] ||= "appdb"
282
334
 
@@ -297,10 +349,10 @@ module NeptuneHelper
297
349
  if storage == "s3"
298
350
  ["EC2_ACCESS_KEY", "EC2_SECRET_KEY", "S3_URL"].each { |item|
299
351
  if job_data["@#{item}"]
300
- Kernel.puts "Using specified #{item}"
352
+ # Kernel.puts "Using specified #{item}"
301
353
  else
302
354
  if ENV[item]
303
- Kernel.puts "Using #{item} from environment"
355
+ # Kernel.puts "Using #{item} from environment"
304
356
  job_data["@#{item}"] = ENV[item]
305
357
  else
306
358
  raise BadConfigurationException.new("When storing data to S3, #{item} must be specified or be in " +
@@ -313,6 +365,7 @@ module NeptuneHelper
313
365
  return job_data
314
366
  end
315
367
 
368
+
316
369
  # This method takes a file on the local user's computer and stores it remotely
317
370
  # via AppScale. It returns a hash map indicating whether or not the job
318
371
  # succeeded and if it failed, the reason for it.
@@ -330,11 +383,11 @@ module NeptuneHelper
330
383
 
331
384
  remote = "/tmp/neptune-input-#{rand(100000)}"
332
385
  scp_cmd = "scp -r #{ssh_args} #{local_file} root@#{shadow_ip}:#{remote}"
333
- Kernel.puts scp_cmd
386
+ # Kernel.puts scp_cmd
334
387
  CommonFunctions.shell(scp_cmd)
335
388
 
336
389
  job_data["@local"] = remote
337
- Kernel.puts "job data = #{job_data.inspect}"
390
+ # Kernel.puts "job data = #{job_data.inspect}"
338
391
  response = controller.put_input(job_data)
339
392
  if response
340
393
  return {:result => :success}
@@ -344,24 +397,26 @@ module NeptuneHelper
344
397
  end
345
398
  end
346
399
 
400
+
347
401
  # This method waits for AppScale to finish compiling the user's code, indicated
348
402
  # by AppScale copying the finished code to a pre-determined location.
349
403
  def self.wait_for_compilation_to_finish(ssh_args, shadow_ip, compiled_location)
350
404
  loop {
351
405
  ssh_command = "ssh #{ssh_args} root@#{shadow_ip} 'ls #{compiled_location}' 2>&1"
352
- Kernel.puts ssh_command
406
+ # Kernel.puts ssh_command
353
407
  ssh_result = CommonFunctions.shell(ssh_command)
354
- Kernel.puts "result was [#{ssh_result}]"
408
+ # Kernel.puts "result was [#{ssh_result}]"
355
409
  if ssh_result =~ /No such file or directory/
356
- Kernel.puts "Still waiting for code to be compiled..."
410
+ # Kernel.puts "Still waiting for code to be compiled..."
357
411
  else
358
- Kernel.puts "compilation complete! Copying compiled code to #{copy_to}"
412
+ # Kernel.puts "compilation complete! Copying compiled code to #{copy_to}"
359
413
  return
360
414
  end
361
415
  sleep(5)
362
416
  }
363
417
  end
364
418
 
419
+
365
420
  # This method sends out a request to compile code, waits for it to finish, and
366
421
  # gets the standard out and error returned from the compilation. This method
367
422
  # returns a hash containing the standard out, error, and a result that indicates
@@ -374,7 +429,7 @@ module NeptuneHelper
374
429
  FileUtils.rm_rf(copy_to)
375
430
 
376
431
  scp_command = "scp -r #{ssh_args} root@#{shadow_ip}:#{compiled_location} #{copy_to} 2>&1"
377
- Kernel.puts scp_command
432
+ # Kernel.puts scp_command
378
433
  CommonFunctions.shell(scp_command)
379
434
 
380
435
  code = job_data["@code"]
@@ -383,13 +438,14 @@ module NeptuneHelper
383
438
 
384
439
  [remote_dir, compiled_location].each { |remote_files|
385
440
  ssh_command = "ssh #{ssh_args} root@#{shadow_ip} 'rm -rf #{remote_files}' 2>&1"
386
- Kernel.puts ssh_command
441
+ # Kernel.puts ssh_command
387
442
  CommonFunctions.shell(ssh_command)
388
443
  }
389
444
 
390
445
  return get_std_out_and_err(copy_to)
391
446
  end
392
447
 
448
+
393
449
  # This method returns a hash containing the standard out and standard error
394
450
  # from a completed job, as well as a result field that indicates whether or
395
451
  # not the job completed successfully (success = no errors).
@@ -411,9 +467,12 @@ module NeptuneHelper
411
467
  return result
412
468
  end
413
469
 
470
+
471
+ # This method uploads a Google App Engine application into AppScale, for use
472
+ # with Cicero jobs. It requires the AppScale tools to be installed.
414
473
  def self.upload_app_for_cicero(job_data)
415
474
  if !job_data["@app"]
416
- Kernel.puts "No app specified, not uploading..."
475
+ # Kernel.puts "No app specified, not uploading..."
417
476
  return
418
477
  end
419
478
 
@@ -431,12 +490,13 @@ module NeptuneHelper
431
490
  upload_app = "appscale-upload-app"
432
491
  end
433
492
 
434
- Kernel.puts "Uploading AppEngine app at #{app_location}"
493
+ # Kernel.puts "Uploading AppEngine app at #{app_location}"
435
494
  upload_command = "#{upload_app} --file #{app_location} --test --keyname #{keyname}"
436
- Kernel.puts upload_command
437
- Kernel.puts `#{upload_command}`
495
+ # Kernel.puts upload_command
496
+ # Kernel.puts `#{upload_command}`
438
497
  end
439
498
 
499
+
440
500
  # This method actually runs the Neptune job, given information about the job
441
501
  # as well as information about the node to send the request to.
442
502
  def self.run_job(job_data, ssh_args, shadow_ip, secret)
@@ -473,30 +533,3 @@ module NeptuneHelper
473
533
  return result
474
534
  end
475
535
  end
476
-
477
- # Make neptune() public so that babel() can call it
478
- public
479
-
480
- # This method is the heart of Neptune - here, we take blocks of code that the
481
- # user has written and convert them into HPC job requests. At a high level,
482
- # the user can request to run a job, retrieve a job's output, or modify the
483
- # access policy (ACL) for the output of a job. By default, job data is private,
484
- # but a Neptune job can be used to set it to public later (and vice-versa).
485
- def neptune(params)
486
- Kernel.puts "Received a request to run a job."
487
- Kernel.puts params[:type]
488
-
489
- job_data = NeptuneHelper.get_job_data(params)
490
- NeptuneHelper.validate_storage_params(job_data)
491
- Kernel.puts "job data = #{job_data.inspect}"
492
- keyname = job_data["@keyname"]
493
-
494
- shadow_ip = CommonFunctions.get_from_yaml(keyname, :shadow)
495
- secret = CommonFunctions.get_secret_key(keyname)
496
- ssh_key = File.expand_path("~/.appscale/#{keyname}.key")
497
- ssh_args = "-i ~/.appscale/#{keyname}.key -o StrictHostkeyChecking=no "
498
-
499
- controller = AppControllerClient.new(shadow_ip, secret)
500
- NeptuneHelper.do_preprocessing(job_data, controller)
501
- return NeptuneHelper.run_job(job_data, ssh_args, shadow_ip, secret)
502
- end
@@ -7,6 +7,58 @@ require 'test/unit'
7
7
 
8
8
 
9
9
  class TestBabel < Test::Unit::TestCase
10
+ def test_babel_mpi_job
11
+ keyname = "appscale"
12
+ file = "/bucket/file.py"
13
+ params = { :type => "mpi",
14
+ :code => file,
15
+ :executable => 'python',
16
+ :procs_to_use => 1,
17
+ :nodes_to_use => 1,
18
+ :storage => "s3",
19
+ :EC2_ACCESS_KEY => "boo",
20
+ :EC2_SECRET_KEY => "baz",
21
+ :S3_URL => "http://baz.com",
22
+ :keyname => keyname
23
+ }
24
+
25
+ job_data = {}
26
+ params.each { |k, v|
27
+ job_data["@#{k}"] = v
28
+ }
29
+ job_data["@is_remote"] = true
30
+
31
+ output = "/bucket/babel/temp-0123456789"
32
+ job_data["@output"] = output
33
+
34
+ run_job_data = job_data.dup
35
+ run_job_data["@engine"] = "executor-sqs"
36
+ run_job_data["@run_local"] = true
37
+
38
+ output_job_data = job_data.dup
39
+ output_job_data["@type"] = "output"
40
+
41
+ kernel = flexmock(Kernel)
42
+ kernel.should_receive(:puts).and_return()
43
+ kernel.should_receive(:rand).and_return(0,1,2,3,4,5,6,7,8,9)
44
+ kernel.should_receive(:sleep).and_return()
45
+
46
+ flexmock(AppControllerClient).new_instances { |instance|
47
+ instance.should_receive(:does_file_exist?).with(file, job_data).and_return(true)
48
+ instance.should_receive(:does_file_exist?).with(output, job_data).and_return(false)
49
+
50
+ instance.should_receive(:start_neptune_job).with(run_job_data).and_return("MPI job is now running")
51
+
52
+ instance.should_receive(:get_output).with(output_job_data).and_return("output")
53
+ }
54
+
55
+ commonfunctions = flexmock(CommonFunctions)
56
+ commonfunctions.should_receive(:get_from_yaml).with(keyname, :shadow).and_return("127.0.0.1")
57
+ commonfunctions.should_receive(:get_secret_key).with(keyname).and_return("secret")
58
+
59
+ assert_equal("output", babel(params))
60
+ end
61
+
10
62
  def test_bad_babel_params
11
63
  job_data_no_code_param = {}
12
64
  assert_raise(BadConfigurationException) {
@@ -42,15 +94,17 @@ class TestBabel < Test::Unit::TestCase
42
94
  ENV['BABEL_BUCKET_NAME'] = "/baz"
43
95
  actual_local_2 = BabelHelper.generate_output_location(job_data_local_code)
44
96
  assert_equal(expected_local, actual_local_2)
97
+ ENV['BABEL_BUCKET_NAME'] = nil
45
98
 
46
99
  # Not putting the initial slash on the bucket name should be fine too.
47
100
  ENV['BABEL_BUCKET_NAME'] = "baz"
48
101
  actual_local_3 = BabelHelper.generate_output_location(job_data_local_code)
49
102
  assert_equal(expected_local, actual_local_3)
103
+ ENV['BABEL_BUCKET_NAME'] = nil
50
104
 
51
105
  # Finally, if we run a job and specify remote code, that should be used
52
106
  # as the bucket.
53
- job_data_remote_code = {"@code" => "/baz/boo/code.baz", "@is_remote" => true}
107
+ job_data_remote_code = {"@code" => "/baz/boo/code.baz", "@storage" => "s3"}
54
108
  expected_remote = "/baz/babel/temp-10"
55
109
 
56
110
  actual_remote = BabelHelper.generate_output_location(job_data_remote_code)
@@ -108,7 +162,8 @@ class TestBabel < Test::Unit::TestCase
108
162
  assert_equal(expected, actual_3)
109
163
  end
110
164
 
111
- def test_run_job
165
+ def test_run_babel_job
166
+ # Running a job with no @type specified means it should be a Babel job
112
167
  job_data = {
113
168
  "@code" => "/baz/boo/code.baz",
114
169
  "@argv" => ["boo", "/remote/babel/baz", "gbaz"]
@@ -131,6 +186,31 @@ class TestBabel < Test::Unit::TestCase
131
186
  assert_equal(expected, actual)
132
187
  end
133
188
 
189
+ def test_run_mpi_job
190
+ # Running a job with @type specified should preserve the job type
191
+ job_data = {
192
+ "@type" => "mpi",
193
+ "@code" => "/baz/boo/code.baz",
194
+ "@argv" => ["boo", "/remote/babel/baz", "gbaz"]
195
+ }
196
+
197
+ neptune_params = {
198
+ :type => "mpi",
199
+ :code => "/baz/boo/code.baz",
200
+ :argv => ["boo", "/remote/babel/baz", "gbaz"],
201
+ :run_local => true,
202
+ :engine => "executor-sqs"
203
+ }
204
+
205
+ result = { :result => :success }
206
+ kernel = flexmock(Kernel)
207
+ kernel.should_receive(:neptune).with(neptune_params).and_return(result)
208
+
209
+ expected = :success
210
+ actual = BabelHelper.run_job(job_data)[:result]
211
+ assert_equal(expected, actual)
212
+ end
213
+
134
214
  def test_get_output
135
215
  job_data = {
136
216
  "@output" => "/baz/boo/code.baz"