ruby-cute 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +6 -0
- data/.yardopts +2 -0
- data/Gemfile +6 -0
- data/README.md +137 -6
- data/Rakefile +48 -0
- data/bin/cute +22 -0
- data/debian/changelog +5 -0
- data/debian/compat +1 -0
- data/debian/control +15 -0
- data/debian/copyright +33 -0
- data/debian/ruby-cute.docs +2 -0
- data/debian/ruby-tests.rb +2 -0
- data/debian/rules +19 -0
- data/debian/source/format +1 -0
- data/debian/watch +2 -0
- data/examples/distem-bootstrap +516 -0
- data/examples/g5k_exp1.rb +41 -0
- data/examples/g5k_exp_virt.rb +129 -0
- data/lib/cute.rb +7 -2
- data/lib/cute/bash.rb +337 -0
- data/lib/cute/configparser.rb +404 -0
- data/lib/cute/execute.rb +272 -0
- data/lib/cute/extensions.rb +38 -0
- data/lib/cute/g5k_api.rb +1190 -0
- data/lib/cute/net-ssh.rb +144 -0
- data/lib/cute/net.rb +29 -0
- data/lib/cute/synchronization.rb +89 -0
- data/lib/cute/taktuk.rb +554 -0
- data/lib/cute/version.rb +3 -0
- data/ruby-cute.gemspec +32 -0
- data/spec/extensions_spec.rb +17 -0
- data/spec/g5k_api_spec.rb +192 -0
- data/spec/spec_helper.rb +66 -0
- data/spec/taktuk_spec.rb +129 -0
- data/test/test_bash.rb +71 -0
- metadata +204 -47
@@ -0,0 +1,38 @@
|
|
1
|
+
# Extends the class string for supporting timespan formats
|
2
|
+
class String
|
3
|
+
|
4
|
+
def to_secs
|
5
|
+
|
6
|
+
return Infinity if [ 'always', 'forever', 'infinitely' ].include?(self.to_s)
|
7
|
+
parts = self.split(':').map { |x| x.to_i rescue nil }
|
8
|
+
if parts.all? && [ 2, 3 ].include?(parts.length)
|
9
|
+
secs = parts.zip([ 3600, 60, 1 ]).map { |x, y| x * y }.reduce(:+)
|
10
|
+
return secs
|
11
|
+
end
|
12
|
+
m = /^(\d+|\d+\.\d*)\s*(\w*)?$/.match(self)
|
13
|
+
num, unit = m.captures
|
14
|
+
mul = case unit
|
15
|
+
when '' then 1
|
16
|
+
when 's' then 1
|
17
|
+
when 'm' then 60
|
18
|
+
when 'h' then 60 * 60
|
19
|
+
when 'd' then 24 * 60 * 60
|
20
|
+
else nil
|
21
|
+
end
|
22
|
+
raise "Unknown timespan unit: '#{unit}' in #{self}" if mul.nil?
|
23
|
+
return num.to_f * mul
|
24
|
+
end
|
25
|
+
|
26
|
+
def to_time
|
27
|
+
secs = self.to_secs.to_i
|
28
|
+
minutes = secs / 60; secs %= 60
|
29
|
+
hours = minutes / 60; minutes %= 60
|
30
|
+
minutes += 1 if secs > 0
|
31
|
+
return '%.02d:%.02d' % [ hours, minutes ]
|
32
|
+
end
|
33
|
+
|
34
|
+
def is_i?
|
35
|
+
/\A[-+]?\d+\z/ === self
|
36
|
+
end
|
37
|
+
|
38
|
+
end
|
data/lib/cute/g5k_api.rb
ADDED
@@ -0,0 +1,1190 @@
|
|
1
|
+
require 'restclient'
|
2
|
+
require 'yaml'
|
3
|
+
require 'json'
|
4
|
+
require 'ipaddress'
|
5
|
+
require 'uri'
|
6
|
+
|
7
|
+
module Cute
|
8
|
+
module G5K
|
9
|
+
|
10
|
+
# = {Cute::G5K} exceptions
|
11
|
+
#
|
12
|
+
# The generated exceptions are divided in 5 groups:
|
13
|
+
#
|
14
|
+
# - {Cute::G5K::BadRequest BadRequest} it means that the syntax you passed to some {Cute::G5K::API G5K::API} method is not correct from
|
15
|
+
# the Grid'5000 services point of view.
|
16
|
+
# - {Cute::G5K::RequestFailed RequestFailed} it means that there is a server problem or there is nothing the user can do to solve the problem.
|
17
|
+
# - {Cute::G5K::NotFound} it means that the requested resources do not exist.
|
18
|
+
# - {Cute::G5K::Unauthorized} it means that there is an authentication problem.
|
19
|
+
# - {Cute::G5K::EventTimeout} this exception is triggered by the methods that wait for events such as:
|
20
|
+
# job submission and environment deployment.
|
21
|
+
class Error < Exception
|
22
|
+
attr_accessor :orig # Original exception
|
23
|
+
|
24
|
+
def initialize(message = nil, object = nil)
|
25
|
+
super(message)
|
26
|
+
self.orig = object
|
27
|
+
end
|
28
|
+
|
29
|
+
def method_missing(method)
|
30
|
+
return orig.send(method)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
# It wraps the http response 400 that corresponds to a bad request.
|
35
|
+
# When using the {Cute::G5K::API#reserve reserve} or {Cute::G5K::API#reserve deploy} methods this could mean:
|
36
|
+
# a bad syntax in the request, not valid properties in the request,
|
37
|
+
# not enough resources to supply the request, non existing environment, etc.
|
38
|
+
#
|
39
|
+
# = Example
|
40
|
+
#
|
41
|
+
# You can handle this exception and decide what to do with your experiment.
|
42
|
+
# In the example below, we iterate over all sites until a site has resources with the property 'ib20g' set to 'YES'.
|
43
|
+
#
|
44
|
+
# require 'cute'
|
45
|
+
#
|
46
|
+
# g5k = Cute::G5K::API.new()
|
47
|
+
#
|
48
|
+
# sites = g5k.site_uids
|
49
|
+
#
|
50
|
+
# sites.each do |site|
|
51
|
+
#
|
52
|
+
# begin
|
53
|
+
# job = g5k.reserve(:site => site, :resources => "{ib20g='YES'}/nodes=2/core=1",:walltime => '00:30:00', :keys => "~/my_ssh_jobkey" )
|
54
|
+
# rescue Cute::G5K::BadRequest
|
55
|
+
# puts "Resource not available in this site, trying with another one"
|
56
|
+
# end
|
57
|
+
#
|
58
|
+
# end
|
59
|
+
class BadRequest < Error
|
60
|
+
end
|
61
|
+
|
62
|
+
# It wraps all Restclient exceptions with http codes: 403, 405,406, 412, 415, 500, 502, 503 and 504.
|
63
|
+
class RequestFailed < Error
|
64
|
+
end
|
65
|
+
|
66
|
+
# It wraps the exceptions generated by Timeout::Error
|
67
|
+
class EventTimeout < Error
|
68
|
+
end
|
69
|
+
|
70
|
+
# It wraps the Restclient exception 404
|
71
|
+
class NotFound < Error
|
72
|
+
end
|
73
|
+
|
74
|
+
# It wraps the Restclient exception RestClient::Unauthorized
|
75
|
+
class Unauthorized < Error
|
76
|
+
end
|
77
|
+
|
78
|
+
|
79
|
+
# @api private
|
80
|
+
class G5KArray < Array
|
81
|
+
|
82
|
+
def uids
|
83
|
+
return self.map { |it| it['uid'] }
|
84
|
+
end
|
85
|
+
|
86
|
+
def rel_self
|
87
|
+
return rel('self')
|
88
|
+
end
|
89
|
+
|
90
|
+
def rel(r)
|
91
|
+
return self['links'].detect { |x| x['rel'] == r }['href']
|
92
|
+
end
|
93
|
+
|
94
|
+
end
|
95
|
+
|
96
|
+
# Provides an abstraction for handling G5K responses.
|
97
|
+
# @api private
|
98
|
+
# @see https://api.grid5000.fr/doc/3.0/reference/grid5000-media-types.html
|
99
|
+
# When this structure is used to describe jobs, it is expected to have the
|
100
|
+
# following fields which depend on the version of the API.
|
101
|
+
# {"uid"=>604692,
|
102
|
+
# "user_uid"=>"name",
|
103
|
+
# "user"=>"name",
|
104
|
+
# "walltime"=>3600,
|
105
|
+
# "queue"=>"default",
|
106
|
+
# "state"=>"running",
|
107
|
+
# "project"=>"default",
|
108
|
+
# "name"=>"rubyCute job",
|
109
|
+
# "types"=>["deploy"],
|
110
|
+
# "mode"=>"PASSIVE",
|
111
|
+
# "command"=>"./oarapi.subscript.ZzvnM",
|
112
|
+
# "submitted_at"=>1423575384,
|
113
|
+
# "scheduled_at"=>1423575386,
|
114
|
+
# "started_at"=>1423575386,
|
115
|
+
# "message"=>"FIFO scheduling OK",
|
116
|
+
# "properties"=>"(deploy = 'YES') AND maintenance = 'NO'",
|
117
|
+
# "directory"=>"/home/name",
|
118
|
+
# "events"=>[],
|
119
|
+
# "links"=>[{"rel"=>"self", "href"=>"/sid/sites/nancy/jobs/604692", "type"=>"application/vnd.grid5000.item+json"},
|
120
|
+
# {"rel"=>"parent", "href"=>"/sid/sites/nancy", "type"=>"application/vnd.grid5000.item+json"}],
|
121
|
+
# "resources_by_type"=>
|
122
|
+
# {"cores"=>
|
123
|
+
# ["griffon-8.nancy.grid5000.fr",
|
124
|
+
# "griffon-8.nancy.grid5000.fr",
|
125
|
+
# "griffon-8.nancy.grid5000.fr",
|
126
|
+
# "griffon-8.nancy.grid5000.fr",
|
127
|
+
# "griffon-9.nancy.grid5000.fr",
|
128
|
+
# "griffon-9.nancy.grid5000.fr",
|
129
|
+
# "griffon-9.nancy.grid5000.fr",
|
130
|
+
# "griffon-9.nancy.grid5000.fr",
|
131
|
+
# "griffon-77.nancy.grid5000.fr",
|
132
|
+
# "griffon-77.nancy.grid5000.fr",
|
133
|
+
# "griffon-77.nancy.grid5000.fr",
|
134
|
+
# "griffon-77.nancy.grid5000.fr",
|
135
|
+
# "vlans"=>["5"]},
|
136
|
+
# "assigned_nodes"=>["griffon-8.nancy.grid5000.fr", "griffon-9.nancy.grid5000.fr", "griffon-77.nancy.grid5000.fr"],
|
137
|
+
# "deploy"=>
|
138
|
+
# {"created_at"=>1423575401,
|
139
|
+
# "environment"=>"http://public.sophia.grid5000.fr/~nniclausse/openmx.dsc",
|
140
|
+
# "key"=>"https://api.grid5000.fr/sid/sites/nancy/files/cruizsanabria-key-84f3f1dbb1279bc1bddcd618e26c960307d653c5",
|
141
|
+
# "nodes"=>["griffon-8.nancy.grid5000.fr", "griffon-9.nancy.grid5000.fr", "griffon-77.nancy.grid5000.fr"],
|
142
|
+
# "site_uid"=>"nancy",
|
143
|
+
# "status"=>"processing",
|
144
|
+
# "uid"=>"D-751096de-0c33-461a-9d27-56be1b2dd980",
|
145
|
+
# "updated_at"=>1423575401,
|
146
|
+
# "user_uid"=>"cruizsanabria",
|
147
|
+
# "vlan"=>5,
|
148
|
+
# "links"=>
|
149
|
+
# [{"rel"=>"self", "href"=>"/sid/sites/nancy/deployments/D-751096de-0c33-461a-9d27-56be1b2dd980", "type"=>"application/vnd.grid5000.item+json"},
|
150
|
+
class G5KJSON < Hash
|
151
|
+
|
152
|
+
def items
|
153
|
+
return self['items']
|
154
|
+
end
|
155
|
+
|
156
|
+
def nodes
|
157
|
+
return self['nodes']
|
158
|
+
end
|
159
|
+
|
160
|
+
def resources
|
161
|
+
return self['resources_by_type'].nil?? Hash.new : self['resources_by_type']
|
162
|
+
end
|
163
|
+
|
164
|
+
def rel(r)
|
165
|
+
return self['links'].detect { |x| x['rel'] == r }['href']
|
166
|
+
end
|
167
|
+
|
168
|
+
def uid
|
169
|
+
return self['uid']
|
170
|
+
end
|
171
|
+
|
172
|
+
def rel_self
|
173
|
+
return rel('self')
|
174
|
+
end
|
175
|
+
|
176
|
+
def rel_parent
|
177
|
+
return rel('parent')
|
178
|
+
end
|
179
|
+
|
180
|
+
def refresh(g5k)
|
181
|
+
return g5k.get_json(rel_self)
|
182
|
+
end
|
183
|
+
|
184
|
+
def self.parse(s)
|
185
|
+
return JSON.parse(s, :object_class => G5KJSON, :array_class => G5KArray)
|
186
|
+
end
|
187
|
+
|
188
|
+
end
|
189
|
+
|
190
|
+
# Manages the low level operations for communicating with the REST API.
|
191
|
+
# @api private
|
192
|
+
class G5KRest
|
193
|
+
|
194
|
+
attr_reader :user
|
195
|
+
# Initializes a REST connection
|
196
|
+
# @param uri [String] resource identifier which normally is the URL of the Rest API
|
197
|
+
# @param user [String] user if authentication is needed
|
198
|
+
# @param pass [String] password if authentication is needed
|
199
|
+
# @param on_error [Symbol] option to deactivate the {Cute::G5K::RequestFailed RequestFailed} exceptions
|
200
|
+
def initialize(uri,api_version,user,pass,on_error)
|
201
|
+
@user = user
|
202
|
+
@pass = pass
|
203
|
+
@api_version = api_version.nil? ? "sid" : api_version
|
204
|
+
if (user.nil? or pass.nil?)
|
205
|
+
@endpoint = uri # Inside Grid'5000
|
206
|
+
else
|
207
|
+
user_escaped = CGI.escape(user)
|
208
|
+
pass_escaped = CGI.escape(pass)
|
209
|
+
@endpoint = "https://#{user_escaped}:#{pass_escaped}@#{uri.split("https://")[1]}"
|
210
|
+
end
|
211
|
+
|
212
|
+
machine =`uname -ov`.chop
|
213
|
+
@user_agent = "ruby-cute/#{VERSION} (#{machine}) Ruby #{RUBY_VERSION}"
|
214
|
+
@api = RestClient::Resource.new(@endpoint, :timeout => 30)
|
215
|
+
@on_error = on_error
|
216
|
+
test_connection
|
217
|
+
end
|
218
|
+
|
219
|
+
# Returns a resource object
|
220
|
+
# @param path [String] this complements the URI to address to a specific resource
|
221
|
+
def resource(path)
|
222
|
+
path = path[1..-1] if path.start_with?('/')
|
223
|
+
return @api[path]
|
224
|
+
end
|
225
|
+
|
226
|
+
# @return [Hash] the HTTP response
|
227
|
+
# @param path [String] this complements the URI to address to a specific resource
|
228
|
+
def get_json(path)
|
229
|
+
|
230
|
+
begin
|
231
|
+
r = resource(path).get(:content_type => "application/json",
|
232
|
+
:user_agent => @user_agent)
|
233
|
+
rescue => e
|
234
|
+
handle_exception(e)
|
235
|
+
end
|
236
|
+
return G5KJSON.parse(r)
|
237
|
+
end
|
238
|
+
|
239
|
+
# Creates a resource on the server
|
240
|
+
# @param path [String] this complements the URI to address to a specific resource
|
241
|
+
# @param json [Hash] contains the characteristics of the resources to be created.
|
242
|
+
def post_json(path, json)
|
243
|
+
|
244
|
+
begin
|
245
|
+
r = resource(path).post(json.to_json,
|
246
|
+
:content_type => "application/json",
|
247
|
+
:accept => "application/json",
|
248
|
+
:user_agent => @user_agent)
|
249
|
+
rescue => e
|
250
|
+
handle_exception(e)
|
251
|
+
end
|
252
|
+
return G5KJSON.parse(r)
|
253
|
+
end
|
254
|
+
|
255
|
+
# Deletes a resource on the server
|
256
|
+
# @param path [String] this complements the URI to address to a specific resource
|
257
|
+
def delete_json(path)
|
258
|
+
begin
|
259
|
+
return resource(path).delete()
|
260
|
+
rescue RestClient::InternalServerError => e
|
261
|
+
raise RequestFailed.new("Service internal error", e)
|
262
|
+
end
|
263
|
+
end
|
264
|
+
|
265
|
+
# @return the parent link
|
266
|
+
def follow_parent(obj)
|
267
|
+
get_json(obj.rel_parent)
|
268
|
+
end
|
269
|
+
|
270
|
+
private
|
271
|
+
|
272
|
+
# Tests the connection and raises an error in case of a problem
|
273
|
+
def test_connection
|
274
|
+
begin
|
275
|
+
return get_json("/#{@api_version}/")
|
276
|
+
rescue Cute::G5K::Unauthorized
|
277
|
+
raise "Your Grid'5000 credentials are not recognized"
|
278
|
+
end
|
279
|
+
end
|
280
|
+
|
281
|
+
# Issues a Cute::G5K exception according to the http status code
|
282
|
+
def handle_exception(e)
|
283
|
+
case e.http_code
|
284
|
+
when 500
|
285
|
+
# This part deals with bug: https://intranet.grid5000.fr/bugzilla/show_bug.cgi?id=5912
|
286
|
+
# Grid'5000 returns 500 error code even though the error was generated by a bad request
|
287
|
+
http_body = JSON.parse("{#{e.http_body.split("\n").select{ |x| x.include?("code")}.first}}")
|
288
|
+
if http_body["code"] == 400
|
289
|
+
raise BadRequest.new("Bad request", e)
|
290
|
+
else
|
291
|
+
raise RequestFailed.new("Service internal error", e)
|
292
|
+
end
|
293
|
+
when 400
|
294
|
+
raise BadRequest.new("Bad request", e)
|
295
|
+
when 404
|
296
|
+
raise NotFound.new("Resource not found", e)
|
297
|
+
when 401
|
298
|
+
raise Unauthorized.new("Authentication problem",e)
|
299
|
+
else
|
300
|
+
if @on_error == :ignore
|
301
|
+
return nil
|
302
|
+
else
|
303
|
+
raise RequestFailed.new("Service internal error", e)
|
304
|
+
end
|
305
|
+
end
|
306
|
+
end
|
307
|
+
|
308
|
+
end
|
309
|
+
|
310
|
+
# This class helps you to access Grid'5000 REST API.
|
311
|
+
# Thus, the most common actions such as reservation of nodes and deployment can be easily scripted.
|
312
|
+
# To simplify the use of the module, it is better to create a file with the following information:
|
313
|
+
#
|
314
|
+
# $ cat > ~/.grid5000_api.yml << EOF
|
315
|
+
# $ uri: https://api.grid5000.fr/
|
316
|
+
# $ username: user
|
317
|
+
# $ password: **********
|
318
|
+
# $ version: sid
|
319
|
+
# $ EOF
|
320
|
+
#
|
321
|
+
# The *username* and *password* are not necessary if you are using the module from inside Grid'5000.
|
322
|
+
# You can take a look at the {Cute::G5K::API#initialize G5K::API constructor} to see more details for
|
323
|
+
# this configuration.
|
324
|
+
#
|
325
|
+
# = Getting started
|
326
|
+
#
|
327
|
+
# As already said, the goal of {Cute::G5K::API G5K::API} class is to present a high level abstraction to manage the most common activities
|
328
|
+
# in Grid'5000 such as: the reservation of resources and the deployment of environments.
|
329
|
+
# Consequently, these activities can be easily scripted using Ruby.
|
330
|
+
# The advantage of this is that you can use all Ruby constructs (e.g., loops, conditionals, blocks, iterators, etc) to script your experiments.
|
331
|
+
# In the presence of error, {Cute::G5K::API G5K::API} raises exceptions (see {Cute::G5K::Error G5K exceptions}),
|
332
|
+
# that you can handle to decide the workflow of your experiment
|
333
|
+
# (see {Cute::G5K::API#wait_for_deploy wait_for_deploy} and {Cute::G5K::API#wait_for_deploy wait_for_job}).
|
334
|
+
# Let's show how {Cute::G5K::API G5K::API} is used through an example, suppose we want to reserve 3 nodes in Nancy site for 1 hour.
|
335
|
+
# In order to do that we would write something like this:
|
336
|
+
#
|
337
|
+
# require 'cute'
|
338
|
+
#
|
339
|
+
# g5k = Cute::G5K::API.new()
|
340
|
+
#
|
341
|
+
# job = g5k.reserve(:nodes => 3, :site => 'nancy', :walltime => '01:00:00')
|
342
|
+
#
|
343
|
+
# puts "Assigned nodes : #{job['assigned_nodes']}"
|
344
|
+
#
|
345
|
+
# If that is all you want to do, you can write that into a file, let's say *example.rb* and execute it using the Ruby interpreter.
|
346
|
+
#
|
347
|
+
# $ ruby example.rb
|
348
|
+
#
|
349
|
+
# The execution will block until you got the reservation. Then, you can interact with the nodes you reserved the way you used to or
|
350
|
+
# add more code to the previous script for controlling your experiment with Ruby-Cute as shown in this
|
351
|
+
# {http://www.rubydoc.info/github/ruby-cute/ruby-cute/master/file/examples/g5k_exp_virt.rb example}.
|
352
|
+
# We have just used the method {Cute::G5K::API#reserve reserve} that allow us to reserve resources in Grid'5000.
|
353
|
+
# This method can be used to reserve resources in deployment mode and deploy our own software environment on them using
|
354
|
+
# {http://kadeploy3.gforge.inria.fr/ Kadeploy}. For this we use the option *:env* of the {Cute::G5K::API#reserve reserve} method.
|
355
|
+
# Therefore, it will first reserve the resources and then deploy the specified environment.
|
356
|
+
# The method {Cute::G5K::API#reserve reserve} will block until the deployment is done.
|
357
|
+
# The following Ruby script illustrates all we have just said.
|
358
|
+
#
|
359
|
+
# require 'cute'
|
360
|
+
#
|
361
|
+
# g5k = Cute::G5K::API.new()
|
362
|
+
#
|
363
|
+
# job = g5k.reserve(:nodes => 1, :site => 'grenoble', :walltime => '00:40:00', :env => 'wheezy-x64-base')
|
364
|
+
#
|
365
|
+
# puts "Assigned nodes : #{job['assigned_nodes']}"
|
366
|
+
#
|
367
|
+
# Your public ssh key located in ~/.ssh will be copied by default on the deployed machines,
|
368
|
+
# you can specify another path for your keys with the option *:keys*.
|
369
|
+
# In order to deploy your own environment, you have to put the tar file that contains the operating system you want to deploy and
|
370
|
+
# the environment description file, under the public directory of a given site.
|
371
|
+
# *VLANS* are supported by adding the parameter :vlan => type where type can be: *:routed*, *:local*, *:global*.
|
372
|
+
# The following example, reserves 10 nodes in the Lille site, starts the deployment of a custom environment over the nodes
|
373
|
+
# and puts the nodes under a routed VLAN. We used the method {Cute::G5K::API#get_vlan_nodes get_vlan_nodes} to get the
|
374
|
+
# new hostnames assigned to your nodes.
|
375
|
+
#
|
376
|
+
# require 'cute'
|
377
|
+
#
|
378
|
+
# g5k = Cute::G5K::API.new()
|
379
|
+
#
|
380
|
+
# job = g5k.reserve(:site => "lille", :nodes => 10,
|
381
|
+
# :env => 'https://public.lyon.grid5000.fr/~user/debian_custom_img.yaml',
|
382
|
+
# :vlan => :routed, :keys => "~/my_ssh_key")
|
383
|
+
#
|
384
|
+
#
|
385
|
+
# puts "Log in into the nodes using the following hostnames: #{g5k.get_vlan_nodes(job)}"
|
386
|
+
#
|
387
|
+
# If you do not want that the method {Cute::G5K::API#reserve reserve} perform the deployment for you, you have to use the option :type => :deploy.
|
388
|
+
# This can be useful when deploying different environments in your reserved nodes. For example deploying the environments for a small HPC cluster.
|
389
|
+
# You have to use the method {Cute::G5K::API#deploy deploy} for performing the deploy.
|
390
|
+
# This method do not block by default, that is why you have to use the method {Cute::G5K::API#wait_for_deploy wait_for_deploy} in order to block the execution
|
391
|
+
# until the deployment is done.
|
392
|
+
#
|
393
|
+
# require 'cute'
|
394
|
+
#
|
395
|
+
# g5k = Cute::G5K::API.new()
|
396
|
+
#
|
397
|
+
# job = g5k.reserve(:site => "lyon", :nodes => 5, :walltime => "03:00:00", :type => :deploy)
|
398
|
+
#
|
399
|
+
# nodes = job["assigned_nodes"]
|
400
|
+
#
|
401
|
+
# slaves = nodes[1..4]
|
402
|
+
# master = nodes-slaves
|
403
|
+
#
|
404
|
+
# g5k.deploy(job,:nodes => master, :env => 'https://public.lyon.grid5000.fr/~user/debian_master_img.yaml')
|
405
|
+
# g5k.deploy(job,:nodes => slaves, :env => 'https://public.lyon.grid5000.fr/~user/debian_slaves_img.yaml')
|
406
|
+
#
|
407
|
+
# g5k.wait_for_deploy(job)
|
408
|
+
#
|
409
|
+
# puts "master node: #{master}"
|
410
|
+
# puts "slaves nodes: #{slaves}"
|
411
|
+
#
|
412
|
+
# You can check out the documentation of {Cute::G5K::API#reserve reserve} and {Cute::G5K::API#deploy deploy} methods
|
413
|
+
# to know all the parameters supported and more complex uses.
|
414
|
+
#
|
415
|
+
# == Another useful methods
|
416
|
+
#
|
417
|
+
# Let's use *pry* to show other useful methods. As shown in {file:README.md Ruby Cute} the *cute* command will open a
|
418
|
+
# pry shell with some modules preloaded and it will create the variable $g5k to access {Cute::G5K::API G5K::API} class.
|
419
|
+
# Therefore, we can consult the name of the cluster available in a specific site.
|
420
|
+
#
|
421
|
+
# [4] pry(main)> $g5k.cluster_uids("grenoble")
|
422
|
+
# => ["adonis", "edel", "genepi"]
|
423
|
+
#
|
424
|
+
# As well as the deployable environments:
|
425
|
+
#
|
426
|
+
# [6] pry(main)> $g5k.environment_uids("grenoble")
|
427
|
+
# => ["squeeze-x64-base", "squeeze-x64-big", "squeeze-x64-nfs", "wheezy-x64-base", "wheezy-x64-big", "wheezy-x64-min", "wheezy-x64-nfs", "wheezy-x64-xen"]
|
428
|
+
#
|
429
|
+
# For getting a list of sites available in Grid'5000 you can use:
|
430
|
+
#
|
431
|
+
# [7] pry(main)> $g5k.site_uids()
|
432
|
+
# => ["grenoble", "lille", "luxembourg", "lyon",...]
|
433
|
+
#
|
434
|
+
# We can get the status of nodes in a given site by using:
|
435
|
+
#
|
436
|
+
# [8] pry(main)> $g5k.nodes_status("lyon")
|
437
|
+
# => {"taurus-2.lyon.grid5000.fr"=>"besteffort", "taurus-16.lyon.grid5000.fr"=>"besteffort", "taurus-15.lyon.grid5000.fr"=>"besteffort", ...}
|
438
|
+
#
|
439
|
+
# We can get information about our submitted jobs by using:
|
440
|
+
#
|
441
|
+
# [11] pry(main)> $g5k.get_my_jobs("grenoble")
|
442
|
+
# => [{"uid"=>1679094,
|
443
|
+
# "user_uid"=>"cruizsanabria",
|
444
|
+
# "user"=>"cruizsanabria",
|
445
|
+
# "walltime"=>3600,
|
446
|
+
# "queue"=>"default",
|
447
|
+
# "state"=>"running", ...}, ...]
|
448
|
+
#
|
449
|
+
# If we are done with our experiment, we can release the submitted job or all jobs in a given site as follows:
|
450
|
+
#
|
451
|
+
# [12] pry(main)> $g5k.release(job)
|
452
|
+
# [13] pry(main)> $g5k.release_all("grenoble")
|
453
|
+
class API
|
454
|
+
|
455
|
+
# Assigns a logger
|
456
|
+
#
|
457
|
+
# = Examples
|
458
|
+
# You can use this attribute to control how to log all messages produce by {Cute::G5K::API G5K::API}.
|
459
|
+
# For example, below we use the logger available in Ruby standard library.
|
460
|
+
#
|
461
|
+
# require 'cute'
|
462
|
+
# require 'logger'
|
463
|
+
#
|
464
|
+
# g5k = Cute::G5K::API.new()
|
465
|
+
#
|
466
|
+
# g5k.logger = Logger.new(File.new('experiment_1.log'))
|
467
|
+
attr_accessor :logger
|
468
|
+
# Initializes a REST connection for Grid'5000 API
|
469
|
+
#
|
470
|
+
# = Example
|
471
|
+
# You can specify another configuration file using the option *:conf_file*, for example:
|
472
|
+
#
|
473
|
+
# g5k = Cute::G5K::API.new(:conf_file =>"config file path")
|
474
|
+
#
|
475
|
+
# You can specify other parameter to use:
|
476
|
+
#
|
477
|
+
# g5k = Cute::G5K::API.new(:uri => "https://api.grid5000.fr", :version => "sid")
|
478
|
+
#
|
479
|
+
# If you want to ignore {Cute::G5K::RequestFailed ResquestFailed} exceptions you can use:
|
480
|
+
#
|
481
|
+
# g5k = Cute::G5K::API.new(:on_error => :ignore)
|
482
|
+
#
|
483
|
+
# @param [Hash] params Contains initialization parameters.
|
484
|
+
# @option params [String] :conf_file Path for configuration file
|
485
|
+
# @option params [String] :uri REST API URI to contact
|
486
|
+
# @option params [String] :version Version of the REST API to use
|
487
|
+
# @option params [String] :user Username to access the REST API
|
488
|
+
# @option params [String] :pass Password to access the REST API
|
489
|
+
# @option params [Symbol] :on_error Set to :ignore if you want to ignore {Cute::G5K::RequestFailed ResquestFailed} exceptions.
|
490
|
+
def initialize(params={})
|
491
|
+
config = {}
|
492
|
+
default_file = "#{ENV['HOME']}/.grid5000_api.yml"
|
493
|
+
|
494
|
+
if params[:conf_file].nil? then
|
495
|
+
params[:conf_file] = default_file if File.exist?(default_file)
|
496
|
+
end
|
497
|
+
|
498
|
+
config = YAML.load(File.open(params[:conf_file],'r')) unless params[:conf_file].nil?
|
499
|
+
@user = params[:user] || config["username"]
|
500
|
+
@pass = params[:pass] || config["password"]
|
501
|
+
@uri = params[:uri] || config["uri"]
|
502
|
+
@api_version = params[:version] || config["version"] || "sid"
|
503
|
+
@logger = nil
|
504
|
+
|
505
|
+
begin
|
506
|
+
@g5k_connection = G5KRest.new(@uri,@api_version,@user,@pass,params[:on_error])
|
507
|
+
rescue
|
508
|
+
msg_create_file = ""
|
509
|
+
if (not File.exist?(default_file)) && params[:conf_file].nil? then
|
510
|
+
msg_create_file = "Please create the file: ~/.grid5000_api.yml and
|
511
|
+
put the necessary credentials or use the option
|
512
|
+
:conf_file to indicate another file for the credentials"
|
513
|
+
end
|
514
|
+
raise "Unable to authorize against the Grid'5000 API.
|
515
|
+
#{msg_create_file}"
|
516
|
+
|
517
|
+
end
|
518
|
+
end
|
519
|
+
|
520
|
+
# It returns the site name. Example:
|
521
|
+
# site #=> "rennes"
|
522
|
+
# This will only work when {Cute::G5K::API G5K::API} is used within Grid'5000.
|
523
|
+
# In the other cases it will return *nil*
|
524
|
+
# @return [String] the site name where the method is called on
|
525
|
+
def site
|
526
|
+
p = `hostname`.chop
|
527
|
+
res = /^.*\.(.*).*\.grid5000.fr/.match(p)
|
528
|
+
res[1] unless res.nil?
|
529
|
+
end
|
530
|
+
|
531
|
+
# @api private
|
532
|
+
# @return the rest point for performing low level REST requests
|
533
|
+
def rest
|
534
|
+
@g5k_connection
|
535
|
+
end
|
536
|
+
|
537
|
+
# @return [String] Grid'5000 user
|
538
|
+
def g5k_user
|
539
|
+
return @user.nil? ? ENV['USER'] : @user
|
540
|
+
end
|
541
|
+
|
542
|
+
# Returns all sites identifiers
|
543
|
+
#
|
544
|
+
# = Example:
|
545
|
+
# site_uids #=> ["grenoble", "lille", "luxembourg", "lyon",...]
|
546
|
+
#
|
547
|
+
# @return [Array] all site identifiers
|
548
|
+
def site_uids
|
549
|
+
return sites.uids
|
550
|
+
end
|
551
|
+
|
552
|
+
# Returns all cluster identifiers
|
553
|
+
#
|
554
|
+
# = Example:
|
555
|
+
# cluster_uids("grenoble") #=> ["adonis", "edel", "genepi"]
|
556
|
+
#
|
557
|
+
# @return [Array] cluster identifiers
|
558
|
+
def cluster_uids(site)
|
559
|
+
return clusters(site).uids
|
560
|
+
end
|
561
|
+
|
562
|
+
# Returns the name of the environments deployable in a given site.
|
563
|
+
# These can be used with {Cute::G5K::API#reserve reserve} and {Cute::G5K::API#deploy deploy} methods
|
564
|
+
#
|
565
|
+
# = Example:
|
566
|
+
# environment_uids("nancy") #=> ["squeeze-x64-base", "squeeze-x64-big", "squeeze-x64-nfs", ...]
|
567
|
+
#
|
568
|
+
# @return [Array] environment identifiers
|
569
|
+
def environment_uids(site)
|
570
|
+
# environments are returned by the API following the format squeeze-x64-big-1.8
|
571
|
+
# it returns environments without the version
|
572
|
+
environment_uids = environments(site).uids.map{ |e|
|
573
|
+
e_match = /(.*)-(.*)/.match(e)
|
574
|
+
new_name = e_match.nil? ? "" : e_match[1]
|
575
|
+
}
|
576
|
+
|
577
|
+
return environment_uids.uniq
|
578
|
+
end
|
579
|
+
|
580
|
+
# @return [Hash] all the status information of a given Grid'5000 site
|
581
|
+
# @param site [String] a valid Grid'5000 site name
|
582
|
+
def site_status(site)
|
583
|
+
@g5k_connection.get_json(api_uri("sites/#{site}/status"))
|
584
|
+
end
|
585
|
+
|
586
|
+
# @return [Hash] the nodes state (e.g, free, busy, etc) that belong to a given Grid'5000 site
|
587
|
+
# @param site [String] a valid Grid'5000 site name
|
588
|
+
def nodes_status(site)
|
589
|
+
nodes = {}
|
590
|
+
site_status(site).nodes.each do |node|
|
591
|
+
name = node[0]
|
592
|
+
status = node[1]["soft"]
|
593
|
+
nodes[name] = status
|
594
|
+
end
|
595
|
+
return nodes
|
596
|
+
end
|
597
|
+
|
598
|
+
# @return [Array] the description of all Grid'5000 sites
|
599
|
+
def sites
|
600
|
+
@g5k_connection.get_json(api_uri("sites")).items
|
601
|
+
end
|
602
|
+
|
603
|
+
# @return [Array] the description of clusters that belong to a given Grid'5000 site
|
604
|
+
# @param site [String] a valid Grid'5000 site name
|
605
|
+
def clusters(site)
|
606
|
+
@g5k_connection.get_json(api_uri("sites/#{site}/clusters")).items
|
607
|
+
end
|
608
|
+
|
609
|
+
# @return [Array] the description of all environments registered in a Grid'5000 site
|
610
|
+
def environments(site)
|
611
|
+
@g5k_connection.get_json(api_uri("sites/#{site}/environments")).items
|
612
|
+
end
|
613
|
+
|
614
|
+
# @return [Hash] all the jobs submitted in a given Grid'5000 site,
|
615
|
+
# if a uid is provided only the jobs owned by the user are shown.
|
616
|
+
# @param site [String] a valid Grid'5000 site name
|
617
|
+
# @param uid [String] user name in Grid'5000
|
618
|
+
# @param state [String] jobs state: running, waiting
|
619
|
+
def get_jobs(site, uid = nil, state = nil)
|
620
|
+
filter = "?"
|
621
|
+
filter += state.nil? ? "" : "state=#{state}"
|
622
|
+
filter += uid.nil? ? "" : "&user=#{uid}"
|
623
|
+
filter += "limit=25" if (state.nil? and uid.nil?)
|
624
|
+
jobs = @g5k_connection.get_json(api_uri("/sites/#{site}/jobs/#{filter}")).items
|
625
|
+
jobs.map{ |j| @g5k_connection.get_json(j.rel_self)}
|
626
|
+
# This request sometime is could take a little long when all jobs are requested
|
627
|
+
# The API return by default 50 the limit was set to 25 (e.g., 23 seconds).
|
628
|
+
end
|
629
|
+
|
630
|
+
# @return [Hash] the last 50 deployments performed in a Grid'5000 site
|
631
|
+
# @param site [String] a valid Grid'5000 site name
|
632
|
+
# @param uid [String] user name in Grid'5000
|
633
|
+
def get_deployments(site, uid = nil)
|
634
|
+
@g5k_connection.get_json(api_uri("sites/#{site}/deployments/?user=#{uid}")).items
|
635
|
+
end
|
636
|
+
|
637
|
+
# @return [Hash] information concerning a given job submitted in a Grid'5000 site
|
638
|
+
# @param site [String] a valid Grid'5000 site name
|
639
|
+
# @param jid [Fixnum] a valid job identifier
|
640
|
+
def get_job(site, jid)
|
641
|
+
@g5k_connection.get_json(api_uri("/sites/#{site}/jobs/#{jid}"))
|
642
|
+
end
|
643
|
+
|
644
|
+
# @return [Hash] switches information available in a given Grid'5000 site.
|
645
|
+
# @param site [String] a valid Grid'5000 site name
|
646
|
+
def get_switches(site)
|
647
|
+
items = @g5k_connection.get_json(api_uri("/sites/#{site}/network_equipments")).items
|
648
|
+
items = items.select { |x| x['kind'] == 'switch' }
|
649
|
+
# extract nodes connected to those switches
|
650
|
+
items.each { |switch|
|
651
|
+
conns = switch['linecards'].detect { |c| c['kind'] == 'node' }
|
652
|
+
next if conns.nil? # IB switches for example
|
653
|
+
nodes = conns['ports'] \
|
654
|
+
.select { |x| x != {} } \
|
655
|
+
.map { |x| x['uid'] } \
|
656
|
+
.map { |x| "#{x}.#{site}.grid5000.fr"}
|
657
|
+
switch['nodes'] = nodes
|
658
|
+
}
|
659
|
+
return items.select { |it| it.key?('nodes') }
|
660
|
+
end
|
661
|
+
|
662
|
+
# @return [Hash] information of a specific switch available in a given Grid'5000 site.
|
663
|
+
# @param site [String] a valid Grid'5000 site name
|
664
|
+
# @param name [String] a valid switch name
|
665
|
+
def get_switch(site, name)
|
666
|
+
s = get_switches(site).detect { |x| x.uid == name }
|
667
|
+
raise "Unknown switch '#{name}'" if s.nil?
|
668
|
+
return s
|
669
|
+
end
|
670
|
+
|
671
|
+
# Returns information of all my jobs submitted in a given site.
|
672
|
+
# By default it only shows the jobs in state *running*.
|
673
|
+
# You can specify another state like this:
|
674
|
+
#
|
675
|
+
# = Example
|
676
|
+
# get_my_jobs("nancy", state="waiting")
|
677
|
+
# Valid states are specified in {https://api.grid5000.fr/doc/4.0/reference/spec.html Grid'5000 API spec}
|
678
|
+
# @return [Array] all my submitted jobs to a given site and their associated deployments.
|
679
|
+
# @param site [String] a valid Grid'5000 site name
|
680
|
+
def get_my_jobs(site, state = "running")
|
681
|
+
jobs = get_jobs(site, g5k_user, state)
|
682
|
+
deployments = get_deployments(site, g5k_user)
|
683
|
+
# filtering deployments only the job in state running make sense
|
684
|
+
jobs.map{ |j| j["deploy"] = deployments.select{ |d| d["created_at"] > j["started_at"]} if j["state"] == "running"}
|
685
|
+
return jobs
|
686
|
+
end
|
687
|
+
|
688
|
+
# Returns an Array with all subnets reserved by a given job.
|
689
|
+
# Each element of the Array is a {https://github.com/bluemonk/ipaddress IPAddress::IPv4} object which we can interact with to obtain
|
690
|
+
# the details of our reserved subnets:
|
691
|
+
#
|
692
|
+
# = Example
|
693
|
+
# require 'cute'
|
694
|
+
#
|
695
|
+
# g5k = Cute::G5K::API.new()
|
696
|
+
#
|
697
|
+
# job = g5k.reserve(:site => "lyon", :resources => "/slash_22=1+{virtual!='none'}/nodes=1")
|
698
|
+
#
|
699
|
+
# subnet = g5k.get_subnets(job).first #=> we use 'first' because it is an array and we only reserved one subnet.
|
700
|
+
#
|
701
|
+
# ips = subnet.map{ |ip| ip.to_s }
|
702
|
+
#
|
703
|
+
# @return [Array] all the subnets defined in a given job
|
704
|
+
# @param job [G5KJSON] as described in {Cute::G5K::G5KJSON job}
|
705
|
+
def get_subnets(job)
|
706
|
+
subnets = job.resources["subnets"]
|
707
|
+
subnets.map{|s| IPAddress::IPv4.new s }
|
708
|
+
end
|
709
|
+
|
710
|
+
# @return [Array] all the nodes in the VLAN
|
711
|
+
# @param job [G5KJSON] as described in {Cute::G5K::G5KJSON job}
|
712
|
+
def get_vlan_nodes(job)
|
713
|
+
if job.resources["vlans"].nil?
|
714
|
+
return nil
|
715
|
+
else
|
716
|
+
vlan_id = job.resources["vlans"].first
|
717
|
+
end
|
718
|
+
nodes = job["assigned_nodes"]
|
719
|
+
reg = /^(\w+-\d+)(\..*)$/
|
720
|
+
nodes.map {
|
721
|
+
|name| reg.match(name)[1]+"-kavlan-"+vlan_id.to_s+reg.match(name)[2] unless reg.match(name).nil?
|
722
|
+
}
|
723
|
+
end
|
724
|
+
|
725
|
+
# Releases all jobs on a site
|
726
|
+
# @param site [String] a valid Grid'5000 site name
|
727
|
+
def release_all(site)
|
728
|
+
Timeout.timeout(20) do
|
729
|
+
jobs = get_my_jobs(site,"running") + get_my_jobs(site,"waiting")
|
730
|
+
break if jobs.empty?
|
731
|
+
begin
|
732
|
+
jobs.each { |j| release(j) }
|
733
|
+
rescue Cute::G5K::RequestFailed => e
|
734
|
+
raise unless e.response.include?('already killed')
|
735
|
+
end
|
736
|
+
end
|
737
|
+
return true
|
738
|
+
end
|
739
|
+
|
740
|
+
# Releases a resource, it can be a job or a deploy.
|
741
|
+
def release(r)
|
742
|
+
begin
|
743
|
+
return @g5k_connection.delete_json(r.rel_self)
|
744
|
+
rescue Cute::G5K::RequestFailed => e
|
745
|
+
raise unless e.response.include?('already killed')
|
746
|
+
end
|
747
|
+
end
|
748
|
+
|
749
|
+
# Performs a reservation in Grid'5000.
|
750
|
+
#
|
751
|
+
# = Examples
|
752
|
+
#
|
753
|
+
# By default this method blocks until the reservation is ready,
|
754
|
+
# if we want this method to return after creating the reservation we set the option *:wait* to *false*.
|
755
|
+
# Then, you can use the method {Cute::G5K::API#wait_for_job wait_for_job} to wait for the reservation.
|
756
|
+
#
|
757
|
+
# job = g5k.reserve(:nodes => 25, :site => 'luxembourg', :walltime => '01:00:00', :wait => false)
|
758
|
+
#
|
759
|
+
# job = g5k.wait_for_job(job, :wait_time => 100)
|
760
|
+
#
|
761
|
+
# == Reserving with properties
|
762
|
+
#
|
763
|
+
# job = g5k.reserve(:site => 'lyon', :nodes => 2, :properties => "wattmeter='YES'")
|
764
|
+
#
|
765
|
+
# job = g5k.reserve(:site => 'nancy', :nodes => 1, :properties => "switch='sgraphene1'")
|
766
|
+
#
|
767
|
+
# job = g5k.reserve(:site => 'nancy', :nodes => 1, :properties => "cputype='Intel Xeon E5-2650'")
|
768
|
+
#
|
769
|
+
# == Subnet reservation
|
770
|
+
#
|
771
|
+
# The example below reserves 2 nodes in the cluster *chirloute* located in Lille for 1 hour as well as 2 /22 subnets.
|
772
|
+
# We will get 2048 IP addresses that can be used, for example, in virtual machines.
|
773
|
+
# If walltime is not specified, 1 hour walltime will be assigned to the reservation.
|
774
|
+
#
|
775
|
+
# job = g5k.reserve(:site => 'lille', :cluster => 'chirloute', :nodes => 2,
|
776
|
+
# :env => 'wheezy-x64-xen', :keys => "~/my_ssh_jobkey",
|
777
|
+
# :subnets => [22,2])
|
778
|
+
#
|
779
|
+
# == Before using OAR hierarchy
|
780
|
+
# All non-deploy reservations are submitted by default with the OAR option "-allow_classic_ssh"
|
781
|
+
# which does not take advantage of the CPU/core management level.
|
782
|
+
# Therefore, in order to take advantage of this capability, SSH keys have to be specified at the moment of reserving resources.
|
783
|
+
# This has to be used whenever we perform a reservation with cpu and core hierarchy.
|
784
|
+
# Users are encouraged to create a pair of SSH keys for managing jobs, for instance the following command can be used:
|
785
|
+
#
|
786
|
+
# ssh-keygen -N "" -t rsa -f ~/my_ssh_jobkey
|
787
|
+
#
|
788
|
+
# The reserved nodes can be accessed using "oarsh" or by configuring the SSH connection as shown in {https://www.grid5000.fr/mediawiki/index.php/OAR2 OAR2}.
|
789
|
+
# You have to specify different keys per reservation if you want several jobs running at the same time in the same site.
|
790
|
+
# Example using the OAR hierarchy:
|
791
|
+
#
|
792
|
+
# job = g5k.reserve(:site => "grenoble", :switches => 3, :nodes => 1, :cpus => 1, :cores => 1, :keys => "~/my_ssh_jobkey")
|
793
|
+
#
|
794
|
+
# == Using OAR syntax
|
795
|
+
#
|
796
|
+
# The parameter *:resources* can be used instead of parameters such as: *:cluster*, *:nodes*, *:cpus*, *:walltime*, *:vlan*, *:subnets*, *:properties*, etc,
|
797
|
+
# which are shortcuts for OAR syntax. These shortcuts are ignored if the the parameter *:resources* is used.
|
798
|
+
# Using the parameter *:resources* allows to express more flexible and complex reservations by using directly the OAR syntax.
|
799
|
+
# Therefore, the two examples shown below are equivalent:
|
800
|
+
#
|
801
|
+
# job = g5k.reserve(:site => "grenoble", :switches => 3, :nodes => 1, :cpus => 1, :cores => 1, :keys => "~/my_ssh_jobkey")
|
802
|
+
# job = g5k.reserve(:site => "grenoble", :resources => "/switch=3/nodes=1/cpu=1/core=1", :keys => "~/my_ssh_jobkey")
|
803
|
+
#
|
804
|
+
# Combining OAR hierarchy with properties:
|
805
|
+
#
|
806
|
+
# job = g5k.reserve(:site => "grenoble", :resources => "{ib10g='YES' and memnode=24160}/cluster=1/nodes=2/core=1", :keys => "~/my_ssh_jobkey")
|
807
|
+
#
|
808
|
+
# If we want 2 nodes with the following constraints:
|
809
|
+
# 1) nodes on 2 different clusters of the same site, 2) nodes with virtualization capability enabled
|
810
|
+
# 3) 1 /22 subnet. The reservation will be like:
|
811
|
+
#
|
812
|
+
# job = g5k.reserve(:site => "rennes", :resources => "/slash_22=1+{virtual!='none'}/cluster=2/nodes=1")
|
813
|
+
#
|
814
|
+
# Another reservation for two clusters:
|
815
|
+
#
|
816
|
+
# job = g5k.reserve(:site => "nancy", :resources => "{cluster='graphene'}/nodes=2+{cluster='griffon'}/nodes=3")
|
817
|
+
#
|
818
|
+
# Reservation using a local VLAN
|
819
|
+
#
|
820
|
+
# job = g5k.reserve(:site => 'nancy', :resources => "{type='kavlan-local'}/vlan=1,nodes=1", :env => 'wheezy-x64-xen')
|
821
|
+
#
|
822
|
+
# @return [G5KJSON] as described in {Cute::G5K::G5KJSON job}
|
823
|
+
# @param [Hash] opts Options for reservation in Grid'5000
|
824
|
+
# @option opts [Numeric] :nodes Number of nodes to reserve
|
825
|
+
# @option opts [String] :walltime Walltime of the reservation
|
826
|
+
# @option opts [String] :site Grid'5000 site
|
827
|
+
# @option opts [Symbol] :type Type of reservation: :deploy, :allow_classic
|
828
|
+
# @option opts [String] :name Reservation name
|
829
|
+
# @option opts [String] :cmd The command to execute when the job starts (e.g. ./my-script.sh).
|
830
|
+
# @option opts [String] :cluster Valid Grid'5000 cluster
|
831
|
+
# @option opts [Array] :subnets 1) prefix_size, 2) number of subnets
|
832
|
+
# @option opts [String] :env Environment name for {http://kadeploy3.gforge.inria.fr/ Kadeploy}
|
833
|
+
# @option opts [Symbol] :vlan Vlan type: :routed, :local, :global
|
834
|
+
# @option opts [String] :properties OAR properties defined in the cluster
|
835
|
+
# @option opts [String] :resources OAR syntax for complex submissions
|
836
|
+
# @option opts [String] :reservation Request a job to be scheduled a specific date.
|
837
|
+
# The date format is "YYYY-MM-DD HH:MM:SS".
|
838
|
+
# @option opts [Boolean] :wait Whether or not to wait until the job is running (default is true)
|
839
|
+
def reserve(opts)
|
840
|
+
|
841
|
+
# checking valid options
|
842
|
+
valid_opts = [:site, :cluster, :switches, :cpus, :cores, :nodes, :walltime, :cmd,
|
843
|
+
:type, :name, :subnets, :env, :vlan, :properties, :resources, :reservation, :wait, :keys]
|
844
|
+
unre_opts = opts.keys - valid_opts
|
845
|
+
raise ArgumentError, "Unrecognized option #{unre_opts}" unless unre_opts.empty?
|
846
|
+
|
847
|
+
nodes = opts.fetch(:nodes, 1)
|
848
|
+
walltime = opts.fetch(:walltime, '01:00:00')
|
849
|
+
site = opts[:site]
|
850
|
+
type = opts[:type]
|
851
|
+
name = opts.fetch(:name, 'rubyCute job')
|
852
|
+
command = opts[:cmd]
|
853
|
+
opts[:wait] = true if opts[:wait].nil?
|
854
|
+
cluster = opts[:cluster]
|
855
|
+
switches = opts[:switches]
|
856
|
+
cpus = opts[:cpus]
|
857
|
+
cores = opts[:cores]
|
858
|
+
subnets = opts[:subnets]
|
859
|
+
properties = opts[:properties]
|
860
|
+
reservation = opts[:reservation]
|
861
|
+
resources = opts.fetch(:resources, "")
|
862
|
+
type = :deploy if opts[:env]
|
863
|
+
keys = opts[:keys]
|
864
|
+
|
865
|
+
vlan_opts = {:routed => "kavlan",:global => "kavlan-global",:local => "kavlan-local"}
|
866
|
+
vlan = nil
|
867
|
+
unless opts[:vlan].nil?
|
868
|
+
if vlan_opts.include?(opts[:vlan]) then
|
869
|
+
vlan = vlan_opts.fetch(opts[:vlan])
|
870
|
+
else
|
871
|
+
raise ArgumentError, 'Option for vlan not recognized'
|
872
|
+
end
|
873
|
+
end
|
874
|
+
|
875
|
+
raise 'At least nodes, time and site must be given' if [nodes, walltime, site].any? { |x| x.nil? }
|
876
|
+
|
877
|
+
secs = walltime.to_secs
|
878
|
+
walltime = walltime.to_time
|
879
|
+
|
880
|
+
raise 'Nodes must be an integer.' unless nodes.is_a?(Integer)
|
881
|
+
|
882
|
+
command = "sleep #{secs}" if command.nil?
|
883
|
+
type = type.to_sym unless type.nil?
|
884
|
+
|
885
|
+
if resources == ""
|
886
|
+
resources = "/switch=#{switches}" unless switches.nil?
|
887
|
+
resources += "/nodes=#{nodes}"
|
888
|
+
resources += "/cpu=#{cpus}" unless cpus.nil?
|
889
|
+
resources += "/core=#{cores}" unless cores.nil?
|
890
|
+
resources = "{cluster='#{cluster}'}" + resources unless cluster.nil?
|
891
|
+
resources = "{type='#{vlan}'}/vlan=1+" + resources unless vlan.nil?
|
892
|
+
resources = "slash_#{subnets[0]}=#{subnets[1]}+" + resources unless subnets.nil?
|
893
|
+
end
|
894
|
+
|
895
|
+
resources += ",walltime=#{walltime}" unless resources.include?("walltime")
|
896
|
+
|
897
|
+
payload = {
|
898
|
+
'resources' => resources,
|
899
|
+
'name' => name,
|
900
|
+
'command' => command
|
901
|
+
}
|
902
|
+
|
903
|
+
info "Reserving resources: #{resources} (type: #{type}) (in #{site})"
|
904
|
+
|
905
|
+
payload['properties'] = properties unless properties.nil?
|
906
|
+
payload['types'] = [ type.to_s ] unless type.nil?
|
907
|
+
|
908
|
+
if not type == :deploy
|
909
|
+
if opts[:keys]
|
910
|
+
payload['import-job-key-from-file'] = [ File.expand_path(keys) ]
|
911
|
+
else
|
912
|
+
payload['types'] = [ 'allow_classic_ssh' ]
|
913
|
+
end
|
914
|
+
end
|
915
|
+
|
916
|
+
if reservation
|
917
|
+
payload['reservation'] = reservation
|
918
|
+
info "Starting this reservation at #{reservation}"
|
919
|
+
end
|
920
|
+
|
921
|
+
begin
|
922
|
+
# Support for the option "import-job-key-from-file"
|
923
|
+
# The request has to be redirected to the OAR API given that Grid'5000 API
|
924
|
+
# does not support some OAR options.
|
925
|
+
if payload['import-job-key-from-file'] then
|
926
|
+
# Adding double quotes otherwise we have a syntax error from OAR API
|
927
|
+
payload["resources"] = "\"#{payload["resources"]}\""
|
928
|
+
temp = @g5k_connection.post_json(api_uri("sites/#{site}/internal/oarapi/jobs"),payload)
|
929
|
+
sleep 1 # This is for being sure that our job appears on the list
|
930
|
+
r = get_my_jobs(site,nil).select{ |j| j["uid"] == temp["id"] }.first
|
931
|
+
else
|
932
|
+
r = @g5k_connection.post_json(api_uri("sites/#{site}/jobs"),payload) # This makes reference to the same class
|
933
|
+
end
|
934
|
+
rescue Error => e
|
935
|
+
info "Fail to submit job"
|
936
|
+
info e.message
|
937
|
+
e.http_body.split("\\n").each{ |line| info line}
|
938
|
+
raise
|
939
|
+
end
|
940
|
+
|
941
|
+
job = @g5k_connection.get_json(r.rel_self)
|
942
|
+
job = wait_for_job(job) if opts[:wait] == true
|
943
|
+
opts.delete(:nodes) # to not collapse with deploy options
|
944
|
+
deploy(job,opts) unless opts[:env].nil? #type == :deploy
|
945
|
+
return job
|
946
|
+
|
947
|
+
end
|
948
|
+
|
949
|
+
# Blocks until job is in *running* state
|
950
|
+
#
|
951
|
+
# = Example
|
952
|
+
# You can pass the parameter *:wait_time* that allows you to timeout the submission (by default is 10h).
|
953
|
+
# The method will throw a {Cute::G5K::EventTimeout Timeout} exception
|
954
|
+
# that you can catch and react upon.
|
955
|
+
# The following example shows how can be used, let's suppose we want to find 5 nodes available for
|
956
|
+
# 3 hours. We can try in each site using the script below.
|
957
|
+
#
|
958
|
+
# require 'cute'
|
959
|
+
#
|
960
|
+
# g5k = Cute::G5K::API.new()
|
961
|
+
#
|
962
|
+
# sites = g5k.site_uids
|
963
|
+
#
|
964
|
+
# sites.each{ |site|
|
965
|
+
# job = g5k.reserve(:site => site, :nodes => 5, :wait => false, :walltime => "03:00:00")
|
966
|
+
# begin
|
967
|
+
# job = g5k.wait_for_job(job, :wait_time => 60)
|
968
|
+
# puts "Nodes assigned #{job['assigned_nodes']}"
|
969
|
+
# break
|
970
|
+
# rescue Cute::G5K::EventTimeout
|
971
|
+
# puts "We waited too long in site #{site} let's release the job and try in another site"
|
972
|
+
# g5k.release(job)
|
973
|
+
# end
|
974
|
+
# }
|
975
|
+
#
|
976
|
+
# @param job [G5KJSON] as described in {Cute::G5K::G5KJSON job}
|
977
|
+
# @param opts [Hash] options
|
978
|
+
def wait_for_job(job,opts={})
|
979
|
+
opts[:wait_time] = 36000 if opts[:wait_time].nil?
|
980
|
+
jid = job['uid']
|
981
|
+
info "Waiting for reservation #{jid}"
|
982
|
+
begin
|
983
|
+
Timeout.timeout(opts[:wait_time]) do
|
984
|
+
while true
|
985
|
+
job = job.refresh(@g5k_connection)
|
986
|
+
t = job['scheduled_at']
|
987
|
+
if !t.nil?
|
988
|
+
t = Time.at(t)
|
989
|
+
secs = [ t - Time.now, 0 ].max.to_i
|
990
|
+
info "Reservation #{jid} should be available at #{t} (#{secs} s)"
|
991
|
+
end
|
992
|
+
break if job['state'] == 'running'
|
993
|
+
raise "Job is finishing." if job['state'] == 'finishing'
|
994
|
+
Kernel.sleep(5)
|
995
|
+
end
|
996
|
+
end
|
997
|
+
rescue Timeout::Error
|
998
|
+
raise EventTimeout.new("Event timeout")
|
999
|
+
end
|
1000
|
+
|
1001
|
+
info "Reservation #{jid} ready"
|
1002
|
+
return job
|
1003
|
+
end
|
1004
|
+
|
1005
|
+
# Deploys an environment in a set of reserved nodes using {http://kadeploy3.gforge.inria.fr/ Kadeploy}.
|
1006
|
+
# A job structure returned by {Cute::G5K::API#reserve reserve} or {Cute::G5K::API#get_my_jobs get_my_jobs} methods
|
1007
|
+
# is mandatory as a parameter as well as the environment to deploy.
|
1008
|
+
# By default this method does not block, for that you have to set the option *:wait* to *true*.
|
1009
|
+
#
|
1010
|
+
# = Examples
|
1011
|
+
# Deploying the production environment *wheezy-x64-base* on all the reserved nodes and wait until the deployment is done:
|
1012
|
+
#
|
1013
|
+
# deploy(job, :env => "wheezy-x64-base", :wait => true)
|
1014
|
+
#
|
1015
|
+
# Other parameters you can specify are *:nodes* [Array] for deploying on specific nodes within a job and
|
1016
|
+
# *:keys* [String] to specify the public key to use during the deployment.
|
1017
|
+
#
|
1018
|
+
# deploy(job, :nodes => ["genepi-2.grid5000.fr"], :env => "wheezy-x64-xen", :keys => "~/my_key")
|
1019
|
+
#
|
1020
|
+
# @param job [G5KJSON] as described in {Cute::G5K::G5KJSON job}
|
1021
|
+
# @param [Hash] opts Deploy options
|
1022
|
+
# @option opts [String] :env {http://kadeploy3.gforge.inria.fr/ Kadeploy} environment to deploy
|
1023
|
+
# @option opts [String] :nodes Specifies the nodes to deploy on
|
1024
|
+
# @option opts [String] :keys Specifies the SSH keys to copy for the deployment
|
1025
|
+
# @option opts [Boolean] :wait Whether or not to wait until the deployment is done (default is false)
|
1026
|
+
# @return [G5KJSON] a job with deploy information as described in {Cute::G5K::G5KJSON job}
|
1027
|
+
def deploy(job, opts = {})
|
1028
|
+
|
1029
|
+
# checking valid options, same as reserve option even though some option dont make any sense
|
1030
|
+
valid_opts = [:site, :cluster, :switches, :cpus, :cores, :nodes, :walltime, :cmd,
|
1031
|
+
:type, :name, :subnets, :env, :vlan, :properties, :resources, :reservation, :wait, :keys]
|
1032
|
+
|
1033
|
+
unre_opts = opts.keys - valid_opts
|
1034
|
+
raise ArgumentError, "Unrecognized option #{unre_opts}" unless unre_opts.empty?
|
1035
|
+
|
1036
|
+
raise ArgumentError, "Unrecognized job format" unless job.is_a?(G5KJSON)
|
1037
|
+
|
1038
|
+
env = opts[:env]
|
1039
|
+
raise ArgumentError, "Environment must be given" if env.nil?
|
1040
|
+
|
1041
|
+
nodes = opts[:nodes].nil? ? job['assigned_nodes'] : opts[:nodes]
|
1042
|
+
raise "Unrecognized nodes format, use an Array" unless nodes.is_a?(Array)
|
1043
|
+
|
1044
|
+
site = @g5k_connection.follow_parent(job).uid
|
1045
|
+
|
1046
|
+
if opts[:keys].nil? then
|
1047
|
+
public_key_path = File.expand_path("~/.ssh/id_rsa.pub")
|
1048
|
+
public_key_file = File.exist?(public_key_path) ? File.read(public_key_path) : ""
|
1049
|
+
else
|
1050
|
+
public_key_file = File.read("#{File.expand_path(opts[:keys])}.pub")
|
1051
|
+
end
|
1052
|
+
|
1053
|
+
payload = {
|
1054
|
+
'nodes' => nodes,
|
1055
|
+
'environment' => env,
|
1056
|
+
'key' => public_key_file,
|
1057
|
+
}
|
1058
|
+
|
1059
|
+
if !job.resources["vlans"].nil?
|
1060
|
+
vlan = job.resources["vlans"].first
|
1061
|
+
payload['vlan'] = vlan
|
1062
|
+
info "Found VLAN with uid = #{vlan}"
|
1063
|
+
end
|
1064
|
+
|
1065
|
+
info "Creating deployment"
|
1066
|
+
|
1067
|
+
begin
|
1068
|
+
r = @g5k_connection.post_json(api_uri("sites/#{site}/deployments"), payload)
|
1069
|
+
rescue Error => e
|
1070
|
+
info "Fail to deploy"
|
1071
|
+
info e.message
|
1072
|
+
e.http_body.split("\\n").each{ |line| info line}
|
1073
|
+
raise
|
1074
|
+
end
|
1075
|
+
|
1076
|
+
job["deploy"] = [] if job["deploy"].nil?
|
1077
|
+
|
1078
|
+
job["deploy"].push(r)
|
1079
|
+
|
1080
|
+
job = wait_for_deploy(job) if opts[:wait] == true
|
1081
|
+
|
1082
|
+
return job
|
1083
|
+
|
1084
|
+
end
|
1085
|
+
|
1086
|
+
# Returns the status of all deployments performed within a job.
|
1087
|
+
# The results can be filtered using a Hash with valid deployment properties
|
1088
|
+
# described in {https://api.grid5000.fr/doc/4.0/reference/spec.html Grid'5000 API spec}.
|
1089
|
+
#
|
1090
|
+
# = Example
|
1091
|
+
#
|
1092
|
+
# deploy_status(job, :nodes => ["adonis-10.grenoble.grid5000.fr"], :status => "terminated")
|
1093
|
+
#
|
1094
|
+
# @return [Array] status of deploys within a job
|
1095
|
+
# @param job [G5KJSON] as described in {Cute::G5K::G5KJSON job}
|
1096
|
+
# @param filter [Hash] filter the deployments to be returned.
|
1097
|
+
def deploy_status(job,filter = {})
|
1098
|
+
|
1099
|
+
job["deploy"].map!{ |d| d.refresh(@g5k_connection) }
|
1100
|
+
|
1101
|
+
filter.keep_if{ |k,v| v} # removes nil values
|
1102
|
+
if filter.empty?
|
1103
|
+
status = job["deploy"].map{ |d| d["status"] }
|
1104
|
+
else
|
1105
|
+
status = job["deploy"].map{ |d| d["status"] if filter.select{ |k,v| d[k.to_s] != v }.empty? }
|
1106
|
+
end
|
1107
|
+
return status.compact
|
1108
|
+
|
1109
|
+
end
|
1110
|
+
|
1111
|
+
# Blocks until deployments have *terminated* status
|
1112
|
+
#
|
1113
|
+
# = Examples
|
1114
|
+
# This method requires a job as a parameter and it will blocks by default until all deployments
|
1115
|
+
# within the job pass form *processing* status to *terminated* status.
|
1116
|
+
#
|
1117
|
+
# wait_for_deploy(job)
|
1118
|
+
#
|
1119
|
+
# You can wait for specific deployments using the option *:nodes*. This can be useful when performing different deployments on the reserved resources.
|
1120
|
+
#
|
1121
|
+
# wait_for_deploy(job, :nodes => ["adonis-10.grenoble.grid5000.fr"])
|
1122
|
+
#
|
1123
|
+
# Another parameter you can specify is *:wait_time* that allows you to timeout the deployment (by default is 10h).
|
1124
|
+
# The method will throw a {Cute::G5K::EventTimeout Timeout} exception
|
1125
|
+
# that you can catch and react upon. This example illustrates how this can be used.
|
1126
|
+
#
|
1127
|
+
# require 'cute'
|
1128
|
+
#
|
1129
|
+
# g5k = Cute::G5K::API.new()
|
1130
|
+
#
|
1131
|
+
# job = g5k.reserve(:nodes => 1, :site => 'lyon', :env => 'wheezy-x64-base')
|
1132
|
+
#
|
1133
|
+
# begin
|
1134
|
+
# g5k.wait_for_deploy(job,:wait_time => 100)
|
1135
|
+
# rescue Cute::G5K::EventTimeout
|
1136
|
+
# puts "We waited too long let's release the job"
|
1137
|
+
# g5k.release(job)
|
1138
|
+
# end
|
1139
|
+
#
|
1140
|
+
# @param job [G5KJSON] as described in {Cute::G5K::G5KJSON job}
|
1141
|
+
# @param opts [Hash] options
|
1142
|
+
def wait_for_deploy(job,opts = {})
|
1143
|
+
|
1144
|
+
raise "Deploy information not present in the given job" if job["deploy"].nil?
|
1145
|
+
|
1146
|
+
opts.merge!({:wait_time => 36000}) if opts[:wait_time].nil?
|
1147
|
+
nodes = opts[:nodes]
|
1148
|
+
|
1149
|
+
begin
|
1150
|
+
Timeout.timeout(opts[:wait_time]) do
|
1151
|
+
# it will ask just for processing status
|
1152
|
+
status = deploy_status(job,{:nodes => nodes, :status => "processing"})
|
1153
|
+
until status.empty? do
|
1154
|
+
info "Waiting for #{status.length} deployment"
|
1155
|
+
sleep 4
|
1156
|
+
status = deploy_status(job,{:nodes => nodes, :status => "processing"})
|
1157
|
+
end
|
1158
|
+
info "Deployment finished"
|
1159
|
+
return job
|
1160
|
+
end
|
1161
|
+
rescue Timeout::Error
|
1162
|
+
raise EventTimeout.new("Timeout triggered")
|
1163
|
+
end
|
1164
|
+
|
1165
|
+
end
|
1166
|
+
|
1167
|
+
private
|
1168
|
+
# Handles the output of messages within the module
|
1169
|
+
# @param msg [String] message to show
|
1170
|
+
def info(msg)
|
1171
|
+
if @logger.nil? then
|
1172
|
+
t = Time.now
|
1173
|
+
s = t.strftime('%Y-%m-%d %H:%M:%S.%L')
|
1174
|
+
puts "#{s} => #{msg}"
|
1175
|
+
else
|
1176
|
+
@logger.info(msg)
|
1177
|
+
end
|
1178
|
+
end
|
1179
|
+
|
1180
|
+
# @return a valid Grid'5000 resource
|
1181
|
+
# it avoids "//"
|
1182
|
+
def api_uri(path)
|
1183
|
+
path = path[1..-1] if path.start_with?('/')
|
1184
|
+
return "#{@api_version}/#{path}"
|
1185
|
+
end
|
1186
|
+
|
1187
|
+
end
|
1188
|
+
|
1189
|
+
end
|
1190
|
+
end
|