ruby-cute 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +6 -0
- data/.yardopts +2 -0
- data/Gemfile +6 -0
- data/README.md +137 -6
- data/Rakefile +48 -0
- data/bin/cute +22 -0
- data/debian/changelog +5 -0
- data/debian/compat +1 -0
- data/debian/control +15 -0
- data/debian/copyright +33 -0
- data/debian/ruby-cute.docs +2 -0
- data/debian/ruby-tests.rb +2 -0
- data/debian/rules +19 -0
- data/debian/source/format +1 -0
- data/debian/watch +2 -0
- data/examples/distem-bootstrap +516 -0
- data/examples/g5k_exp1.rb +41 -0
- data/examples/g5k_exp_virt.rb +129 -0
- data/lib/cute.rb +7 -2
- data/lib/cute/bash.rb +337 -0
- data/lib/cute/configparser.rb +404 -0
- data/lib/cute/execute.rb +272 -0
- data/lib/cute/extensions.rb +38 -0
- data/lib/cute/g5k_api.rb +1190 -0
- data/lib/cute/net-ssh.rb +144 -0
- data/lib/cute/net.rb +29 -0
- data/lib/cute/synchronization.rb +89 -0
- data/lib/cute/taktuk.rb +554 -0
- data/lib/cute/version.rb +3 -0
- data/ruby-cute.gemspec +32 -0
- data/spec/extensions_spec.rb +17 -0
- data/spec/g5k_api_spec.rb +192 -0
- data/spec/spec_helper.rb +66 -0
- data/spec/taktuk_spec.rb +129 -0
- data/test/test_bash.rb +71 -0
- metadata +204 -47
@@ -0,0 +1,38 @@
|
|
1
|
+
# Extends the class string for supporting timespan formats
|
2
|
+
class String
|
3
|
+
|
4
|
+
def to_secs
|
5
|
+
|
6
|
+
return Infinity if [ 'always', 'forever', 'infinitely' ].include?(self.to_s)
|
7
|
+
parts = self.split(':').map { |x| x.to_i rescue nil }
|
8
|
+
if parts.all? && [ 2, 3 ].include?(parts.length)
|
9
|
+
secs = parts.zip([ 3600, 60, 1 ]).map { |x, y| x * y }.reduce(:+)
|
10
|
+
return secs
|
11
|
+
end
|
12
|
+
m = /^(\d+|\d+\.\d*)\s*(\w*)?$/.match(self)
|
13
|
+
num, unit = m.captures
|
14
|
+
mul = case unit
|
15
|
+
when '' then 1
|
16
|
+
when 's' then 1
|
17
|
+
when 'm' then 60
|
18
|
+
when 'h' then 60 * 60
|
19
|
+
when 'd' then 24 * 60 * 60
|
20
|
+
else nil
|
21
|
+
end
|
22
|
+
raise "Unknown timespan unit: '#{unit}' in #{self}" if mul.nil?
|
23
|
+
return num.to_f * mul
|
24
|
+
end
|
25
|
+
|
26
|
+
def to_time
|
27
|
+
secs = self.to_secs.to_i
|
28
|
+
minutes = secs / 60; secs %= 60
|
29
|
+
hours = minutes / 60; minutes %= 60
|
30
|
+
minutes += 1 if secs > 0
|
31
|
+
return '%.02d:%.02d' % [ hours, minutes ]
|
32
|
+
end
|
33
|
+
|
34
|
+
def is_i?
|
35
|
+
/\A[-+]?\d+\z/ === self
|
36
|
+
end
|
37
|
+
|
38
|
+
end
|
data/lib/cute/g5k_api.rb
ADDED
@@ -0,0 +1,1190 @@
|
|
1
|
+
require 'restclient'
|
2
|
+
require 'yaml'
|
3
|
+
require 'json'
|
4
|
+
require 'ipaddress'
|
5
|
+
require 'uri'
|
6
|
+
|
7
|
+
module Cute
|
8
|
+
module G5K
|
9
|
+
|
10
|
+
# = {Cute::G5K} exceptions
|
11
|
+
#
|
12
|
+
# The generated exceptions are divided in 5 groups:
|
13
|
+
#
|
14
|
+
# - {Cute::G5K::BadRequest BadRequest} it means that the syntax you passed to some {Cute::G5K::API G5K::API} method is not correct from
|
15
|
+
# the Grid'5000 services point of view.
|
16
|
+
# - {Cute::G5K::RequestFailed RequestFailed} it means that there is a server problem or there is nothing the user can do to solve the problem.
|
17
|
+
# - {Cute::G5K::NotFound} it means that the requested resources do not exist.
|
18
|
+
# - {Cute::G5K::Unauthorized} it means that there is an authentication problem.
|
19
|
+
# - {Cute::G5K::EventTimeout} this exception is triggered by the methods that wait for events such as:
|
20
|
+
# job submission and environment deployment.
|
21
|
+
class Error < Exception
|
22
|
+
attr_accessor :orig # Original exception
|
23
|
+
|
24
|
+
def initialize(message = nil, object = nil)
|
25
|
+
super(message)
|
26
|
+
self.orig = object
|
27
|
+
end
|
28
|
+
|
29
|
+
def method_missing(method)
|
30
|
+
return orig.send(method)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
# It wraps the http response 400 that corresponds to a bad request.
|
35
|
+
# When using the {Cute::G5K::API#reserve reserve} or {Cute::G5K::API#reserve deploy} methods this could mean:
|
36
|
+
# a bad syntax in the request, not valid properties in the request,
|
37
|
+
# not enough resources to supply the request, non existing environment, etc.
|
38
|
+
#
|
39
|
+
# = Example
|
40
|
+
#
|
41
|
+
# You can handle this exception and decide what to do with your experiment.
|
42
|
+
# In the example below, we iterate over all sites until a site has resources with the property 'ib20g' set to 'YES'.
|
43
|
+
#
|
44
|
+
# require 'cute'
|
45
|
+
#
|
46
|
+
# g5k = Cute::G5K::API.new()
|
47
|
+
#
|
48
|
+
# sites = g5k.site_uids
|
49
|
+
#
|
50
|
+
# sites.each do |site|
|
51
|
+
#
|
52
|
+
# begin
|
53
|
+
# job = g5k.reserve(:site => site, :resources => "{ib20g='YES'}/nodes=2/core=1",:walltime => '00:30:00', :keys => "~/my_ssh_jobkey" )
|
54
|
+
# rescue Cute::G5K::BadRequest
|
55
|
+
# puts "Resource not available in this site, trying with another one"
|
56
|
+
# end
|
57
|
+
#
|
58
|
+
# end
|
59
|
+
class BadRequest < Error
|
60
|
+
end
|
61
|
+
|
62
|
+
# It wraps all Restclient exceptions with http codes: 403, 405,406, 412, 415, 500, 502, 503 and 504.
|
63
|
+
class RequestFailed < Error
|
64
|
+
end
|
65
|
+
|
66
|
+
# It wraps the exceptions generated by Timeout::Error
|
67
|
+
class EventTimeout < Error
|
68
|
+
end
|
69
|
+
|
70
|
+
# It wraps the Restclient exception 404
|
71
|
+
class NotFound < Error
|
72
|
+
end
|
73
|
+
|
74
|
+
# It wraps the Restclient exception RestClient::Unauthorized
|
75
|
+
class Unauthorized < Error
|
76
|
+
end
|
77
|
+
|
78
|
+
|
79
|
+
# @api private
|
80
|
+
class G5KArray < Array
|
81
|
+
|
82
|
+
def uids
|
83
|
+
return self.map { |it| it['uid'] }
|
84
|
+
end
|
85
|
+
|
86
|
+
def rel_self
|
87
|
+
return rel('self')
|
88
|
+
end
|
89
|
+
|
90
|
+
def rel(r)
|
91
|
+
return self['links'].detect { |x| x['rel'] == r }['href']
|
92
|
+
end
|
93
|
+
|
94
|
+
end
|
95
|
+
|
96
|
+
# Provides an abstraction for handling G5K responses.
|
97
|
+
# @api private
|
98
|
+
# @see https://api.grid5000.fr/doc/3.0/reference/grid5000-media-types.html
|
99
|
+
# When this structure is used to describe jobs, it is expected to have the
|
100
|
+
# following fields which depend on the version of the API.
|
101
|
+
# {"uid"=>604692,
|
102
|
+
# "user_uid"=>"name",
|
103
|
+
# "user"=>"name",
|
104
|
+
# "walltime"=>3600,
|
105
|
+
# "queue"=>"default",
|
106
|
+
# "state"=>"running",
|
107
|
+
# "project"=>"default",
|
108
|
+
# "name"=>"rubyCute job",
|
109
|
+
# "types"=>["deploy"],
|
110
|
+
# "mode"=>"PASSIVE",
|
111
|
+
# "command"=>"./oarapi.subscript.ZzvnM",
|
112
|
+
# "submitted_at"=>1423575384,
|
113
|
+
# "scheduled_at"=>1423575386,
|
114
|
+
# "started_at"=>1423575386,
|
115
|
+
# "message"=>"FIFO scheduling OK",
|
116
|
+
# "properties"=>"(deploy = 'YES') AND maintenance = 'NO'",
|
117
|
+
# "directory"=>"/home/name",
|
118
|
+
# "events"=>[],
|
119
|
+
# "links"=>[{"rel"=>"self", "href"=>"/sid/sites/nancy/jobs/604692", "type"=>"application/vnd.grid5000.item+json"},
|
120
|
+
# {"rel"=>"parent", "href"=>"/sid/sites/nancy", "type"=>"application/vnd.grid5000.item+json"}],
|
121
|
+
# "resources_by_type"=>
|
122
|
+
# {"cores"=>
|
123
|
+
# ["griffon-8.nancy.grid5000.fr",
|
124
|
+
# "griffon-8.nancy.grid5000.fr",
|
125
|
+
# "griffon-8.nancy.grid5000.fr",
|
126
|
+
# "griffon-8.nancy.grid5000.fr",
|
127
|
+
# "griffon-9.nancy.grid5000.fr",
|
128
|
+
# "griffon-9.nancy.grid5000.fr",
|
129
|
+
# "griffon-9.nancy.grid5000.fr",
|
130
|
+
# "griffon-9.nancy.grid5000.fr",
|
131
|
+
# "griffon-77.nancy.grid5000.fr",
|
132
|
+
# "griffon-77.nancy.grid5000.fr",
|
133
|
+
# "griffon-77.nancy.grid5000.fr",
|
134
|
+
# "griffon-77.nancy.grid5000.fr",
|
135
|
+
# "vlans"=>["5"]},
|
136
|
+
# "assigned_nodes"=>["griffon-8.nancy.grid5000.fr", "griffon-9.nancy.grid5000.fr", "griffon-77.nancy.grid5000.fr"],
|
137
|
+
# "deploy"=>
|
138
|
+
# {"created_at"=>1423575401,
|
139
|
+
# "environment"=>"http://public.sophia.grid5000.fr/~nniclausse/openmx.dsc",
|
140
|
+
# "key"=>"https://api.grid5000.fr/sid/sites/nancy/files/cruizsanabria-key-84f3f1dbb1279bc1bddcd618e26c960307d653c5",
|
141
|
+
# "nodes"=>["griffon-8.nancy.grid5000.fr", "griffon-9.nancy.grid5000.fr", "griffon-77.nancy.grid5000.fr"],
|
142
|
+
# "site_uid"=>"nancy",
|
143
|
+
# "status"=>"processing",
|
144
|
+
# "uid"=>"D-751096de-0c33-461a-9d27-56be1b2dd980",
|
145
|
+
# "updated_at"=>1423575401,
|
146
|
+
# "user_uid"=>"cruizsanabria",
|
147
|
+
# "vlan"=>5,
|
148
|
+
# "links"=>
|
149
|
+
# [{"rel"=>"self", "href"=>"/sid/sites/nancy/deployments/D-751096de-0c33-461a-9d27-56be1b2dd980", "type"=>"application/vnd.grid5000.item+json"},
|
150
|
+
class G5KJSON < Hash
|
151
|
+
|
152
|
+
def items
|
153
|
+
return self['items']
|
154
|
+
end
|
155
|
+
|
156
|
+
def nodes
|
157
|
+
return self['nodes']
|
158
|
+
end
|
159
|
+
|
160
|
+
def resources
|
161
|
+
return self['resources_by_type'].nil?? Hash.new : self['resources_by_type']
|
162
|
+
end
|
163
|
+
|
164
|
+
def rel(r)
|
165
|
+
return self['links'].detect { |x| x['rel'] == r }['href']
|
166
|
+
end
|
167
|
+
|
168
|
+
def uid
|
169
|
+
return self['uid']
|
170
|
+
end
|
171
|
+
|
172
|
+
def rel_self
|
173
|
+
return rel('self')
|
174
|
+
end
|
175
|
+
|
176
|
+
def rel_parent
|
177
|
+
return rel('parent')
|
178
|
+
end
|
179
|
+
|
180
|
+
def refresh(g5k)
|
181
|
+
return g5k.get_json(rel_self)
|
182
|
+
end
|
183
|
+
|
184
|
+
def self.parse(s)
|
185
|
+
return JSON.parse(s, :object_class => G5KJSON, :array_class => G5KArray)
|
186
|
+
end
|
187
|
+
|
188
|
+
end
|
189
|
+
|
190
|
+
# Manages the low level operations for communicating with the REST API.
|
191
|
+
# @api private
|
192
|
+
class G5KRest
|
193
|
+
|
194
|
+
attr_reader :user
|
195
|
+
# Initializes a REST connection
|
196
|
+
# @param uri [String] resource identifier which normally is the URL of the Rest API
|
197
|
+
# @param user [String] user if authentication is needed
|
198
|
+
# @param pass [String] password if authentication is needed
|
199
|
+
# @param on_error [Symbol] option to deactivate the {Cute::G5K::RequestFailed RequestFailed} exceptions
|
200
|
+
def initialize(uri,api_version,user,pass,on_error)
|
201
|
+
@user = user
|
202
|
+
@pass = pass
|
203
|
+
@api_version = api_version.nil? ? "sid" : api_version
|
204
|
+
if (user.nil? or pass.nil?)
|
205
|
+
@endpoint = uri # Inside Grid'5000
|
206
|
+
else
|
207
|
+
user_escaped = CGI.escape(user)
|
208
|
+
pass_escaped = CGI.escape(pass)
|
209
|
+
@endpoint = "https://#{user_escaped}:#{pass_escaped}@#{uri.split("https://")[1]}"
|
210
|
+
end
|
211
|
+
|
212
|
+
machine =`uname -ov`.chop
|
213
|
+
@user_agent = "ruby-cute/#{VERSION} (#{machine}) Ruby #{RUBY_VERSION}"
|
214
|
+
@api = RestClient::Resource.new(@endpoint, :timeout => 30)
|
215
|
+
@on_error = on_error
|
216
|
+
test_connection
|
217
|
+
end
|
218
|
+
|
219
|
+
# Returns a resource object
|
220
|
+
# @param path [String] this complements the URI to address to a specific resource
|
221
|
+
def resource(path)
|
222
|
+
path = path[1..-1] if path.start_with?('/')
|
223
|
+
return @api[path]
|
224
|
+
end
|
225
|
+
|
226
|
+
# @return [Hash] the HTTP response
|
227
|
+
# @param path [String] this complements the URI to address to a specific resource
|
228
|
+
def get_json(path)
|
229
|
+
|
230
|
+
begin
|
231
|
+
r = resource(path).get(:content_type => "application/json",
|
232
|
+
:user_agent => @user_agent)
|
233
|
+
rescue => e
|
234
|
+
handle_exception(e)
|
235
|
+
end
|
236
|
+
return G5KJSON.parse(r)
|
237
|
+
end
|
238
|
+
|
239
|
+
# Creates a resource on the server
|
240
|
+
# @param path [String] this complements the URI to address to a specific resource
|
241
|
+
# @param json [Hash] contains the characteristics of the resources to be created.
|
242
|
+
def post_json(path, json)
|
243
|
+
|
244
|
+
begin
|
245
|
+
r = resource(path).post(json.to_json,
|
246
|
+
:content_type => "application/json",
|
247
|
+
:accept => "application/json",
|
248
|
+
:user_agent => @user_agent)
|
249
|
+
rescue => e
|
250
|
+
handle_exception(e)
|
251
|
+
end
|
252
|
+
return G5KJSON.parse(r)
|
253
|
+
end
|
254
|
+
|
255
|
+
# Deletes a resource on the server
|
256
|
+
# @param path [String] this complements the URI to address to a specific resource
|
257
|
+
def delete_json(path)
|
258
|
+
begin
|
259
|
+
return resource(path).delete()
|
260
|
+
rescue RestClient::InternalServerError => e
|
261
|
+
raise RequestFailed.new("Service internal error", e)
|
262
|
+
end
|
263
|
+
end
|
264
|
+
|
265
|
+
# @return the parent link
|
266
|
+
def follow_parent(obj)
|
267
|
+
get_json(obj.rel_parent)
|
268
|
+
end
|
269
|
+
|
270
|
+
private
|
271
|
+
|
272
|
+
# Tests the connection and raises an error in case of a problem
|
273
|
+
def test_connection
|
274
|
+
begin
|
275
|
+
return get_json("/#{@api_version}/")
|
276
|
+
rescue Cute::G5K::Unauthorized
|
277
|
+
raise "Your Grid'5000 credentials are not recognized"
|
278
|
+
end
|
279
|
+
end
|
280
|
+
|
281
|
+
# Issues a Cute::G5K exception according to the http status code
|
282
|
+
def handle_exception(e)
|
283
|
+
case e.http_code
|
284
|
+
when 500
|
285
|
+
# This part deals with bug: https://intranet.grid5000.fr/bugzilla/show_bug.cgi?id=5912
|
286
|
+
# Grid'5000 returns 500 error code even though the error was generated by a bad request
|
287
|
+
http_body = JSON.parse("{#{e.http_body.split("\n").select{ |x| x.include?("code")}.first}}")
|
288
|
+
if http_body["code"] == 400
|
289
|
+
raise BadRequest.new("Bad request", e)
|
290
|
+
else
|
291
|
+
raise RequestFailed.new("Service internal error", e)
|
292
|
+
end
|
293
|
+
when 400
|
294
|
+
raise BadRequest.new("Bad request", e)
|
295
|
+
when 404
|
296
|
+
raise NotFound.new("Resource not found", e)
|
297
|
+
when 401
|
298
|
+
raise Unauthorized.new("Authentication problem",e)
|
299
|
+
else
|
300
|
+
if @on_error == :ignore
|
301
|
+
return nil
|
302
|
+
else
|
303
|
+
raise RequestFailed.new("Service internal error", e)
|
304
|
+
end
|
305
|
+
end
|
306
|
+
end
|
307
|
+
|
308
|
+
end
|
309
|
+
|
310
|
+
# This class helps you to access Grid'5000 REST API.
|
311
|
+
# Thus, the most common actions such as reservation of nodes and deployment can be easily scripted.
|
312
|
+
# To simplify the use of the module, it is better to create a file with the following information:
|
313
|
+
#
|
314
|
+
# $ cat > ~/.grid5000_api.yml << EOF
|
315
|
+
# $ uri: https://api.grid5000.fr/
|
316
|
+
# $ username: user
|
317
|
+
# $ password: **********
|
318
|
+
# $ version: sid
|
319
|
+
# $ EOF
|
320
|
+
#
|
321
|
+
# The *username* and *password* are not necessary if you are using the module from inside Grid'5000.
|
322
|
+
# You can take a look at the {Cute::G5K::API#initialize G5K::API constructor} to see more details for
|
323
|
+
# this configuration.
|
324
|
+
#
|
325
|
+
# = Getting started
|
326
|
+
#
|
327
|
+
# As already said, the goal of {Cute::G5K::API G5K::API} class is to present a high level abstraction to manage the most common activities
|
328
|
+
# in Grid'5000 such as: the reservation of resources and the deployment of environments.
|
329
|
+
# Consequently, these activities can be easily scripted using Ruby.
|
330
|
+
# The advantage of this is that you can use all Ruby constructs (e.g., loops, conditionals, blocks, iterators, etc) to script your experiments.
|
331
|
+
# In the presence of error, {Cute::G5K::API G5K::API} raises exceptions (see {Cute::G5K::Error G5K exceptions}),
|
332
|
+
# that you can handle to decide the workflow of your experiment
|
333
|
+
# (see {Cute::G5K::API#wait_for_deploy wait_for_deploy} and {Cute::G5K::API#wait_for_deploy wait_for_job}).
|
334
|
+
# Let's show how {Cute::G5K::API G5K::API} is used through an example, suppose we want to reserve 3 nodes in Nancy site for 1 hour.
|
335
|
+
# In order to do that we would write something like this:
|
336
|
+
#
|
337
|
+
# require 'cute'
|
338
|
+
#
|
339
|
+
# g5k = Cute::G5K::API.new()
|
340
|
+
#
|
341
|
+
# job = g5k.reserve(:nodes => 3, :site => 'nancy', :walltime => '01:00:00')
|
342
|
+
#
|
343
|
+
# puts "Assigned nodes : #{job['assigned_nodes']}"
|
344
|
+
#
|
345
|
+
# If that is all you want to do, you can write that into a file, let's say *example.rb* and execute it using the Ruby interpreter.
|
346
|
+
#
|
347
|
+
# $ ruby example.rb
|
348
|
+
#
|
349
|
+
# The execution will block until you got the reservation. Then, you can interact with the nodes you reserved the way you used to or
|
350
|
+
# add more code to the previous script for controlling your experiment with Ruby-Cute as shown in this
|
351
|
+
# {http://www.rubydoc.info/github/ruby-cute/ruby-cute/master/file/examples/g5k_exp_virt.rb example}.
|
352
|
+
# We have just used the method {Cute::G5K::API#reserve reserve} that allow us to reserve resources in Grid'5000.
|
353
|
+
# This method can be used to reserve resources in deployment mode and deploy our own software environment on them using
|
354
|
+
# {http://kadeploy3.gforge.inria.fr/ Kadeploy}. For this we use the option *:env* of the {Cute::G5K::API#reserve reserve} method.
|
355
|
+
# Therefore, it will first reserve the resources and then deploy the specified environment.
|
356
|
+
# The method {Cute::G5K::API#reserve reserve} will block until the deployment is done.
|
357
|
+
# The following Ruby script illustrates all we have just said.
|
358
|
+
#
|
359
|
+
# require 'cute'
|
360
|
+
#
|
361
|
+
# g5k = Cute::G5K::API.new()
|
362
|
+
#
|
363
|
+
# job = g5k.reserve(:nodes => 1, :site => 'grenoble', :walltime => '00:40:00', :env => 'wheezy-x64-base')
|
364
|
+
#
|
365
|
+
# puts "Assigned nodes : #{job['assigned_nodes']}"
|
366
|
+
#
|
367
|
+
# Your public ssh key located in ~/.ssh will be copied by default on the deployed machines,
|
368
|
+
# you can specify another path for your keys with the option *:keys*.
|
369
|
+
# In order to deploy your own environment, you have to put the tar file that contains the operating system you want to deploy and
|
370
|
+
# the environment description file, under the public directory of a given site.
|
371
|
+
# *VLANS* are supported by adding the parameter :vlan => type where type can be: *:routed*, *:local*, *:global*.
|
372
|
+
# The following example, reserves 10 nodes in the Lille site, starts the deployment of a custom environment over the nodes
|
373
|
+
# and puts the nodes under a routed VLAN. We used the method {Cute::G5K::API#get_vlan_nodes get_vlan_nodes} to get the
|
374
|
+
# new hostnames assigned to your nodes.
|
375
|
+
#
|
376
|
+
# require 'cute'
|
377
|
+
#
|
378
|
+
# g5k = Cute::G5K::API.new()
|
379
|
+
#
|
380
|
+
# job = g5k.reserve(:site => "lille", :nodes => 10,
|
381
|
+
# :env => 'https://public.lyon.grid5000.fr/~user/debian_custom_img.yaml',
|
382
|
+
# :vlan => :routed, :keys => "~/my_ssh_key")
|
383
|
+
#
|
384
|
+
#
|
385
|
+
# puts "Log in into the nodes using the following hostnames: #{g5k.get_vlan_nodes(job)}"
|
386
|
+
#
|
387
|
+
# If you do not want that the method {Cute::G5K::API#reserve reserve} perform the deployment for you, you have to use the option :type => :deploy.
|
388
|
+
# This can be useful when deploying different environments in your reserved nodes. For example deploying the environments for a small HPC cluster.
|
389
|
+
# You have to use the method {Cute::G5K::API#deploy deploy} for performing the deploy.
|
390
|
+
# This method do not block by default, that is why you have to use the method {Cute::G5K::API#wait_for_deploy wait_for_deploy} in order to block the execution
|
391
|
+
# until the deployment is done.
|
392
|
+
#
|
393
|
+
# require 'cute'
|
394
|
+
#
|
395
|
+
# g5k = Cute::G5K::API.new()
|
396
|
+
#
|
397
|
+
# job = g5k.reserve(:site => "lyon", :nodes => 5, :walltime => "03:00:00", :type => :deploy)
|
398
|
+
#
|
399
|
+
# nodes = job["assigned_nodes"]
|
400
|
+
#
|
401
|
+
# slaves = nodes[1..4]
|
402
|
+
# master = nodes-slaves
|
403
|
+
#
|
404
|
+
# g5k.deploy(job,:nodes => master, :env => 'https://public.lyon.grid5000.fr/~user/debian_master_img.yaml')
|
405
|
+
# g5k.deploy(job,:nodes => slaves, :env => 'https://public.lyon.grid5000.fr/~user/debian_slaves_img.yaml')
|
406
|
+
#
|
407
|
+
# g5k.wait_for_deploy(job)
|
408
|
+
#
|
409
|
+
# puts "master node: #{master}"
|
410
|
+
# puts "slaves nodes: #{slaves}"
|
411
|
+
#
|
412
|
+
# You can check out the documentation of {Cute::G5K::API#reserve reserve} and {Cute::G5K::API#deploy deploy} methods
|
413
|
+
# to know all the parameters supported and more complex uses.
|
414
|
+
#
|
415
|
+
# == Another useful methods
|
416
|
+
#
|
417
|
+
# Let's use *pry* to show other useful methods. As shown in {file:README.md Ruby Cute} the *cute* command will open a
|
418
|
+
# pry shell with some modules preloaded and it will create the variable $g5k to access {Cute::G5K::API G5K::API} class.
|
419
|
+
# Therefore, we can consult the name of the cluster available in a specific site.
|
420
|
+
#
|
421
|
+
# [4] pry(main)> $g5k.cluster_uids("grenoble")
|
422
|
+
# => ["adonis", "edel", "genepi"]
|
423
|
+
#
|
424
|
+
# As well as the deployable environments:
|
425
|
+
#
|
426
|
+
# [6] pry(main)> $g5k.environment_uids("grenoble")
|
427
|
+
# => ["squeeze-x64-base", "squeeze-x64-big", "squeeze-x64-nfs", "wheezy-x64-base", "wheezy-x64-big", "wheezy-x64-min", "wheezy-x64-nfs", "wheezy-x64-xen"]
|
428
|
+
#
|
429
|
+
# For getting a list of sites available in Grid'5000 you can use:
|
430
|
+
#
|
431
|
+
# [7] pry(main)> $g5k.site_uids()
|
432
|
+
# => ["grenoble", "lille", "luxembourg", "lyon",...]
|
433
|
+
#
|
434
|
+
# We can get the status of nodes in a given site by using:
|
435
|
+
#
|
436
|
+
# [8] pry(main)> $g5k.nodes_status("lyon")
|
437
|
+
# => {"taurus-2.lyon.grid5000.fr"=>"besteffort", "taurus-16.lyon.grid5000.fr"=>"besteffort", "taurus-15.lyon.grid5000.fr"=>"besteffort", ...}
|
438
|
+
#
|
439
|
+
# We can get information about our submitted jobs by using:
|
440
|
+
#
|
441
|
+
# [11] pry(main)> $g5k.get_my_jobs("grenoble")
|
442
|
+
# => [{"uid"=>1679094,
|
443
|
+
# "user_uid"=>"cruizsanabria",
|
444
|
+
# "user"=>"cruizsanabria",
|
445
|
+
# "walltime"=>3600,
|
446
|
+
# "queue"=>"default",
|
447
|
+
# "state"=>"running", ...}, ...]
|
448
|
+
#
|
449
|
+
# If we are done with our experiment, we can release the submitted job or all jobs in a given site as follows:
|
450
|
+
#
|
451
|
+
# [12] pry(main)> $g5k.release(job)
|
452
|
+
# [13] pry(main)> $g5k.release_all("grenoble")
|
453
|
+
class API
|
454
|
+
|
455
|
+
# Assigns a logger
|
456
|
+
#
|
457
|
+
# = Examples
|
458
|
+
# You can use this attribute to control how to log all messages produce by {Cute::G5K::API G5K::API}.
|
459
|
+
# For example, below we use the logger available in Ruby standard library.
|
460
|
+
#
|
461
|
+
# require 'cute'
|
462
|
+
# require 'logger'
|
463
|
+
#
|
464
|
+
# g5k = Cute::G5K::API.new()
|
465
|
+
#
|
466
|
+
# g5k.logger = Logger.new(File.new('experiment_1.log'))
|
467
|
+
attr_accessor :logger
|
468
|
+
# Initializes a REST connection for Grid'5000 API
|
469
|
+
#
|
470
|
+
# = Example
|
471
|
+
# You can specify another configuration file using the option *:conf_file*, for example:
|
472
|
+
#
|
473
|
+
# g5k = Cute::G5K::API.new(:conf_file =>"config file path")
|
474
|
+
#
|
475
|
+
# You can specify other parameter to use:
|
476
|
+
#
|
477
|
+
# g5k = Cute::G5K::API.new(:uri => "https://api.grid5000.fr", :version => "sid")
|
478
|
+
#
|
479
|
+
# If you want to ignore {Cute::G5K::RequestFailed ResquestFailed} exceptions you can use:
|
480
|
+
#
|
481
|
+
# g5k = Cute::G5K::API.new(:on_error => :ignore)
|
482
|
+
#
|
483
|
+
# @param [Hash] params Contains initialization parameters.
|
484
|
+
# @option params [String] :conf_file Path for configuration file
|
485
|
+
# @option params [String] :uri REST API URI to contact
|
486
|
+
# @option params [String] :version Version of the REST API to use
|
487
|
+
# @option params [String] :user Username to access the REST API
|
488
|
+
# @option params [String] :pass Password to access the REST API
|
489
|
+
# @option params [Symbol] :on_error Set to :ignore if you want to ignore {Cute::G5K::RequestFailed ResquestFailed} exceptions.
|
490
|
+
def initialize(params={})
|
491
|
+
config = {}
|
492
|
+
default_file = "#{ENV['HOME']}/.grid5000_api.yml"
|
493
|
+
|
494
|
+
if params[:conf_file].nil? then
|
495
|
+
params[:conf_file] = default_file if File.exist?(default_file)
|
496
|
+
end
|
497
|
+
|
498
|
+
config = YAML.load(File.open(params[:conf_file],'r')) unless params[:conf_file].nil?
|
499
|
+
@user = params[:user] || config["username"]
|
500
|
+
@pass = params[:pass] || config["password"]
|
501
|
+
@uri = params[:uri] || config["uri"]
|
502
|
+
@api_version = params[:version] || config["version"] || "sid"
|
503
|
+
@logger = nil
|
504
|
+
|
505
|
+
begin
|
506
|
+
@g5k_connection = G5KRest.new(@uri,@api_version,@user,@pass,params[:on_error])
|
507
|
+
rescue
|
508
|
+
msg_create_file = ""
|
509
|
+
if (not File.exist?(default_file)) && params[:conf_file].nil? then
|
510
|
+
msg_create_file = "Please create the file: ~/.grid5000_api.yml and
|
511
|
+
put the necessary credentials or use the option
|
512
|
+
:conf_file to indicate another file for the credentials"
|
513
|
+
end
|
514
|
+
raise "Unable to authorize against the Grid'5000 API.
|
515
|
+
#{msg_create_file}"
|
516
|
+
|
517
|
+
end
|
518
|
+
end
|
519
|
+
|
520
|
+
# It returns the site name. Example:
|
521
|
+
# site #=> "rennes"
|
522
|
+
# This will only work when {Cute::G5K::API G5K::API} is used within Grid'5000.
|
523
|
+
# In the other cases it will return *nil*
|
524
|
+
# @return [String] the site name where the method is called on
|
525
|
+
def site
|
526
|
+
p = `hostname`.chop
|
527
|
+
res = /^.*\.(.*).*\.grid5000.fr/.match(p)
|
528
|
+
res[1] unless res.nil?
|
529
|
+
end
|
530
|
+
|
531
|
+
# @api private
|
532
|
+
# @return the rest point for performing low level REST requests
|
533
|
+
def rest
|
534
|
+
@g5k_connection
|
535
|
+
end
|
536
|
+
|
537
|
+
# @return [String] Grid'5000 user
|
538
|
+
def g5k_user
|
539
|
+
return @user.nil? ? ENV['USER'] : @user
|
540
|
+
end
|
541
|
+
|
542
|
+
# Returns all sites identifiers
|
543
|
+
#
|
544
|
+
# = Example:
|
545
|
+
# site_uids #=> ["grenoble", "lille", "luxembourg", "lyon",...]
|
546
|
+
#
|
547
|
+
# @return [Array] all site identifiers
|
548
|
+
def site_uids
|
549
|
+
return sites.uids
|
550
|
+
end
|
551
|
+
|
552
|
+
# Returns all cluster identifiers
|
553
|
+
#
|
554
|
+
# = Example:
|
555
|
+
# cluster_uids("grenoble") #=> ["adonis", "edel", "genepi"]
|
556
|
+
#
|
557
|
+
# @return [Array] cluster identifiers
|
558
|
+
def cluster_uids(site)
|
559
|
+
return clusters(site).uids
|
560
|
+
end
|
561
|
+
|
562
|
+
# Returns the name of the environments deployable in a given site.
|
563
|
+
# These can be used with {Cute::G5K::API#reserve reserve} and {Cute::G5K::API#deploy deploy} methods
|
564
|
+
#
|
565
|
+
# = Example:
|
566
|
+
# environment_uids("nancy") #=> ["squeeze-x64-base", "squeeze-x64-big", "squeeze-x64-nfs", ...]
|
567
|
+
#
|
568
|
+
# @return [Array] environment identifiers
|
569
|
+
def environment_uids(site)
|
570
|
+
# environments are returned by the API following the format squeeze-x64-big-1.8
|
571
|
+
# it returns environments without the version
|
572
|
+
environment_uids = environments(site).uids.map{ |e|
|
573
|
+
e_match = /(.*)-(.*)/.match(e)
|
574
|
+
new_name = e_match.nil? ? "" : e_match[1]
|
575
|
+
}
|
576
|
+
|
577
|
+
return environment_uids.uniq
|
578
|
+
end
|
579
|
+
|
580
|
+
# @return [Hash] all the status information of a given Grid'5000 site
|
581
|
+
# @param site [String] a valid Grid'5000 site name
|
582
|
+
def site_status(site)
|
583
|
+
@g5k_connection.get_json(api_uri("sites/#{site}/status"))
|
584
|
+
end
|
585
|
+
|
586
|
+
# @return [Hash] the nodes state (e.g, free, busy, etc) that belong to a given Grid'5000 site
|
587
|
+
# @param site [String] a valid Grid'5000 site name
|
588
|
+
def nodes_status(site)
|
589
|
+
nodes = {}
|
590
|
+
site_status(site).nodes.each do |node|
|
591
|
+
name = node[0]
|
592
|
+
status = node[1]["soft"]
|
593
|
+
nodes[name] = status
|
594
|
+
end
|
595
|
+
return nodes
|
596
|
+
end
|
597
|
+
|
598
|
+
# @return [Array] the description of all Grid'5000 sites
|
599
|
+
def sites
|
600
|
+
@g5k_connection.get_json(api_uri("sites")).items
|
601
|
+
end
|
602
|
+
|
603
|
+
# @return [Array] the description of clusters that belong to a given Grid'5000 site
|
604
|
+
# @param site [String] a valid Grid'5000 site name
|
605
|
+
def clusters(site)
|
606
|
+
@g5k_connection.get_json(api_uri("sites/#{site}/clusters")).items
|
607
|
+
end
|
608
|
+
|
609
|
+
# @return [Array] the description of all environments registered in a Grid'5000 site
|
610
|
+
def environments(site)
|
611
|
+
@g5k_connection.get_json(api_uri("sites/#{site}/environments")).items
|
612
|
+
end
|
613
|
+
|
614
|
+
# @return [Hash] all the jobs submitted in a given Grid'5000 site,
|
615
|
+
# if a uid is provided only the jobs owned by the user are shown.
|
616
|
+
# @param site [String] a valid Grid'5000 site name
|
617
|
+
# @param uid [String] user name in Grid'5000
|
618
|
+
# @param state [String] jobs state: running, waiting
|
619
|
+
def get_jobs(site, uid = nil, state = nil)
|
620
|
+
filter = "?"
|
621
|
+
filter += state.nil? ? "" : "state=#{state}"
|
622
|
+
filter += uid.nil? ? "" : "&user=#{uid}"
|
623
|
+
filter += "limit=25" if (state.nil? and uid.nil?)
|
624
|
+
jobs = @g5k_connection.get_json(api_uri("/sites/#{site}/jobs/#{filter}")).items
|
625
|
+
jobs.map{ |j| @g5k_connection.get_json(j.rel_self)}
|
626
|
+
# This request sometime is could take a little long when all jobs are requested
|
627
|
+
# The API return by default 50 the limit was set to 25 (e.g., 23 seconds).
|
628
|
+
end
|
629
|
+
|
630
|
+
# @return [Hash] the last 50 deployments performed in a Grid'5000 site
|
631
|
+
# @param site [String] a valid Grid'5000 site name
|
632
|
+
# @param uid [String] user name in Grid'5000
|
633
|
+
def get_deployments(site, uid = nil)
|
634
|
+
@g5k_connection.get_json(api_uri("sites/#{site}/deployments/?user=#{uid}")).items
|
635
|
+
end
|
636
|
+
|
637
|
+
# @return [Hash] information concerning a given job submitted in a Grid'5000 site
|
638
|
+
# @param site [String] a valid Grid'5000 site name
|
639
|
+
# @param jid [Fixnum] a valid job identifier
|
640
|
+
def get_job(site, jid)
|
641
|
+
@g5k_connection.get_json(api_uri("/sites/#{site}/jobs/#{jid}"))
|
642
|
+
end
|
643
|
+
|
644
|
+
# @return [Hash] switches information available in a given Grid'5000 site.
|
645
|
+
# @param site [String] a valid Grid'5000 site name
|
646
|
+
def get_switches(site)
|
647
|
+
items = @g5k_connection.get_json(api_uri("/sites/#{site}/network_equipments")).items
|
648
|
+
items = items.select { |x| x['kind'] == 'switch' }
|
649
|
+
# extract nodes connected to those switches
|
650
|
+
items.each { |switch|
|
651
|
+
conns = switch['linecards'].detect { |c| c['kind'] == 'node' }
|
652
|
+
next if conns.nil? # IB switches for example
|
653
|
+
nodes = conns['ports'] \
|
654
|
+
.select { |x| x != {} } \
|
655
|
+
.map { |x| x['uid'] } \
|
656
|
+
.map { |x| "#{x}.#{site}.grid5000.fr"}
|
657
|
+
switch['nodes'] = nodes
|
658
|
+
}
|
659
|
+
return items.select { |it| it.key?('nodes') }
|
660
|
+
end
|
661
|
+
|
662
|
+
# @return [Hash] information of a specific switch available in a given Grid'5000 site.
|
663
|
+
# @param site [String] a valid Grid'5000 site name
|
664
|
+
# @param name [String] a valid switch name
|
665
|
+
def get_switch(site, name)
|
666
|
+
s = get_switches(site).detect { |x| x.uid == name }
|
667
|
+
raise "Unknown switch '#{name}'" if s.nil?
|
668
|
+
return s
|
669
|
+
end
|
670
|
+
|
671
|
+
# Returns information of all my jobs submitted in a given site.
|
672
|
+
# By default it only shows the jobs in state *running*.
|
673
|
+
# You can specify another state like this:
|
674
|
+
#
|
675
|
+
# = Example
|
676
|
+
# get_my_jobs("nancy", state="waiting")
|
677
|
+
# Valid states are specified in {https://api.grid5000.fr/doc/4.0/reference/spec.html Grid'5000 API spec}
|
678
|
+
# @return [Array] all my submitted jobs to a given site and their associated deployments.
|
679
|
+
# @param site [String] a valid Grid'5000 site name
|
680
|
+
def get_my_jobs(site, state = "running")
|
681
|
+
jobs = get_jobs(site, g5k_user, state)
|
682
|
+
deployments = get_deployments(site, g5k_user)
|
683
|
+
# filtering deployments only the job in state running make sense
|
684
|
+
jobs.map{ |j| j["deploy"] = deployments.select{ |d| d["created_at"] > j["started_at"]} if j["state"] == "running"}
|
685
|
+
return jobs
|
686
|
+
end
|
687
|
+
|
688
|
+
# Returns an Array with all subnets reserved by a given job.
|
689
|
+
# Each element of the Array is a {https://github.com/bluemonk/ipaddress IPAddress::IPv4} object which we can interact with to obtain
|
690
|
+
# the details of our reserved subnets:
|
691
|
+
#
|
692
|
+
# = Example
|
693
|
+
# require 'cute'
|
694
|
+
#
|
695
|
+
# g5k = Cute::G5K::API.new()
|
696
|
+
#
|
697
|
+
# job = g5k.reserve(:site => "lyon", :resources => "/slash_22=1+{virtual!='none'}/nodes=1")
|
698
|
+
#
|
699
|
+
# subnet = g5k.get_subnets(job).first #=> we use 'first' because it is an array and we only reserved one subnet.
|
700
|
+
#
|
701
|
+
# ips = subnet.map{ |ip| ip.to_s }
|
702
|
+
#
|
703
|
+
# @return [Array] all the subnets defined in a given job
|
704
|
+
# @param job [G5KJSON] as described in {Cute::G5K::G5KJSON job}
|
705
|
+
def get_subnets(job)
|
706
|
+
subnets = job.resources["subnets"]
|
707
|
+
subnets.map{|s| IPAddress::IPv4.new s }
|
708
|
+
end
|
709
|
+
|
710
|
+
# @return [Array] all the nodes in the VLAN
|
711
|
+
# @param job [G5KJSON] as described in {Cute::G5K::G5KJSON job}
|
712
|
+
def get_vlan_nodes(job)
|
713
|
+
if job.resources["vlans"].nil?
|
714
|
+
return nil
|
715
|
+
else
|
716
|
+
vlan_id = job.resources["vlans"].first
|
717
|
+
end
|
718
|
+
nodes = job["assigned_nodes"]
|
719
|
+
reg = /^(\w+-\d+)(\..*)$/
|
720
|
+
nodes.map {
|
721
|
+
|name| reg.match(name)[1]+"-kavlan-"+vlan_id.to_s+reg.match(name)[2] unless reg.match(name).nil?
|
722
|
+
}
|
723
|
+
end
|
724
|
+
|
725
|
+
# Releases all jobs on a site
|
726
|
+
# @param site [String] a valid Grid'5000 site name
|
727
|
+
def release_all(site)
|
728
|
+
Timeout.timeout(20) do
|
729
|
+
jobs = get_my_jobs(site,"running") + get_my_jobs(site,"waiting")
|
730
|
+
break if jobs.empty?
|
731
|
+
begin
|
732
|
+
jobs.each { |j| release(j) }
|
733
|
+
rescue Cute::G5K::RequestFailed => e
|
734
|
+
raise unless e.response.include?('already killed')
|
735
|
+
end
|
736
|
+
end
|
737
|
+
return true
|
738
|
+
end
|
739
|
+
|
740
|
+
# Releases a resource, it can be a job or a deploy.
|
741
|
+
def release(r)
|
742
|
+
begin
|
743
|
+
return @g5k_connection.delete_json(r.rel_self)
|
744
|
+
rescue Cute::G5K::RequestFailed => e
|
745
|
+
raise unless e.response.include?('already killed')
|
746
|
+
end
|
747
|
+
end
|
748
|
+
|
749
|
+
# Performs a reservation in Grid'5000.
|
750
|
+
#
|
751
|
+
# = Examples
|
752
|
+
#
|
753
|
+
# By default this method blocks until the reservation is ready,
|
754
|
+
# if we want this method to return after creating the reservation we set the option *:wait* to *false*.
|
755
|
+
# Then, you can use the method {Cute::G5K::API#wait_for_job wait_for_job} to wait for the reservation.
|
756
|
+
#
|
757
|
+
# job = g5k.reserve(:nodes => 25, :site => 'luxembourg', :walltime => '01:00:00', :wait => false)
|
758
|
+
#
|
759
|
+
# job = g5k.wait_for_job(job, :wait_time => 100)
|
760
|
+
#
|
761
|
+
# == Reserving with properties
|
762
|
+
#
|
763
|
+
# job = g5k.reserve(:site => 'lyon', :nodes => 2, :properties => "wattmeter='YES'")
|
764
|
+
#
|
765
|
+
# job = g5k.reserve(:site => 'nancy', :nodes => 1, :properties => "switch='sgraphene1'")
|
766
|
+
#
|
767
|
+
# job = g5k.reserve(:site => 'nancy', :nodes => 1, :properties => "cputype='Intel Xeon E5-2650'")
|
768
|
+
#
|
769
|
+
# == Subnet reservation
|
770
|
+
#
|
771
|
+
# The example below reserves 2 nodes in the cluster *chirloute* located in Lille for 1 hour as well as 2 /22 subnets.
|
772
|
+
# We will get 2048 IP addresses that can be used, for example, in virtual machines.
|
773
|
+
# If walltime is not specified, 1 hour walltime will be assigned to the reservation.
|
774
|
+
#
|
775
|
+
# job = g5k.reserve(:site => 'lille', :cluster => 'chirloute', :nodes => 2,
|
776
|
+
# :env => 'wheezy-x64-xen', :keys => "~/my_ssh_jobkey",
|
777
|
+
# :subnets => [22,2])
|
778
|
+
#
|
779
|
+
# == Before using OAR hierarchy
|
780
|
+
# All non-deploy reservations are submitted by default with the OAR option "-allow_classic_ssh"
|
781
|
+
# which does not take advantage of the CPU/core management level.
|
782
|
+
# Therefore, in order to take advantage of this capability, SSH keys have to be specified at the moment of reserving resources.
|
783
|
+
# This has to be used whenever we perform a reservation with cpu and core hierarchy.
|
784
|
+
# Users are encouraged to create a pair of SSH keys for managing jobs, for instance the following command can be used:
|
785
|
+
#
|
786
|
+
# ssh-keygen -N "" -t rsa -f ~/my_ssh_jobkey
|
787
|
+
#
|
788
|
+
# The reserved nodes can be accessed using "oarsh" or by configuring the SSH connection as shown in {https://www.grid5000.fr/mediawiki/index.php/OAR2 OAR2}.
|
789
|
+
# You have to specify different keys per reservation if you want several jobs running at the same time in the same site.
|
790
|
+
# Example using the OAR hierarchy:
|
791
|
+
#
|
792
|
+
# job = g5k.reserve(:site => "grenoble", :switches => 3, :nodes => 1, :cpus => 1, :cores => 1, :keys => "~/my_ssh_jobkey")
|
793
|
+
#
|
794
|
+
# == Using OAR syntax
|
795
|
+
#
|
796
|
+
# The parameter *:resources* can be used instead of parameters such as: *:cluster*, *:nodes*, *:cpus*, *:walltime*, *:vlan*, *:subnets*, *:properties*, etc,
|
797
|
+
# which are shortcuts for OAR syntax. These shortcuts are ignored if the the parameter *:resources* is used.
|
798
|
+
# Using the parameter *:resources* allows to express more flexible and complex reservations by using directly the OAR syntax.
|
799
|
+
# Therefore, the two examples shown below are equivalent:
|
800
|
+
#
|
801
|
+
# job = g5k.reserve(:site => "grenoble", :switches => 3, :nodes => 1, :cpus => 1, :cores => 1, :keys => "~/my_ssh_jobkey")
|
802
|
+
# job = g5k.reserve(:site => "grenoble", :resources => "/switch=3/nodes=1/cpu=1/core=1", :keys => "~/my_ssh_jobkey")
|
803
|
+
#
|
804
|
+
# Combining OAR hierarchy with properties:
|
805
|
+
#
|
806
|
+
# job = g5k.reserve(:site => "grenoble", :resources => "{ib10g='YES' and memnode=24160}/cluster=1/nodes=2/core=1", :keys => "~/my_ssh_jobkey")
|
807
|
+
#
|
808
|
+
# If we want 2 nodes with the following constraints:
|
809
|
+
# 1) nodes on 2 different clusters of the same site, 2) nodes with virtualization capability enabled
|
810
|
+
# 3) 1 /22 subnet. The reservation will be like:
|
811
|
+
#
|
812
|
+
# job = g5k.reserve(:site => "rennes", :resources => "/slash_22=1+{virtual!='none'}/cluster=2/nodes=1")
|
813
|
+
#
|
814
|
+
# Another reservation for two clusters:
|
815
|
+
#
|
816
|
+
# job = g5k.reserve(:site => "nancy", :resources => "{cluster='graphene'}/nodes=2+{cluster='griffon'}/nodes=3")
|
817
|
+
#
|
818
|
+
# Reservation using a local VLAN
|
819
|
+
#
|
820
|
+
# job = g5k.reserve(:site => 'nancy', :resources => "{type='kavlan-local'}/vlan=1,nodes=1", :env => 'wheezy-x64-xen')
|
821
|
+
#
|
822
|
+
# @return [G5KJSON] as described in {Cute::G5K::G5KJSON job}
|
823
|
+
# @param [Hash] opts Options for reservation in Grid'5000
|
824
|
+
# @option opts [Numeric] :nodes Number of nodes to reserve
|
825
|
+
# @option opts [String] :walltime Walltime of the reservation
|
826
|
+
# @option opts [String] :site Grid'5000 site
|
827
|
+
# @option opts [Symbol] :type Type of reservation: :deploy, :allow_classic
|
828
|
+
# @option opts [String] :name Reservation name
|
829
|
+
# @option opts [String] :cmd The command to execute when the job starts (e.g. ./my-script.sh).
|
830
|
+
# @option opts [String] :cluster Valid Grid'5000 cluster
|
831
|
+
# @option opts [Array] :subnets 1) prefix_size, 2) number of subnets
|
832
|
+
# @option opts [String] :env Environment name for {http://kadeploy3.gforge.inria.fr/ Kadeploy}
|
833
|
+
# @option opts [Symbol] :vlan Vlan type: :routed, :local, :global
|
834
|
+
# @option opts [String] :properties OAR properties defined in the cluster
|
835
|
+
# @option opts [String] :resources OAR syntax for complex submissions
|
836
|
+
# @option opts [String] :reservation Request a job to be scheduled a specific date.
|
837
|
+
# The date format is "YYYY-MM-DD HH:MM:SS".
|
838
|
+
# @option opts [Boolean] :wait Whether or not to wait until the job is running (default is true)
|
839
|
+
def reserve(opts)
|
840
|
+
|
841
|
+
# checking valid options
|
842
|
+
valid_opts = [:site, :cluster, :switches, :cpus, :cores, :nodes, :walltime, :cmd,
|
843
|
+
:type, :name, :subnets, :env, :vlan, :properties, :resources, :reservation, :wait, :keys]
|
844
|
+
unre_opts = opts.keys - valid_opts
|
845
|
+
raise ArgumentError, "Unrecognized option #{unre_opts}" unless unre_opts.empty?
|
846
|
+
|
847
|
+
nodes = opts.fetch(:nodes, 1)
|
848
|
+
walltime = opts.fetch(:walltime, '01:00:00')
|
849
|
+
site = opts[:site]
|
850
|
+
type = opts[:type]
|
851
|
+
name = opts.fetch(:name, 'rubyCute job')
|
852
|
+
command = opts[:cmd]
|
853
|
+
opts[:wait] = true if opts[:wait].nil?
|
854
|
+
cluster = opts[:cluster]
|
855
|
+
switches = opts[:switches]
|
856
|
+
cpus = opts[:cpus]
|
857
|
+
cores = opts[:cores]
|
858
|
+
subnets = opts[:subnets]
|
859
|
+
properties = opts[:properties]
|
860
|
+
reservation = opts[:reservation]
|
861
|
+
resources = opts.fetch(:resources, "")
|
862
|
+
type = :deploy if opts[:env]
|
863
|
+
keys = opts[:keys]
|
864
|
+
|
865
|
+
vlan_opts = {:routed => "kavlan",:global => "kavlan-global",:local => "kavlan-local"}
|
866
|
+
vlan = nil
|
867
|
+
unless opts[:vlan].nil?
|
868
|
+
if vlan_opts.include?(opts[:vlan]) then
|
869
|
+
vlan = vlan_opts.fetch(opts[:vlan])
|
870
|
+
else
|
871
|
+
raise ArgumentError, 'Option for vlan not recognized'
|
872
|
+
end
|
873
|
+
end
|
874
|
+
|
875
|
+
raise 'At least nodes, time and site must be given' if [nodes, walltime, site].any? { |x| x.nil? }
|
876
|
+
|
877
|
+
secs = walltime.to_secs
|
878
|
+
walltime = walltime.to_time
|
879
|
+
|
880
|
+
raise 'Nodes must be an integer.' unless nodes.is_a?(Integer)
|
881
|
+
|
882
|
+
command = "sleep #{secs}" if command.nil?
|
883
|
+
type = type.to_sym unless type.nil?
|
884
|
+
|
885
|
+
if resources == ""
|
886
|
+
resources = "/switch=#{switches}" unless switches.nil?
|
887
|
+
resources += "/nodes=#{nodes}"
|
888
|
+
resources += "/cpu=#{cpus}" unless cpus.nil?
|
889
|
+
resources += "/core=#{cores}" unless cores.nil?
|
890
|
+
resources = "{cluster='#{cluster}'}" + resources unless cluster.nil?
|
891
|
+
resources = "{type='#{vlan}'}/vlan=1+" + resources unless vlan.nil?
|
892
|
+
resources = "slash_#{subnets[0]}=#{subnets[1]}+" + resources unless subnets.nil?
|
893
|
+
end
|
894
|
+
|
895
|
+
resources += ",walltime=#{walltime}" unless resources.include?("walltime")
|
896
|
+
|
897
|
+
payload = {
|
898
|
+
'resources' => resources,
|
899
|
+
'name' => name,
|
900
|
+
'command' => command
|
901
|
+
}
|
902
|
+
|
903
|
+
info "Reserving resources: #{resources} (type: #{type}) (in #{site})"
|
904
|
+
|
905
|
+
payload['properties'] = properties unless properties.nil?
|
906
|
+
payload['types'] = [ type.to_s ] unless type.nil?
|
907
|
+
|
908
|
+
if not type == :deploy
|
909
|
+
if opts[:keys]
|
910
|
+
payload['import-job-key-from-file'] = [ File.expand_path(keys) ]
|
911
|
+
else
|
912
|
+
payload['types'] = [ 'allow_classic_ssh' ]
|
913
|
+
end
|
914
|
+
end
|
915
|
+
|
916
|
+
if reservation
|
917
|
+
payload['reservation'] = reservation
|
918
|
+
info "Starting this reservation at #{reservation}"
|
919
|
+
end
|
920
|
+
|
921
|
+
begin
|
922
|
+
# Support for the option "import-job-key-from-file"
|
923
|
+
# The request has to be redirected to the OAR API given that Grid'5000 API
|
924
|
+
# does not support some OAR options.
|
925
|
+
if payload['import-job-key-from-file'] then
|
926
|
+
# Adding double quotes otherwise we have a syntax error from OAR API
|
927
|
+
payload["resources"] = "\"#{payload["resources"]}\""
|
928
|
+
temp = @g5k_connection.post_json(api_uri("sites/#{site}/internal/oarapi/jobs"),payload)
|
929
|
+
sleep 1 # This is for being sure that our job appears on the list
|
930
|
+
r = get_my_jobs(site,nil).select{ |j| j["uid"] == temp["id"] }.first
|
931
|
+
else
|
932
|
+
r = @g5k_connection.post_json(api_uri("sites/#{site}/jobs"),payload) # This makes reference to the same class
|
933
|
+
end
|
934
|
+
rescue Error => e
|
935
|
+
info "Fail to submit job"
|
936
|
+
info e.message
|
937
|
+
e.http_body.split("\\n").each{ |line| info line}
|
938
|
+
raise
|
939
|
+
end
|
940
|
+
|
941
|
+
job = @g5k_connection.get_json(r.rel_self)
|
942
|
+
job = wait_for_job(job) if opts[:wait] == true
|
943
|
+
opts.delete(:nodes) # to not collapse with deploy options
|
944
|
+
deploy(job,opts) unless opts[:env].nil? #type == :deploy
|
945
|
+
return job
|
946
|
+
|
947
|
+
end
|
948
|
+
|
949
|
+
# Blocks until job is in *running* state
|
950
|
+
#
|
951
|
+
# = Example
|
952
|
+
# You can pass the parameter *:wait_time* that allows you to timeout the submission (by default is 10h).
|
953
|
+
# The method will throw a {Cute::G5K::EventTimeout Timeout} exception
|
954
|
+
# that you can catch and react upon.
|
955
|
+
# The following example shows how can be used, let's suppose we want to find 5 nodes available for
|
956
|
+
# 3 hours. We can try in each site using the script below.
|
957
|
+
#
|
958
|
+
# require 'cute'
|
959
|
+
#
|
960
|
+
# g5k = Cute::G5K::API.new()
|
961
|
+
#
|
962
|
+
# sites = g5k.site_uids
|
963
|
+
#
|
964
|
+
# sites.each{ |site|
|
965
|
+
# job = g5k.reserve(:site => site, :nodes => 5, :wait => false, :walltime => "03:00:00")
|
966
|
+
# begin
|
967
|
+
# job = g5k.wait_for_job(job, :wait_time => 60)
|
968
|
+
# puts "Nodes assigned #{job['assigned_nodes']}"
|
969
|
+
# break
|
970
|
+
# rescue Cute::G5K::EventTimeout
|
971
|
+
# puts "We waited too long in site #{site} let's release the job and try in another site"
|
972
|
+
# g5k.release(job)
|
973
|
+
# end
|
974
|
+
# }
|
975
|
+
#
|
976
|
+
# @param job [G5KJSON] as described in {Cute::G5K::G5KJSON job}
|
977
|
+
# @param opts [Hash] options
|
978
|
+
def wait_for_job(job,opts={})
|
979
|
+
opts[:wait_time] = 36000 if opts[:wait_time].nil?
|
980
|
+
jid = job['uid']
|
981
|
+
info "Waiting for reservation #{jid}"
|
982
|
+
begin
|
983
|
+
Timeout.timeout(opts[:wait_time]) do
|
984
|
+
while true
|
985
|
+
job = job.refresh(@g5k_connection)
|
986
|
+
t = job['scheduled_at']
|
987
|
+
if !t.nil?
|
988
|
+
t = Time.at(t)
|
989
|
+
secs = [ t - Time.now, 0 ].max.to_i
|
990
|
+
info "Reservation #{jid} should be available at #{t} (#{secs} s)"
|
991
|
+
end
|
992
|
+
break if job['state'] == 'running'
|
993
|
+
raise "Job is finishing." if job['state'] == 'finishing'
|
994
|
+
Kernel.sleep(5)
|
995
|
+
end
|
996
|
+
end
|
997
|
+
rescue Timeout::Error
|
998
|
+
raise EventTimeout.new("Event timeout")
|
999
|
+
end
|
1000
|
+
|
1001
|
+
info "Reservation #{jid} ready"
|
1002
|
+
return job
|
1003
|
+
end
|
1004
|
+
|
1005
|
+
# Deploys an environment in a set of reserved nodes using {http://kadeploy3.gforge.inria.fr/ Kadeploy}.
|
1006
|
+
# A job structure returned by {Cute::G5K::API#reserve reserve} or {Cute::G5K::API#get_my_jobs get_my_jobs} methods
|
1007
|
+
# is mandatory as a parameter as well as the environment to deploy.
|
1008
|
+
# By default this method does not block, for that you have to set the option *:wait* to *true*.
|
1009
|
+
#
|
1010
|
+
# = Examples
|
1011
|
+
# Deploying the production environment *wheezy-x64-base* on all the reserved nodes and wait until the deployment is done:
|
1012
|
+
#
|
1013
|
+
# deploy(job, :env => "wheezy-x64-base", :wait => true)
|
1014
|
+
#
|
1015
|
+
# Other parameters you can specify are *:nodes* [Array] for deploying on specific nodes within a job and
|
1016
|
+
# *:keys* [String] to specify the public key to use during the deployment.
|
1017
|
+
#
|
1018
|
+
# deploy(job, :nodes => ["genepi-2.grid5000.fr"], :env => "wheezy-x64-xen", :keys => "~/my_key")
|
1019
|
+
#
|
1020
|
+
# @param job [G5KJSON] as described in {Cute::G5K::G5KJSON job}
|
1021
|
+
# @param [Hash] opts Deploy options
|
1022
|
+
# @option opts [String] :env {http://kadeploy3.gforge.inria.fr/ Kadeploy} environment to deploy
|
1023
|
+
# @option opts [String] :nodes Specifies the nodes to deploy on
|
1024
|
+
# @option opts [String] :keys Specifies the SSH keys to copy for the deployment
|
1025
|
+
# @option opts [Boolean] :wait Whether or not to wait until the deployment is done (default is false)
|
1026
|
+
# @return [G5KJSON] a job with deploy information as described in {Cute::G5K::G5KJSON job}
|
1027
|
+
def deploy(job, opts = {})
|
1028
|
+
|
1029
|
+
# checking valid options, same as reserve option even though some option dont make any sense
|
1030
|
+
valid_opts = [:site, :cluster, :switches, :cpus, :cores, :nodes, :walltime, :cmd,
|
1031
|
+
:type, :name, :subnets, :env, :vlan, :properties, :resources, :reservation, :wait, :keys]
|
1032
|
+
|
1033
|
+
unre_opts = opts.keys - valid_opts
|
1034
|
+
raise ArgumentError, "Unrecognized option #{unre_opts}" unless unre_opts.empty?
|
1035
|
+
|
1036
|
+
raise ArgumentError, "Unrecognized job format" unless job.is_a?(G5KJSON)
|
1037
|
+
|
1038
|
+
env = opts[:env]
|
1039
|
+
raise ArgumentError, "Environment must be given" if env.nil?
|
1040
|
+
|
1041
|
+
nodes = opts[:nodes].nil? ? job['assigned_nodes'] : opts[:nodes]
|
1042
|
+
raise "Unrecognized nodes format, use an Array" unless nodes.is_a?(Array)
|
1043
|
+
|
1044
|
+
site = @g5k_connection.follow_parent(job).uid
|
1045
|
+
|
1046
|
+
if opts[:keys].nil? then
|
1047
|
+
public_key_path = File.expand_path("~/.ssh/id_rsa.pub")
|
1048
|
+
public_key_file = File.exist?(public_key_path) ? File.read(public_key_path) : ""
|
1049
|
+
else
|
1050
|
+
public_key_file = File.read("#{File.expand_path(opts[:keys])}.pub")
|
1051
|
+
end
|
1052
|
+
|
1053
|
+
payload = {
|
1054
|
+
'nodes' => nodes,
|
1055
|
+
'environment' => env,
|
1056
|
+
'key' => public_key_file,
|
1057
|
+
}
|
1058
|
+
|
1059
|
+
if !job.resources["vlans"].nil?
|
1060
|
+
vlan = job.resources["vlans"].first
|
1061
|
+
payload['vlan'] = vlan
|
1062
|
+
info "Found VLAN with uid = #{vlan}"
|
1063
|
+
end
|
1064
|
+
|
1065
|
+
info "Creating deployment"
|
1066
|
+
|
1067
|
+
begin
|
1068
|
+
r = @g5k_connection.post_json(api_uri("sites/#{site}/deployments"), payload)
|
1069
|
+
rescue Error => e
|
1070
|
+
info "Fail to deploy"
|
1071
|
+
info e.message
|
1072
|
+
e.http_body.split("\\n").each{ |line| info line}
|
1073
|
+
raise
|
1074
|
+
end
|
1075
|
+
|
1076
|
+
job["deploy"] = [] if job["deploy"].nil?
|
1077
|
+
|
1078
|
+
job["deploy"].push(r)
|
1079
|
+
|
1080
|
+
job = wait_for_deploy(job) if opts[:wait] == true
|
1081
|
+
|
1082
|
+
return job
|
1083
|
+
|
1084
|
+
end
|
1085
|
+
|
1086
|
+
# Returns the status of all deployments performed within a job.
|
1087
|
+
# The results can be filtered using a Hash with valid deployment properties
|
1088
|
+
# described in {https://api.grid5000.fr/doc/4.0/reference/spec.html Grid'5000 API spec}.
|
1089
|
+
#
|
1090
|
+
# = Example
|
1091
|
+
#
|
1092
|
+
# deploy_status(job, :nodes => ["adonis-10.grenoble.grid5000.fr"], :status => "terminated")
|
1093
|
+
#
|
1094
|
+
# @return [Array] status of deploys within a job
|
1095
|
+
# @param job [G5KJSON] as described in {Cute::G5K::G5KJSON job}
|
1096
|
+
# @param filter [Hash] filter the deployments to be returned.
|
1097
|
+
def deploy_status(job,filter = {})
|
1098
|
+
|
1099
|
+
job["deploy"].map!{ |d| d.refresh(@g5k_connection) }
|
1100
|
+
|
1101
|
+
filter.keep_if{ |k,v| v} # removes nil values
|
1102
|
+
if filter.empty?
|
1103
|
+
status = job["deploy"].map{ |d| d["status"] }
|
1104
|
+
else
|
1105
|
+
status = job["deploy"].map{ |d| d["status"] if filter.select{ |k,v| d[k.to_s] != v }.empty? }
|
1106
|
+
end
|
1107
|
+
return status.compact
|
1108
|
+
|
1109
|
+
end
|
1110
|
+
|
1111
|
+
# Blocks until deployments have *terminated* status
|
1112
|
+
#
|
1113
|
+
# = Examples
|
1114
|
+
# This method requires a job as a parameter and it will blocks by default until all deployments
|
1115
|
+
# within the job pass form *processing* status to *terminated* status.
|
1116
|
+
#
|
1117
|
+
# wait_for_deploy(job)
|
1118
|
+
#
|
1119
|
+
# You can wait for specific deployments using the option *:nodes*. This can be useful when performing different deployments on the reserved resources.
|
1120
|
+
#
|
1121
|
+
# wait_for_deploy(job, :nodes => ["adonis-10.grenoble.grid5000.fr"])
|
1122
|
+
#
|
1123
|
+
# Another parameter you can specify is *:wait_time* that allows you to timeout the deployment (by default is 10h).
|
1124
|
+
# The method will throw a {Cute::G5K::EventTimeout Timeout} exception
|
1125
|
+
# that you can catch and react upon. This example illustrates how this can be used.
|
1126
|
+
#
|
1127
|
+
# require 'cute'
|
1128
|
+
#
|
1129
|
+
# g5k = Cute::G5K::API.new()
|
1130
|
+
#
|
1131
|
+
# job = g5k.reserve(:nodes => 1, :site => 'lyon', :env => 'wheezy-x64-base')
|
1132
|
+
#
|
1133
|
+
# begin
|
1134
|
+
# g5k.wait_for_deploy(job,:wait_time => 100)
|
1135
|
+
# rescue Cute::G5K::EventTimeout
|
1136
|
+
# puts "We waited too long let's release the job"
|
1137
|
+
# g5k.release(job)
|
1138
|
+
# end
|
1139
|
+
#
|
1140
|
+
# @param job [G5KJSON] as described in {Cute::G5K::G5KJSON job}
|
1141
|
+
# @param opts [Hash] options
|
1142
|
+
def wait_for_deploy(job,opts = {})
|
1143
|
+
|
1144
|
+
raise "Deploy information not present in the given job" if job["deploy"].nil?
|
1145
|
+
|
1146
|
+
opts.merge!({:wait_time => 36000}) if opts[:wait_time].nil?
|
1147
|
+
nodes = opts[:nodes]
|
1148
|
+
|
1149
|
+
begin
|
1150
|
+
Timeout.timeout(opts[:wait_time]) do
|
1151
|
+
# it will ask just for processing status
|
1152
|
+
status = deploy_status(job,{:nodes => nodes, :status => "processing"})
|
1153
|
+
until status.empty? do
|
1154
|
+
info "Waiting for #{status.length} deployment"
|
1155
|
+
sleep 4
|
1156
|
+
status = deploy_status(job,{:nodes => nodes, :status => "processing"})
|
1157
|
+
end
|
1158
|
+
info "Deployment finished"
|
1159
|
+
return job
|
1160
|
+
end
|
1161
|
+
rescue Timeout::Error
|
1162
|
+
raise EventTimeout.new("Timeout triggered")
|
1163
|
+
end
|
1164
|
+
|
1165
|
+
end
|
1166
|
+
|
1167
|
+
private
|
1168
|
+
# Handles the output of messages within the module
|
1169
|
+
# @param msg [String] message to show
|
1170
|
+
def info(msg)
|
1171
|
+
if @logger.nil? then
|
1172
|
+
t = Time.now
|
1173
|
+
s = t.strftime('%Y-%m-%d %H:%M:%S.%L')
|
1174
|
+
puts "#{s} => #{msg}"
|
1175
|
+
else
|
1176
|
+
@logger.info(msg)
|
1177
|
+
end
|
1178
|
+
end
|
1179
|
+
|
1180
|
+
# @return a valid Grid'5000 resource
|
1181
|
+
# it avoids "//"
|
1182
|
+
def api_uri(path)
|
1183
|
+
path = path[1..-1] if path.start_with?('/')
|
1184
|
+
return "#{@api_version}/#{path}"
|
1185
|
+
end
|
1186
|
+
|
1187
|
+
end
|
1188
|
+
|
1189
|
+
end
|
1190
|
+
end
|