ruby-cute 0.3 → 0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.yardopts +3 -1
- data/README.md +8 -7
- data/Rakefile +1 -0
- data/debian/changelog +2 -2
- data/examples/distem-bootstrap +113 -42
- data/examples/g5k-tutorial.md +999 -0
- data/lib/cute/extensions.rb +16 -0
- data/lib/cute/g5k_api.rb +82 -15
- data/lib/cute/synchronization.rb +1 -1
- data/lib/cute/version.rb +1 -1
- data/spec/g5k_api_spec.rb +5 -2
- data/spec/spec_helper.rb +6 -0
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 07edfbc07f26d33908a645a9caf1b8fa786830e8
|
4
|
+
data.tar.gz: 95168f3f5d0b06290c526e713f8d2d3f9033e5e9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 26efbf1b1a42f09c73ef2626fd7bf1420872c6efc04d0a389635dc865eec7edf6a0e6f0911d792ff1e26af7f6377d81d9402dd706ad50432c1ad027b6c066af9
|
7
|
+
data.tar.gz: e9210765ad54ff59a42f81e20c6232499b992cb2591daee8899cfbfdcb1a4fa7d78d741bdbe46f7f6d97fb71b6a08de14dec2bdd5169f5b440e3e1d977ef4478
|
data/.yardopts
CHANGED
data/README.md
CHANGED
@@ -8,19 +8,18 @@ Grid'5000.
|
|
8
8
|
|
9
9
|
## Installation
|
10
10
|
|
11
|
+
To install latest release from [RubyGems](https://rubygems.org/gems/ruby-cute):
|
12
|
+
|
13
|
+
```bash
|
14
|
+
$ gem install --user-install ruby-cute
|
15
|
+
```
|
11
16
|
From sources:
|
12
17
|
|
13
18
|
```bash
|
14
19
|
$ git clone https://github.com/ruby-cute/ruby-cute
|
15
20
|
$ cd ruby-cute
|
16
21
|
$ gem build ruby-cute.gemspec
|
17
|
-
$ gem install ruby-cute-*.gem
|
18
|
-
```
|
19
|
-
|
20
|
-
In Grid'5000 the installation procedure goes as follows:
|
21
|
-
|
22
|
-
```bash
|
23
|
-
$ gem install --user-install ruby-cute
|
22
|
+
$ gem install --user-install ruby-cute-*.gem
|
24
23
|
```
|
25
24
|
|
26
25
|
Then, type the following for having ruby cute in your path (this is only necessary if you want to use interactive mode).
|
@@ -72,6 +71,8 @@ $ cute
|
|
72
71
|
The variable *$g5k* is available which can be used to access the Grid'5000 API through the
|
73
72
|
[G5K Module](http://www.rubydoc.info/github/ruby-cute/ruby-cute/master/Cute/G5K/API). For example,
|
74
73
|
let's request the name of the sites available in Grid'5000.
|
74
|
+
(Before starting be sure to set up a configuration file for the module, please refer to
|
75
|
+
[G5K Module](http://www.rubydoc.info/github/ruby-cute/ruby-cute/master/Cute/G5K/API))
|
75
76
|
|
76
77
|
```bash
|
77
78
|
[2] pry(main)> $g5k.site_uids()
|
data/Rakefile
CHANGED
@@ -27,6 +27,7 @@ Rake::PackageTask::new("ruby-cute",get_version) do |p|
|
|
27
27
|
p.package_files.include('ext/**/*')
|
28
28
|
p.package_files.include('bin/**/*')
|
29
29
|
p.package_files.include('test/**/*')
|
30
|
+
p.package_files.include('examples/**/*')
|
30
31
|
p.package_files.include('Rakefile', 'COPYING','README', 'README.md')
|
31
32
|
end
|
32
33
|
|
data/debian/changelog
CHANGED
data/examples/distem-bootstrap
CHANGED
@@ -99,7 +99,10 @@ options[:network_mode] = 'classical'
|
|
99
99
|
options[:network_interface] = nil
|
100
100
|
options[:num_nodes] = 2
|
101
101
|
options[:walltime] = "2:00:00"
|
102
|
-
|
102
|
+
options[:deb_repo] = 'http://distem.gforge.inria.fr/deb'
|
103
|
+
options[:cluster] = "nancy"
|
104
|
+
options[:env] = 'wheezy-x64-nfs'
|
105
|
+
options[:branch] = nil
|
103
106
|
args = ARGV.dup
|
104
107
|
|
105
108
|
optparse = OptionParser.new do |opts|
|
@@ -113,9 +116,16 @@ optparse = OptionParser.new do |opts|
|
|
113
116
|
opts.separator ""
|
114
117
|
opts.separator "Options:"
|
115
118
|
|
119
|
+
opts.on( '-f', '--node-list <nodelist_file>', 'File containing the list of nodes to install. If not specified, a job will be submitted using RubyCute') do |n|
|
120
|
+
options[:node_list] = n
|
121
|
+
end
|
122
|
+
|
116
123
|
opts.on( '--nodes <number of nodes>', Integer,'Number of nodes to reserve on Grid5000') do |n|
|
117
124
|
options[:num_nodes] = n.to_i
|
118
125
|
end
|
126
|
+
opts.on( '--cluster <cluster name>', String,'Cluster name in Grid\'5000') do |n|
|
127
|
+
options[:cluster] = n
|
128
|
+
end
|
119
129
|
opts.on( '-t', '--time <walltime>', 'Walltime for the reservation on Grid5000') do |n|
|
120
130
|
options[:walltime] = n
|
121
131
|
end
|
@@ -134,6 +144,12 @@ optparse = OptionParser.new do |opts|
|
|
134
144
|
opts.on( '-D', '--distem-debug', 'Show distem output when executing distem commands (script, node init)' ) do
|
135
145
|
options[:debug_distem] = true
|
136
146
|
end
|
147
|
+
opts.on( '--env <environment>', String, 'Kadeploy environment to be used as a base' ) do |c|
|
148
|
+
options[:env] = c
|
149
|
+
end
|
150
|
+
opts.on( '--deploy', 'Redeploy the base image used for distem' ) do
|
151
|
+
options[:deploy] = true
|
152
|
+
end
|
137
153
|
opts.on( '-k', '--ssh-key [<ssh_key_file>]', 'Path of the ssh private key to use' ) do |k|
|
138
154
|
options[:ssh_key] = k
|
139
155
|
end
|
@@ -155,6 +171,9 @@ optparse = OptionParser.new do |opts|
|
|
155
171
|
opts.on( '-G', '--gerrit <gerrit_ref>', "Checkout a gerrit ref (e.g refs/changes/94/94/1) and rebuild a Debian package" ) do |n|
|
156
172
|
options[:gerrit] = n
|
157
173
|
end
|
174
|
+
opts.on( '-B', '--branch <branch_name>', "Checkout a specific branch and rebuild a Debian package" ) do |n|
|
175
|
+
options[:branch] = n
|
176
|
+
end
|
158
177
|
opts.on( '-S', '--stealth-mode', 'Do not report usage statistics (Grid\'5000 only)' ) do |c|
|
159
178
|
options[:stats] = false
|
160
179
|
end
|
@@ -177,6 +196,19 @@ optparse = OptionParser.new do |opts|
|
|
177
196
|
opts.on( '-i', '--network-interface <iface>', 'Define the root network interface for inter-pnode communication (use only with vxlan network mode)') do |iface|
|
178
197
|
options[:network_interface] = iface
|
179
198
|
end
|
199
|
+
opts.on( '--debian-version <version>', 'Choose the Debian version (squeeze, wheezy or jessie)') do |v|
|
200
|
+
case v
|
201
|
+
when 'squeeze'
|
202
|
+
options[:deb_repo] = 'http://distem.gforge.inria.fr/deb'
|
203
|
+
when 'wheezy'
|
204
|
+
options[:deb_repo] = 'http://distem.gforge.inria.fr/deb'
|
205
|
+
when 'jessie'
|
206
|
+
options[:deb_repo] = 'http://distem.gforge.inria.fr/deb-jessie'
|
207
|
+
else
|
208
|
+
puts 'Unsupported Debian version'
|
209
|
+
exit 1
|
210
|
+
end
|
211
|
+
end
|
180
212
|
opts.on( '--verbose', 'Activate the verbose mode on Distem servers' ) do
|
181
213
|
options[:verbose] = true
|
182
214
|
end
|
@@ -200,6 +232,7 @@ begin
|
|
200
232
|
optparse.parse!
|
201
233
|
rescue OptionParser::InvalidOption => e
|
202
234
|
logger.error(e.to_s)
|
235
|
+
exit 1
|
203
236
|
end
|
204
237
|
|
205
238
|
logger.level = Logger::INFO
|
@@ -209,23 +242,37 @@ g5k = Cute::G5K::API.new()
|
|
209
242
|
g5k.logger = logger
|
210
243
|
## performing reservation with ruby-cute
|
211
244
|
|
212
|
-
reserv_param = {:site => g5k.site, :nodes => options[:num_nodes], :name => 'distem',
|
213
|
-
:walltime => options[:walltime], :env =>
|
245
|
+
reserv_param = {:site => g5k.site, :cluster => options[:cluster], :nodes => options[:num_nodes], :name => 'distem',
|
246
|
+
:walltime => options[:walltime], :env => options[:env], :subnets => [22,1]}
|
247
|
+
|
248
|
+
reserv_param[:keys] = options[:ssh_key] if options[:ssh_key]
|
214
249
|
|
215
|
-
|
250
|
+
nodelist = []
|
216
251
|
|
252
|
+
if options[:node_list]
|
253
|
+
File.open(options[:node_list], "r") do |f|
|
254
|
+
f.each_line do |line|
|
255
|
+
nodelist.push(line.chop)
|
256
|
+
end
|
257
|
+
end
|
258
|
+
end
|
217
259
|
|
218
|
-
#
|
219
|
-
|
260
|
+
# if we pass a list of nodes
|
261
|
+
if nodelist.empty?
|
220
262
|
|
221
|
-
|
263
|
+
# we verify if we have already submit a job
|
264
|
+
old_jobs = g5k.get_my_jobs(g5k.site).select{ |j| j["name"] == "distem"}
|
222
265
|
|
223
|
-
|
224
|
-
## wait for the deploy
|
266
|
+
job = old_jobs.empty? ? g5k.reserve(reserv_param) : old_jobs.first
|
225
267
|
|
226
|
-
|
268
|
+
nodelist = job.resources["cores"].uniq
|
269
|
+
## wait for the deploy
|
270
|
+
reserv_param.delete(:nodes)
|
271
|
+
g5k.deploy(job,reserv_param) if options[:deploy]
|
227
272
|
|
228
|
-
|
273
|
+
g5k.wait_for_deploy(job)
|
274
|
+
end
|
275
|
+
allnodes = nodelist.dup
|
229
276
|
|
230
277
|
# Generating ssh keys"
|
231
278
|
|
@@ -233,16 +280,28 @@ key_dir = Dir.mktmpdir("keys")
|
|
233
280
|
system "ssh-keygen -P \'\' -f #{key_dir}/keys"
|
234
281
|
logger.info "Keys generated in #{key_dir}"
|
235
282
|
|
283
|
+
# Generating ssh config"
|
284
|
+
|
285
|
+
ssh_conf = Tempfile.new('config')
|
286
|
+
File.open(ssh_conf.path,'w+') do |f|
|
287
|
+
f.puts "Host *"
|
288
|
+
f.puts "StrictHostKeyChecking no"
|
289
|
+
f.puts "UserKnownHostsFile=/dev/null "
|
290
|
+
end
|
291
|
+
|
292
|
+
|
236
293
|
nodelist.each do |node|
|
237
294
|
|
238
295
|
Net::SCP.start(node, "root") do |scp|
|
239
296
|
logger.debug "Transfering key to #{node}"
|
240
|
-
scp.upload
|
241
|
-
scp.upload
|
297
|
+
scp.upload "#{key_dir}/keys.pub", "/root/.ssh/id_rsa.pub"
|
298
|
+
scp.upload "#{key_dir}/keys", "/root/.ssh/id_rsa"
|
299
|
+
scp.upload ssh_conf.path, "/root/.ssh/config"
|
242
300
|
end
|
243
301
|
|
244
302
|
end
|
245
303
|
|
304
|
+
ssh_conf.unlink
|
246
305
|
# Deletating keys
|
247
306
|
FileUtils.rm_rf(key_dir)
|
248
307
|
|
@@ -314,8 +373,7 @@ Net::SSH::Multi.start do |session|
|
|
314
373
|
session.exec! "grep -q '#{rule}' /etc/security/limits.conf; true || echo '#{rule} #{ULIMIT_OPEN_FILES}' >> /etc/security/limits.conf"
|
315
374
|
|
316
375
|
# setup Debian repo
|
317
|
-
session.exec! "grep -q '#{
|
318
|
-
|
376
|
+
session.exec! "grep -q '#{options[:deb_repo]}' /etc/apt/sources.list || echo 'deb #{options[:deb_repo]} ./\ndeb-src #{options[:deb_repo]} ./' >> /etc/apt/sources.list"
|
319
377
|
logger.info "Updating debian packages list"
|
320
378
|
session.exec! "apt-get update -q"
|
321
379
|
|
@@ -348,25 +406,39 @@ Net::SSH::Multi.start do |session|
|
|
348
406
|
session.with(:corrd).exec! "git --git-dir=#{gitdir}/.git reset --hard #{options[:git]}"
|
349
407
|
end
|
350
408
|
end
|
351
|
-
|
409
|
+
if options[:branch]
|
410
|
+
logger.info("Setting up git repository from a specific branch:#{options[:branch]} on #{coordinator}")
|
411
|
+
session.with(:coord).exec! "cd #{gitdir} && git fetch origin #{options[:branch]}"
|
412
|
+
session.with(:coord).exec! "cd #{gitdir} && git checkout #{options[:branch]}"
|
413
|
+
end
|
414
|
+
else option[:gerrit] # gerrit
|
352
415
|
logger.info("Setting up git repository from gerrit ref:#{options[:gerrit]} on #{coordinator}")
|
353
416
|
session.with(:coord).exec! "git clone #{GERRIT_REPOSITORY} #{gitdir}"
|
354
417
|
session.with(:coord).exec! "cd #{gitdir} && git fetch #{GERRIT_REPOSITORY} #{options[:gerrit]} && git checkout FETCH_HEAD"
|
355
418
|
end
|
419
|
+
# Checking out a specific commit from Distem repository
|
420
|
+
|
421
|
+
|
356
422
|
logger.info("Building debian package of distem on #{coordinator}")
|
357
423
|
session.with(:coord).exec! "rm -f #{TMP_DIR}/*.deb #{TMP_DIR}/*.changes #{TMP_DIR}/*.dsc #{TMP_DIR}/*.tar.gz"
|
358
424
|
session.with(:coord).exec! "cd #{gitdir}; rake snapshot"
|
359
425
|
|
360
|
-
|
426
|
+
res = session.with(:coord).exec! "find #{TMP_DIR} -maxdepth 1 -name distem*.deb"
|
361
427
|
|
362
|
-
|
428
|
+
debarchivefile = res[coordinator][:stdout]
|
429
|
+
|
430
|
+
logger.info("Copying generated debian package #{File.basename(debarchivefile)}")
|
363
431
|
|
364
432
|
#exec(session,"cp #{debarchivefile} #{TMP_DIR}",:coord,true)
|
365
433
|
nodelist.each {|node| session.with(:coord).exec! "scp -o StrictHostKeyChecking=no #{debarchivefile} root@#{node}:#{TMP_DIR}"}
|
366
434
|
logger.info("Installing generated debian package #{File.basename(debarchivefile)}")
|
367
|
-
|
435
|
+
res = session.with(:coord).exec! "dpkg -I #{debarchivefile} | grep 'Depends:'"
|
436
|
+
|
437
|
+
raw_deps = res[coordinator][:stdout].sub('Depends:','').split(",")
|
438
|
+
|
439
|
+
depends = ""
|
440
|
+
raw_deps.each{ |str| depends+=str.split(" ")[0]; depends+="," unless str==raw_deps.last}
|
368
441
|
|
369
|
-
depends = depends.split("|").last.gsub!(' ','')
|
370
442
|
session.exec! "dpkg --ignore-depends #{depends} -i #{TMP_DIR}/#{File.basename(debarchivefile)}"
|
371
443
|
|
372
444
|
session.exec! "DEBIAN_FRONTEND=noninteractive apt-get install -q -y --force-yes -f"
|
@@ -388,7 +460,7 @@ Net::SSH::Multi.start do |session|
|
|
388
460
|
end
|
389
461
|
options[:gempackages].each do |gempkg|
|
390
462
|
logger.info "Installing additional gem package '#{gempkg}'"
|
391
|
-
session.
|
463
|
+
session.exec! "export http_proxy=#{HTTP_PROXY}; gem install #{gempkg}"
|
392
464
|
end
|
393
465
|
|
394
466
|
if options[:init_pnodes] or ARGV[0]
|
@@ -426,8 +498,7 @@ Net::SSH::Multi.start do |session|
|
|
426
498
|
begin
|
427
499
|
Timeout.timeout(10) do
|
428
500
|
root_iface_opt = options[:network_interface] ? "--network-interface #{options[:network_interface]}" : ''
|
429
|
-
distem_cmd = "LANG=C distemd #{options[:verbose] ? '--verbose' : ''}
|
430
|
-
"#{options[:network_mode]} #{root_iface_opt} -d &>#{File.join(PATH_DISTEMD_LOGS,'distemd.log')}&"
|
501
|
+
distem_cmd = "LANG=C distemd #{options[:verbose] ? '--verbose' : ''} -d &>#{File.join(PATH_DISTEMD_LOGS,'distemd.log')}&"
|
431
502
|
|
432
503
|
session.with(:coord).exec! distem_cmd
|
433
504
|
|
@@ -475,7 +546,7 @@ Net::SSH::Multi.start do |session|
|
|
475
546
|
if ARGV[0]
|
476
547
|
begin
|
477
548
|
File.open(ARGV[0], 'r') do |f|
|
478
|
-
filename = session.with(:coord).exec!('tempfile')[coordinator][
|
549
|
+
filename = session.with(:coord).exec!('tempfile')[coordinator][:stdout]
|
479
550
|
logger.info("Copying script file in '#{filename}' on #{coordinator}")
|
480
551
|
`scp #{ARGV[0]} root@#{coordinator}:#{filename}`
|
481
552
|
session.with(:coord).exec! "chmod +x #{filename}"
|
@@ -484,7 +555,7 @@ Net::SSH::Multi.start do |session|
|
|
484
555
|
argv_dup = ARGV.dup
|
485
556
|
argv_dup.shift
|
486
557
|
script_args = (argv_dup.length > 0 ? argv_dup.join(" ") : "")
|
487
|
-
session.with(:coord).exec! "export #{VAR_DISTEM_NODES}='#{(nodelist + [coordinator]) * "\n"}'; export #{VAR_DISTEM_COORD}='#{coordinator}';#{filename} #{script_args}"
|
558
|
+
#session.with(:coord).exec! "export #{VAR_DISTEM_NODES}='#{(nodelist + [coordinator]) * "\n"}'; export #{VAR_DISTEM_COORD}='#{coordinator}';#{filename} #{script_args}"
|
488
559
|
session.with(:coord).exec! "rm #{filename}"
|
489
560
|
end
|
490
561
|
logger.info 'Script execution done'
|
@@ -496,21 +567,21 @@ Net::SSH::Multi.start do |session|
|
|
496
567
|
logger.info("Coordinator: #{coordinator}") if options[:init_pnodes] or ARGV[0]
|
497
568
|
|
498
569
|
# Send stats record to stats server
|
499
|
-
|
500
|
-
|
501
|
-
|
502
|
-
|
503
|
-
|
504
|
-
|
505
|
-
|
506
|
-
|
507
|
-
|
508
|
-
|
509
|
-
|
510
|
-
|
511
|
-
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
570
|
+
if g5k? and options[:stats]
|
571
|
+
begin
|
572
|
+
stats = {
|
573
|
+
:time => Time.now.to_i,
|
574
|
+
:site => Socket.gethostname,
|
575
|
+
:user => g5k.g5k_user,
|
576
|
+
:oar => job['uid'],
|
577
|
+
:nodes => allnodes,
|
578
|
+
:params => args * ' ',
|
579
|
+
:length => (Time.now - $startt).to_i
|
580
|
+
}
|
581
|
+
sock = TCPSocket.open(STATS_SERV, STATS_PORT)
|
582
|
+
sock.send(stats.to_yaml,0)
|
583
|
+
sock.close
|
584
|
+
rescue SocketError, Errno::ECONNREFUSED, Errno::ECONNRESET, Errno::ETIMEDOUT, Errno::EHOSTUNREACH
|
585
|
+
end
|
586
|
+
end
|
516
587
|
end
|
@@ -0,0 +1,999 @@
|
|
1
|
+
# @title Grid'5000 tutorial
|
2
|
+
# Grid'5000 tutorial
|
3
|
+
|
4
|
+
|
5
|
+
This tutorial aims at showing how **Ruby-Cute** can be used to
|
6
|
+
help the scripting of an experiment in the context of the Grid'5000 testbed.
|
7
|
+
The programming language used, as you would expect, is {https://www.ruby-lang.org/en/ Ruby}.
|
8
|
+
We will use a powerful console debugger called {http://pryrepl.org/ Pry} which offers
|
9
|
+
several functionalities that can be used for the step-by-step scripting of complex experiments.
|
10
|
+
|
11
|
+
In this tutorial the first two sections are dedicated to the installation and basic use of **Ruby-Cute**.
|
12
|
+
You can skip those sections if you are already acquainted with **Ruby-Cute**.
|
13
|
+
The other sections show how to use **Ruby-cute** for scripting complex experiments.
|
14
|
+
This is shown through three examples:
|
15
|
+
|
16
|
+
1. **Infiniband performance test**: the experiment will illustrate how to perform a reservation,
|
17
|
+
the execution of commands in a reserved node and it explains several `pry` commands that will help you with the writing of your experiment.
|
18
|
+
2. **Running NAS benchmarks in Grid’5000**: you will get acquainted with parallel execution using simple SSH or {http://taktuk.gforge.inria.fr/ TakTuk}.
|
19
|
+
3. **Performing network measures within a reserved VLAN**: you will learn how to reserve a routed VLAN
|
20
|
+
and to query the G5K metrology {https://www.grid5000.fr/mediawiki/index.php/API API}.
|
21
|
+
For this particular experiment we will query the {https://www.grid5000.fr/mediawiki/index.php/Monitoring Kwapi} service.
|
22
|
+
|
23
|
+
The aforementioned experiments are independent, you can perform them in any order.
|
24
|
+
However, you may need some concepts that are explained only in specific sections.
|
25
|
+
|
26
|
+
## Installing and preparing Ruby cute
|
27
|
+
|
28
|
+
The installation procedure is shown in {file:README.md Ruby-Cute install}.
|
29
|
+
After this step you will normally have `ruby-cute` and `pry` gems installed.
|
30
|
+
|
31
|
+
Before using **Ruby-Cute** you have to create the following file:
|
32
|
+
|
33
|
+
$ cat > ~/.grid5000_api.yml << EOF
|
34
|
+
$ uri: https://api.grid5000.fr/
|
35
|
+
$ version: 3.0
|
36
|
+
$ EOF
|
37
|
+
|
38
|
+
## Getting acquainted with the pry console
|
39
|
+
|
40
|
+
After instaling `ruby-cute` and `pry` gems you can lunch a pry console
|
41
|
+
with **ruby-cute** loaded by typing:
|
42
|
+
|
43
|
+
$ cute
|
44
|
+
|
45
|
+
Which will open a `pry` console:
|
46
|
+
|
47
|
+
[1] pry(main)>
|
48
|
+
|
49
|
+
In this console, we can evaluate Ruby code, execute shell commands, consult
|
50
|
+
documentation, explore classes and more.
|
51
|
+
The variable *$g5k* is available which can be used to access the Grid'5000 API through the
|
52
|
+
[G5K Module](http://www.rubydoc.info/github/ruby-cute/ruby-cute/master/Cute/G5K/API). For example,
|
53
|
+
let's request the name of the sites available in Grid'5000.
|
54
|
+
|
55
|
+
[2] pry(main)> $g5k.site_uids()
|
56
|
+
=> ["grenoble", "lille", "luxembourg", "lyon", "nancy", "nantes", "reims", "rennes", "sophia", "toulouse"]
|
57
|
+
|
58
|
+
We can consult the name of the clusters available in a specific site.
|
59
|
+
|
60
|
+
[3] pry(main)> $g5k.cluster_uids("grenoble")
|
61
|
+
=> ["adonis", "edel", "genepi"]
|
62
|
+
|
63
|
+
It is possible to execute shell commands, however all commands have to be prefixed with a dot ".". For example we could generate a pair of SSH keys using:
|
64
|
+
|
65
|
+
[7] pry(main)> .ssh-keygen -b 1024 -N "" -t rsa -f ~/my_ssh_jobkey
|
66
|
+
|
67
|
+
Another advantage is the possibility of exploring the loaded Ruby modules.
|
68
|
+
Let's explore the [Cute](http://www.rubydoc.info/github/ruby-cute/ruby-cute/master/Cute) module.
|
69
|
+
|
70
|
+
[8] pry(main)> cd Cute
|
71
|
+
[9] pry(Cute):1> ls
|
72
|
+
constants: Bash Execute G5K TakTuk VERSION
|
73
|
+
locals: _ __ _dir_ _ex_ _file_ _in_ _out_ _pry_
|
74
|
+
|
75
|
+
We can see that the [Cute](http://www.rubydoc.info/github/ruby-cute/ruby-cute/master/Cute) module
|
76
|
+
is composed of other helpful modules such as:
|
77
|
+
[G5K](http://www.rubydoc.info/github/ruby-cute/ruby-cute/master/Cute/G5K/API),
|
78
|
+
[TakTuk](http://www.rubydoc.info/github/ruby-cute/ruby-cute/master/Cute/TakTuk), etc.
|
79
|
+
To quit the Cute namespace type:
|
80
|
+
|
81
|
+
[10] pry(main)> cd
|
82
|
+
|
83
|
+
Let's explore the methods defined in the
|
84
|
+
[G5K Module](http://www.rubydoc.info/github/ruby-cute/ruby-cute/master/Cute/G5K/API),
|
85
|
+
so you can observe which methods can be used with `$g5k` variable.
|
86
|
+
|
87
|
+
[11] pry(main)> ls Cute::G5K::API
|
88
|
+
Object.methods: yaml_tag
|
89
|
+
Cute::G5K::API#methods:
|
90
|
+
check_deployment deploy environments get_job get_subnets get_vlan_nodes nodes_status reserve site_status wait_for_deploy
|
91
|
+
cluster_uids deploy_status g5k_user get_jobs get_switch logger release rest site_uids wait_for_job
|
92
|
+
clusters environment_uids get_deployments get_my_jobs get_switches logger= release_all site sites
|
93
|
+
|
94
|
+
We can access the respective YARD documentation of a given method by typing:
|
95
|
+
|
96
|
+
[12] pry(main)> show-doc Cute::G5K::API#deploy
|
97
|
+
|
98
|
+
In the following section we will see how
|
99
|
+
`pry` can be used to setup an experiment step by step using **Ruby-Cute**.
|
100
|
+
|
101
|
+
Pry can be customized by creating the file `.pryrc`. We will create this
|
102
|
+
file with the following content in order to choose our prefered editor:
|
103
|
+
|
104
|
+
$ cat > ~/.pryrc << EOF
|
105
|
+
Pry.config.editor = "emacs"
|
106
|
+
EOF
|
107
|
+
|
108
|
+
## First experiment: Infiniband performance test
|
109
|
+
|
110
|
+
Here, we will use **Ruby-cute** to carry out an experiment.
|
111
|
+
In this experiment, we will ask for two nodes equipped with infiniband and
|
112
|
+
then, we will perform some performance test using a network benchmark called *NETPIPE*.
|
113
|
+
For this particular experiment we have the following requirements:
|
114
|
+
|
115
|
+
- A pair of SSH keys
|
116
|
+
- Use of standard environment (no deploy)
|
117
|
+
- Two nodes connected with infiniband (10G or 20G)
|
118
|
+
- MPI benchmark NETPIPE
|
119
|
+
- A MPI runtime (OpenMPI or MPICH)
|
120
|
+
|
121
|
+
We will do it interactively using `pry`.
|
122
|
+
Let's create a directory for keeping all the scripts that we will write throughout the tutorial.
|
123
|
+
|
124
|
+
$ mkdir ruby-cute-tutorial
|
125
|
+
|
126
|
+
Then, we execute the `pry` console form this directory:
|
127
|
+
|
128
|
+
$ cd ruby-cute-tutorial
|
129
|
+
$ cute
|
130
|
+
|
131
|
+
First, let's find the sites that offer Infiniband interconnection.
|
132
|
+
For that we will write a small script from `pry` console using the command edit.
|
133
|
+
|
134
|
+
|
135
|
+
[13] pry(main)> edit -n find_infiniband.rb
|
136
|
+
|
137
|
+
This will open a new file with your prefered editor. Here we will put the following
|
138
|
+
ruby script:
|
139
|
+
|
140
|
+
sites = $g5k.site_uids
|
141
|
+
|
142
|
+
sites_infiniband = []
|
143
|
+
|
144
|
+
sites.each do |site|
|
145
|
+
sites_infiniband.push(site) unless $g5k.get_switches(site).select{ |t| t["model"] == "Infiniband" }.empty?
|
146
|
+
end
|
147
|
+
|
148
|
+
Then, we execute it using the `play` command which will execute line by line this script in the context of a Pry session.
|
149
|
+
|
150
|
+
[21] pry(main)> play find_infiniband.rb
|
151
|
+
=> ["grenoble", "lille", "luxembourg", "lyon", "nancy", "nantes", "reims", "rennes", "sophia"]
|
152
|
+
|
153
|
+
We can observe that the variable `sites_infiniband` is now defined, telling us that Grenoble and Nancy sites offer Infiniband interconnection.
|
154
|
+
|
155
|
+
[22] pry(main)> sites_infiniband
|
156
|
+
=> ["grenoble", "nancy"]
|
157
|
+
|
158
|
+
Then, create a pair of SSH keys (Necessary for OARSSH):
|
159
|
+
|
160
|
+
$ ssh-keygen -b 1024 -N "" -t rsa -f ~/my_ssh_jobkey
|
161
|
+
|
162
|
+
We send the generated keys to the chosen site:
|
163
|
+
|
164
|
+
[22] pry(main)> .scp ~/my_ssh* nancy:~/
|
165
|
+
|
166
|
+
Now that we have found the sites, let's submit a job. You can use between Grenoble and Nancy sites. If you
|
167
|
+
take a look at {https://www.grid5000.fr/mediawiki/index.php/Status Monika} you will see that in Nancy we should use the OAR property 'ib20g' and in Grenoble we should use 'ib10g'.
|
168
|
+
Given that the MPI bench uses just one MPI process, we will need in realty just one core of a given machine.
|
169
|
+
We will use OAR syntax to ask for two cores in two different nodes with ib10g in Grenoble.
|
170
|
+
|
171
|
+
[23] pry(main)> job = $g5k.reserve(:site => "grenoble", :resources => "{ib10g='YES'}/nodes=2/core=1",:walltime => '01:00:00', :keys => "~/my_ssh_jobkey" )
|
172
|
+
2015-12-04 14:07:31.370 => Reserving resources: {ib20g='YES'}/nodes=2/core=1,walltime=01:00 (type: ) (in grenoble)
|
173
|
+
2015-12-04 14:07:41.358 => Waiting for reservation 692665
|
174
|
+
2015-12-04 14:07:41.444 => Reservation 692665 should be available at 2015-12-04 14:07:34 +0100 (0 s)
|
175
|
+
2015-12-04 14:07:41.444 => Reservation 692665 ready
|
176
|
+
|
177
|
+
A hash is returned containing all the information about the job that we have just submitted.
|
178
|
+
|
179
|
+
[58] pry(main)> job
|
180
|
+
=> {"uid"=>692665,
|
181
|
+
"user_uid"=>"cruizsanabria",
|
182
|
+
"user"=>"cruizsanabria",
|
183
|
+
"walltime"=>3600,
|
184
|
+
"queue"=>"default",
|
185
|
+
"state"=>"running",
|
186
|
+
"project"=>"default",
|
187
|
+
"name"=>"rubyCute job",
|
188
|
+
"types"=>[],
|
189
|
+
"mode"=>"PASSIVE",
|
190
|
+
"command"=>"sleep 3600",
|
191
|
+
"submitted_at"=>1449234452,
|
192
|
+
"scheduled_at"=>1449234454,
|
193
|
+
"started_at"=>1449234454,
|
194
|
+
"message"=>"FIFO scheduling OK",
|
195
|
+
"properties"=>"(maintenance = 'NO') AND production = 'NO'",
|
196
|
+
"directory"=>"/home/cruizsanabria",
|
197
|
+
"events"=>[],
|
198
|
+
"links"=>
|
199
|
+
[{"rel"=>"self", "href"=>"/sid/sites/nancy/jobs/692665", "type"=>"application/vnd.grid5000.item+json"},
|
200
|
+
{"rel"=>"parent", "href"=>"/sid/sites/nancy", "type"=>"application/vnd.grid5000.item+json"}],
|
201
|
+
"resources_by_type"=>{"cores"=>["graphene-67.nancy.grid5000.fr", "graphene-45.nancy.grid5000.fr"]},
|
202
|
+
"assigned_nodes"=>["graphene-67.nancy.grid5000.fr", "graphene-45.nancy.grid5000.fr"]}
|
203
|
+
|
204
|
+
An important information is the nodes that has been assigned, let's put this information in another variable:
|
205
|
+
|
206
|
+
[60] pry(main)> nodes = job["assigned_nodes"]
|
207
|
+
=> ["graphene-67.nancy.grid5000.fr", "graphene-45.nancy.grid5000.fr"]
|
208
|
+
|
209
|
+
Then, we create a file with the name of the reserved machines:
|
210
|
+
|
211
|
+
[62] pry(main)> machine_file = Tempfile.open('machine_file')
|
212
|
+
=> #<File:/tmp/machine_file20151204-28888-1ll3brs>
|
213
|
+
|
214
|
+
[64] pry(main)> nodes.each{ |node| machine_file.puts node }
|
215
|
+
=> ["graphene-67.nancy.grid5000.fr", "graphene-45.nancy.grid5000.fr"]
|
216
|
+
|
217
|
+
[66] pry(main)> machine_file.close
|
218
|
+
|
219
|
+
|
220
|
+
We will need to setup SSH options for OAR, we can do it with the {Cute::OARSSHopts OARSSHopts} class helper provided by ruby-cute:
|
221
|
+
|
222
|
+
[6] pry(main)> grid5000_opt = Cute::OARSSHopts.new(:keys => "~/my_ssh_jobkey")
|
223
|
+
=> {:user=>"oar", :keys=>"~/my_ssh_jobkey", :port=>6667}
|
224
|
+
|
225
|
+
Now, we can communicate using SSH with our nodes. Let's send the machinefile using SCP.
|
226
|
+
From a `pry` console let's load the SCP module to transfer files:
|
227
|
+
|
228
|
+
[12] pry(main)> require 'net/scp'
|
229
|
+
|
230
|
+
Then, copy-paste the following code in pry console:
|
231
|
+
|
232
|
+
Net::SCP.start(nodes.first, "oar", grid5000_opt) do |scp|
|
233
|
+
scp.upload! machine_file.path, "/tmp/machine_file"
|
234
|
+
end
|
235
|
+
|
236
|
+
The previous code will sent the machine file into the first node.
|
237
|
+
We can check this by performing an SSH connection into the node.
|
238
|
+
Here to illustrate the use of temporary files, let's type the following:
|
239
|
+
|
240
|
+
[6] pry(main)> edit -t
|
241
|
+
|
242
|
+
and copy-paste the following code:
|
243
|
+
|
244
|
+
Net::SSH.start(nodes.first, "oar", grid5000_opt) do |ssh|
|
245
|
+
puts ssh.exec("cat /tmp/machine_file")
|
246
|
+
end
|
247
|
+
|
248
|
+
If we save and quit the editor, the code will be evaluated in Pry context.
|
249
|
+
Which will generate the following output:
|
250
|
+
|
251
|
+
[12] pry(main)> edit -t
|
252
|
+
#<Net::SSH::Connection::Channel:0x00000001247150>
|
253
|
+
graphene-80.nancy.grid5000.fr
|
254
|
+
graphene-81.nancy.grid5000.fr
|
255
|
+
=> nil
|
256
|
+
|
257
|
+
We confirmed the existence of the file in the first reserved node.
|
258
|
+
Now let's download, compile and execute
|
259
|
+
the benchmark. Create a Ruby file called netpipe:
|
260
|
+
|
261
|
+
[12] pry(main)> edit -n netpipe.rb
|
262
|
+
|
263
|
+
With the following content:
|
264
|
+
|
265
|
+
Net::SSH.start(nodes.first, "oar", grid5000_opt) do |ssh|
|
266
|
+
netpipe_url = "http://pkgs.fedoraproject.org/repo/pkgs/NetPIPE/NetPIPE-3.7.1.tar.gz/5f720541387be065afdefc81d438b712/NetPIPE-3.7.1.tar.gz"
|
267
|
+
ssh.exec!("mkdir -p netpipe_exp")
|
268
|
+
ssh.exec!("wget -O ~/netpipe_exp/NetPIPE.tar.gz #{netpipe_url}")
|
269
|
+
ssh.exec!("cd netpipe_exp && tar -zvxf NetPIPE.tar.gz")
|
270
|
+
ssh.exec!("cd netpipe_exp/NetPIPE-3.7.1 && make mpi")
|
271
|
+
ssh.exec("mpirun --mca plm_rsh_agent \"oarsh\" -machinefile /tmp/machine_file ~/netpipe_exp/NetPIPE-3.7.1/NPmpi")
|
272
|
+
end
|
273
|
+
|
274
|
+
Then, execute the created script:
|
275
|
+
|
276
|
+
[16] pry(main)> play netpipe.rb
|
277
|
+
#<Net::SSH::Connection::Channel:0x000000021679f0>
|
278
|
+
Permission denied (publickey,keyboard-interactive).
|
279
|
+
--------------------------------------------------------------------------
|
280
|
+
A daemon (pid 4615) died unexpectedly with status 255 while attempting
|
281
|
+
to launch so we are aborting.
|
282
|
+
|
283
|
+
There may be more information reported by the environment (see above).
|
284
|
+
|
285
|
+
This may be because the daemon was unable to find all the needed shared
|
286
|
+
libraries on the remote node. You may set your LD_LIBRARY_PATH to have the
|
287
|
+
location of the shared libraries on the remote nodes and this will
|
288
|
+
automatically be forwarded to the remote nodes.
|
289
|
+
--------------------------------------------------------------------------
|
290
|
+
--------------------------------------------------------------------------
|
291
|
+
mpirun noticed that the job aborted, but has no info as to the process
|
292
|
+
that caused that situation.
|
293
|
+
--------------------------------------------------------------------------
|
294
|
+
mpirun: clean termination accomplished
|
295
|
+
|
296
|
+
=> nil
|
297
|
+
|
298
|
+
We got an error related to the SSH keys and it is due to the fact that `oarsh` cannot not find the appropriate key files.
|
299
|
+
We can fix this problem by prefixing the `mpirun` command with `export OAR_JOB_KEY_FILE=~/my_ssh_jobkey`.
|
300
|
+
Now the code will look like this:
|
301
|
+
|
302
|
+
Net::SSH.start(nodes.first, "oar", grid5000_opt) do |ssh|
|
303
|
+
netpipe_url = "http://pkgs.fedoraproject.org/repo/pkgs/NetPIPE/NetPIPE-3.7.1.tar.gz/5f720541387be065afdefc81d438b712/NetPIPE-3.7.1.tar.gz"
|
304
|
+
ssh.exec!("mkdir -p netpipe_exp")
|
305
|
+
ssh.exec!("wget -O ~/netpipe_exp/NetPIPE.tar.gz #{netpipe_url}")
|
306
|
+
ssh.exec!("cd netpipe_exp && tar -zvxf NetPIPE.tar.gz")
|
307
|
+
ssh.exec!("cd netpipe_exp/NetPIPE-3.7.1 && make mpi")
|
308
|
+
ssh.exec("export OAR_JOB_KEY_FILE=~/my_ssh_jobkey;mpirun --mca plm_rsh_agent \"oarsh\" -machinefile /tmp/machine_file ~/netpipe_exp/NetPIPE-3.7.1/NPmpi")
|
309
|
+
end
|
310
|
+
|
311
|
+
After running the script, it will show the output of the benchmark in the `pry` console:
|
312
|
+
|
313
|
+
[34] pry(main)> play netpipe.rb
|
314
|
+
#<Net::SSH::Connection::Channel:0x00000002edc6d0>
|
315
|
+
0: adonis-9
|
316
|
+
1: adonis-10
|
317
|
+
Now starting the main loop
|
318
|
+
0: 1 bytes 32103 times --> 4.58 Mbps in 1.67 usec
|
319
|
+
1: 2 bytes 59994 times --> 9.22 Mbps in 1.65 usec
|
320
|
+
2: 3 bytes 60440 times --> 13.79 Mbps in 1.66 usec
|
321
|
+
3: 4 bytes 40180 times --> 18.34 Mbps in 1.66 usec
|
322
|
+
4: 6 bytes 45076 times --> 27.07 Mbps in 1.69 usec
|
323
|
+
5: 8 bytes 29563 times --> 36.16 Mbps in 1.69 usec
|
324
|
+
6: 12 bytes 37023 times --> 53.84 Mbps in 1.70 usec
|
325
|
+
7: 13 bytes 24500 times --> 57.97 Mbps in 1.71 usec
|
326
|
+
8: 16 bytes 26977 times --> 71.61 Mbps in 1.70 usec
|
327
|
+
9: 19 bytes 32995 times --> 84.65 Mbps in 1.71 usec
|
328
|
+
10: 21 bytes 36882 times --> 93.27 Mbps in 1.72 usec
|
329
|
+
11: 24 bytes 38808 times --> 106.69 Mbps in 1.72 usec
|
330
|
+
12: 27 bytes 41271 times --> 119.77 Mbps in 1.72 usec
|
331
|
+
|
332
|
+
We can modify slightly the previous script to write the result into a file.
|
333
|
+
We need to use `ssh.exec!` to capture the output of the commands.
|
334
|
+
|
335
|
+
Net::SSH.start(nodes.first, "oar", grid5000_opt) do |ssh|
|
336
|
+
netpipe_url = "http://pkgs.fedoraproject.org/repo/pkgs/NetPIPE/NetPIPE-3.7.1.tar.gz/5f720541387be065afdefc81d438b712/NetPIPE-3.7.1.tar.gz"
|
337
|
+
ssh.exec!("mkdir -p netpipe_exp")
|
338
|
+
ssh.exec!("wget -O ~/netpipe_exp/NetPIPE.tar.gz #{netpipe_url}")
|
339
|
+
ssh.exec!("cd netpipe_exp && tar -zvxf NetPIPE.tar.gz")
|
340
|
+
ssh.exec!("cd netpipe_exp/NetPIPE-3.7.1 && make mpi")
|
341
|
+
|
342
|
+
File.open("output_netpipe.txt", 'w') do |f|
|
343
|
+
f.puts ssh.exec!("OAR_JOB_KEY_FILE=~/my_ssh_jobkey; mpirun --mca plm_rsh_agent \"oarsh\" -machinefile /tmp/machine_file ~/netpipe_exp/NetPIPE-3.7.1/NPmpi")
|
344
|
+
end
|
345
|
+
end
|
346
|
+
|
347
|
+
We can check the results by doing:
|
348
|
+
|
349
|
+
[16] pry(main)> .cat output_netpipe.txt
|
350
|
+
0: adonis-9
|
351
|
+
1: adonis-10
|
352
|
+
Now starting the main loop
|
353
|
+
0: 1 bytes 31441 times --> 4.62 Mbps in 1.65 usec
|
354
|
+
1: 2 bytes 60550 times --> 9.24 Mbps in 1.65 usec
|
355
|
+
2: 3 bytes 60580 times --> 13.87 Mbps in 1.65 usec
|
356
|
+
3: 4 bytes 40404 times --> 18.39 Mbps in 1.66 usec
|
357
|
+
4: 6 bytes 45183 times --> 27.22 Mbps in 1.68 usec
|
358
|
+
5: 8 bytes 29729 times --> 36.17 Mbps in 1.69 usec
|
359
|
+
6: 12 bytes 37039 times --> 54.01 Mbps in 1.70 usec
|
360
|
+
7: 13 bytes 24578 times --> 58.40 Mbps in 1.70 usec
|
361
|
+
8: 16 bytes 27177 times --> 71.87 Mbps in 1.70 usec
|
362
|
+
9: 19 bytes 33116 times --> 85.00 Mbps in 1.71 usec
|
363
|
+
|
364
|
+
At the end of the experiment you can use the command `hist` to see what you have done so far.
|
365
|
+
This can help you to assemble everything together in a whole script.
|
366
|
+
|
367
|
+
[22] pry(main)> hist
|
368
|
+
1: edit -n find_infiniband.rb
|
369
|
+
3: play find_infiniband.rb
|
370
|
+
4: sites_infiniband
|
371
|
+
5: .ls ~/my_ssh*
|
372
|
+
6: .scp ~/my_ssh* nancy:~/
|
373
|
+
7: job = $g5k.reserve(:site => "nancy", :resources => "{ib20g='YES'}/nodes=2/core=1",:walltime => '01:00:00', :keys => "~/my_ssh_jobkey" )
|
374
|
+
8: nodes = job["assigned_nodes"]
|
375
|
+
9: machine_file = Tempfile.open('machine_file')
|
376
|
+
10: nodes.each{ |node| machine_file.puts node }
|
377
|
+
11: machine_file.close
|
378
|
+
12: grid5000_opt = OARSSHopts.new(:keys => "~/my_ssh_jobkey")
|
379
|
+
13: require 'net/scp'
|
380
|
+
14: Net::SCP.start(nodes.first, "oar", grid5000_opt) do |scp|
|
381
|
+
15: scp.upload! machine_file.path, "/tmp/machine_file"
|
382
|
+
16: end
|
383
|
+
17: edit -n netpipe.rb
|
384
|
+
18: play netpipe.rb
|
385
|
+
19: edit -n netpipe.rb
|
386
|
+
20: play netpipe.rb
|
387
|
+
|
388
|
+
## Running NAS benchmarks in Grid'5000: getting acquainted with parallel command execution
|
389
|
+
|
390
|
+
In this experiment, we will run the NAS benchmark in Grid'5000 and we will script a scalability test for one of the benchmarks.
|
391
|
+
This experiment has the following requirements:
|
392
|
+
|
393
|
+
- 4 or 2 nodes from any Grid'5000 sites
|
394
|
+
- Use of standard environment (no deploy)
|
395
|
+
- NAS MPI behchmark
|
396
|
+
- A MPI runtime (OpenMPI or MPICH)
|
397
|
+
|
398
|
+
If you have not created a directory for the tutorial, create it and execute the `pry` console from there:
|
399
|
+
|
400
|
+
$ mkdir ruby-cute-tutorial
|
401
|
+
$ cd ruby-cute-tutorial
|
402
|
+
$ cute
|
403
|
+
|
404
|
+
First, let's find the necessary nodes for our experiment. As resources in Grid'5000 could be very busy, we are going
|
405
|
+
to script a loop that will explore all Grid'5000 sites and find the first site that can provide us with the required nodes.
|
406
|
+
Open an editor form `pry` console:
|
407
|
+
|
408
|
+
[5] pry(main)> edit -n find_nodes.rb
|
409
|
+
|
410
|
+
and type the following code:
|
411
|
+
|
412
|
+
sites = $g5k.site_uids
|
413
|
+
job = {}
|
414
|
+
|
415
|
+
sites.each do |site|
|
416
|
+
job = $g5k.reserve(:site => site, :nodes => 4, :wait => false, :walltime => "01:00:00")
|
417
|
+
begin
|
418
|
+
job = $g5k.wait_for_job(job, :wait_time => 60)
|
419
|
+
puts "Nodes assigned #{job['assigned_nodes']}"
|
420
|
+
break
|
421
|
+
rescue Cute::G5K::EventTimeout
|
422
|
+
puts "We waited too long in site #{site} let's release the job and try in another site"
|
423
|
+
$g5k.release(job)
|
424
|
+
end
|
425
|
+
end
|
426
|
+
|
427
|
+
Here, we use the method {Cute::G5K::API#site_uids site_uids} for getting all available sites.
|
428
|
+
Then, a job is submitted using the method {Cute::G5K::API#reserve reserve}.
|
429
|
+
We ask for 4 nodes in a given site and we set the parameter `wait` to false which makes the method to return immediately.
|
430
|
+
Then, we use {Cute::G5K::API#wait_for_job wait_for_job} to set a timeout. If the timeout is reached a {Cute::G5K::EventTimeout Timeout} exception will be triggered
|
431
|
+
that we catch in order to consequently release the submitted job and try to submit it in another site.
|
432
|
+
Let's execute the script using `play` command:
|
433
|
+
|
434
|
+
[8] pry(main)> play find_nodes.rb
|
435
|
+
2015-12-08 16:50:35.582 => Reserving resources: /nodes=4,walltime=01:00 (type: ) (in grenoble)
|
436
|
+
2015-12-08 16:50:36.465 => Waiting for reservation 1702197
|
437
|
+
2015-12-08 16:50:41.587 => Reservation 1702197 should be available at 2015-12-08 16:50:37 +0100 (0 s)
|
438
|
+
2015-12-08 16:50:41.587 => Reservation 1702197 ready
|
439
|
+
Nodes assigned ["edel-10.grenoble.grid5000.fr", "edel-11.grenoble.grid5000.fr", "edel-12.grenoble.grid5000.fr", "edel-13.grenoble.grid5000.fr"]
|
440
|
+
=> nil
|
441
|
+
|
442
|
+
The variable `job` is updated in Pry context. When no keys are specified, the option *-allow_classic_ssh* is activated
|
443
|
+
which enables the access via default SSH to the reserved machines. You can verify it by doing:
|
444
|
+
|
445
|
+
[11] pry(main)> .ssh edel-11.grenoble.grid5000.fr "hostname"
|
446
|
+
Warning: Permanently added 'edel-11.grenoble.grid5000.fr,172.16.17.11' (RSA) to the list of known hosts.
|
447
|
+
edel-11.grenoble.grid5000.fr
|
448
|
+
|
449
|
+
Let's explore the available modules for the parallel execution of commands in several remote machines.
|
450
|
+
The following example shows how to use the {Cute::TakTuk TakTuk} module.
|
451
|
+
|
452
|
+
nodes = job["assigned_nodes"]
|
453
|
+
Cute::TakTuk.start(nodes) do |tak|
|
454
|
+
tak.exec("hostname")
|
455
|
+
end
|
456
|
+
|
457
|
+
Which generates as output:
|
458
|
+
|
459
|
+
edel-10.grenoble.grid5000.fr/output/0:edel-10.grenoble.grid5000.fr
|
460
|
+
edel-12.grenoble.grid5000.fr/output/0:edel-12.grenoble.grid5000.fr
|
461
|
+
edel-13.grenoble.grid5000.fr/output/0:edel-13.grenoble.grid5000.fr
|
462
|
+
edel-10.grenoble.grid5000.fr/status/0:0
|
463
|
+
edel-11.grenoble.grid5000.fr/output/0:edel-11.grenoble.grid5000.fr
|
464
|
+
edel-12.grenoble.grid5000.fr/status/0:0
|
465
|
+
edel-13.grenoble.grid5000.fr/status/0:0
|
466
|
+
edel-11.grenoble.grid5000.fr/status/0:0
|
467
|
+
|
468
|
+
The following example shows how to use the {Net::SSH::Multi Net::SSH::Multi} module.
|
469
|
+
|
470
|
+
Net::SSH::Multi.start do |session|
|
471
|
+
nodes.each{ |node| session.use node }
|
472
|
+
session.exec("hostname")
|
473
|
+
end
|
474
|
+
|
475
|
+
If we type that into pry console we will get:
|
476
|
+
|
477
|
+
[edel-10.grenoble.grid5000.fr] edel-10.grenoble.grid5000.fr
|
478
|
+
[edel-11.grenoble.grid5000.fr] edel-11.grenoble.grid5000.fr
|
479
|
+
[edel-12.grenoble.grid5000.fr] edel-12.grenoble.grid5000.fr
|
480
|
+
[edel-13.grenoble.grid5000.fr] edel-13.grenoble.grid5000.fr
|
481
|
+
|
482
|
+
It is possible to capture the output of the executed command by adding **!** to exec method.
|
483
|
+
For example, let's find the number of cores available in the reserved machines:
|
484
|
+
|
485
|
+
results = {}
|
486
|
+
Net::SSH::Multi.start do |session|
|
487
|
+
nodes.each{ |node| session.use node }
|
488
|
+
results = session.exec!("nproc")
|
489
|
+
end
|
490
|
+
|
491
|
+
The {Net::SSH::Multi::SessionActions#exec! exec!} method will return a Hash that looks like this:
|
492
|
+
|
493
|
+
[27] pry(main)> results
|
494
|
+
=> {"edel-10.grenoble.grid5000.fr"=>{:stdout=>"8", :status=>0},
|
495
|
+
"edel-11.grenoble.grid5000.fr"=>{:stdout=>"8", :status=>0},
|
496
|
+
"edel-12.grenoble.grid5000.fr"=>{:stdout=>"8", :status=>0},
|
497
|
+
"edel-13.grenoble.grid5000.fr"=>{:stdout=>"8", :status=>0}}
|
498
|
+
|
499
|
+
Where the Hash keys are the names of the machines and the values correspond to the output of the commands.
|
500
|
+
Then, we can easily get the total number of cores by typing:
|
501
|
+
|
502
|
+
[11] pry(main)> num_cores = results.values.inject(0){ |sum, item| sum+item[:stdout].to_i}
|
503
|
+
=> 32
|
504
|
+
|
505
|
+
Another way to do that is to use the information given by the G5K API regarding the submitted job:
|
506
|
+
|
507
|
+
[36] pry(main)> job["resources_by_type"]["cores"].length
|
508
|
+
=> 32
|
509
|
+
|
510
|
+
Let's create a machine file that we will need later on for our experiments:
|
511
|
+
|
512
|
+
machine_file = Tempfile.open('machine_file')
|
513
|
+
nodes.each{ |node| machine_file.puts node }
|
514
|
+
machine_file.close
|
515
|
+
|
516
|
+
After creating the machine file, we need to send it to the other machines.
|
517
|
+
Additionally, we need to download and compile the benchmark.
|
518
|
+
Let's write a small script that help us to perform the aforementioned tasks.
|
519
|
+
Open the editor in pry console:
|
520
|
+
|
521
|
+
[17] pry(main)> edit -n NAS-expe.rb
|
522
|
+
|
523
|
+
Then, type:
|
524
|
+
|
525
|
+
SOURCE_NAS = "http://public.rennes.grid5000.fr/~cruizsanabria/NPB3.3.tar"
|
526
|
+
|
527
|
+
`wget #{SOURCE_NAS} -O /tmp/NAS.tar`
|
528
|
+
|
529
|
+
Cute::TakTuk.start(nodes) do |tak|
|
530
|
+
|
531
|
+
tak.put(machine_file.path, "machine_file")
|
532
|
+
tak.put("/tmp/NAS.tar", "/tmp/NAS.tar")
|
533
|
+
|
534
|
+
tak.exec!("cd /tmp/; tar -xvf NAS.tar")
|
535
|
+
puts tak.exec!("make lu NPROCS=#{num_cores} CLASS=A MPIF77=mpif77 -C /tmp/NPB3.3/NPB3.3-MPI/")
|
536
|
+
|
537
|
+
end
|
538
|
+
|
539
|
+
We can observe in the previous snippet of code that {Cute::TakTuk TakTuk} module can be used to
|
540
|
+
transfer files to several remote nodes. {Cute::TakTuk::TakTuk#put put} and {Cute::TakTuk::TakTuk#exec exec} methods
|
541
|
+
can be used in the same block. Finally, execute the script:
|
542
|
+
|
543
|
+
[102] pry(main)> play NAS-expe.rb
|
544
|
+
|
545
|
+
We can check if each node has the generated binary and the machine file:
|
546
|
+
|
547
|
+
Net::SSH::Multi.start do |session|
|
548
|
+
nodes.each{ |node| session.use node }
|
549
|
+
session.exec("ls /tmp/NPB3.3/NPB3.3-MPI/bin/")
|
550
|
+
session.exec("ls ~/machine*")
|
551
|
+
end
|
552
|
+
|
553
|
+
After typing it into `pry` console we will get something like:
|
554
|
+
|
555
|
+
[genepi-27.grenoble.grid5000.fr] lu.A.32
|
556
|
+
[genepi-29.grenoble.grid5000.fr] lu.A.32
|
557
|
+
[genepi-29.grenoble.grid5000.fr] /home/cruizsanabria/machine_file
|
558
|
+
[genepi-19.grenoble.grid5000.fr] lu.A.32
|
559
|
+
[genepi-19.grenoble.grid5000.fr] /home/cruizsanabria/machine_file
|
560
|
+
[genepi-2.grenoble.grid5000.fr] lu.A.32
|
561
|
+
[genepi-2.grenoble.grid5000.fr] /home/cruizsanabria/machine_file
|
562
|
+
[genepi-27.grenoble.grid5000.fr] /home/cruizsanabria/machine_file
|
563
|
+
|
564
|
+
Which confirms the presence of both files on the nodes.
|
565
|
+
We can get the path of the binary by typing the following into `pry` console.
|
566
|
+
|
567
|
+
Net::SSH::Multi.start do |session|
|
568
|
+
nodes.each{ |node| session.use node }
|
569
|
+
results = session.exec!("find /tmp/ -name lu.A.32")
|
570
|
+
end
|
571
|
+
|
572
|
+
We will get some errors caused by the `find` command:
|
573
|
+
|
574
|
+
[32] pry(main)> results
|
575
|
+
=> {"genepi-27.grenoble.grid5000.fr"=>{:stdout=>"/tmp/NPB3.3/NPB3.3-MPI/bin/lu.A.32", :stderr=>": Permission denied", :status=>1},
|
576
|
+
"genepi-29.grenoble.grid5000.fr"=>{:stdout=>"/tmp/NPB3.3/NPB3.3-MPI/bin/lu.A.32", :stderr=>": Permission denied", :status=>1},
|
577
|
+
"genepi-19.grenoble.grid5000.fr"=>{:stderr=>": Permission denied", :stdout=>"/tmp/NPB3.3/NPB3.3-MPI/bin/lu.A.32", :status=>1},
|
578
|
+
"genepi-2.grenoble.grid5000.fr"=>{:stdout=>"/tmp/NPB3.3/NPB3.3-MPI/bin/lu.A.32", :stderr=>": Permission denied", :status=>1}}
|
579
|
+
|
580
|
+
|
581
|
+
Then, we can assign this to a new variable:
|
582
|
+
|
583
|
+
[33] pry(main)> lu_path = results.values.first[:stdout]
|
584
|
+
=> "/tmp/NPB3.3/NPB3.3-MPI/bin/lu.A.32"
|
585
|
+
|
586
|
+
The setup of the experiment is done. It is time to execute the benchmark by typing the following into `pry` console.
|
587
|
+
|
588
|
+
Net::SSH.start(nodes.first,"cruizsanabria") do |ssh|
|
589
|
+
results = ssh.exec!("mpirun --mca btl self,sm,tcp -np 32 --machinefile machine_file #{lu_path}")
|
590
|
+
end
|
591
|
+
|
592
|
+
Let's now perform a scalability test of the LU application for 2, 4, 8, 16, 32 processes. Open the editor:
|
593
|
+
|
594
|
+
[100] pry(main)> edit -n scalability_NAS.rb
|
595
|
+
|
596
|
+
And copy-paste the following script:
|
597
|
+
|
598
|
+
num_cores = [2,4,8,16,32]
|
599
|
+
|
600
|
+
Cute::TakTuk.start(nodes) do |tak|
|
601
|
+
|
602
|
+
num_cores.each do |cores|
|
603
|
+
puts tak.exec!("make lu NPROCS=#{cores} CLASS=A MPIF77=mpif77 -C /tmp/NPB3.3/NPB3.3-MPI/")
|
604
|
+
end
|
605
|
+
|
606
|
+
results = tak.exec!("find /tmp/ -name lu.A.*")
|
607
|
+
end
|
608
|
+
|
609
|
+
binaries = results.values.first[:output].split("\n")
|
610
|
+
|
611
|
+
expe_res = {}
|
612
|
+
|
613
|
+
Net::SSH.start(nodes.first,"cruizsanabria") do |ssh|
|
614
|
+
binaries.each do |binary|
|
615
|
+
processes = /A\.(\d*)/.match(binary)[1]
|
616
|
+
expe_res[processes]= {}
|
617
|
+
result = ssh.exec!("mpirun --mca btl self,sm,tcp -np #{processes} --machinefile machine_file #{binary}")
|
618
|
+
expe_res[processes][:output]= result
|
619
|
+
expe_res[processes][:time] =result.split("\n").select{ |t| t["Time in"]}.first
|
620
|
+
end
|
621
|
+
end
|
622
|
+
|
623
|
+
Then, we execute it:
|
624
|
+
|
625
|
+
[102] pry(main)> play scalability_NAS.rb
|
626
|
+
|
627
|
+
It will take approximately 2 ~ 3 minutes to run. After finishing a new Hash will
|
628
|
+
be defined called *expe_res* that we can use to print the results:
|
629
|
+
|
630
|
+
num_cores.each{ |cores| puts "#{cores} cores: #{expe_res[cores.to_s][:time]}"}
|
631
|
+
|
632
|
+
It will generate:
|
633
|
+
|
634
|
+
[107] pry(main)> num_cores.each{ |cores| puts "#{cores} cores: #{expe_res[cores.to_s][:time]}"}
|
635
|
+
2 cores: Time in seconds = 42.93
|
636
|
+
4 cores: Time in seconds = 26.50
|
637
|
+
8 cores: Time in seconds = 12.39
|
638
|
+
16 cores: Time in seconds = 7.01
|
639
|
+
32 cores: Time in seconds = 6.00
|
640
|
+
|
641
|
+
Finally, we can use the command `hist` to try to assemble all we have done so far into a script.
|
642
|
+
Once finished, we could release the job:
|
643
|
+
|
644
|
+
[34] pry(main)> $g5k.release(job)
|
645
|
+
=> ""
|
646
|
+
|
647
|
+
## Performing network measurements within a reserved VLAN
|
648
|
+
|
649
|
+
In this experiment, we will perform network measurements between two nodes located in different Grid'5000 sites.
|
650
|
+
The network measurements will be carried out in an isolated VLAN.
|
651
|
+
We will first reserved two nodes located in two different Grid'5000 sites in deploy mode and we will ask for two routed VLANs.
|
652
|
+
Once the nodes are ready an environment will be deployed and the application iperf will be install in all nodes.
|
653
|
+
Then, we will perform some network measurements among the nodes.
|
654
|
+
Finally, we will query the KWAPI using the G5K metrology API to get the network traffic generated during our experiment.
|
655
|
+
|
656
|
+
This experiment has the following requirements:
|
657
|
+
|
658
|
+
- Two nodes in two different G5K sites
|
659
|
+
- Environment deployment
|
660
|
+
- VLAN reservation
|
661
|
+
- Iperf application
|
662
|
+
- Access to Network traffic data.
|
663
|
+
|
664
|
+
If you have not created a directory for the tutorial, create it and execute the `pry` console from there:
|
665
|
+
|
666
|
+
$ mkdir ruby-cute-tutorial
|
667
|
+
$ cd ruby-cute-tutorial
|
668
|
+
$ cute
|
669
|
+
|
670
|
+
Let's create a small script that will help us with the reservation of nodes.
|
671
|
+
Open the `pry` editor:
|
672
|
+
|
673
|
+
[35] pry(main)> edit -n multisite.rb
|
674
|
+
|
675
|
+
and type:
|
676
|
+
|
677
|
+
jobs = {}
|
678
|
+
threads = []
|
679
|
+
["nancy","rennes"].each do |site|
|
680
|
+
|
681
|
+
threads.push<< Thread.new do
|
682
|
+
|
683
|
+
jobs[site] = job = $g5k.reserve(:site => site, :nodes => 1,
|
684
|
+
:env => 'jessie-x64-min',
|
685
|
+
:vlan => :routed)
|
686
|
+
end
|
687
|
+
end
|
688
|
+
|
689
|
+
threads.each{ |t| t.join}
|
690
|
+
|
691
|
+
|
692
|
+
In the script, we have chosen Nancy and Rennes sites. You are encouraged to try other sites as the number of routed VLANs is limited in each site.
|
693
|
+
For the purpose of this tutorial you have to choose a site where Kwapi is available: Grenoble, Nancy, Rennes, Lyon, Nantes.
|
694
|
+
We use the method {Cute::G5K::API#reserve reserve} with parameter *env* for specifying the environment we want to deploy.
|
695
|
+
This will automatically submit a deploy job and it will deploy the specified environment.
|
696
|
+
The parameter *vlan* will additionally reserve a VLAN and pass it to Kadeploy to setup the VLAN.
|
697
|
+
After executing this small script we got:
|
698
|
+
|
699
|
+
[36] pry(main)> play multisite.rb
|
700
|
+
2016-01-20 12:48:15.010 => Reserving resources: {type='kavlan'}/vlan=1+/nodes=1,walltime=01:00 (type: deploy) (in nancy)
|
701
|
+
2016-01-20 12:48:15.010 => Reserving resources: {type='kavlan'}/vlan=1+/nodes=1,walltime=01:00 (type: deploy) (in rennes)
|
702
|
+
2016-01-20 12:48:16.145 => Waiting for reservation 740698
|
703
|
+
2016-01-20 12:48:16.246 => Waiting for reservation 802917
|
704
|
+
2016-01-20 12:48:21.270 => Reservation 740698 should be available at 2016-01-20 12:48:17 +0100 (0 s)
|
705
|
+
2016-01-20 12:48:26.344 => Reservation 740698 should be available at 2016-01-20 12:48:17 +0100 (0 s)
|
706
|
+
2016-01-20 12:48:26.404 => Reservation 802917 should be available at 2016-01-20 12:48:13 +0100 (0 s)
|
707
|
+
2016-01-20 12:48:26.404 => Reservation 802917 ready
|
708
|
+
2016-01-20 12:48:26.541 => Found VLAN with uid = 4
|
709
|
+
2016-01-20 12:48:26.541 => Creating deployment
|
710
|
+
2016-01-20 12:48:27.256 => Waiting for 1 deployment
|
711
|
+
2016-01-20 12:48:31.296 => Waiting for 1 deployment
|
712
|
+
2016-01-20 12:48:31.406 => Reservation 740698 should be available at 2016-01-20 12:48:17 +0100 (0 s)
|
713
|
+
2016-01-20 12:48:31.406 => Reservation 740698 ready
|
714
|
+
2016-01-20 12:48:31.469 => Found VLAN with uid = 4
|
715
|
+
2016-01-20 12:48:31.469 => Creating deployment
|
716
|
+
2016-01-20 12:48:31.869 => Waiting for 1 deployment
|
717
|
+
2016-01-20 12:48:35.414 => Waiting for 1 deployment
|
718
|
+
|
719
|
+
At the end of the process the variable `jobs` will be defined and it will contain the jobs' information in each site.
|
720
|
+
In this variable, we can find information related with the deployment.
|
721
|
+
|
722
|
+
[44] pry(main)> jobs["nancy"]["deploy"]
|
723
|
+
=> [{"created_at"=>1450439620,
|
724
|
+
"environment"=>"jessie-x64-min",
|
725
|
+
"key"=>"https://api.grid5000.fr/sid/sites/nancy/files/cruizsanabria-key-84f3f1dbb1279bc1bddcd618e26c960307d653c5",
|
726
|
+
"nodes"=>["graphite-4.nancy.grid5000.fr"],
|
727
|
+
"result"=>{"graphite-4.nancy.grid5000.fr"=>{"macro"=>nil, "micro"=>nil, "state"=>"OK"}},
|
728
|
+
"site_uid"=>"nancy",
|
729
|
+
"status"=>"terminated",
|
730
|
+
"uid"=>"D-b026879e-b185-4e20-8bc5-ea0842a6954b",
|
731
|
+
"updated_at"=>1450439860,
|
732
|
+
"user_uid"=>"cruizsanabria",
|
733
|
+
"vlan"=>14,
|
734
|
+
"links"=>
|
735
|
+
[{"rel"=>"self", "href"=>"/sid/sites/nancy/deployments/D-b026879e-b185-4e20-8bc5-ea0842a6954b", "type"=>"application/vnd.grid5000.item+json"},
|
736
|
+
{"rel"=>"parent", "href"=>"/sid/sites/nancy", "type"=>"application/vnd.grid5000.item+json"}]}]
|
737
|
+
|
738
|
+
Some important information are: the status of the whole process and the state per node.
|
739
|
+
We can use this information to check if the deployment have finished successfully in all nodes.
|
740
|
+
This data structure is used by the method {Cute::G5K::API#check_deployment check_deployment}.
|
741
|
+
Let's check the documentation of this method:
|
742
|
+
|
743
|
+
[16] pry(main)> show-doc Cute::G5K::API#check_deployment
|
744
|
+
|
745
|
+
From: /home/cruizsanabria/Repositories/ruby-cute/lib/cute/g5k_api.rb @ line 1198:
|
746
|
+
Owner: Cute::G5K::API
|
747
|
+
Visibility: public
|
748
|
+
Signature: check_deployment(deploy_info)
|
749
|
+
Number of lines: 10
|
750
|
+
|
751
|
+
It returns an array of machines that did not deploy successfully
|
752
|
+
= Example
|
753
|
+
It can be used to try a new deploy:
|
754
|
+
|
755
|
+
badnodes = g5k.check_deployment(job["deploy"].last)
|
756
|
+
g5k.deploy(job,:nodes => badnodes, :env => 'wheezy-x64-base')
|
757
|
+
g5k.wait_for_deploy(job)
|
758
|
+
|
759
|
+
return [Array] machines that did not deploy successfully
|
760
|
+
param deploy_info [Hash] deployment structure information
|
761
|
+
|
762
|
+
We can use this method with the jobs we have just submitted
|
763
|
+
(The output will be probably long, so you will need to scroll up to see what it is shown here):
|
764
|
+
|
765
|
+
[47] pry(main)> jobs.each{ |site,job| puts "all nodes OK in site: #{site}" if $g5k.check_deployment(job["deploy"].last).empty?}
|
766
|
+
all nodes OK in site: rennes
|
767
|
+
all nodes OK in site: nancy
|
768
|
+
|
769
|
+
Now, the reserved nodes are in a VLAN; within this VLAN a DHCP server will assign new IP addresses to the nodes.
|
770
|
+
You can configure your own if you want (please refer to {https://www.grid5000.fr/mediawiki/index.php/Network_isolation_on_Grid%275000 KVLAN tutorial}
|
771
|
+
if you want to know more). We can get the new assigned names by doing:
|
772
|
+
|
773
|
+
nodes = []
|
774
|
+
jobs.each{ |site,job| nodes.push($g5k.get_vlan_nodes(job))}
|
775
|
+
|
776
|
+
After putting that into `pry` we will get something like this:
|
777
|
+
|
778
|
+
[50] pry(main)> nodes
|
779
|
+
=> [["paranoia-6-kavlan-16.rennes.grid5000.fr"], ["graphite-4-kavlan-14.nancy.grid5000.fr"]]
|
780
|
+
|
781
|
+
[51] pry(main)> nodes.flatten
|
782
|
+
=> ["paranoia-6-kavlan-16.rennes.grid5000.fr", "graphite-4-kavlan-14.nancy.grid5000.fr"]
|
783
|
+
|
784
|
+
Now, let's install `iperf` application in order to perform our network measurements.
|
785
|
+
Copy-paste the following code into `pry`:
|
786
|
+
|
787
|
+
nodes = nodes.flatten
|
788
|
+
|
789
|
+
Net::SSH::Multi.start do |session|
|
790
|
+
nodes.each{ |node| session.use("root@#{node}") }
|
791
|
+
session.exec!("apt-get update")
|
792
|
+
session.exec("DEBIAN_FRONTEND=noninteractive apt-get install -q -y iperf")
|
793
|
+
end
|
794
|
+
|
795
|
+
You should get something like this:
|
796
|
+
|
797
|
+
[paranoia-6-kavlan-16.rennes.grid5000.fr] Reading package lists...
|
798
|
+
[paranoia-6-kavlan-16.rennes.grid5000.fr] Building dependency tree...
|
799
|
+
[graphite-4-kavlan-14.nancy.grid5000.fr] Reading package lists...
|
800
|
+
[graphite-4-kavlan-14.nancy.grid5000.fr] Building dependency tree...
|
801
|
+
[paranoia-6-kavlan-16.rennes.grid5000.fr]
|
802
|
+
[paranoia-6-kavlan-16.rennes.grid5000.fr] Reading state information...
|
803
|
+
[paranoia-6-kavlan-16.rennes.grid5000.fr] The following NEW packages will be installed:
|
804
|
+
[paranoia-6-kavlan-16.rennes.grid5000.fr] iperf
|
805
|
+
[paranoia-6-kavlan-16.rennes.grid5000.fr] 0 upgraded, 1 newly installed, 0 to remove and 8 not upgraded.
|
806
|
+
[paranoia-6-kavlan-16.rennes.grid5000.fr] Need to get 51.4 kB of archives.
|
807
|
+
[paranoia-6-kavlan-16.rennes.grid5000.fr] After this operation, 179 kB of additional disk space will be used.
|
808
|
+
[paranoia-6-kavlan-16.rennes.grid5000.fr] Get:1 http://ftp.debian.org/debian/ jessie/main iperf amd64 2.0.5+dfsg1-2 [51.4 kB]
|
809
|
+
[graphite-4-kavlan-14.nancy.grid5000.fr]
|
810
|
+
[graphite-4-kavlan-14.nancy.grid5000.fr] Reading state information...
|
811
|
+
[graphite-4-kavlan-14.nancy.grid5000.fr] The following NEW packages will be installed:
|
812
|
+
[graphite-4-kavlan-14.nancy.grid5000.fr] iperf
|
813
|
+
|
814
|
+
|
815
|
+
You can check if the application has been successfully installed,
|
816
|
+
by typing the following into the `pry` console:
|
817
|
+
|
818
|
+
Net::SSH::Multi.start do |session|
|
819
|
+
nodes.each{ |node| session.use("root@#{node}") }
|
820
|
+
session.exec("iperf --version")
|
821
|
+
end
|
822
|
+
|
823
|
+
Which will generate:
|
824
|
+
|
825
|
+
[65] pry(main)* end
|
826
|
+
[paranoia-6-kavlan-16.rennes.grid5000.fr] iperf version 2.0.5 (08 Jul 2010) pthreads
|
827
|
+
[graphite-4-kavlan-14.nancy.grid5000.fr] iperf version 2.0.5 (08 Jul 2010) pthreads
|
828
|
+
=> nil
|
829
|
+
|
830
|
+
Let's perform some iperf tests, let's write a small script.
|
831
|
+
Open the editor:
|
832
|
+
|
833
|
+
[76] pry(main)> edit -n iperf_test.rb
|
834
|
+
|
835
|
+
and type:
|
836
|
+
|
837
|
+
results = {}
|
838
|
+
|
839
|
+
Net::SSH::Multi.start do |session|
|
840
|
+
|
841
|
+
session.group :server do
|
842
|
+
session.use("root@#{nodes[0]}")
|
843
|
+
end
|
844
|
+
|
845
|
+
session.group :client do
|
846
|
+
session.use("root@#{nodes[1]}")
|
847
|
+
end
|
848
|
+
|
849
|
+
session.with(:server).exec("iperf -s &")
|
850
|
+
|
851
|
+
#bandwith
|
852
|
+
|
853
|
+
results[:bandwidth]= session.with(:client).exec!("iperf -c #{nodes[0]}")
|
854
|
+
|
855
|
+
# bi-directional bandwidth measurement
|
856
|
+
|
857
|
+
results[:bidi]= session.with(:client).exec!("iperf -c #{nodes[0]} -r")
|
858
|
+
|
859
|
+
# TCP windows size
|
860
|
+
results[:window]= session.with(:client).exec!("iperf -c #{nodes[0]} -w 2000")
|
861
|
+
|
862
|
+
# shutdown server
|
863
|
+
|
864
|
+
session.with(:server).exec("skill iperf")
|
865
|
+
end
|
866
|
+
|
867
|
+
Then, if we execute it with the `play` command:
|
868
|
+
|
869
|
+
|
870
|
+
[77] pry(main)> play iperf_test.rb
|
871
|
+
[paranoia-6-kavlan-16.rennes.grid5000.fr] ------------------------------------------------------------
|
872
|
+
[paranoia-6-kavlan-16.rennes.grid5000.fr] Server listening on TCP port 5001
|
873
|
+
[paranoia-6-kavlan-16.rennes.grid5000.fr] TCP window size: 85.3 KByte (default)
|
874
|
+
[paranoia-6-kavlan-16.rennes.grid5000.fr] ------------------------------------------------------------
|
875
|
+
[paranoia-6-kavlan-16.rennes.grid5000.fr] [ 4] local 10.27.204.71 port 5001 connected with 10.19.200.240 port 32769
|
876
|
+
[paranoia-6-kavlan-16.rennes.grid5000.fr] [ ID] Interval Transfer Bandwidth
|
877
|
+
[paranoia-6-kavlan-16.rennes.grid5000.fr] [ 4] 0.0-10.0 sec 1.12 GBytes 957 Mbits/sec
|
878
|
+
[paranoia-6-kavlan-16.rennes.grid5000.fr] [ 5] local 10.27.204.71 port 5001 connected with 10.19.200.240 port 32770
|
879
|
+
[paranoia-6-kavlan-16.rennes.grid5000.fr] [ 5] 0.0-10.0 sec 1.10 GBytes 947 Mbits/sec
|
880
|
+
[paranoia-6-kavlan-16.rennes.grid5000.fr] ------------------------------------------------------------
|
881
|
+
[paranoia-6-kavlan-16.rennes.grid5000.fr] Client connecting to 10.19.200.240, TCP port 5001
|
882
|
+
[paranoia-6-kavlan-16.rennes.grid5000.fr] TCP window size: 85.0 KByte (default)
|
883
|
+
[paranoia-6-kavlan-16.rennes.grid5000.fr] ------------------------------------------------------------
|
884
|
+
[paranoia-6-kavlan-16.rennes.grid5000.fr] [ 5] local 10.27.204.71 port 47604 connected with 10.19.200.240 port 5001
|
885
|
+
[paranoia-6-kavlan-16.rennes.grid5000.fr] [ 5] 0.0-10.0 sec 1.12 GBytes 958 Mbits/sec
|
886
|
+
[paranoia-6-kavlan-16.rennes.grid5000.fr] [ 4] local 10.27.204.71 port 5001 connected with 10.19.200.240 port 32771
|
887
|
+
[paranoia-6-kavlan-16.rennes.grid5000.fr] [ 4] 0.0-10.7 sec 2.25 MBytes 1.77 Mbits/sec
|
888
|
+
|
889
|
+
|
890
|
+
The variable `results` will be defined which contains the results for each test.
|
891
|
+
Let's print the results. Type the following into the `pry` console:
|
892
|
+
|
893
|
+
results.each do |test, res|
|
894
|
+
puts "Results of test: #{test}"
|
895
|
+
res.each { |node,r| puts r[:stdout]}
|
896
|
+
end
|
897
|
+
|
898
|
+
Which will give us:
|
899
|
+
|
900
|
+
Results of test: bandwidth
|
901
|
+
[ ID] Interval Transfer Bandwidth
|
902
|
+
[ 3] 0.0-10.0 sec 1.12 GBytes 958 Mbits/sec
|
903
|
+
Results of test: bidi
|
904
|
+
[ 4] 0.0-10.0 sec 1.12 GBytes 957 Mbits/sec
|
905
|
+
Results of test: window
|
906
|
+
[ ID] Interval Transfer Bandwidth
|
907
|
+
[ 3] 0.0-10.7 sec 2.25 MBytes 1.77 Mbits/sec
|
908
|
+
|
909
|
+
|
910
|
+
Now let's look at the network traffic that we have generated during our experiment using KWAPI.
|
911
|
+
**Ruby-cute** offers the {Cute::G5K::API#get_metric get_metric} method to consult the G5K Metrology API.
|
912
|
+
In order to carry out a query and get the values of a specific probe,
|
913
|
+
we have to know the time interval of the values and the name of the probe.
|
914
|
+
Let's get the values for the metric `network_in`.
|
915
|
+
We could get all the names of the probes specific to this metric by typing:
|
916
|
+
|
917
|
+
probes = $g5k.get_metric("rennes",:metric => "network_in").uids
|
918
|
+
|
919
|
+
If you type that in `pry` you will get:
|
920
|
+
|
921
|
+
[13] pry(main)> probes
|
922
|
+
=> ["parasilo-11-eth0",
|
923
|
+
"parasilo-11-eth1",
|
924
|
+
"paravance-48-eth1",
|
925
|
+
"paravance-48-eth0",
|
926
|
+
"paravance-2-eth0",
|
927
|
+
"paravance-2-eth1",
|
928
|
+
"paranoia-4",
|
929
|
+
"paranoia-5",
|
930
|
+
"paranoia-6",
|
931
|
+
"paranoia-7",
|
932
|
+
"paravance-72-eth0",
|
933
|
+
|
934
|
+
In order to choose the right probes, we need to get the real names of the machines
|
935
|
+
and not the ones assigned by the VLAN. We can consult the job information:
|
936
|
+
|
937
|
+
nodes_normal = []
|
938
|
+
jobs.each{ |site,job| nodes_normal.push(job["assigned_nodes"])}
|
939
|
+
|
940
|
+
Which will give us an Array of Arrays that we can flatten by doing:
|
941
|
+
|
942
|
+
[97] pry(main)> nodes_normal.flatten!
|
943
|
+
=> [["paranoia-6.rennes.grid5000.fr"], ["graphite-4.nancy.grid5000.fr"]]
|
944
|
+
|
945
|
+
As we are going to fetch the data for Rennes (First node). We could do:
|
946
|
+
|
947
|
+
[68] pry(main)> probe_expe = probes.select{ |p| p[nodes_normal[0].split(".")[0]]}
|
948
|
+
|
949
|
+
So, at this point we already have the probe we want to request.
|
950
|
+
Next step is to get the start time of the interval, we can choose for example, the time at which deployments have finished:
|
951
|
+
|
952
|
+
deploy_end = []
|
953
|
+
jobs.each{ |site,job| deploy_end.push(job["deploy"].last["updated_at"])}
|
954
|
+
|
955
|
+
Therefore, we could choose the maximum timestamp from the ones returned:
|
956
|
+
|
957
|
+
start = deploy_end.max
|
958
|
+
|
959
|
+
Now, we can proceed by performing the query:
|
960
|
+
|
961
|
+
$g5k.get_metric("rennes",:metric => "network_in",:query => {:from => start, :to => start+3600, :only => probe_expe.first})
|
962
|
+
|
963
|
+
An Array is returned. We can then open an editor and write a small script that will write these values into a file
|
964
|
+
|
965
|
+
[33] pry(main)> edit -n get_results.rb
|
966
|
+
|
967
|
+
type:
|
968
|
+
|
969
|
+
raw_data = $g5k.get_metric("rennes",:metric => "network_in",
|
970
|
+
:query => {:from => start, :to => start+3600, :only => probe_expe.first})
|
971
|
+
|
972
|
+
network_in = raw_data.map{ |r| r["values"]}.flatten
|
973
|
+
time = raw_data.map{ |r| r["timestamps"]}.flatten
|
974
|
+
|
975
|
+
values = Hash[time.zip(network_in)]
|
976
|
+
|
977
|
+
File.open("network_in-values.txt",'w+') do |f|
|
978
|
+
f.puts("time\t bytes")
|
979
|
+
values.each{ |k,v| f.puts("#{k}\t#{v}")}
|
980
|
+
end
|
981
|
+
|
982
|
+
and execute it with:
|
983
|
+
|
984
|
+
pry(main)> play get_results.rb
|
985
|
+
=> {1453293153.732378=>4498405298908,
|
986
|
+
1453293155.183099=>4498405298908,
|
987
|
+
1453293156.582115=>4498405298908,
|
988
|
+
1453293157.924968=>4498405298908,
|
989
|
+
1453293159.28666=>4498405298908,
|
990
|
+
1453293160.655534=>4498405298908,
|
991
|
+
1453293161.998718=>4498405299219,
|
992
|
+
|
993
|
+
## Conclusions
|
994
|
+
|
995
|
+
This tutorial has shown how the scripting of complex experiment can be done using the Ruby scripting language.
|
996
|
+
We saw that in the context of Grid'5000,
|
997
|
+
**Ruby-Cute** offers useful methods for accessing the platform's services and executing commands in parallel.
|
998
|
+
The aim of this tutorial was to give you some ideas for coding your experiments using **Ruby-Cute**
|
999
|
+
and we hope it will be useful for your experiments.
|