ruby-cute 0.3 → 0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.yardopts +3 -1
- data/README.md +8 -7
- data/Rakefile +1 -0
- data/debian/changelog +2 -2
- data/examples/distem-bootstrap +113 -42
- data/examples/g5k-tutorial.md +999 -0
- data/lib/cute/extensions.rb +16 -0
- data/lib/cute/g5k_api.rb +82 -15
- data/lib/cute/synchronization.rb +1 -1
- data/lib/cute/version.rb +1 -1
- data/spec/g5k_api_spec.rb +5 -2
- data/spec/spec_helper.rb +6 -0
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 07edfbc07f26d33908a645a9caf1b8fa786830e8
|
4
|
+
data.tar.gz: 95168f3f5d0b06290c526e713f8d2d3f9033e5e9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 26efbf1b1a42f09c73ef2626fd7bf1420872c6efc04d0a389635dc865eec7edf6a0e6f0911d792ff1e26af7f6377d81d9402dd706ad50432c1ad027b6c066af9
|
7
|
+
data.tar.gz: e9210765ad54ff59a42f81e20c6232499b992cb2591daee8899cfbfdcb1a4fa7d78d741bdbe46f7f6d97fb71b6a08de14dec2bdd5169f5b440e3e1d977ef4478
|
data/.yardopts
CHANGED
data/README.md
CHANGED
@@ -8,19 +8,18 @@ Grid'5000.
|
|
8
8
|
|
9
9
|
## Installation
|
10
10
|
|
11
|
+
To install latest release from [RubyGems](https://rubygems.org/gems/ruby-cute):
|
12
|
+
|
13
|
+
```bash
|
14
|
+
$ gem install --user-install ruby-cute
|
15
|
+
```
|
11
16
|
From sources:
|
12
17
|
|
13
18
|
```bash
|
14
19
|
$ git clone https://github.com/ruby-cute/ruby-cute
|
15
20
|
$ cd ruby-cute
|
16
21
|
$ gem build ruby-cute.gemspec
|
17
|
-
$ gem install ruby-cute-*.gem
|
18
|
-
```
|
19
|
-
|
20
|
-
In Grid'5000 the installation procedure goes as follows:
|
21
|
-
|
22
|
-
```bash
|
23
|
-
$ gem install --user-install ruby-cute
|
22
|
+
$ gem install --user-install ruby-cute-*.gem
|
24
23
|
```
|
25
24
|
|
26
25
|
Then, type the following for having ruby cute in your path (this is only necessary if you want to use interactive mode).
|
@@ -72,6 +71,8 @@ $ cute
|
|
72
71
|
The variable *$g5k* is available which can be used to access the Grid'5000 API through the
|
73
72
|
[G5K Module](http://www.rubydoc.info/github/ruby-cute/ruby-cute/master/Cute/G5K/API). For example,
|
74
73
|
let's request the name of the sites available in Grid'5000.
|
74
|
+
(Before starting be sure to set up a configuration file for the module, please refer to
|
75
|
+
[G5K Module](http://www.rubydoc.info/github/ruby-cute/ruby-cute/master/Cute/G5K/API))
|
75
76
|
|
76
77
|
```bash
|
77
78
|
[2] pry(main)> $g5k.site_uids()
|
data/Rakefile
CHANGED
@@ -27,6 +27,7 @@ Rake::PackageTask::new("ruby-cute",get_version) do |p|
|
|
27
27
|
p.package_files.include('ext/**/*')
|
28
28
|
p.package_files.include('bin/**/*')
|
29
29
|
p.package_files.include('test/**/*')
|
30
|
+
p.package_files.include('examples/**/*')
|
30
31
|
p.package_files.include('Rakefile', 'COPYING','README', 'README.md')
|
31
32
|
end
|
32
33
|
|
data/debian/changelog
CHANGED
data/examples/distem-bootstrap
CHANGED
@@ -99,7 +99,10 @@ options[:network_mode] = 'classical'
|
|
99
99
|
options[:network_interface] = nil
|
100
100
|
options[:num_nodes] = 2
|
101
101
|
options[:walltime] = "2:00:00"
|
102
|
-
|
102
|
+
options[:deb_repo] = 'http://distem.gforge.inria.fr/deb'
|
103
|
+
options[:cluster] = "nancy"
|
104
|
+
options[:env] = 'wheezy-x64-nfs'
|
105
|
+
options[:branch] = nil
|
103
106
|
args = ARGV.dup
|
104
107
|
|
105
108
|
optparse = OptionParser.new do |opts|
|
@@ -113,9 +116,16 @@ optparse = OptionParser.new do |opts|
|
|
113
116
|
opts.separator ""
|
114
117
|
opts.separator "Options:"
|
115
118
|
|
119
|
+
opts.on( '-f', '--node-list <nodelist_file>', 'File containing the list of nodes to install. If not specified, a job will be submitted using RubyCute') do |n|
|
120
|
+
options[:node_list] = n
|
121
|
+
end
|
122
|
+
|
116
123
|
opts.on( '--nodes <number of nodes>', Integer,'Number of nodes to reserve on Grid5000') do |n|
|
117
124
|
options[:num_nodes] = n.to_i
|
118
125
|
end
|
126
|
+
opts.on( '--cluster <cluster name>', String,'Cluster name in Grid\'5000') do |n|
|
127
|
+
options[:cluster] = n
|
128
|
+
end
|
119
129
|
opts.on( '-t', '--time <walltime>', 'Walltime for the reservation on Grid5000') do |n|
|
120
130
|
options[:walltime] = n
|
121
131
|
end
|
@@ -134,6 +144,12 @@ optparse = OptionParser.new do |opts|
|
|
134
144
|
opts.on( '-D', '--distem-debug', 'Show distem output when executing distem commands (script, node init)' ) do
|
135
145
|
options[:debug_distem] = true
|
136
146
|
end
|
147
|
+
opts.on( '--env <environment>', String, 'Kadeploy environment to be used as a base' ) do |c|
|
148
|
+
options[:env] = c
|
149
|
+
end
|
150
|
+
opts.on( '--deploy', 'Redeploy the base image used for distem' ) do
|
151
|
+
options[:deploy] = true
|
152
|
+
end
|
137
153
|
opts.on( '-k', '--ssh-key [<ssh_key_file>]', 'Path of the ssh private key to use' ) do |k|
|
138
154
|
options[:ssh_key] = k
|
139
155
|
end
|
@@ -155,6 +171,9 @@ optparse = OptionParser.new do |opts|
|
|
155
171
|
opts.on( '-G', '--gerrit <gerrit_ref>', "Checkout a gerrit ref (e.g refs/changes/94/94/1) and rebuild a Debian package" ) do |n|
|
156
172
|
options[:gerrit] = n
|
157
173
|
end
|
174
|
+
opts.on( '-B', '--branch <branch_name>', "Checkout a specific branch and rebuild a Debian package" ) do |n|
|
175
|
+
options[:branch] = n
|
176
|
+
end
|
158
177
|
opts.on( '-S', '--stealth-mode', 'Do not report usage statistics (Grid\'5000 only)' ) do |c|
|
159
178
|
options[:stats] = false
|
160
179
|
end
|
@@ -177,6 +196,19 @@ optparse = OptionParser.new do |opts|
|
|
177
196
|
opts.on( '-i', '--network-interface <iface>', 'Define the root network interface for inter-pnode communication (use only with vxlan network mode)') do |iface|
|
178
197
|
options[:network_interface] = iface
|
179
198
|
end
|
199
|
+
opts.on( '--debian-version <version>', 'Choose the Debian version (squeeze, wheezy or jessie)') do |v|
|
200
|
+
case v
|
201
|
+
when 'squeeze'
|
202
|
+
options[:deb_repo] = 'http://distem.gforge.inria.fr/deb'
|
203
|
+
when 'wheezy'
|
204
|
+
options[:deb_repo] = 'http://distem.gforge.inria.fr/deb'
|
205
|
+
when 'jessie'
|
206
|
+
options[:deb_repo] = 'http://distem.gforge.inria.fr/deb-jessie'
|
207
|
+
else
|
208
|
+
puts 'Unsupported Debian version'
|
209
|
+
exit 1
|
210
|
+
end
|
211
|
+
end
|
180
212
|
opts.on( '--verbose', 'Activate the verbose mode on Distem servers' ) do
|
181
213
|
options[:verbose] = true
|
182
214
|
end
|
@@ -200,6 +232,7 @@ begin
|
|
200
232
|
optparse.parse!
|
201
233
|
rescue OptionParser::InvalidOption => e
|
202
234
|
logger.error(e.to_s)
|
235
|
+
exit 1
|
203
236
|
end
|
204
237
|
|
205
238
|
logger.level = Logger::INFO
|
@@ -209,23 +242,37 @@ g5k = Cute::G5K::API.new()
|
|
209
242
|
g5k.logger = logger
|
210
243
|
## performing reservation with ruby-cute
|
211
244
|
|
212
|
-
reserv_param = {:site => g5k.site, :nodes => options[:num_nodes], :name => 'distem',
|
213
|
-
:walltime => options[:walltime], :env =>
|
245
|
+
reserv_param = {:site => g5k.site, :cluster => options[:cluster], :nodes => options[:num_nodes], :name => 'distem',
|
246
|
+
:walltime => options[:walltime], :env => options[:env], :subnets => [22,1]}
|
247
|
+
|
248
|
+
reserv_param[:keys] = options[:ssh_key] if options[:ssh_key]
|
214
249
|
|
215
|
-
|
250
|
+
nodelist = []
|
216
251
|
|
252
|
+
if options[:node_list]
|
253
|
+
File.open(options[:node_list], "r") do |f|
|
254
|
+
f.each_line do |line|
|
255
|
+
nodelist.push(line.chop)
|
256
|
+
end
|
257
|
+
end
|
258
|
+
end
|
217
259
|
|
218
|
-
#
|
219
|
-
|
260
|
+
# if we pass a list of nodes
|
261
|
+
if nodelist.empty?
|
220
262
|
|
221
|
-
|
263
|
+
# we verify if we have already submit a job
|
264
|
+
old_jobs = g5k.get_my_jobs(g5k.site).select{ |j| j["name"] == "distem"}
|
222
265
|
|
223
|
-
|
224
|
-
## wait for the deploy
|
266
|
+
job = old_jobs.empty? ? g5k.reserve(reserv_param) : old_jobs.first
|
225
267
|
|
226
|
-
|
268
|
+
nodelist = job.resources["cores"].uniq
|
269
|
+
## wait for the deploy
|
270
|
+
reserv_param.delete(:nodes)
|
271
|
+
g5k.deploy(job,reserv_param) if options[:deploy]
|
227
272
|
|
228
|
-
|
273
|
+
g5k.wait_for_deploy(job)
|
274
|
+
end
|
275
|
+
allnodes = nodelist.dup
|
229
276
|
|
230
277
|
# Generating ssh keys"
|
231
278
|
|
@@ -233,16 +280,28 @@ key_dir = Dir.mktmpdir("keys")
|
|
233
280
|
system "ssh-keygen -P \'\' -f #{key_dir}/keys"
|
234
281
|
logger.info "Keys generated in #{key_dir}"
|
235
282
|
|
283
|
+
# Generating ssh config"
|
284
|
+
|
285
|
+
ssh_conf = Tempfile.new('config')
|
286
|
+
File.open(ssh_conf.path,'w+') do |f|
|
287
|
+
f.puts "Host *"
|
288
|
+
f.puts "StrictHostKeyChecking no"
|
289
|
+
f.puts "UserKnownHostsFile=/dev/null "
|
290
|
+
end
|
291
|
+
|
292
|
+
|
236
293
|
nodelist.each do |node|
|
237
294
|
|
238
295
|
Net::SCP.start(node, "root") do |scp|
|
239
296
|
logger.debug "Transfering key to #{node}"
|
240
|
-
scp.upload
|
241
|
-
scp.upload
|
297
|
+
scp.upload "#{key_dir}/keys.pub", "/root/.ssh/id_rsa.pub"
|
298
|
+
scp.upload "#{key_dir}/keys", "/root/.ssh/id_rsa"
|
299
|
+
scp.upload ssh_conf.path, "/root/.ssh/config"
|
242
300
|
end
|
243
301
|
|
244
302
|
end
|
245
303
|
|
304
|
+
ssh_conf.unlink
|
246
305
|
# Deletating keys
|
247
306
|
FileUtils.rm_rf(key_dir)
|
248
307
|
|
@@ -314,8 +373,7 @@ Net::SSH::Multi.start do |session|
|
|
314
373
|
session.exec! "grep -q '#{rule}' /etc/security/limits.conf; true || echo '#{rule} #{ULIMIT_OPEN_FILES}' >> /etc/security/limits.conf"
|
315
374
|
|
316
375
|
# setup Debian repo
|
317
|
-
session.exec! "grep -q '#{
|
318
|
-
|
376
|
+
session.exec! "grep -q '#{options[:deb_repo]}' /etc/apt/sources.list || echo 'deb #{options[:deb_repo]} ./\ndeb-src #{options[:deb_repo]} ./' >> /etc/apt/sources.list"
|
319
377
|
logger.info "Updating debian packages list"
|
320
378
|
session.exec! "apt-get update -q"
|
321
379
|
|
@@ -348,25 +406,39 @@ Net::SSH::Multi.start do |session|
|
|
348
406
|
session.with(:corrd).exec! "git --git-dir=#{gitdir}/.git reset --hard #{options[:git]}"
|
349
407
|
end
|
350
408
|
end
|
351
|
-
|
409
|
+
if options[:branch]
|
410
|
+
logger.info("Setting up git repository from a specific branch:#{options[:branch]} on #{coordinator}")
|
411
|
+
session.with(:coord).exec! "cd #{gitdir} && git fetch origin #{options[:branch]}"
|
412
|
+
session.with(:coord).exec! "cd #{gitdir} && git checkout #{options[:branch]}"
|
413
|
+
end
|
414
|
+
else option[:gerrit] # gerrit
|
352
415
|
logger.info("Setting up git repository from gerrit ref:#{options[:gerrit]} on #{coordinator}")
|
353
416
|
session.with(:coord).exec! "git clone #{GERRIT_REPOSITORY} #{gitdir}"
|
354
417
|
session.with(:coord).exec! "cd #{gitdir} && git fetch #{GERRIT_REPOSITORY} #{options[:gerrit]} && git checkout FETCH_HEAD"
|
355
418
|
end
|
419
|
+
# Checking out a specific commit from Distem repository
|
420
|
+
|
421
|
+
|
356
422
|
logger.info("Building debian package of distem on #{coordinator}")
|
357
423
|
session.with(:coord).exec! "rm -f #{TMP_DIR}/*.deb #{TMP_DIR}/*.changes #{TMP_DIR}/*.dsc #{TMP_DIR}/*.tar.gz"
|
358
424
|
session.with(:coord).exec! "cd #{gitdir}; rake snapshot"
|
359
425
|
|
360
|
-
|
426
|
+
res = session.with(:coord).exec! "find #{TMP_DIR} -maxdepth 1 -name distem*.deb"
|
361
427
|
|
362
|
-
|
428
|
+
debarchivefile = res[coordinator][:stdout]
|
429
|
+
|
430
|
+
logger.info("Copying generated debian package #{File.basename(debarchivefile)}")
|
363
431
|
|
364
432
|
#exec(session,"cp #{debarchivefile} #{TMP_DIR}",:coord,true)
|
365
433
|
nodelist.each {|node| session.with(:coord).exec! "scp -o StrictHostKeyChecking=no #{debarchivefile} root@#{node}:#{TMP_DIR}"}
|
366
434
|
logger.info("Installing generated debian package #{File.basename(debarchivefile)}")
|
367
|
-
|
435
|
+
res = session.with(:coord).exec! "dpkg -I #{debarchivefile} | grep 'Depends:'"
|
436
|
+
|
437
|
+
raw_deps = res[coordinator][:stdout].sub('Depends:','').split(",")
|
438
|
+
|
439
|
+
depends = ""
|
440
|
+
raw_deps.each{ |str| depends+=str.split(" ")[0]; depends+="," unless str==raw_deps.last}
|
368
441
|
|
369
|
-
depends = depends.split("|").last.gsub!(' ','')
|
370
442
|
session.exec! "dpkg --ignore-depends #{depends} -i #{TMP_DIR}/#{File.basename(debarchivefile)}"
|
371
443
|
|
372
444
|
session.exec! "DEBIAN_FRONTEND=noninteractive apt-get install -q -y --force-yes -f"
|
@@ -388,7 +460,7 @@ Net::SSH::Multi.start do |session|
|
|
388
460
|
end
|
389
461
|
options[:gempackages].each do |gempkg|
|
390
462
|
logger.info "Installing additional gem package '#{gempkg}'"
|
391
|
-
session.
|
463
|
+
session.exec! "export http_proxy=#{HTTP_PROXY}; gem install #{gempkg}"
|
392
464
|
end
|
393
465
|
|
394
466
|
if options[:init_pnodes] or ARGV[0]
|
@@ -426,8 +498,7 @@ Net::SSH::Multi.start do |session|
|
|
426
498
|
begin
|
427
499
|
Timeout.timeout(10) do
|
428
500
|
root_iface_opt = options[:network_interface] ? "--network-interface #{options[:network_interface]}" : ''
|
429
|
-
distem_cmd = "LANG=C distemd #{options[:verbose] ? '--verbose' : ''}
|
430
|
-
"#{options[:network_mode]} #{root_iface_opt} -d &>#{File.join(PATH_DISTEMD_LOGS,'distemd.log')}&"
|
501
|
+
distem_cmd = "LANG=C distemd #{options[:verbose] ? '--verbose' : ''} -d &>#{File.join(PATH_DISTEMD_LOGS,'distemd.log')}&"
|
431
502
|
|
432
503
|
session.with(:coord).exec! distem_cmd
|
433
504
|
|
@@ -475,7 +546,7 @@ Net::SSH::Multi.start do |session|
|
|
475
546
|
if ARGV[0]
|
476
547
|
begin
|
477
548
|
File.open(ARGV[0], 'r') do |f|
|
478
|
-
filename = session.with(:coord).exec!('tempfile')[coordinator][
|
549
|
+
filename = session.with(:coord).exec!('tempfile')[coordinator][:stdout]
|
479
550
|
logger.info("Copying script file in '#{filename}' on #{coordinator}")
|
480
551
|
`scp #{ARGV[0]} root@#{coordinator}:#{filename}`
|
481
552
|
session.with(:coord).exec! "chmod +x #{filename}"
|
@@ -484,7 +555,7 @@ Net::SSH::Multi.start do |session|
|
|
484
555
|
argv_dup = ARGV.dup
|
485
556
|
argv_dup.shift
|
486
557
|
script_args = (argv_dup.length > 0 ? argv_dup.join(" ") : "")
|
487
|
-
session.with(:coord).exec! "export #{VAR_DISTEM_NODES}='#{(nodelist + [coordinator]) * "\n"}'; export #{VAR_DISTEM_COORD}='#{coordinator}';#{filename} #{script_args}"
|
558
|
+
#session.with(:coord).exec! "export #{VAR_DISTEM_NODES}='#{(nodelist + [coordinator]) * "\n"}'; export #{VAR_DISTEM_COORD}='#{coordinator}';#{filename} #{script_args}"
|
488
559
|
session.with(:coord).exec! "rm #{filename}"
|
489
560
|
end
|
490
561
|
logger.info 'Script execution done'
|
@@ -496,21 +567,21 @@ Net::SSH::Multi.start do |session|
|
|
496
567
|
logger.info("Coordinator: #{coordinator}") if options[:init_pnodes] or ARGV[0]
|
497
568
|
|
498
569
|
# Send stats record to stats server
|
499
|
-
|
500
|
-
|
501
|
-
|
502
|
-
|
503
|
-
|
504
|
-
|
505
|
-
|
506
|
-
|
507
|
-
|
508
|
-
|
509
|
-
|
510
|
-
|
511
|
-
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
570
|
+
if g5k? and options[:stats]
|
571
|
+
begin
|
572
|
+
stats = {
|
573
|
+
:time => Time.now.to_i,
|
574
|
+
:site => Socket.gethostname,
|
575
|
+
:user => g5k.g5k_user,
|
576
|
+
:oar => job['uid'],
|
577
|
+
:nodes => allnodes,
|
578
|
+
:params => args * ' ',
|
579
|
+
:length => (Time.now - $startt).to_i
|
580
|
+
}
|
581
|
+
sock = TCPSocket.open(STATS_SERV, STATS_PORT)
|
582
|
+
sock.send(stats.to_yaml,0)
|
583
|
+
sock.close
|
584
|
+
rescue SocketError, Errno::ECONNREFUSED, Errno::ECONNRESET, Errno::ETIMEDOUT, Errno::EHOSTUNREACH
|
585
|
+
end
|
586
|
+
end
|
516
587
|
end
|
@@ -0,0 +1,999 @@
|
|
1
|
+
# @title Grid'5000 tutorial
|
2
|
+
# Grid'5000 tutorial
|
3
|
+
|
4
|
+
|
5
|
+
This tutorial aims at showing how **Ruby-Cute** can be used to
|
6
|
+
help the scripting of an experiment in the context of the Grid'5000 testbed.
|
7
|
+
The programming language used, as you would expect, is {https://www.ruby-lang.org/en/ Ruby}.
|
8
|
+
We will use a powerful console debugger called {http://pryrepl.org/ Pry} which offers
|
9
|
+
several functionalities that can be used for the step-by-step scripting of complex experiments.
|
10
|
+
|
11
|
+
In this tutorial the first two sections are dedicated to the installation and basic use of **Ruby-Cute**.
|
12
|
+
You can skip those sections if you are already acquainted with **Ruby-Cute**.
|
13
|
+
The other sections show how to use **Ruby-cute** for scripting complex experiments.
|
14
|
+
This is shown through three examples:
|
15
|
+
|
16
|
+
1. **Infiniband performance test**: the experiment will illustrate how to perform a reservation,
|
17
|
+
the execution of commands in a reserved node and it explains several `pry` commands that will help you with the writing of your experiment.
|
18
|
+
2. **Running NAS benchmarks in Grid’5000**: you will get acquainted with parallel execution using simple SSH or {http://taktuk.gforge.inria.fr/ TakTuk}.
|
19
|
+
3. **Performing network measures within a reserved VLAN**: you will learn how to reserve a routed VLAN
|
20
|
+
and to query the G5K metrology {https://www.grid5000.fr/mediawiki/index.php/API API}.
|
21
|
+
For this particular experiment we will query the {https://www.grid5000.fr/mediawiki/index.php/Monitoring Kwapi} service.
|
22
|
+
|
23
|
+
The aforementioned experiments are independent, you can perform them in any order.
|
24
|
+
However, you may need some concepts that are explained only in specific sections.
|
25
|
+
|
26
|
+
## Installing and preparing Ruby cute
|
27
|
+
|
28
|
+
The installation procedure is shown in {file:README.md Ruby-Cute install}.
|
29
|
+
After this step you will normally have `ruby-cute` and `pry` gems installed.
|
30
|
+
|
31
|
+
Before using **Ruby-Cute** you have to create the following file:
|
32
|
+
|
33
|
+
$ cat > ~/.grid5000_api.yml << EOF
|
34
|
+
$ uri: https://api.grid5000.fr/
|
35
|
+
$ version: 3.0
|
36
|
+
$ EOF
|
37
|
+
|
38
|
+
## Getting acquainted with the pry console
|
39
|
+
|
40
|
+
After instaling `ruby-cute` and `pry` gems you can lunch a pry console
|
41
|
+
with **ruby-cute** loaded by typing:
|
42
|
+
|
43
|
+
$ cute
|
44
|
+
|
45
|
+
Which will open a `pry` console:
|
46
|
+
|
47
|
+
[1] pry(main)>
|
48
|
+
|
49
|
+
In this console, we can evaluate Ruby code, execute shell commands, consult
|
50
|
+
documentation, explore classes and more.
|
51
|
+
The variable *$g5k* is available which can be used to access the Grid'5000 API through the
|
52
|
+
[G5K Module](http://www.rubydoc.info/github/ruby-cute/ruby-cute/master/Cute/G5K/API). For example,
|
53
|
+
let's request the name of the sites available in Grid'5000.
|
54
|
+
|
55
|
+
[2] pry(main)> $g5k.site_uids()
|
56
|
+
=> ["grenoble", "lille", "luxembourg", "lyon", "nancy", "nantes", "reims", "rennes", "sophia", "toulouse"]
|
57
|
+
|
58
|
+
We can consult the name of the clusters available in a specific site.
|
59
|
+
|
60
|
+
[3] pry(main)> $g5k.cluster_uids("grenoble")
|
61
|
+
=> ["adonis", "edel", "genepi"]
|
62
|
+
|
63
|
+
It is possible to execute shell commands, however all commands have to be prefixed with a dot ".". For example we could generate a pair of SSH keys using:
|
64
|
+
|
65
|
+
[7] pry(main)> .ssh-keygen -b 1024 -N "" -t rsa -f ~/my_ssh_jobkey
|
66
|
+
|
67
|
+
Another advantage is the possibility of exploring the loaded Ruby modules.
|
68
|
+
Let's explore the [Cute](http://www.rubydoc.info/github/ruby-cute/ruby-cute/master/Cute) module.
|
69
|
+
|
70
|
+
[8] pry(main)> cd Cute
|
71
|
+
[9] pry(Cute):1> ls
|
72
|
+
constants: Bash Execute G5K TakTuk VERSION
|
73
|
+
locals: _ __ _dir_ _ex_ _file_ _in_ _out_ _pry_
|
74
|
+
|
75
|
+
We can see that the [Cute](http://www.rubydoc.info/github/ruby-cute/ruby-cute/master/Cute) module
|
76
|
+
is composed of other helpful modules such as:
|
77
|
+
[G5K](http://www.rubydoc.info/github/ruby-cute/ruby-cute/master/Cute/G5K/API),
|
78
|
+
[TakTuk](http://www.rubydoc.info/github/ruby-cute/ruby-cute/master/Cute/TakTuk), etc.
|
79
|
+
To quit the Cute namespace type:
|
80
|
+
|
81
|
+
[10] pry(main)> cd
|
82
|
+
|
83
|
+
Let's explore the methods defined in the
|
84
|
+
[G5K Module](http://www.rubydoc.info/github/ruby-cute/ruby-cute/master/Cute/G5K/API),
|
85
|
+
so you can observe which methods can be used with `$g5k` variable.
|
86
|
+
|
87
|
+
[11] pry(main)> ls Cute::G5K::API
|
88
|
+
Object.methods: yaml_tag
|
89
|
+
Cute::G5K::API#methods:
|
90
|
+
check_deployment deploy environments get_job get_subnets get_vlan_nodes nodes_status reserve site_status wait_for_deploy
|
91
|
+
cluster_uids deploy_status g5k_user get_jobs get_switch logger release rest site_uids wait_for_job
|
92
|
+
clusters environment_uids get_deployments get_my_jobs get_switches logger= release_all site sites
|
93
|
+
|
94
|
+
We can access the respective YARD documentation of a given method by typing:
|
95
|
+
|
96
|
+
[12] pry(main)> show-doc Cute::G5K::API#deploy
|
97
|
+
|
98
|
+
In the following section we will see how
|
99
|
+
`pry` can be used to setup an experiment step by step using **Ruby-Cute**.
|
100
|
+
|
101
|
+
Pry can be customized by creating the file `.pryrc`. We will create this
|
102
|
+
file with the following content in order to choose our prefered editor:
|
103
|
+
|
104
|
+
$ cat > ~/.pryrc << EOF
|
105
|
+
Pry.config.editor = "emacs"
|
106
|
+
EOF
|
107
|
+
|
108
|
+
## First experiment: Infiniband performance test
|
109
|
+
|
110
|
+
Here, we will use **Ruby-cute** to carry out an experiment.
|
111
|
+
In this experiment, we will ask for two nodes equipped with infiniband and
|
112
|
+
then, we will perform some performance test using a network benchmark called *NETPIPE*.
|
113
|
+
For this particular experiment we have the following requirements:
|
114
|
+
|
115
|
+
- A pair of SSH keys
|
116
|
+
- Use of standard environment (no deploy)
|
117
|
+
- Two nodes connected with infiniband (10G or 20G)
|
118
|
+
- MPI benchmark NETPIPE
|
119
|
+
- A MPI runtime (OpenMPI or MPICH)
|
120
|
+
|
121
|
+
We will do it interactively using `pry`.
|
122
|
+
Let's create a directory for keeping all the scripts that we will write throughout the tutorial.
|
123
|
+
|
124
|
+
$ mkdir ruby-cute-tutorial
|
125
|
+
|
126
|
+
Then, we execute the `pry` console form this directory:
|
127
|
+
|
128
|
+
$ cd ruby-cute-tutorial
|
129
|
+
$ cute
|
130
|
+
|
131
|
+
First, let's find the sites that offer Infiniband interconnection.
|
132
|
+
For that we will write a small script from `pry` console using the command edit.
|
133
|
+
|
134
|
+
|
135
|
+
[13] pry(main)> edit -n find_infiniband.rb
|
136
|
+
|
137
|
+
This will open a new file with your prefered editor. Here we will put the following
|
138
|
+
ruby script:
|
139
|
+
|
140
|
+
sites = $g5k.site_uids
|
141
|
+
|
142
|
+
sites_infiniband = []
|
143
|
+
|
144
|
+
sites.each do |site|
|
145
|
+
sites_infiniband.push(site) unless $g5k.get_switches(site).select{ |t| t["model"] == "Infiniband" }.empty?
|
146
|
+
end
|
147
|
+
|
148
|
+
Then, we execute it using the `play` command which will execute line by line this script in the context of a Pry session.
|
149
|
+
|
150
|
+
[21] pry(main)> play find_infiniband.rb
|
151
|
+
=> ["grenoble", "lille", "luxembourg", "lyon", "nancy", "nantes", "reims", "rennes", "sophia"]
|
152
|
+
|
153
|
+
We can observe that the variable `sites_infiniband` is now defined, telling us that Grenoble and Nancy sites offer Infiniband interconnection.
|
154
|
+
|
155
|
+
[22] pry(main)> sites_infiniband
|
156
|
+
=> ["grenoble", "nancy"]
|
157
|
+
|
158
|
+
Then, create a pair of SSH keys (Necessary for OARSSH):
|
159
|
+
|
160
|
+
$ ssh-keygen -b 1024 -N "" -t rsa -f ~/my_ssh_jobkey
|
161
|
+
|
162
|
+
We send the generated keys to the chosen site:
|
163
|
+
|
164
|
+
[22] pry(main)> .scp ~/my_ssh* nancy:~/
|
165
|
+
|
166
|
+
Now that we have found the sites, let's submit a job. You can use between Grenoble and Nancy sites. If you
|
167
|
+
take a look at {https://www.grid5000.fr/mediawiki/index.php/Status Monika} you will see that in Nancy we should use the OAR property 'ib20g' and in Grenoble we should use 'ib10g'.
|
168
|
+
Given that the MPI bench uses just one MPI process, we will need in realty just one core of a given machine.
|
169
|
+
We will use OAR syntax to ask for two cores in two different nodes with ib10g in Grenoble.
|
170
|
+
|
171
|
+
[23] pry(main)> job = $g5k.reserve(:site => "grenoble", :resources => "{ib10g='YES'}/nodes=2/core=1",:walltime => '01:00:00', :keys => "~/my_ssh_jobkey" )
|
172
|
+
2015-12-04 14:07:31.370 => Reserving resources: {ib20g='YES'}/nodes=2/core=1,walltime=01:00 (type: ) (in grenoble)
|
173
|
+
2015-12-04 14:07:41.358 => Waiting for reservation 692665
|
174
|
+
2015-12-04 14:07:41.444 => Reservation 692665 should be available at 2015-12-04 14:07:34 +0100 (0 s)
|
175
|
+
2015-12-04 14:07:41.444 => Reservation 692665 ready
|
176
|
+
|
177
|
+
A hash is returned containing all the information about the job that we have just submitted.
|
178
|
+
|
179
|
+
[58] pry(main)> job
|
180
|
+
=> {"uid"=>692665,
|
181
|
+
"user_uid"=>"cruizsanabria",
|
182
|
+
"user"=>"cruizsanabria",
|
183
|
+
"walltime"=>3600,
|
184
|
+
"queue"=>"default",
|
185
|
+
"state"=>"running",
|
186
|
+
"project"=>"default",
|
187
|
+
"name"=>"rubyCute job",
|
188
|
+
"types"=>[],
|
189
|
+
"mode"=>"PASSIVE",
|
190
|
+
"command"=>"sleep 3600",
|
191
|
+
"submitted_at"=>1449234452,
|
192
|
+
"scheduled_at"=>1449234454,
|
193
|
+
"started_at"=>1449234454,
|
194
|
+
"message"=>"FIFO scheduling OK",
|
195
|
+
"properties"=>"(maintenance = 'NO') AND production = 'NO'",
|
196
|
+
"directory"=>"/home/cruizsanabria",
|
197
|
+
"events"=>[],
|
198
|
+
"links"=>
|
199
|
+
[{"rel"=>"self", "href"=>"/sid/sites/nancy/jobs/692665", "type"=>"application/vnd.grid5000.item+json"},
|
200
|
+
{"rel"=>"parent", "href"=>"/sid/sites/nancy", "type"=>"application/vnd.grid5000.item+json"}],
|
201
|
+
"resources_by_type"=>{"cores"=>["graphene-67.nancy.grid5000.fr", "graphene-45.nancy.grid5000.fr"]},
|
202
|
+
"assigned_nodes"=>["graphene-67.nancy.grid5000.fr", "graphene-45.nancy.grid5000.fr"]}
|
203
|
+
|
204
|
+
An important information is the nodes that has been assigned, let's put this information in another variable:
|
205
|
+
|
206
|
+
[60] pry(main)> nodes = job["assigned_nodes"]
|
207
|
+
=> ["graphene-67.nancy.grid5000.fr", "graphene-45.nancy.grid5000.fr"]
|
208
|
+
|
209
|
+
Then, we create a file with the name of the reserved machines:
|
210
|
+
|
211
|
+
[62] pry(main)> machine_file = Tempfile.open('machine_file')
|
212
|
+
=> #<File:/tmp/machine_file20151204-28888-1ll3brs>
|
213
|
+
|
214
|
+
[64] pry(main)> nodes.each{ |node| machine_file.puts node }
|
215
|
+
=> ["graphene-67.nancy.grid5000.fr", "graphene-45.nancy.grid5000.fr"]
|
216
|
+
|
217
|
+
[66] pry(main)> machine_file.close
|
218
|
+
|
219
|
+
|
220
|
+
We will need to setup SSH options for OAR, we can do it with the {Cute::OARSSHopts OARSSHopts} class helper provided by ruby-cute:
|
221
|
+
|
222
|
+
[6] pry(main)> grid5000_opt = Cute::OARSSHopts.new(:keys => "~/my_ssh_jobkey")
|
223
|
+
=> {:user=>"oar", :keys=>"~/my_ssh_jobkey", :port=>6667}
|
224
|
+
|
225
|
+
Now, we can communicate using SSH with our nodes. Let's send the machinefile using SCP.
|
226
|
+
From a `pry` console let's load the SCP module to transfer files:
|
227
|
+
|
228
|
+
[12] pry(main)> require 'net/scp'
|
229
|
+
|
230
|
+
Then, copy-paste the following code in pry console:
|
231
|
+
|
232
|
+
Net::SCP.start(nodes.first, "oar", grid5000_opt) do |scp|
|
233
|
+
scp.upload! machine_file.path, "/tmp/machine_file"
|
234
|
+
end
|
235
|
+
|
236
|
+
The previous code will sent the machine file into the first node.
|
237
|
+
We can check this by performing an SSH connection into the node.
|
238
|
+
Here to illustrate the use of temporary files, let's type the following:
|
239
|
+
|
240
|
+
[6] pry(main)> edit -t
|
241
|
+
|
242
|
+
and copy-paste the following code:
|
243
|
+
|
244
|
+
Net::SSH.start(nodes.first, "oar", grid5000_opt) do |ssh|
|
245
|
+
puts ssh.exec("cat /tmp/machine_file")
|
246
|
+
end
|
247
|
+
|
248
|
+
If we save and quit the editor, the code will be evaluated in Pry context.
|
249
|
+
Which will generate the following output:
|
250
|
+
|
251
|
+
[12] pry(main)> edit -t
|
252
|
+
#<Net::SSH::Connection::Channel:0x00000001247150>
|
253
|
+
graphene-80.nancy.grid5000.fr
|
254
|
+
graphene-81.nancy.grid5000.fr
|
255
|
+
=> nil
|
256
|
+
|
257
|
+
We confirmed the existence of the file in the first reserved node.
|
258
|
+
Now let's download, compile and execute
|
259
|
+
the benchmark. Create a Ruby file called netpipe:
|
260
|
+
|
261
|
+
[12] pry(main)> edit -n netpipe.rb
|
262
|
+
|
263
|
+
With the following content:
|
264
|
+
|
265
|
+
Net::SSH.start(nodes.first, "oar", grid5000_opt) do |ssh|
|
266
|
+
netpipe_url = "http://pkgs.fedoraproject.org/repo/pkgs/NetPIPE/NetPIPE-3.7.1.tar.gz/5f720541387be065afdefc81d438b712/NetPIPE-3.7.1.tar.gz"
|
267
|
+
ssh.exec!("mkdir -p netpipe_exp")
|
268
|
+
ssh.exec!("wget -O ~/netpipe_exp/NetPIPE.tar.gz #{netpipe_url}")
|
269
|
+
ssh.exec!("cd netpipe_exp && tar -zvxf NetPIPE.tar.gz")
|
270
|
+
ssh.exec!("cd netpipe_exp/NetPIPE-3.7.1 && make mpi")
|
271
|
+
ssh.exec("mpirun --mca plm_rsh_agent \"oarsh\" -machinefile /tmp/machine_file ~/netpipe_exp/NetPIPE-3.7.1/NPmpi")
|
272
|
+
end
|
273
|
+
|
274
|
+
Then, execute the created script:
|
275
|
+
|
276
|
+
[16] pry(main)> play netpipe.rb
|
277
|
+
#<Net::SSH::Connection::Channel:0x000000021679f0>
|
278
|
+
Permission denied (publickey,keyboard-interactive).
|
279
|
+
--------------------------------------------------------------------------
|
280
|
+
A daemon (pid 4615) died unexpectedly with status 255 while attempting
|
281
|
+
to launch so we are aborting.
|
282
|
+
|
283
|
+
There may be more information reported by the environment (see above).
|
284
|
+
|
285
|
+
This may be because the daemon was unable to find all the needed shared
|
286
|
+
libraries on the remote node. You may set your LD_LIBRARY_PATH to have the
|
287
|
+
location of the shared libraries on the remote nodes and this will
|
288
|
+
automatically be forwarded to the remote nodes.
|
289
|
+
--------------------------------------------------------------------------
|
290
|
+
--------------------------------------------------------------------------
|
291
|
+
mpirun noticed that the job aborted, but has no info as to the process
|
292
|
+
that caused that situation.
|
293
|
+
--------------------------------------------------------------------------
|
294
|
+
mpirun: clean termination accomplished
|
295
|
+
|
296
|
+
=> nil
|
297
|
+
|
298
|
+
We got an error related to the SSH keys and it is due to the fact that `oarsh` cannot not find the appropriate key files.
|
299
|
+
We can fix this problem by prefixing the `mpirun` command with `export OAR_JOB_KEY_FILE=~/my_ssh_jobkey`.
|
300
|
+
Now the code will look like this:
|
301
|
+
|
302
|
+
Net::SSH.start(nodes.first, "oar", grid5000_opt) do |ssh|
|
303
|
+
netpipe_url = "http://pkgs.fedoraproject.org/repo/pkgs/NetPIPE/NetPIPE-3.7.1.tar.gz/5f720541387be065afdefc81d438b712/NetPIPE-3.7.1.tar.gz"
|
304
|
+
ssh.exec!("mkdir -p netpipe_exp")
|
305
|
+
ssh.exec!("wget -O ~/netpipe_exp/NetPIPE.tar.gz #{netpipe_url}")
|
306
|
+
ssh.exec!("cd netpipe_exp && tar -zvxf NetPIPE.tar.gz")
|
307
|
+
ssh.exec!("cd netpipe_exp/NetPIPE-3.7.1 && make mpi")
|
308
|
+
ssh.exec("export OAR_JOB_KEY_FILE=~/my_ssh_jobkey;mpirun --mca plm_rsh_agent \"oarsh\" -machinefile /tmp/machine_file ~/netpipe_exp/NetPIPE-3.7.1/NPmpi")
|
309
|
+
end
|
310
|
+
|
311
|
+
After running the script, it will show the output of the benchmark in the `pry` console:
|
312
|
+
|
313
|
+
[34] pry(main)> play netpipe.rb
|
314
|
+
#<Net::SSH::Connection::Channel:0x00000002edc6d0>
|
315
|
+
0: adonis-9
|
316
|
+
1: adonis-10
|
317
|
+
Now starting the main loop
|
318
|
+
0: 1 bytes 32103 times --> 4.58 Mbps in 1.67 usec
|
319
|
+
1: 2 bytes 59994 times --> 9.22 Mbps in 1.65 usec
|
320
|
+
2: 3 bytes 60440 times --> 13.79 Mbps in 1.66 usec
|
321
|
+
3: 4 bytes 40180 times --> 18.34 Mbps in 1.66 usec
|
322
|
+
4: 6 bytes 45076 times --> 27.07 Mbps in 1.69 usec
|
323
|
+
5: 8 bytes 29563 times --> 36.16 Mbps in 1.69 usec
|
324
|
+
6: 12 bytes 37023 times --> 53.84 Mbps in 1.70 usec
|
325
|
+
7: 13 bytes 24500 times --> 57.97 Mbps in 1.71 usec
|
326
|
+
8: 16 bytes 26977 times --> 71.61 Mbps in 1.70 usec
|
327
|
+
9: 19 bytes 32995 times --> 84.65 Mbps in 1.71 usec
|
328
|
+
10: 21 bytes 36882 times --> 93.27 Mbps in 1.72 usec
|
329
|
+
11: 24 bytes 38808 times --> 106.69 Mbps in 1.72 usec
|
330
|
+
12: 27 bytes 41271 times --> 119.77 Mbps in 1.72 usec
|
331
|
+
|
332
|
+
We can modify slightly the previous script to write the result into a file.
|
333
|
+
We need to use `ssh.exec!` to capture the output of the commands.
|
334
|
+
|
335
|
+
Net::SSH.start(nodes.first, "oar", grid5000_opt) do |ssh|
|
336
|
+
netpipe_url = "http://pkgs.fedoraproject.org/repo/pkgs/NetPIPE/NetPIPE-3.7.1.tar.gz/5f720541387be065afdefc81d438b712/NetPIPE-3.7.1.tar.gz"
|
337
|
+
ssh.exec!("mkdir -p netpipe_exp")
|
338
|
+
ssh.exec!("wget -O ~/netpipe_exp/NetPIPE.tar.gz #{netpipe_url}")
|
339
|
+
ssh.exec!("cd netpipe_exp && tar -zvxf NetPIPE.tar.gz")
|
340
|
+
ssh.exec!("cd netpipe_exp/NetPIPE-3.7.1 && make mpi")
|
341
|
+
|
342
|
+
File.open("output_netpipe.txt", 'w') do |f|
|
343
|
+
f.puts ssh.exec!("OAR_JOB_KEY_FILE=~/my_ssh_jobkey; mpirun --mca plm_rsh_agent \"oarsh\" -machinefile /tmp/machine_file ~/netpipe_exp/NetPIPE-3.7.1/NPmpi")
|
344
|
+
end
|
345
|
+
end
|
346
|
+
|
347
|
+
We can check the results by doing:
|
348
|
+
|
349
|
+
[16] pry(main)> .cat output_netpipe.txt
|
350
|
+
0: adonis-9
|
351
|
+
1: adonis-10
|
352
|
+
Now starting the main loop
|
353
|
+
0: 1 bytes 31441 times --> 4.62 Mbps in 1.65 usec
|
354
|
+
1: 2 bytes 60550 times --> 9.24 Mbps in 1.65 usec
|
355
|
+
2: 3 bytes 60580 times --> 13.87 Mbps in 1.65 usec
|
356
|
+
3: 4 bytes 40404 times --> 18.39 Mbps in 1.66 usec
|
357
|
+
4: 6 bytes 45183 times --> 27.22 Mbps in 1.68 usec
|
358
|
+
5: 8 bytes 29729 times --> 36.17 Mbps in 1.69 usec
|
359
|
+
6: 12 bytes 37039 times --> 54.01 Mbps in 1.70 usec
|
360
|
+
7: 13 bytes 24578 times --> 58.40 Mbps in 1.70 usec
|
361
|
+
8: 16 bytes 27177 times --> 71.87 Mbps in 1.70 usec
|
362
|
+
9: 19 bytes 33116 times --> 85.00 Mbps in 1.71 usec
|
363
|
+
|
364
|
+
At the end of the experiment you can use the command `hist` to see what you have done so far.
|
365
|
+
This can help you to assemble everything together in a whole script.
|
366
|
+
|
367
|
+
[22] pry(main)> hist
|
368
|
+
1: edit -n find_infiniband.rb
|
369
|
+
3: play find_infiniband.rb
|
370
|
+
4: sites_infiniband
|
371
|
+
5: .ls ~/my_ssh*
|
372
|
+
6: .scp ~/my_ssh* nancy:~/
|
373
|
+
7: job = $g5k.reserve(:site => "nancy", :resources => "{ib20g='YES'}/nodes=2/core=1",:walltime => '01:00:00', :keys => "~/my_ssh_jobkey" )
|
374
|
+
8: nodes = job["assigned_nodes"]
|
375
|
+
9: machine_file = Tempfile.open('machine_file')
|
376
|
+
10: nodes.each{ |node| machine_file.puts node }
|
377
|
+
11: machine_file.close
|
378
|
+
12: grid5000_opt = OARSSHopts.new(:keys => "~/my_ssh_jobkey")
|
379
|
+
13: require 'net/scp'
|
380
|
+
14: Net::SCP.start(nodes.first, "oar", grid5000_opt) do |scp|
|
381
|
+
15: scp.upload! machine_file.path, "/tmp/machine_file"
|
382
|
+
16: end
|
383
|
+
17: edit -n netpipe.rb
|
384
|
+
18: play netpipe.rb
|
385
|
+
19: edit -n netpipe.rb
|
386
|
+
20: play netpipe.rb
|
387
|
+
|
388
|
+
## Running NAS benchmarks in Grid'5000: getting acquainted with parallel command execution
|
389
|
+
|
390
|
+
In this experiment, we will run the NAS benchmark in Grid'5000 and we will script a scalability test for one of the benchmarks.
|
391
|
+
This experiment has the following requirements:
|
392
|
+
|
393
|
+
- 4 or 2 nodes from any Grid'5000 sites
|
394
|
+
- Use of standard environment (no deploy)
|
395
|
+
- NAS MPI behchmark
|
396
|
+
- A MPI runtime (OpenMPI or MPICH)
|
397
|
+
|
398
|
+
If you have not created a directory for the tutorial, create it and execute the `pry` console from there:
|
399
|
+
|
400
|
+
$ mkdir ruby-cute-tutorial
|
401
|
+
$ cd ruby-cute-tutorial
|
402
|
+
$ cute
|
403
|
+
|
404
|
+
First, let's find the necessary nodes for our experiment. As resources in Grid'5000 could be very busy, we are going
|
405
|
+
to script a loop that will explore all Grid'5000 sites and find the first site that can provide us with the required nodes.
|
406
|
+
Open an editor form `pry` console:
|
407
|
+
|
408
|
+
[5] pry(main)> edit -n find_nodes.rb
|
409
|
+
|
410
|
+
and type the following code:
|
411
|
+
|
412
|
+
sites = $g5k.site_uids
|
413
|
+
job = {}
|
414
|
+
|
415
|
+
sites.each do |site|
|
416
|
+
job = $g5k.reserve(:site => site, :nodes => 4, :wait => false, :walltime => "01:00:00")
|
417
|
+
begin
|
418
|
+
job = $g5k.wait_for_job(job, :wait_time => 60)
|
419
|
+
puts "Nodes assigned #{job['assigned_nodes']}"
|
420
|
+
break
|
421
|
+
rescue Cute::G5K::EventTimeout
|
422
|
+
puts "We waited too long in site #{site} let's release the job and try in another site"
|
423
|
+
$g5k.release(job)
|
424
|
+
end
|
425
|
+
end
|
426
|
+
|
427
|
+
Here, we use the method {Cute::G5K::API#site_uids site_uids} for getting all available sites.
|
428
|
+
Then, a job is submitted using the method {Cute::G5K::API#reserve reserve}.
|
429
|
+
We ask for 4 nodes in a given site and we set the parameter `wait` to false which makes the method to return immediately.
|
430
|
+
Then, we use {Cute::G5K::API#wait_for_job wait_for_job} to set a timeout. If the timeout is reached a {Cute::G5K::EventTimeout Timeout} exception will be triggered
|
431
|
+
that we catch in order to consequently release the submitted job and try to submit it in another site.
|
432
|
+
Let's execute the script using `play` command:
|
433
|
+
|
434
|
+
[8] pry(main)> play find_nodes.rb
|
435
|
+
2015-12-08 16:50:35.582 => Reserving resources: /nodes=4,walltime=01:00 (type: ) (in grenoble)
|
436
|
+
2015-12-08 16:50:36.465 => Waiting for reservation 1702197
|
437
|
+
2015-12-08 16:50:41.587 => Reservation 1702197 should be available at 2015-12-08 16:50:37 +0100 (0 s)
|
438
|
+
2015-12-08 16:50:41.587 => Reservation 1702197 ready
|
439
|
+
Nodes assigned ["edel-10.grenoble.grid5000.fr", "edel-11.grenoble.grid5000.fr", "edel-12.grenoble.grid5000.fr", "edel-13.grenoble.grid5000.fr"]
|
440
|
+
=> nil
|
441
|
+
|
442
|
+
The variable `job` is updated in Pry context. When no keys are specified, the option *-allow_classic_ssh* is activated
|
443
|
+
which enables the access via default SSH to the reserved machines. You can verify it by doing:
|
444
|
+
|
445
|
+
[11] pry(main)> .ssh edel-11.grenoble.grid5000.fr "hostname"
|
446
|
+
Warning: Permanently added 'edel-11.grenoble.grid5000.fr,172.16.17.11' (RSA) to the list of known hosts.
|
447
|
+
edel-11.grenoble.grid5000.fr
|
448
|
+
|
449
|
+
Let's explore the available modules for the parallel execution of commands in several remote machines.
|
450
|
+
The following example shows how to use the {Cute::TakTuk TakTuk} module.
|
451
|
+
|
452
|
+
nodes = job["assigned_nodes"]
|
453
|
+
Cute::TakTuk.start(nodes) do |tak|
|
454
|
+
tak.exec("hostname")
|
455
|
+
end
|
456
|
+
|
457
|
+
Which generates as output:
|
458
|
+
|
459
|
+
edel-10.grenoble.grid5000.fr/output/0:edel-10.grenoble.grid5000.fr
|
460
|
+
edel-12.grenoble.grid5000.fr/output/0:edel-12.grenoble.grid5000.fr
|
461
|
+
edel-13.grenoble.grid5000.fr/output/0:edel-13.grenoble.grid5000.fr
|
462
|
+
edel-10.grenoble.grid5000.fr/status/0:0
|
463
|
+
edel-11.grenoble.grid5000.fr/output/0:edel-11.grenoble.grid5000.fr
|
464
|
+
edel-12.grenoble.grid5000.fr/status/0:0
|
465
|
+
edel-13.grenoble.grid5000.fr/status/0:0
|
466
|
+
edel-11.grenoble.grid5000.fr/status/0:0
|
467
|
+
|
468
|
+
The following example shows how to use the {Net::SSH::Multi Net::SSH::Multi} module.
|
469
|
+
|
470
|
+
Net::SSH::Multi.start do |session|
|
471
|
+
nodes.each{ |node| session.use node }
|
472
|
+
session.exec("hostname")
|
473
|
+
end
|
474
|
+
|
475
|
+
If we type that into pry console we will get:
|
476
|
+
|
477
|
+
[edel-10.grenoble.grid5000.fr] edel-10.grenoble.grid5000.fr
|
478
|
+
[edel-11.grenoble.grid5000.fr] edel-11.grenoble.grid5000.fr
|
479
|
+
[edel-12.grenoble.grid5000.fr] edel-12.grenoble.grid5000.fr
|
480
|
+
[edel-13.grenoble.grid5000.fr] edel-13.grenoble.grid5000.fr
|
481
|
+
|
482
|
+
It is possible to capture the output of the executed command by adding **!** to exec method.
|
483
|
+
For example, let's find the number of cores available in the reserved machines:
|
484
|
+
|
485
|
+
results = {}
|
486
|
+
Net::SSH::Multi.start do |session|
|
487
|
+
nodes.each{ |node| session.use node }
|
488
|
+
results = session.exec!("nproc")
|
489
|
+
end
|
490
|
+
|
491
|
+
The {Net::SSH::Multi::SessionActions#exec! exec!} method will return a Hash that looks like this:
|
492
|
+
|
493
|
+
[27] pry(main)> results
|
494
|
+
=> {"edel-10.grenoble.grid5000.fr"=>{:stdout=>"8", :status=>0},
|
495
|
+
"edel-11.grenoble.grid5000.fr"=>{:stdout=>"8", :status=>0},
|
496
|
+
"edel-12.grenoble.grid5000.fr"=>{:stdout=>"8", :status=>0},
|
497
|
+
"edel-13.grenoble.grid5000.fr"=>{:stdout=>"8", :status=>0}}
|
498
|
+
|
499
|
+
Where the Hash keys are the names of the machines and the values correspond to the output of the commands.
|
500
|
+
Then, we can easily get the total number of cores by typing:
|
501
|
+
|
502
|
+
[11] pry(main)> num_cores = results.values.inject(0){ |sum, item| sum+item[:stdout].to_i}
|
503
|
+
=> 32
|
504
|
+
|
505
|
+
Another way to do that is to use the information given by the G5K API regarding the submitted job:
|
506
|
+
|
507
|
+
[36] pry(main)> job["resources_by_type"]["cores"].length
|
508
|
+
=> 32
|
509
|
+
|
510
|
+
Let's create a machine file that we will need later on for our experiments:
|
511
|
+
|
512
|
+
machine_file = Tempfile.open('machine_file')
|
513
|
+
nodes.each{ |node| machine_file.puts node }
|
514
|
+
machine_file.close
|
515
|
+
|
516
|
+
After creating the machine file, we need to send it to the other machines.
|
517
|
+
Additionally, we need to download and compile the benchmark.
|
518
|
+
Let's write a small script that help us to perform the aforementioned tasks.
|
519
|
+
Open the editor in pry console:
|
520
|
+
|
521
|
+
[17] pry(main)> edit -n NAS-expe.rb
|
522
|
+
|
523
|
+
Then, type:
|
524
|
+
|
525
|
+
SOURCE_NAS = "http://public.rennes.grid5000.fr/~cruizsanabria/NPB3.3.tar"
|
526
|
+
|
527
|
+
`wget #{SOURCE_NAS} -O /tmp/NAS.tar`
|
528
|
+
|
529
|
+
Cute::TakTuk.start(nodes) do |tak|
|
530
|
+
|
531
|
+
tak.put(machine_file.path, "machine_file")
|
532
|
+
tak.put("/tmp/NAS.tar", "/tmp/NAS.tar")
|
533
|
+
|
534
|
+
tak.exec!("cd /tmp/; tar -xvf NAS.tar")
|
535
|
+
puts tak.exec!("make lu NPROCS=#{num_cores} CLASS=A MPIF77=mpif77 -C /tmp/NPB3.3/NPB3.3-MPI/")
|
536
|
+
|
537
|
+
end
|
538
|
+
|
539
|
+
We can observe in the previous snippet of code that {Cute::TakTuk TakTuk} module can be used to
|
540
|
+
transfer files to several remote nodes. {Cute::TakTuk::TakTuk#put put} and {Cute::TakTuk::TakTuk#exec exec} methods
|
541
|
+
can be used in the same block. Finally, execute the script:
|
542
|
+
|
543
|
+
[102] pry(main)> play NAS-expe.rb
|
544
|
+
|
545
|
+
We can check if each node has the generated binary and the machine file:
|
546
|
+
|
547
|
+
Net::SSH::Multi.start do |session|
|
548
|
+
nodes.each{ |node| session.use node }
|
549
|
+
session.exec("ls /tmp/NPB3.3/NPB3.3-MPI/bin/")
|
550
|
+
session.exec("ls ~/machine*")
|
551
|
+
end
|
552
|
+
|
553
|
+
After typing it into `pry` console we will get something like:
|
554
|
+
|
555
|
+
[genepi-27.grenoble.grid5000.fr] lu.A.32
|
556
|
+
[genepi-29.grenoble.grid5000.fr] lu.A.32
|
557
|
+
[genepi-29.grenoble.grid5000.fr] /home/cruizsanabria/machine_file
|
558
|
+
[genepi-19.grenoble.grid5000.fr] lu.A.32
|
559
|
+
[genepi-19.grenoble.grid5000.fr] /home/cruizsanabria/machine_file
|
560
|
+
[genepi-2.grenoble.grid5000.fr] lu.A.32
|
561
|
+
[genepi-2.grenoble.grid5000.fr] /home/cruizsanabria/machine_file
|
562
|
+
[genepi-27.grenoble.grid5000.fr] /home/cruizsanabria/machine_file
|
563
|
+
|
564
|
+
Which confirms the presence of both files on the nodes.
|
565
|
+
We can get the path of the binary by typing the following into `pry` console.
|
566
|
+
|
567
|
+
Net::SSH::Multi.start do |session|
|
568
|
+
nodes.each{ |node| session.use node }
|
569
|
+
results = session.exec!("find /tmp/ -name lu.A.32")
|
570
|
+
end
|
571
|
+
|
572
|
+
We will get some errors caused by the `find` command:
|
573
|
+
|
574
|
+
[32] pry(main)> results
|
575
|
+
=> {"genepi-27.grenoble.grid5000.fr"=>{:stdout=>"/tmp/NPB3.3/NPB3.3-MPI/bin/lu.A.32", :stderr=>": Permission denied", :status=>1},
|
576
|
+
"genepi-29.grenoble.grid5000.fr"=>{:stdout=>"/tmp/NPB3.3/NPB3.3-MPI/bin/lu.A.32", :stderr=>": Permission denied", :status=>1},
|
577
|
+
"genepi-19.grenoble.grid5000.fr"=>{:stderr=>": Permission denied", :stdout=>"/tmp/NPB3.3/NPB3.3-MPI/bin/lu.A.32", :status=>1},
|
578
|
+
"genepi-2.grenoble.grid5000.fr"=>{:stdout=>"/tmp/NPB3.3/NPB3.3-MPI/bin/lu.A.32", :stderr=>": Permission denied", :status=>1}}
|
579
|
+
|
580
|
+
|
581
|
+
Then, we can assign this to a new variable:
|
582
|
+
|
583
|
+
[33] pry(main)> lu_path = results.values.first[:stdout]
|
584
|
+
=> "/tmp/NPB3.3/NPB3.3-MPI/bin/lu.A.32"
|
585
|
+
|
586
|
+
The setup of the experiment is done. It is time to execute the benchmark by typing the following into `pry` console.
|
587
|
+
|
588
|
+
Net::SSH.start(nodes.first,"cruizsanabria") do |ssh|
|
589
|
+
results = ssh.exec!("mpirun --mca btl self,sm,tcp -np 32 --machinefile machine_file #{lu_path}")
|
590
|
+
end
|
591
|
+
|
592
|
+
Let's now perform a scalability test of the LU application for 2, 4, 8, 16, 32 processes. Open the editor:
|
593
|
+
|
594
|
+
[100] pry(main)> edit -n scalability_NAS.rb
|
595
|
+
|
596
|
+
And copy-paste the following script:
|
597
|
+
|
598
|
+
num_cores = [2,4,8,16,32]
|
599
|
+
|
600
|
+
Cute::TakTuk.start(nodes) do |tak|
|
601
|
+
|
602
|
+
num_cores.each do |cores|
|
603
|
+
puts tak.exec!("make lu NPROCS=#{cores} CLASS=A MPIF77=mpif77 -C /tmp/NPB3.3/NPB3.3-MPI/")
|
604
|
+
end
|
605
|
+
|
606
|
+
results = tak.exec!("find /tmp/ -name lu.A.*")
|
607
|
+
end
|
608
|
+
|
609
|
+
binaries = results.values.first[:output].split("\n")
|
610
|
+
|
611
|
+
expe_res = {}
|
612
|
+
|
613
|
+
Net::SSH.start(nodes.first,"cruizsanabria") do |ssh|
|
614
|
+
binaries.each do |binary|
|
615
|
+
processes = /A\.(\d*)/.match(binary)[1]
|
616
|
+
expe_res[processes]= {}
|
617
|
+
result = ssh.exec!("mpirun --mca btl self,sm,tcp -np #{processes} --machinefile machine_file #{binary}")
|
618
|
+
expe_res[processes][:output]= result
|
619
|
+
expe_res[processes][:time] =result.split("\n").select{ |t| t["Time in"]}.first
|
620
|
+
end
|
621
|
+
end
|
622
|
+
|
623
|
+
Then, we execute it:
|
624
|
+
|
625
|
+
[102] pry(main)> play scalability_NAS.rb
|
626
|
+
|
627
|
+
It will take approximately 2 ~ 3 minutes to run. After finishing a new Hash will
|
628
|
+
be defined called *expe_res* that we can use to print the results:
|
629
|
+
|
630
|
+
num_cores.each{ |cores| puts "#{cores} cores: #{expe_res[cores.to_s][:time]}"}
|
631
|
+
|
632
|
+
It will generate:
|
633
|
+
|
634
|
+
[107] pry(main)> num_cores.each{ |cores| puts "#{cores} cores: #{expe_res[cores.to_s][:time]}"}
|
635
|
+
2 cores: Time in seconds = 42.93
|
636
|
+
4 cores: Time in seconds = 26.50
|
637
|
+
8 cores: Time in seconds = 12.39
|
638
|
+
16 cores: Time in seconds = 7.01
|
639
|
+
32 cores: Time in seconds = 6.00
|
640
|
+
|
641
|
+
Finally, we can use the command `hist` to try to assemble all we have done so far into a script.
|
642
|
+
Once finished, we could release the job:
|
643
|
+
|
644
|
+
[34] pry(main)> $g5k.release(job)
|
645
|
+
=> ""
|
646
|
+
|
647
|
+
## Performing network measurements within a reserved VLAN
|
648
|
+
|
649
|
+
In this experiment, we will perform network measurements between two nodes located in different Grid'5000 sites.
|
650
|
+
The network measurements will be carried out in an isolated VLAN.
|
651
|
+
We will first reserved two nodes located in two different Grid'5000 sites in deploy mode and we will ask for two routed VLANs.
|
652
|
+
Once the nodes are ready an environment will be deployed and the application iperf will be install in all nodes.
|
653
|
+
Then, we will perform some network measurements among the nodes.
|
654
|
+
Finally, we will query the KWAPI using the G5K metrology API to get the network traffic generated during our experiment.
|
655
|
+
|
656
|
+
This experiment has the following requirements:
|
657
|
+
|
658
|
+
- Two nodes in two different G5K sites
|
659
|
+
- Environment deployment
|
660
|
+
- VLAN reservation
|
661
|
+
- Iperf application
|
662
|
+
- Access to Network traffic data.
|
663
|
+
|
664
|
+
If you have not created a directory for the tutorial, create it and execute the `pry` console from there:
|
665
|
+
|
666
|
+
$ mkdir ruby-cute-tutorial
|
667
|
+
$ cd ruby-cute-tutorial
|
668
|
+
$ cute
|
669
|
+
|
670
|
+
Let's create a small script that will help us with the reservation of nodes.
|
671
|
+
Open the `pry` editor:
|
672
|
+
|
673
|
+
[35] pry(main)> edit -n multisite.rb
|
674
|
+
|
675
|
+
and type:
|
676
|
+
|
677
|
+
jobs = {}
|
678
|
+
threads = []
|
679
|
+
["nancy","rennes"].each do |site|
|
680
|
+
|
681
|
+
threads.push<< Thread.new do
|
682
|
+
|
683
|
+
jobs[site] = job = $g5k.reserve(:site => site, :nodes => 1,
|
684
|
+
:env => 'jessie-x64-min',
|
685
|
+
:vlan => :routed)
|
686
|
+
end
|
687
|
+
end
|
688
|
+
|
689
|
+
threads.each{ |t| t.join}
|
690
|
+
|
691
|
+
|
692
|
+
In the script, we have chosen Nancy and Rennes sites. You are encouraged to try other sites as the number of routed VLANs is limited in each site.
|
693
|
+
For the purpose of this tutorial you have to choose a site where Kwapi is available: Grenoble, Nancy, Rennes, Lyon, Nantes.
|
694
|
+
We use the method {Cute::G5K::API#reserve reserve} with parameter *env* for specifying the environment we want to deploy.
|
695
|
+
This will automatically submit a deploy job and it will deploy the specified environment.
|
696
|
+
The parameter *vlan* will additionally reserve a VLAN and pass it to Kadeploy to setup the VLAN.
|
697
|
+
After executing this small script we got:
|
698
|
+
|
699
|
+
[36] pry(main)> play multisite.rb
|
700
|
+
2016-01-20 12:48:15.010 => Reserving resources: {type='kavlan'}/vlan=1+/nodes=1,walltime=01:00 (type: deploy) (in nancy)
|
701
|
+
2016-01-20 12:48:15.010 => Reserving resources: {type='kavlan'}/vlan=1+/nodes=1,walltime=01:00 (type: deploy) (in rennes)
|
702
|
+
2016-01-20 12:48:16.145 => Waiting for reservation 740698
|
703
|
+
2016-01-20 12:48:16.246 => Waiting for reservation 802917
|
704
|
+
2016-01-20 12:48:21.270 => Reservation 740698 should be available at 2016-01-20 12:48:17 +0100 (0 s)
|
705
|
+
2016-01-20 12:48:26.344 => Reservation 740698 should be available at 2016-01-20 12:48:17 +0100 (0 s)
|
706
|
+
2016-01-20 12:48:26.404 => Reservation 802917 should be available at 2016-01-20 12:48:13 +0100 (0 s)
|
707
|
+
2016-01-20 12:48:26.404 => Reservation 802917 ready
|
708
|
+
2016-01-20 12:48:26.541 => Found VLAN with uid = 4
|
709
|
+
2016-01-20 12:48:26.541 => Creating deployment
|
710
|
+
2016-01-20 12:48:27.256 => Waiting for 1 deployment
|
711
|
+
2016-01-20 12:48:31.296 => Waiting for 1 deployment
|
712
|
+
2016-01-20 12:48:31.406 => Reservation 740698 should be available at 2016-01-20 12:48:17 +0100 (0 s)
|
713
|
+
2016-01-20 12:48:31.406 => Reservation 740698 ready
|
714
|
+
2016-01-20 12:48:31.469 => Found VLAN with uid = 4
|
715
|
+
2016-01-20 12:48:31.469 => Creating deployment
|
716
|
+
2016-01-20 12:48:31.869 => Waiting for 1 deployment
|
717
|
+
2016-01-20 12:48:35.414 => Waiting for 1 deployment
|
718
|
+
|
719
|
+
At the end of the process the variable `jobs` will be defined and it will contain the jobs' information in each site.
|
720
|
+
In this variable, we can find information related with the deployment.
|
721
|
+
|
722
|
+
[44] pry(main)> jobs["nancy"]["deploy"]
|
723
|
+
=> [{"created_at"=>1450439620,
|
724
|
+
"environment"=>"jessie-x64-min",
|
725
|
+
"key"=>"https://api.grid5000.fr/sid/sites/nancy/files/cruizsanabria-key-84f3f1dbb1279bc1bddcd618e26c960307d653c5",
|
726
|
+
"nodes"=>["graphite-4.nancy.grid5000.fr"],
|
727
|
+
"result"=>{"graphite-4.nancy.grid5000.fr"=>{"macro"=>nil, "micro"=>nil, "state"=>"OK"}},
|
728
|
+
"site_uid"=>"nancy",
|
729
|
+
"status"=>"terminated",
|
730
|
+
"uid"=>"D-b026879e-b185-4e20-8bc5-ea0842a6954b",
|
731
|
+
"updated_at"=>1450439860,
|
732
|
+
"user_uid"=>"cruizsanabria",
|
733
|
+
"vlan"=>14,
|
734
|
+
"links"=>
|
735
|
+
[{"rel"=>"self", "href"=>"/sid/sites/nancy/deployments/D-b026879e-b185-4e20-8bc5-ea0842a6954b", "type"=>"application/vnd.grid5000.item+json"},
|
736
|
+
{"rel"=>"parent", "href"=>"/sid/sites/nancy", "type"=>"application/vnd.grid5000.item+json"}]}]
|
737
|
+
|
738
|
+
Some important information are: the status of the whole process and the state per node.
|
739
|
+
We can use this information to check if the deployment have finished successfully in all nodes.
|
740
|
+
This data structure is used by the method {Cute::G5K::API#check_deployment check_deployment}.
|
741
|
+
Let's check the documentation of this method:
|
742
|
+
|
743
|
+
[16] pry(main)> show-doc Cute::G5K::API#check_deployment
|
744
|
+
|
745
|
+
From: /home/cruizsanabria/Repositories/ruby-cute/lib/cute/g5k_api.rb @ line 1198:
|
746
|
+
Owner: Cute::G5K::API
|
747
|
+
Visibility: public
|
748
|
+
Signature: check_deployment(deploy_info)
|
749
|
+
Number of lines: 10
|
750
|
+
|
751
|
+
It returns an array of machines that did not deploy successfully
|
752
|
+
= Example
|
753
|
+
It can be used to try a new deploy:
|
754
|
+
|
755
|
+
badnodes = g5k.check_deployment(job["deploy"].last)
|
756
|
+
g5k.deploy(job,:nodes => badnodes, :env => 'wheezy-x64-base')
|
757
|
+
g5k.wait_for_deploy(job)
|
758
|
+
|
759
|
+
return [Array] machines that did not deploy successfully
|
760
|
+
param deploy_info [Hash] deployment structure information
|
761
|
+
|
762
|
+
We can use this method with the jobs we have just submitted
|
763
|
+
(The output will be probably long, so you will need to scroll up to see what it is shown here):
|
764
|
+
|
765
|
+
[47] pry(main)> jobs.each{ |site,job| puts "all nodes OK in site: #{site}" if $g5k.check_deployment(job["deploy"].last).empty?}
|
766
|
+
all nodes OK in site: rennes
|
767
|
+
all nodes OK in site: nancy
|
768
|
+
|
769
|
+
Now, the reserved nodes are in a VLAN; within this VLAN a DHCP server will assign new IP addresses to the nodes.
|
770
|
+
You can configure your own if you want (please refer to {https://www.grid5000.fr/mediawiki/index.php/Network_isolation_on_Grid%275000 KVLAN tutorial}
|
771
|
+
if you want to know more). We can get the new assigned names by doing:
|
772
|
+
|
773
|
+
nodes = []
|
774
|
+
jobs.each{ |site,job| nodes.push($g5k.get_vlan_nodes(job))}
|
775
|
+
|
776
|
+
After putting that into `pry` we will get something like this:
|
777
|
+
|
778
|
+
[50] pry(main)> nodes
|
779
|
+
=> [["paranoia-6-kavlan-16.rennes.grid5000.fr"], ["graphite-4-kavlan-14.nancy.grid5000.fr"]]
|
780
|
+
|
781
|
+
[51] pry(main)> nodes.flatten
|
782
|
+
=> ["paranoia-6-kavlan-16.rennes.grid5000.fr", "graphite-4-kavlan-14.nancy.grid5000.fr"]
|
783
|
+
|
784
|
+
Now, let's install `iperf` application in order to perform our network measurements.
|
785
|
+
Copy-paste the following code into `pry`:
|
786
|
+
|
787
|
+
nodes = nodes.flatten
|
788
|
+
|
789
|
+
Net::SSH::Multi.start do |session|
|
790
|
+
nodes.each{ |node| session.use("root@#{node}") }
|
791
|
+
session.exec!("apt-get update")
|
792
|
+
session.exec("DEBIAN_FRONTEND=noninteractive apt-get install -q -y iperf")
|
793
|
+
end
|
794
|
+
|
795
|
+
You should get something like this:
|
796
|
+
|
797
|
+
[paranoia-6-kavlan-16.rennes.grid5000.fr] Reading package lists...
|
798
|
+
[paranoia-6-kavlan-16.rennes.grid5000.fr] Building dependency tree...
|
799
|
+
[graphite-4-kavlan-14.nancy.grid5000.fr] Reading package lists...
|
800
|
+
[graphite-4-kavlan-14.nancy.grid5000.fr] Building dependency tree...
|
801
|
+
[paranoia-6-kavlan-16.rennes.grid5000.fr]
|
802
|
+
[paranoia-6-kavlan-16.rennes.grid5000.fr] Reading state information...
|
803
|
+
[paranoia-6-kavlan-16.rennes.grid5000.fr] The following NEW packages will be installed:
|
804
|
+
[paranoia-6-kavlan-16.rennes.grid5000.fr] iperf
|
805
|
+
[paranoia-6-kavlan-16.rennes.grid5000.fr] 0 upgraded, 1 newly installed, 0 to remove and 8 not upgraded.
|
806
|
+
[paranoia-6-kavlan-16.rennes.grid5000.fr] Need to get 51.4 kB of archives.
|
807
|
+
[paranoia-6-kavlan-16.rennes.grid5000.fr] After this operation, 179 kB of additional disk space will be used.
|
808
|
+
[paranoia-6-kavlan-16.rennes.grid5000.fr] Get:1 http://ftp.debian.org/debian/ jessie/main iperf amd64 2.0.5+dfsg1-2 [51.4 kB]
|
809
|
+
[graphite-4-kavlan-14.nancy.grid5000.fr]
|
810
|
+
[graphite-4-kavlan-14.nancy.grid5000.fr] Reading state information...
|
811
|
+
[graphite-4-kavlan-14.nancy.grid5000.fr] The following NEW packages will be installed:
|
812
|
+
[graphite-4-kavlan-14.nancy.grid5000.fr] iperf
|
813
|
+
|
814
|
+
|
815
|
+
You can check if the application has been successfully installed,
|
816
|
+
by typing the following into the `pry` console:
|
817
|
+
|
818
|
+
Net::SSH::Multi.start do |session|
|
819
|
+
nodes.each{ |node| session.use("root@#{node}") }
|
820
|
+
session.exec("iperf --version")
|
821
|
+
end
|
822
|
+
|
823
|
+
Which will generate:
|
824
|
+
|
825
|
+
[65] pry(main)* end
|
826
|
+
[paranoia-6-kavlan-16.rennes.grid5000.fr] iperf version 2.0.5 (08 Jul 2010) pthreads
|
827
|
+
[graphite-4-kavlan-14.nancy.grid5000.fr] iperf version 2.0.5 (08 Jul 2010) pthreads
|
828
|
+
=> nil
|
829
|
+
|
830
|
+
Let's perform some iperf tests, let's write a small script.
|
831
|
+
Open the editor:
|
832
|
+
|
833
|
+
[76] pry(main)> edit -n iperf_test.rb
|
834
|
+
|
835
|
+
and type:
|
836
|
+
|
837
|
+
results = {}
|
838
|
+
|
839
|
+
Net::SSH::Multi.start do |session|
|
840
|
+
|
841
|
+
session.group :server do
|
842
|
+
session.use("root@#{nodes[0]}")
|
843
|
+
end
|
844
|
+
|
845
|
+
session.group :client do
|
846
|
+
session.use("root@#{nodes[1]}")
|
847
|
+
end
|
848
|
+
|
849
|
+
session.with(:server).exec("iperf -s &")
|
850
|
+
|
851
|
+
#bandwith
|
852
|
+
|
853
|
+
results[:bandwidth]= session.with(:client).exec!("iperf -c #{nodes[0]}")
|
854
|
+
|
855
|
+
# bi-directional bandwidth measurement
|
856
|
+
|
857
|
+
results[:bidi]= session.with(:client).exec!("iperf -c #{nodes[0]} -r")
|
858
|
+
|
859
|
+
# TCP windows size
|
860
|
+
results[:window]= session.with(:client).exec!("iperf -c #{nodes[0]} -w 2000")
|
861
|
+
|
862
|
+
# shutdown server
|
863
|
+
|
864
|
+
session.with(:server).exec("skill iperf")
|
865
|
+
end
|
866
|
+
|
867
|
+
Then, if we execute it with the `play` command:
|
868
|
+
|
869
|
+
|
870
|
+
[77] pry(main)> play iperf_test.rb
|
871
|
+
[paranoia-6-kavlan-16.rennes.grid5000.fr] ------------------------------------------------------------
|
872
|
+
[paranoia-6-kavlan-16.rennes.grid5000.fr] Server listening on TCP port 5001
|
873
|
+
[paranoia-6-kavlan-16.rennes.grid5000.fr] TCP window size: 85.3 KByte (default)
|
874
|
+
[paranoia-6-kavlan-16.rennes.grid5000.fr] ------------------------------------------------------------
|
875
|
+
[paranoia-6-kavlan-16.rennes.grid5000.fr] [ 4] local 10.27.204.71 port 5001 connected with 10.19.200.240 port 32769
|
876
|
+
[paranoia-6-kavlan-16.rennes.grid5000.fr] [ ID] Interval Transfer Bandwidth
|
877
|
+
[paranoia-6-kavlan-16.rennes.grid5000.fr] [ 4] 0.0-10.0 sec 1.12 GBytes 957 Mbits/sec
|
878
|
+
[paranoia-6-kavlan-16.rennes.grid5000.fr] [ 5] local 10.27.204.71 port 5001 connected with 10.19.200.240 port 32770
|
879
|
+
[paranoia-6-kavlan-16.rennes.grid5000.fr] [ 5] 0.0-10.0 sec 1.10 GBytes 947 Mbits/sec
|
880
|
+
[paranoia-6-kavlan-16.rennes.grid5000.fr] ------------------------------------------------------------
|
881
|
+
[paranoia-6-kavlan-16.rennes.grid5000.fr] Client connecting to 10.19.200.240, TCP port 5001
|
882
|
+
[paranoia-6-kavlan-16.rennes.grid5000.fr] TCP window size: 85.0 KByte (default)
|
883
|
+
[paranoia-6-kavlan-16.rennes.grid5000.fr] ------------------------------------------------------------
|
884
|
+
[paranoia-6-kavlan-16.rennes.grid5000.fr] [ 5] local 10.27.204.71 port 47604 connected with 10.19.200.240 port 5001
|
885
|
+
[paranoia-6-kavlan-16.rennes.grid5000.fr] [ 5] 0.0-10.0 sec 1.12 GBytes 958 Mbits/sec
|
886
|
+
[paranoia-6-kavlan-16.rennes.grid5000.fr] [ 4] local 10.27.204.71 port 5001 connected with 10.19.200.240 port 32771
|
887
|
+
[paranoia-6-kavlan-16.rennes.grid5000.fr] [ 4] 0.0-10.7 sec 2.25 MBytes 1.77 Mbits/sec
|
888
|
+
|
889
|
+
|
890
|
+
The variable `results` will be defined which contains the results for each test.
|
891
|
+
Let's print the results. Type the following into the `pry` console:
|
892
|
+
|
893
|
+
results.each do |test, res|
|
894
|
+
puts "Results of test: #{test}"
|
895
|
+
res.each { |node,r| puts r[:stdout]}
|
896
|
+
end
|
897
|
+
|
898
|
+
Which will give us:
|
899
|
+
|
900
|
+
Results of test: bandwidth
|
901
|
+
[ ID] Interval Transfer Bandwidth
|
902
|
+
[ 3] 0.0-10.0 sec 1.12 GBytes 958 Mbits/sec
|
903
|
+
Results of test: bidi
|
904
|
+
[ 4] 0.0-10.0 sec 1.12 GBytes 957 Mbits/sec
|
905
|
+
Results of test: window
|
906
|
+
[ ID] Interval Transfer Bandwidth
|
907
|
+
[ 3] 0.0-10.7 sec 2.25 MBytes 1.77 Mbits/sec
|
908
|
+
|
909
|
+
|
910
|
+
Now let's look at the network traffic that we have generated during our experiment using KWAPI.
|
911
|
+
**Ruby-cute** offers the {Cute::G5K::API#get_metric get_metric} method to consult the G5K Metrology API.
|
912
|
+
In order to carry out a query and get the values of a specific probe,
|
913
|
+
we have to know the time interval of the values and the name of the probe.
|
914
|
+
Let's get the values for the metric `network_in`.
|
915
|
+
We could get all the names of the probes specific to this metric by typing:
|
916
|
+
|
917
|
+
probes = $g5k.get_metric("rennes",:metric => "network_in").uids
|
918
|
+
|
919
|
+
If you type that in `pry` you will get:
|
920
|
+
|
921
|
+
[13] pry(main)> probes
|
922
|
+
=> ["parasilo-11-eth0",
|
923
|
+
"parasilo-11-eth1",
|
924
|
+
"paravance-48-eth1",
|
925
|
+
"paravance-48-eth0",
|
926
|
+
"paravance-2-eth0",
|
927
|
+
"paravance-2-eth1",
|
928
|
+
"paranoia-4",
|
929
|
+
"paranoia-5",
|
930
|
+
"paranoia-6",
|
931
|
+
"paranoia-7",
|
932
|
+
"paravance-72-eth0",
|
933
|
+
|
934
|
+
In order to choose the right probes, we need to get the real names of the machines
|
935
|
+
and not the ones assigned by the VLAN. We can consult the job information:
|
936
|
+
|
937
|
+
nodes_normal = []
|
938
|
+
jobs.each{ |site,job| nodes_normal.push(job["assigned_nodes"])}
|
939
|
+
|
940
|
+
Which will give us an Array of Arrays that we can flatten by doing:
|
941
|
+
|
942
|
+
[97] pry(main)> nodes_normal.flatten!
|
943
|
+
=> [["paranoia-6.rennes.grid5000.fr"], ["graphite-4.nancy.grid5000.fr"]]
|
944
|
+
|
945
|
+
As we are going to fetch the data for Rennes (First node). We could do:
|
946
|
+
|
947
|
+
[68] pry(main)> probe_expe = probes.select{ |p| p[nodes_normal[0].split(".")[0]]}
|
948
|
+
|
949
|
+
So, at this point we already have the probe we want to request.
|
950
|
+
Next step is to get the start time of the interval, we can choose for example, the time at which deployments have finished:
|
951
|
+
|
952
|
+
deploy_end = []
|
953
|
+
jobs.each{ |site,job| deploy_end.push(job["deploy"].last["updated_at"])}
|
954
|
+
|
955
|
+
Therefore, we could choose the maximum timestamp from the ones returned:
|
956
|
+
|
957
|
+
start = deploy_end.max
|
958
|
+
|
959
|
+
Now, we can proceed by performing the query:
|
960
|
+
|
961
|
+
$g5k.get_metric("rennes",:metric => "network_in",:query => {:from => start, :to => start+3600, :only => probe_expe.first})
|
962
|
+
|
963
|
+
An Array is returned. We can then open an editor and write a small script that will write these values into a file
|
964
|
+
|
965
|
+
[33] pry(main)> edit -n get_results.rb
|
966
|
+
|
967
|
+
type:
|
968
|
+
|
969
|
+
raw_data = $g5k.get_metric("rennes",:metric => "network_in",
|
970
|
+
:query => {:from => start, :to => start+3600, :only => probe_expe.first})
|
971
|
+
|
972
|
+
network_in = raw_data.map{ |r| r["values"]}.flatten
|
973
|
+
time = raw_data.map{ |r| r["timestamps"]}.flatten
|
974
|
+
|
975
|
+
values = Hash[time.zip(network_in)]
|
976
|
+
|
977
|
+
File.open("network_in-values.txt",'w+') do |f|
|
978
|
+
f.puts("time\t bytes")
|
979
|
+
values.each{ |k,v| f.puts("#{k}\t#{v}")}
|
980
|
+
end
|
981
|
+
|
982
|
+
and execute it with:
|
983
|
+
|
984
|
+
pry(main)> play get_results.rb
|
985
|
+
=> {1453293153.732378=>4498405298908,
|
986
|
+
1453293155.183099=>4498405298908,
|
987
|
+
1453293156.582115=>4498405298908,
|
988
|
+
1453293157.924968=>4498405298908,
|
989
|
+
1453293159.28666=>4498405298908,
|
990
|
+
1453293160.655534=>4498405298908,
|
991
|
+
1453293161.998718=>4498405299219,
|
992
|
+
|
993
|
+
## Conclusions
|
994
|
+
|
995
|
+
This tutorial has shown how the scripting of complex experiment can be done using the Ruby scripting language.
|
996
|
+
We saw that in the context of Grid'5000,
|
997
|
+
**Ruby-Cute** offers useful methods for accessing the platform's services and executing commands in parallel.
|
998
|
+
The aim of this tutorial was to give you some ideas for coding your experiments using **Ruby-Cute**
|
999
|
+
and we hope it will be useful for your experiments.
|