nitos_testbed_rc 2.0.3 → 2.0.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,493 @@
1
+ #this resource is used to control chassis managers.
2
+ require 'rubygems'
3
+ require 'yaml'
4
+ require 'open-uri'
5
+ require 'nokogiri'
6
+ require 'net/ssh'
7
+
8
+ REBOOT_CMD = "reboot"
9
+ SHUTDOWN_CMD = "shutdown -P now"
10
+
11
+ module OmfRc::ResourceProxy::CM
12
+ include OmfRc::ResourceProxyDSL
13
+
14
+ @config = YAML.load_file('/etc/nitos_testbed_rc/cm_proxy_conf.yaml')
15
+ # @config = YAML.load_file(File.join(File.dirname(File.expand_path(__FILE__)), '../etc/cm_proxy_conf.yaml'))
16
+ @@timeout = @config[:timeout]
17
+
18
+ register_proxy :cm, :create_by => :cm_factory
19
+
20
+ property :node, default: "node000"
21
+
22
+ hook :after_initial_configured do |res|
23
+ puts "*******************************************"
24
+ puts "Node: #{res.property.node}"
25
+ puts "*******************************************"
26
+ end
27
+
28
+ configure :state do |res, value|
29
+ debug "Received message '#{value.inspect}'"
30
+ if error_msg = value.error_msg
31
+ res.inform(:error,{
32
+ event_type: "AUTH",
33
+ exit_code: "-1",
34
+ node_name: value[:node],
35
+ msg: error_msg
36
+ }, :ALL)
37
+ next
38
+ end
39
+ nod = value.node
40
+ # nod = {node_name: "node120", node_ip: "10.0.1.120", node_mac: "00-03-1d-0d-4b-96", node_cm_ip: "10.1.0.120"} if value.node == 'node120'
41
+ # nod = {node_name: "node121", node_ip: "10.0.1.121", node_mac: "00-03-1d-0d-40-98", node_cm_ip: "10.1.0.121"} if value.node == 'node121'
42
+
43
+ case value[:status].to_sym
44
+ when :on then res.start_node(nod, value[:wait])
45
+ when :off then res.stop_node(nod, value[:wait])
46
+ when :reset then res.reset_node(nod, value[:wait])
47
+ when :start_on_pxe then res.start_node_pxe(nod)
48
+ when :start_without_pxe then res.start_node_pxe_off(nod, value[:last_action])
49
+ when :get_status then res.status(nod)
50
+ else
51
+ res.log_inform_warn "Cannot switch node to unknown state '#{value[:status].to_s}'!"
52
+ end
53
+ end
54
+
55
+ #this is used by the get status call
56
+ work("status") do |res, node|
57
+ debug "Status url: http://#{node[:node_cm_ip].to_s}/state"
58
+ begin
59
+ resp = open("http://#{node[:node_cm_ip].to_s}/state")
60
+ rescue
61
+ res.inform(:error, {
62
+ event_type: "HTTP",
63
+ exit_code: "-1",
64
+ node_name: "#{node[:node_name].to_s}",
65
+ msg: "failed to reach cm, ip: #{node[:node_cm_ip].to_s}."
66
+ }, :ALL)
67
+ next
68
+ end
69
+ ans = res.parse_responce(resp, "//Response//line//value")
70
+
71
+ res.inform(:status, {
72
+ current: "#{ans}",
73
+ node_name: "#{node[:node_name].to_s}"
74
+ }, :ALL)
75
+ sleep 1 #this solves the getting stuck problem.
76
+ end
77
+
78
+ work("start_node") do |res, node, wait|
79
+ node[:node_mac] = node[:node_mac].downcase.gsub(/:/, '-')
80
+ symlink_name = "/tftpboot/pxelinux.cfg/01-#{node[:node_mac]}"
81
+ if File.exists?(symlink_name)
82
+ File.delete(symlink_name)
83
+ end
84
+ debug "Start_node url: http://#{node[:node_cm_ip].to_s}/on"
85
+ begin
86
+ resp = open("http://#{node[:node_cm_ip].to_s}/on")
87
+ rescue
88
+ res.inform(:error, {
89
+ event_type: "HTTP",
90
+ exit_code: "-1",
91
+ node_name: "#{node[:node_name].to_s}",
92
+ msg: "#{node[:name]} failed to reach cm, ip: #{node[:node_cm_ip].to_s}."
93
+ }, :ALL)
94
+ next
95
+ end
96
+
97
+ ans = res.parse_responce(resp, "//Response")
98
+
99
+ if ans == 'ok'
100
+ res.inform(:status, {
101
+ node_name: "#{node[:node_name].to_s}",
102
+ current: :booting,
103
+ desired: :running
104
+ }, :ALL)
105
+ elsif ans == 'already on'
106
+ res.inform(:status, {
107
+ node_name: "#{node[:node_name].to_s}",
108
+ current: :running,
109
+ desired: :running
110
+ }, :ALL)
111
+ end
112
+
113
+ if wait
114
+ if res.wait_until_ping(node[:node_ip])
115
+ res.inform(:status, {
116
+ node_name: "#{node[:node_name].to_s}",
117
+ current: :running,
118
+ desired: :running
119
+ }, :ALL)
120
+ else
121
+ res.inform(:error, {
122
+ event_type: "TIME_OUT",
123
+ exit_code: "-1",
124
+ node_name: "#{node[:node_name].to_s}",
125
+ msg: "Node '#{node[:node_name].to_s}' timed out while booting."
126
+ }, :ALL)
127
+ end
128
+ end
129
+ sleep 1
130
+ end
131
+
132
+ work("stop_node") do |res, node, wait|
133
+ node[:node_mac] = node[:node_mac].downcase.gsub(/:/, '-')
134
+ symlink_name = "/tftpboot/pxelinux.cfg/01-#{node[:node_mac]}"
135
+ if File.exists?(symlink_name)
136
+ File.delete(symlink_name)
137
+ end
138
+ begin
139
+ debug "Shutting down node '#{node[:node_name]}' through ssh."
140
+ ssh = Net::SSH.start(node[:node_ip], 'root')#, :password => @password)
141
+ resp = ssh.exec!(SHUTDOWN_CMD)
142
+ ssh.close
143
+ debug "shutting down completed with ssh."
144
+ res.inform(:status, {
145
+ node_name: "#{node[:node_name].to_s}",
146
+ current: :running,
147
+ desired: :stopped
148
+ }, :ALL)
149
+ rescue
150
+ begin
151
+ debug "ssh failed, using CM card instead."
152
+ debug "Stop_node url: http://#{node[:node_cm_ip].to_s}/off"
153
+
154
+ begin
155
+ resp = open("http://#{node[:node_cm_ip].to_s}/off")
156
+ rescue
157
+ res.inform(:error, {
158
+ event_type: "HTTP",
159
+ exit_code: "-1",
160
+ node_name: "#{node[:node_name].to_s}",
161
+ msg: "#{node[:name]} failed to reach cm, ip: #{node[:node_cm_ip].to_s}."
162
+ }, :ALL)
163
+ next
164
+ end
165
+
166
+ ans = res.parse_responce(resp, "//Response")
167
+
168
+ if ans == 'ok'
169
+ res.inform(:status, {
170
+ node_name: "#{node[:node_name].to_s}",
171
+ current: :running,
172
+ desired: :stopped
173
+ }, :ALL)
174
+ elsif ans == 'already off'
175
+ res.inform(:status, {
176
+ node_name: "#{node[:node_name].to_s}",
177
+ current: :stopped,
178
+ desired: :stopped
179
+ }, :ALL)
180
+ end
181
+ rescue
182
+ res.inform(:error, {
183
+ event_type: "HTTP",
184
+ exit_code: "-1",
185
+ node_name: "#{node[:node_name].to_s}",
186
+ msg: "failed to reach cm, ip: #{node[:node_cm_ip].to_s}."
187
+ }, :ALL)
188
+ next
189
+ end
190
+ end
191
+
192
+ if wait
193
+ if res.wait_until_no_ping(node[:node_ip])
194
+ res.inform(:status, {
195
+ node_name: "#{node[:node_name].to_s}",
196
+ current: :stopped,
197
+ desired: :stopped
198
+ }, :ALL)
199
+ else
200
+ res.inform(:error, {
201
+ event_type: "TIME_OUT",
202
+ exit_code: "-1",
203
+ node_name: "#{node[:node_name].to_s}",
204
+ msg: "Node '#{node[:node_name].to_s}' timed out while shutting down."
205
+ }, :ALL)
206
+ end
207
+ end
208
+ sleep 1
209
+ end
210
+
211
+ work("reset_node") do |res, node, wait|
212
+ node[:node_mac] = node[:node_mac].downcase.gsub(/:/, '-')
213
+ symlink_name = "/tftpboot/pxelinux.cfg/01-#{node[:node_mac]}"
214
+ if File.exists?(symlink_name)
215
+ File.delete(symlink_name)
216
+ end
217
+ begin
218
+ debug "Rebooting node '#{node[:node_name]}' through ssh."
219
+ ssh = Net::SSH.start(node[:node_ip], 'root')#, :password => @password)
220
+ resp = ssh.exec!(REBOOT_CMD)
221
+ ssh.close
222
+ debug "Rebooting completed with ssh."
223
+ res.inform(:status, {
224
+ node_name: "#{node[:node_name].to_s}",
225
+ current: :running,
226
+ desired: :resetted
227
+ }, :ALL)
228
+ rescue
229
+ begin
230
+ debug "ssh failed, using CM card instead."
231
+ debug "Reset_node url: http://#{node[:node_cm_ip].to_s}/reset"
232
+ begin
233
+ resp = open("http://#{node[:node_cm_ip].to_s}/reset")
234
+ rescue
235
+ res.inform(:error, {
236
+ event_type: "HTTP",
237
+ exit_code: "-1",
238
+ node_name: "#{node[:node_name].to_s}",
239
+ msg: "#{node[:name]} failed to reach cm, ip: #{node[:node_cm_ip].to_s}."
240
+ }, :ALL)
241
+ next
242
+ end
243
+
244
+ ans = res.parse_responce(resp, "//Response")
245
+ if ans == 'ok'
246
+ res.inform(:status, {
247
+ node_name: "#{node[:node_name].to_s}",
248
+ current: :resetted,
249
+ desired: :resetted
250
+ }, :ALL)
251
+ end
252
+ rescue
253
+ res.inform(:error, {
254
+ event_type: "HTTP",
255
+ exit_code: "-1",
256
+ node_name: "#{node[:node_name].to_s}",
257
+ msg: "failed to reach cm, ip: #{node[:node_cm_ip].to_s}."
258
+ }, :ALL)
259
+ next
260
+ end
261
+ end
262
+
263
+ if wait
264
+ if res.wait_until_ping(node[:node_ip])
265
+ res.inform(:status, {
266
+ node_name: "#{node[:node_name].to_s}",
267
+ current: :resetted,
268
+ desired: :resetted
269
+ }, :ALL)
270
+ else
271
+ res.inform(:error, {
272
+ event_type: "TIME_OUT",
273
+ exit_code: "-1",
274
+ node_name: "#{node[:node_name].to_s}",
275
+ msg: "Node '#{node[:node_name].to_s}' timed out while reseting."
276
+ }, :ALL)
277
+ end
278
+ end
279
+ sleep 1
280
+ end
281
+
282
+ work("start_node_pxe") do |res, node|
283
+ resp = res.get_status(node)
284
+ node[:node_mac] = node[:node_mac].downcase.gsub(/:/, '-')
285
+ if resp == :on
286
+ symlink_name = "/tftpboot/pxelinux.cfg/01-#{node[:node_mac]}"
287
+ if !File.exists?("#{symlink_name}")
288
+ File.symlink("/tftpboot/pxelinux.cfg/#{@config[:pxeSymLinkConfFile]}", "#{symlink_name}")
289
+ end
290
+ debug "Start_node_pxe RESET: http://#{node[:node_cm_ip].to_s}/reset"
291
+ begin
292
+ open("http://#{node[:node_cm_ip].to_s}/reset")
293
+ rescue
294
+ res.inform(:error, {
295
+ event_type: "HTTP",
296
+ exit_code: "-1",
297
+ node_name: "#{node[:node_name].to_s}",
298
+ msg: "failed to reach cm, ip: #{node[:node_cm_ip].to_s}."
299
+ }, :ALL)
300
+ next
301
+ end
302
+ elsif resp == :off
303
+ symlink_name = "/tftpboot/pxelinux.cfg/01-#{node[:node_mac]}"
304
+ if !File.exists?("#{symlink_name}")
305
+ File.symlink("/tftpboot/pxelinux.cfg/#{@config[:pxeSymLinkConfFile]}", "#{symlink_name}")
306
+ end
307
+ debug "Start_node_pxe ON: http://#{node[:node_cm_ip].to_s}/on"
308
+ begin
309
+ open("http://#{node[:node_cm_ip].to_s}/on")
310
+ rescue
311
+ res.inform(:error, {
312
+ event_type: "HTTP",
313
+ exit_code: "-1",
314
+ node_name: "#{node[:node_name].to_s}",
315
+ msg: "failed to reach cm, ip: #{node[:node_cm_ip].to_s}."
316
+ }, :ALL)
317
+ next
318
+ end
319
+ elsif resp == :started_on_pxe
320
+ debug "Start_node_pxe STARTED: http://#{node[:node_cm_ip].to_s}/reset"
321
+ begin
322
+ open("http://#{node[:node_cm_ip].to_s}/reset")
323
+ rescue
324
+ res.inform(:error, {
325
+ event_type: "HTTP",
326
+ exit_code: "-1",
327
+ node_name: "#{node[:node_name].to_s}",
328
+ msg: "failed to reach cm, ip: #{node[:node_cm_ip].to_s}."
329
+ }, :ALL)
330
+ next
331
+ end
332
+ end
333
+
334
+ if res.wait_until_ping(res, node[:node_ip])
335
+ res.inform(:status, {
336
+ node_name: "#{node[:node_name].to_s}",
337
+ current: :pxe_on,
338
+ desired: :pxe_on
339
+ }, :ALL)
340
+ else
341
+ node[:node_mac] = node[:node_mac].downcase.gsub(/:/, '-')
342
+ symlink_name = "/tftpboot/pxelinux.cfg/01-#{node[:node_mac]}"
343
+ if File.exists?(symlink_name)
344
+ File.delete(symlink_name)
345
+ end
346
+ res.inform(:error, {
347
+ event_type: "TIME_OUT",
348
+ exit_code: "-1",
349
+ node_name: "#{node[:node_name].to_s}",
350
+ msg: "Node '#{node[:node_name].to_s}' timed out while trying to boot on PXE."
351
+ }, :ALL)
352
+ end
353
+ sleep 1
354
+ end
355
+
356
+ work("start_node_pxe_off") do |res, node, action|
357
+ node[:node_mac] = node[:node_mac].downcase.gsub(/:/, '-')
358
+ symlink_name = "/tftpboot/pxelinux.cfg/01-#{node[:node_mac]}"
359
+ if File.exists?(symlink_name)
360
+ File.delete(symlink_name)
361
+ end
362
+ if action == "reset"
363
+ debug "Start_node_pxe_off RESET: http://#{node[:node_cm_ip].to_s}/reset"
364
+ begin
365
+ open("http://#{node[:node_cm_ip].to_s}/reset")
366
+ rescue
367
+ res.inform(:error, {
368
+ event_type: "HTTP",
369
+ exit_code: "-1",
370
+ node_name: "#{node[:node_name].to_s}",
371
+ msg: "failed to reach cm, ip: #{node[:node_cm_ip].to_s}."
372
+ }, :ALL)
373
+ next
374
+ end
375
+
376
+ t = 0
377
+ if res.wait_until_ping(node[:node_ip])
378
+ res.inform(:status, {
379
+ node_name: "#{node[:node_name].to_s}",
380
+ current: :pxe_off,
381
+ desired: :pxe_off
382
+ }, :ALL)
383
+ else
384
+ res.inform(:error, {
385
+ event_type: "TIME_OUT",
386
+ exit_code: "-1",
387
+ node_name: "#{node[:node_name].to_s}",
388
+ msg: "Node '#{node[:node_name].to_s}' timed out while booting."
389
+ }, :ALL)
390
+ end
391
+ elsif action == "shutdown"
392
+ debug "Start_node_pxe_off SHUTDOWN: http://#{node[:node_cm_ip].to_s}/off"
393
+ begin
394
+ open("http://#{node[:node_cm_ip].to_s}/off")
395
+ rescue
396
+ res.inform(:error, {
397
+ event_type: "HTTP",
398
+ exit_code: "-1",
399
+ node_name: "#{node[:node_name].to_s}",
400
+ msg: "failed to reach cm, ip: #{node[:node_cm_ip].to_s}."
401
+ }, :ALL)
402
+ next
403
+ end
404
+
405
+ if res.wait_until_no_ping(node[:node_ip])
406
+ res.inform(:status, {
407
+ node_name: "#{node[:node_name].to_s}",
408
+ current: :pxe_off,
409
+ desired: :pxe_off
410
+ }, :ALL)
411
+ else
412
+ res.inform(:error, {
413
+ event_type: "TIME_OUT",
414
+ exit_code: "-1",
415
+ node_name: "#{node[:node_name].to_s}",
416
+ msg: "Node '#{node[:node_name].to_s}' timed out while shutting down."
417
+ }, :ALL)
418
+ end
419
+ end
420
+ sleep 1
421
+ end
422
+
423
+ #this is used by other methods in this scope
424
+ def wait_until_ping(res, ip)
425
+ t = 0
426
+ resp = false
427
+ loop do
428
+ sleep 2
429
+ status = system("ping #{ip} -c 2 -w 2")
430
+ if t < @@timeout
431
+ if status == true
432
+ resp = true
433
+ break
434
+ end
435
+ else
436
+ resp = false
437
+ break
438
+ end
439
+ t += 2
440
+ end
441
+ resp
442
+ end
443
+
444
+ #this is used by other methods in this scope
445
+ work("wait_until_no_ping") do |res, ip|
446
+ t = 0
447
+ resp = false
448
+ loop do
449
+ sleep 2
450
+ status = system("ping #{ip} -c 2 -w 2")
451
+ if t < @@timeout
452
+ if status == false
453
+ resp = true
454
+ break
455
+ end
456
+ else
457
+ resp = false
458
+ break
459
+ end
460
+ t += 2
461
+ end
462
+ resp
463
+ end
464
+
465
+ #this is used by other methods in this scope
466
+ work("get_status") do |res, node|
467
+ debug "http://#{node[:node_cm_ip].to_s}/state"
468
+ resp = open("http://#{node[:node_cm_ip].to_s}/state")
469
+ resp = res.parse_responce(resp, "//Response//line//value")
470
+ debug "state response: #{resp}"
471
+
472
+ if resp == 'on'
473
+ symlink_name = "/tftpboot/pxelinux.cfg/01-#{node[:node_mac]}"
474
+ if File.exists?("#{symlink_name}")
475
+ :on_pxe
476
+ else
477
+ :on
478
+ end
479
+ elsif resp == 'off'
480
+ :off
481
+ end
482
+ end
483
+
484
+ work("parse_responce") do |res, input, path|
485
+ input = input.string if input.kind_of? StringIO
486
+ if input[0] == "<"
487
+ output = Nokogiri::XML(input).xpath(path).text.strip
488
+ else
489
+ output = input.strip
490
+ end
491
+ output
492
+ end
493
+ end