nitos_testbed_rc 2.0.3 → 2.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,493 @@
1
+ #this resource is used to control chassis managers.
2
+ require 'rubygems'
3
+ require 'yaml'
4
+ require 'open-uri'
5
+ require 'nokogiri'
6
+ require 'net/ssh'
7
+
8
+ REBOOT_CMD = "reboot"
9
+ SHUTDOWN_CMD = "shutdown -P now"
10
+
11
+ module OmfRc::ResourceProxy::CM
12
+ include OmfRc::ResourceProxyDSL
13
+
14
+ @config = YAML.load_file('/etc/nitos_testbed_rc/cm_proxy_conf.yaml')
15
+ # @config = YAML.load_file(File.join(File.dirname(File.expand_path(__FILE__)), '../etc/cm_proxy_conf.yaml'))
16
+ @@timeout = @config[:timeout]
17
+
18
+ register_proxy :cm, :create_by => :cm_factory
19
+
20
+ property :node, default: "node000"
21
+
22
+ hook :after_initial_configured do |res|
23
+ puts "*******************************************"
24
+ puts "Node: #{res.property.node}"
25
+ puts "*******************************************"
26
+ end
27
+
28
+ configure :state do |res, value|
29
+ debug "Received message '#{value.inspect}'"
30
+ if error_msg = value.error_msg
31
+ res.inform(:error,{
32
+ event_type: "AUTH",
33
+ exit_code: "-1",
34
+ node_name: value[:node],
35
+ msg: error_msg
36
+ }, :ALL)
37
+ next
38
+ end
39
+ nod = value.node
40
+ # nod = {node_name: "node120", node_ip: "10.0.1.120", node_mac: "00-03-1d-0d-4b-96", node_cm_ip: "10.1.0.120"} if value.node == 'node120'
41
+ # nod = {node_name: "node121", node_ip: "10.0.1.121", node_mac: "00-03-1d-0d-40-98", node_cm_ip: "10.1.0.121"} if value.node == 'node121'
42
+
43
+ case value[:status].to_sym
44
+ when :on then res.start_node(nod, value[:wait])
45
+ when :off then res.stop_node(nod, value[:wait])
46
+ when :reset then res.reset_node(nod, value[:wait])
47
+ when :start_on_pxe then res.start_node_pxe(nod)
48
+ when :start_without_pxe then res.start_node_pxe_off(nod, value[:last_action])
49
+ when :get_status then res.status(nod)
50
+ else
51
+ res.log_inform_warn "Cannot switch node to unknown state '#{value[:status].to_s}'!"
52
+ end
53
+ end
54
+
55
+ #this is used by the get status call
56
+ work("status") do |res, node|
57
+ debug "Status url: http://#{node[:node_cm_ip].to_s}/state"
58
+ begin
59
+ resp = open("http://#{node[:node_cm_ip].to_s}/state")
60
+ rescue
61
+ res.inform(:error, {
62
+ event_type: "HTTP",
63
+ exit_code: "-1",
64
+ node_name: "#{node[:node_name].to_s}",
65
+ msg: "failed to reach cm, ip: #{node[:node_cm_ip].to_s}."
66
+ }, :ALL)
67
+ next
68
+ end
69
+ ans = res.parse_responce(resp, "//Response//line//value")
70
+
71
+ res.inform(:status, {
72
+ current: "#{ans}",
73
+ node_name: "#{node[:node_name].to_s}"
74
+ }, :ALL)
75
+ sleep 1 #this solves the getting stuck problem.
76
+ end
77
+
78
+ work("start_node") do |res, node, wait|
79
+ node[:node_mac] = node[:node_mac].downcase.gsub(/:/, '-')
80
+ symlink_name = "/tftpboot/pxelinux.cfg/01-#{node[:node_mac]}"
81
+ if File.exists?(symlink_name)
82
+ File.delete(symlink_name)
83
+ end
84
+ debug "Start_node url: http://#{node[:node_cm_ip].to_s}/on"
85
+ begin
86
+ resp = open("http://#{node[:node_cm_ip].to_s}/on")
87
+ rescue
88
+ res.inform(:error, {
89
+ event_type: "HTTP",
90
+ exit_code: "-1",
91
+ node_name: "#{node[:node_name].to_s}",
92
+ msg: "#{node[:name]} failed to reach cm, ip: #{node[:node_cm_ip].to_s}."
93
+ }, :ALL)
94
+ next
95
+ end
96
+
97
+ ans = res.parse_responce(resp, "//Response")
98
+
99
+ if ans == 'ok'
100
+ res.inform(:status, {
101
+ node_name: "#{node[:node_name].to_s}",
102
+ current: :booting,
103
+ desired: :running
104
+ }, :ALL)
105
+ elsif ans == 'already on'
106
+ res.inform(:status, {
107
+ node_name: "#{node[:node_name].to_s}",
108
+ current: :running,
109
+ desired: :running
110
+ }, :ALL)
111
+ end
112
+
113
+ if wait
114
+ if res.wait_until_ping(node[:node_ip])
115
+ res.inform(:status, {
116
+ node_name: "#{node[:node_name].to_s}",
117
+ current: :running,
118
+ desired: :running
119
+ }, :ALL)
120
+ else
121
+ res.inform(:error, {
122
+ event_type: "TIME_OUT",
123
+ exit_code: "-1",
124
+ node_name: "#{node[:node_name].to_s}",
125
+ msg: "Node '#{node[:node_name].to_s}' timed out while booting."
126
+ }, :ALL)
127
+ end
128
+ end
129
+ sleep 1
130
+ end
131
+
132
+ work("stop_node") do |res, node, wait|
133
+ node[:node_mac] = node[:node_mac].downcase.gsub(/:/, '-')
134
+ symlink_name = "/tftpboot/pxelinux.cfg/01-#{node[:node_mac]}"
135
+ if File.exists?(symlink_name)
136
+ File.delete(symlink_name)
137
+ end
138
+ begin
139
+ debug "Shutting down node '#{node[:node_name]}' through ssh."
140
+ ssh = Net::SSH.start(node[:node_ip], 'root')#, :password => @password)
141
+ resp = ssh.exec!(SHUTDOWN_CMD)
142
+ ssh.close
143
+ debug "shutting down completed with ssh."
144
+ res.inform(:status, {
145
+ node_name: "#{node[:node_name].to_s}",
146
+ current: :running,
147
+ desired: :stopped
148
+ }, :ALL)
149
+ rescue
150
+ begin
151
+ debug "ssh failed, using CM card instead."
152
+ debug "Stop_node url: http://#{node[:node_cm_ip].to_s}/off"
153
+
154
+ begin
155
+ resp = open("http://#{node[:node_cm_ip].to_s}/off")
156
+ rescue
157
+ res.inform(:error, {
158
+ event_type: "HTTP",
159
+ exit_code: "-1",
160
+ node_name: "#{node[:node_name].to_s}",
161
+ msg: "#{node[:name]} failed to reach cm, ip: #{node[:node_cm_ip].to_s}."
162
+ }, :ALL)
163
+ next
164
+ end
165
+
166
+ ans = res.parse_responce(resp, "//Response")
167
+
168
+ if ans == 'ok'
169
+ res.inform(:status, {
170
+ node_name: "#{node[:node_name].to_s}",
171
+ current: :running,
172
+ desired: :stopped
173
+ }, :ALL)
174
+ elsif ans == 'already off'
175
+ res.inform(:status, {
176
+ node_name: "#{node[:node_name].to_s}",
177
+ current: :stopped,
178
+ desired: :stopped
179
+ }, :ALL)
180
+ end
181
+ rescue
182
+ res.inform(:error, {
183
+ event_type: "HTTP",
184
+ exit_code: "-1",
185
+ node_name: "#{node[:node_name].to_s}",
186
+ msg: "failed to reach cm, ip: #{node[:node_cm_ip].to_s}."
187
+ }, :ALL)
188
+ next
189
+ end
190
+ end
191
+
192
+ if wait
193
+ if res.wait_until_no_ping(node[:node_ip])
194
+ res.inform(:status, {
195
+ node_name: "#{node[:node_name].to_s}",
196
+ current: :stopped,
197
+ desired: :stopped
198
+ }, :ALL)
199
+ else
200
+ res.inform(:error, {
201
+ event_type: "TIME_OUT",
202
+ exit_code: "-1",
203
+ node_name: "#{node[:node_name].to_s}",
204
+ msg: "Node '#{node[:node_name].to_s}' timed out while shutting down."
205
+ }, :ALL)
206
+ end
207
+ end
208
+ sleep 1
209
+ end
210
+
211
+ work("reset_node") do |res, node, wait|
212
+ node[:node_mac] = node[:node_mac].downcase.gsub(/:/, '-')
213
+ symlink_name = "/tftpboot/pxelinux.cfg/01-#{node[:node_mac]}"
214
+ if File.exists?(symlink_name)
215
+ File.delete(symlink_name)
216
+ end
217
+ begin
218
+ debug "Rebooting node '#{node[:node_name]}' through ssh."
219
+ ssh = Net::SSH.start(node[:node_ip], 'root')#, :password => @password)
220
+ resp = ssh.exec!(REBOOT_CMD)
221
+ ssh.close
222
+ debug "Rebooting completed with ssh."
223
+ res.inform(:status, {
224
+ node_name: "#{node[:node_name].to_s}",
225
+ current: :running,
226
+ desired: :resetted
227
+ }, :ALL)
228
+ rescue
229
+ begin
230
+ debug "ssh failed, using CM card instead."
231
+ debug "Reset_node url: http://#{node[:node_cm_ip].to_s}/reset"
232
+ begin
233
+ resp = open("http://#{node[:node_cm_ip].to_s}/reset")
234
+ rescue
235
+ res.inform(:error, {
236
+ event_type: "HTTP",
237
+ exit_code: "-1",
238
+ node_name: "#{node[:node_name].to_s}",
239
+ msg: "#{node[:name]} failed to reach cm, ip: #{node[:node_cm_ip].to_s}."
240
+ }, :ALL)
241
+ next
242
+ end
243
+
244
+ ans = res.parse_responce(resp, "//Response")
245
+ if ans == 'ok'
246
+ res.inform(:status, {
247
+ node_name: "#{node[:node_name].to_s}",
248
+ current: :resetted,
249
+ desired: :resetted
250
+ }, :ALL)
251
+ end
252
+ rescue
253
+ res.inform(:error, {
254
+ event_type: "HTTP",
255
+ exit_code: "-1",
256
+ node_name: "#{node[:node_name].to_s}",
257
+ msg: "failed to reach cm, ip: #{node[:node_cm_ip].to_s}."
258
+ }, :ALL)
259
+ next
260
+ end
261
+ end
262
+
263
+ if wait
264
+ if res.wait_until_ping(node[:node_ip])
265
+ res.inform(:status, {
266
+ node_name: "#{node[:node_name].to_s}",
267
+ current: :resetted,
268
+ desired: :resetted
269
+ }, :ALL)
270
+ else
271
+ res.inform(:error, {
272
+ event_type: "TIME_OUT",
273
+ exit_code: "-1",
274
+ node_name: "#{node[:node_name].to_s}",
275
+ msg: "Node '#{node[:node_name].to_s}' timed out while reseting."
276
+ }, :ALL)
277
+ end
278
+ end
279
+ sleep 1
280
+ end
281
+
282
+ work("start_node_pxe") do |res, node|
283
+ resp = res.get_status(node)
284
+ node[:node_mac] = node[:node_mac].downcase.gsub(/:/, '-')
285
+ if resp == :on
286
+ symlink_name = "/tftpboot/pxelinux.cfg/01-#{node[:node_mac]}"
287
+ if !File.exists?("#{symlink_name}")
288
+ File.symlink("/tftpboot/pxelinux.cfg/#{@config[:pxeSymLinkConfFile]}", "#{symlink_name}")
289
+ end
290
+ debug "Start_node_pxe RESET: http://#{node[:node_cm_ip].to_s}/reset"
291
+ begin
292
+ open("http://#{node[:node_cm_ip].to_s}/reset")
293
+ rescue
294
+ res.inform(:error, {
295
+ event_type: "HTTP",
296
+ exit_code: "-1",
297
+ node_name: "#{node[:node_name].to_s}",
298
+ msg: "failed to reach cm, ip: #{node[:node_cm_ip].to_s}."
299
+ }, :ALL)
300
+ next
301
+ end
302
+ elsif resp == :off
303
+ symlink_name = "/tftpboot/pxelinux.cfg/01-#{node[:node_mac]}"
304
+ if !File.exists?("#{symlink_name}")
305
+ File.symlink("/tftpboot/pxelinux.cfg/#{@config[:pxeSymLinkConfFile]}", "#{symlink_name}")
306
+ end
307
+ debug "Start_node_pxe ON: http://#{node[:node_cm_ip].to_s}/on"
308
+ begin
309
+ open("http://#{node[:node_cm_ip].to_s}/on")
310
+ rescue
311
+ res.inform(:error, {
312
+ event_type: "HTTP",
313
+ exit_code: "-1",
314
+ node_name: "#{node[:node_name].to_s}",
315
+ msg: "failed to reach cm, ip: #{node[:node_cm_ip].to_s}."
316
+ }, :ALL)
317
+ next
318
+ end
319
+ elsif resp == :started_on_pxe
320
+ debug "Start_node_pxe STARTED: http://#{node[:node_cm_ip].to_s}/reset"
321
+ begin
322
+ open("http://#{node[:node_cm_ip].to_s}/reset")
323
+ rescue
324
+ res.inform(:error, {
325
+ event_type: "HTTP",
326
+ exit_code: "-1",
327
+ node_name: "#{node[:node_name].to_s}",
328
+ msg: "failed to reach cm, ip: #{node[:node_cm_ip].to_s}."
329
+ }, :ALL)
330
+ next
331
+ end
332
+ end
333
+
334
+ if res.wait_until_ping(res, node[:node_ip])
335
+ res.inform(:status, {
336
+ node_name: "#{node[:node_name].to_s}",
337
+ current: :pxe_on,
338
+ desired: :pxe_on
339
+ }, :ALL)
340
+ else
341
+ node[:node_mac] = node[:node_mac].downcase.gsub(/:/, '-')
342
+ symlink_name = "/tftpboot/pxelinux.cfg/01-#{node[:node_mac]}"
343
+ if File.exists?(symlink_name)
344
+ File.delete(symlink_name)
345
+ end
346
+ res.inform(:error, {
347
+ event_type: "TIME_OUT",
348
+ exit_code: "-1",
349
+ node_name: "#{node[:node_name].to_s}",
350
+ msg: "Node '#{node[:node_name].to_s}' timed out while trying to boot on PXE."
351
+ }, :ALL)
352
+ end
353
+ sleep 1
354
+ end
355
+
356
+ work("start_node_pxe_off") do |res, node, action|
357
+ node[:node_mac] = node[:node_mac].downcase.gsub(/:/, '-')
358
+ symlink_name = "/tftpboot/pxelinux.cfg/01-#{node[:node_mac]}"
359
+ if File.exists?(symlink_name)
360
+ File.delete(symlink_name)
361
+ end
362
+ if action == "reset"
363
+ debug "Start_node_pxe_off RESET: http://#{node[:node_cm_ip].to_s}/reset"
364
+ begin
365
+ open("http://#{node[:node_cm_ip].to_s}/reset")
366
+ rescue
367
+ res.inform(:error, {
368
+ event_type: "HTTP",
369
+ exit_code: "-1",
370
+ node_name: "#{node[:node_name].to_s}",
371
+ msg: "failed to reach cm, ip: #{node[:node_cm_ip].to_s}."
372
+ }, :ALL)
373
+ next
374
+ end
375
+
376
+ t = 0
377
+ if res.wait_until_ping(node[:node_ip])
378
+ res.inform(:status, {
379
+ node_name: "#{node[:node_name].to_s}",
380
+ current: :pxe_off,
381
+ desired: :pxe_off
382
+ }, :ALL)
383
+ else
384
+ res.inform(:error, {
385
+ event_type: "TIME_OUT",
386
+ exit_code: "-1",
387
+ node_name: "#{node[:node_name].to_s}",
388
+ msg: "Node '#{node[:node_name].to_s}' timed out while booting."
389
+ }, :ALL)
390
+ end
391
+ elsif action == "shutdown"
392
+ debug "Start_node_pxe_off SHUTDOWN: http://#{node[:node_cm_ip].to_s}/off"
393
+ begin
394
+ open("http://#{node[:node_cm_ip].to_s}/off")
395
+ rescue
396
+ res.inform(:error, {
397
+ event_type: "HTTP",
398
+ exit_code: "-1",
399
+ node_name: "#{node[:node_name].to_s}",
400
+ msg: "failed to reach cm, ip: #{node[:node_cm_ip].to_s}."
401
+ }, :ALL)
402
+ next
403
+ end
404
+
405
+ if res.wait_until_no_ping(node[:node_ip])
406
+ res.inform(:status, {
407
+ node_name: "#{node[:node_name].to_s}",
408
+ current: :pxe_off,
409
+ desired: :pxe_off
410
+ }, :ALL)
411
+ else
412
+ res.inform(:error, {
413
+ event_type: "TIME_OUT",
414
+ exit_code: "-1",
415
+ node_name: "#{node[:node_name].to_s}",
416
+ msg: "Node '#{node[:node_name].to_s}' timed out while shutting down."
417
+ }, :ALL)
418
+ end
419
+ end
420
+ sleep 1
421
+ end
422
+
423
+ #this is used by other methods in this scope
424
+ def wait_until_ping(res, ip)
425
+ t = 0
426
+ resp = false
427
+ loop do
428
+ sleep 2
429
+ status = system("ping #{ip} -c 2 -w 2")
430
+ if t < @@timeout
431
+ if status == true
432
+ resp = true
433
+ break
434
+ end
435
+ else
436
+ resp = false
437
+ break
438
+ end
439
+ t += 2
440
+ end
441
+ resp
442
+ end
443
+
444
+ #this is used by other methods in this scope
445
+ work("wait_until_no_ping") do |res, ip|
446
+ t = 0
447
+ resp = false
448
+ loop do
449
+ sleep 2
450
+ status = system("ping #{ip} -c 2 -w 2")
451
+ if t < @@timeout
452
+ if status == false
453
+ resp = true
454
+ break
455
+ end
456
+ else
457
+ resp = false
458
+ break
459
+ end
460
+ t += 2
461
+ end
462
+ resp
463
+ end
464
+
465
+ #this is used by other methods in this scope
466
+ work("get_status") do |res, node|
467
+ debug "http://#{node[:node_cm_ip].to_s}/state"
468
+ resp = open("http://#{node[:node_cm_ip].to_s}/state")
469
+ resp = res.parse_responce(resp, "//Response//line//value")
470
+ debug "state response: #{resp}"
471
+
472
+ if resp == 'on'
473
+ symlink_name = "/tftpboot/pxelinux.cfg/01-#{node[:node_mac]}"
474
+ if File.exists?("#{symlink_name}")
475
+ :on_pxe
476
+ else
477
+ :on
478
+ end
479
+ elsif resp == 'off'
480
+ :off
481
+ end
482
+ end
483
+
484
+ work("parse_responce") do |res, input, path|
485
+ input = input.string if input.kind_of? StringIO
486
+ if input[0] == "<"
487
+ output = Nokogiri::XML(input).xpath(path).text.strip
488
+ else
489
+ output = input.strip
490
+ end
491
+ output
492
+ end
493
+ end