rvc 1.7.0 → 1.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -25,19 +25,38 @@ VNC = ENV['VNC'] || search_path('tightvnc') || search_path('vncviewer') || searc
25
25
  opts :view do
26
26
  summary "Spawn a VNC client"
27
27
  arg :vm, nil, :lookup => VIM::VirtualMachine
28
+ opt :ws, "Enable VNC websocket proxy"
28
29
  end
29
30
 
30
31
  rvc_alias :view, :vnc
31
32
  rvc_alias :view, :V
32
33
 
33
- def view vm
34
+ def view vm, opts
34
35
  ip = reachable_ip vm.collect('runtime.host')[0]
35
36
  extraConfig, = vm.collect('config.extraConfig')
36
37
  already_enabled = extraConfig.find { |x| x.key == 'RemoteDisplay.vnc.enabled' && x.value.downcase == 'true' }
38
+
39
+ if opts[:ws]
40
+ opt = extraConfig.find { |x| x.key == 'RemoteDisplay.vnc.webSocket.port' }
41
+ if opt.nil?
42
+ ws_port = unused_vnc_port ip
43
+ vm.ReconfigVM_Task(:spec => {
44
+ :extraConfig => [
45
+ { :key => 'RemoteDisplay.vnc.webSocket.port', :value => ws_port.to_s }
46
+ ]
47
+ }).wait_for_completion
48
+ else
49
+ ws_port = opt.value
50
+ end
51
+ end
37
52
  if already_enabled
38
53
  puts "VNC already enabled"
39
- port = extraConfig.find { |x| x.key == 'RemoteDisplay.vnc.port' }.value
40
- password = extraConfig.find { |x| x.key == 'RemoteDisplay.vnc.password' }.value
54
+ port = extraConfig.find { |x| x.key == 'RemoteDisplay.vnc.port' }
55
+ if !port
56
+ err "VNC enabled but no port assigned. Use vnc.off to reset config"
57
+ end
58
+ password = extraConfig.find { |x| x.key == 'RemoteDisplay.vnc.password' }
59
+ password = password ? password.value : ""
41
60
  else
42
61
  port = unused_vnc_port ip
43
62
  password = vnc_password
@@ -49,7 +68,11 @@ def view vm
49
68
  ]
50
69
  }).wait_for_completion
51
70
  end
52
- vnc_client ip, port, password
71
+ if opts[:ws]
72
+ puts "open http://novnc.com?host=#{ip}&port=#{ws_port}&password=#{password}"
73
+ else
74
+ vnc_client ip, port, password
75
+ end
53
76
  end
54
77
 
55
78
 
@@ -63,7 +86,8 @@ def off vm
63
86
  :extraConfig => [
64
87
  { :key => 'RemoteDisplay.vnc.enabled', :value => 'false' },
65
88
  { :key => 'RemoteDisplay.vnc.password', :value => '' },
66
- { :key => 'RemoteDisplay.vnc.port', :value => '' }
89
+ { :key => 'RemoteDisplay.vnc.port', :value => '' },
90
+ { :key => 'RemoteDisplay.vnc.webSocket.port', :value => '' }
67
91
  ]
68
92
  }).wait_for_completion
69
93
  end
@@ -0,0 +1,4105 @@
1
+ # Copyright (c) 2013 VMware, Inc. All Rights Reserved.
2
+ #
3
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ # of this software and associated documentation files (the "Software"), to deal
5
+ # in the Software without restriction, including without limitation the rights
6
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ # copies of the Software, and to permit persons to whom the Software is
8
+ # furnished to do so, subject to the following conditions:
9
+ #
10
+ # The above copyright notice and this permission notice shall be included in
11
+ # all copies or substantial portions of the Software.
12
+ #
13
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
+ # THE SOFTWARE.
20
+
21
+ require 'rvc/vim'
22
+ require 'json'
23
+ require 'time'
24
+ VIM::ClusterComputeResource
25
+
26
+ # Patch in some last minute additions to the API
27
+ db = VIM.loader.instance_variable_get(:@db)
28
+ db['HostVsanInternalSystem']['methods']["QuerySyncingVsanObjects"] =
29
+ {"params"=>
30
+ [{"name"=>"uuids",
31
+ "is-array"=>true,
32
+ "is-optional"=>true,
33
+ "version-id-ref"=>nil,
34
+ "wsdl_type"=>"xsd:string"}],
35
+ "result"=>
36
+ {"is-array"=>false,
37
+ "is-optional"=>false,
38
+ "is-task"=>false,
39
+ "version-id-ref"=>nil,
40
+ "wsdl_type"=>"xsd:string"}}
41
+ db['HostVsanInternalSystem']['methods']["GetVsanObjExtAttrs"] =
42
+ {"params"=>
43
+ [{"name"=>"uuids",
44
+ "is-array"=>true,
45
+ "is-optional"=>true,
46
+ "version-id-ref"=>nil,
47
+ "wsdl_type"=>"xsd:string"}],
48
+ "result"=>
49
+ {"is-array"=>false,
50
+ "is-optional"=>false,
51
+ "is-task"=>false,
52
+ "version-id-ref"=>nil,
53
+ "wsdl_type"=>"xsd:string"}}
54
+ db = nil
55
+
56
+ $vsanUseGzipApis = false
57
+
58
+ def is_uuid str
59
+ str =~ /[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}/
60
+ end
61
+
62
+ opts :enable_vsan_on_cluster do
63
+ summary "Enable VSAN on a cluster"
64
+ arg :cluster, nil, :lookup => VIM::ClusterComputeResource
65
+ opt :disable_storage_auto_claim, "Disable auto disk-claim", :type => :boolean
66
+ end
67
+
68
+ def enable_vsan_on_cluster cluster, opts
69
+ conn = cluster._connection
70
+ _run_with_rev(conn, "dev") do
71
+ spec = VIM::ClusterConfigSpecEx(
72
+ :vsanConfig => {
73
+ :enabled => true,
74
+ :defaultConfig => {
75
+ :autoClaimStorage => (!(opts[:disable_storage_auto_claim] || false)),
76
+ }
77
+ }
78
+ )
79
+ task = cluster.ReconfigureComputeResource_Task(:spec => spec, :modify => true)
80
+ progress([task])
81
+ childtasks = task.child_tasks
82
+ if childtasks && childtasks.length > 0
83
+ progress(childtasks)
84
+ end
85
+ childtasks = task.child_tasks
86
+ if childtasks && childtasks.length > 0
87
+ progress(childtasks)
88
+ end
89
+ end
90
+ end
91
+
92
+ opts :disable_vsan_on_cluster do
93
+ summary "Disable VSAN on a cluster"
94
+ arg :cluster, nil, :lookup => VIM::ClusterComputeResource
95
+ end
96
+
97
+ def disable_vsan_on_cluster cluster
98
+ conn = cluster._connection
99
+ _run_with_rev(conn, "dev") do
100
+ spec = VIM::ClusterConfigSpecEx(
101
+ :vsanConfig => {
102
+ :enabled => false,
103
+ }
104
+ )
105
+ task = cluster.ReconfigureComputeResource_Task(:spec => spec, :modify => true)
106
+ progress([task])
107
+ childtasks = task.child_tasks
108
+ if childtasks && childtasks.length > 0
109
+ progress(childtasks)
110
+ end
111
+ end
112
+ end
113
+
114
+ VIM::HostSystem
115
+ class VIM::HostSystem
116
+ def filtered_disks_for_vsan opts = {}
117
+ vsan = opts[:vsanSystem] || self.configManager.vsanSystem
118
+ stateFilter = opts[:state_filter] || /^eligible$/
119
+ disks = vsan.QueryDisksForVsan()
120
+
121
+ disks = disks.select do |disk|
122
+ disk.state =~ stateFilter
123
+ end
124
+
125
+ if opts[:filter_ssd_by_model]
126
+ disks = disks.select do |disk|
127
+ model = [
128
+ disk.disk.vendor,
129
+ disk.disk.model
130
+ ].compact.map{|x| x.strip}.join(" ")
131
+ model_match = (model =~ opts[:filter_ssd_by_model])
132
+ !disk.disk.ssd || model_match
133
+ end
134
+ end
135
+
136
+ disks = disks.map{|x| x.disk}
137
+
138
+ disks
139
+ end
140
+
141
+ def consume_disks_for_vsan opts = {}
142
+ vsan = opts[:vsanSystem] || self.configManager.vsanSystem
143
+ disks = filtered_disks_for_vsan(opts.merge(
144
+ :state_filter => /^eligible$/,
145
+ :vsanSystem => vsan
146
+ ))
147
+ if disks.length > 0
148
+ vsan.AddDisks_Task(:disk => disks)
149
+ end
150
+ end
151
+ end
152
+
153
+ opts :host_consume_disks do
154
+ summary "Consumes all eligible disks on a host"
155
+ arg :host_or_cluster, nil, :lookup => [VIM::ComputeResource, VIM::HostSystem], :multi => true
156
+ opt :filter_ssd_by_model, "Regex to apply as ssd model filter", :type => :string
157
+ end
158
+
159
+ def host_consume_disks hosts_or_clusters, opts
160
+ conn = hosts_or_clusters.first._connection
161
+ hosts = []
162
+ hosts_or_clusters.each do |host_or_cluster|
163
+ if host_or_cluster.is_a?(VIM::HostSystem)
164
+ hosts << host_or_cluster
165
+ else
166
+ hosts += host_or_cluster.host
167
+ end
168
+ end
169
+ if opts[:filter_ssd_by_model]
170
+ opts[:filter_ssd_by_model] = /#{opts[:filter_ssd_by_model]}/
171
+ end
172
+ tasks = []
173
+ results = {}
174
+ _run_with_rev(conn, "dev") do
175
+ tasks = hosts.map do |host|
176
+ host.consume_disks_for_vsan(opts)
177
+ end.compact
178
+ if tasks.length > 0
179
+ results = progress(tasks)
180
+ pp results.values.flatten.map{|x| x.error}.compact
181
+ else
182
+ puts "No disks were consumed."
183
+ end
184
+ $claimResults = results
185
+ end
186
+ $disksCache = {}
187
+ end
188
+
189
+ opts :host_wipe_vsan_disks do
190
+ summary "Wipes content of all VSAN disks on a host"
191
+ arg :host, nil, :lookup => VIM::HostSystem, :multi => true
192
+ opt :force, "Apply force", :type => :boolean
193
+ end
194
+
195
+ def host_wipe_vsan_disks hosts, opts
196
+ conn = hosts.first._connection
197
+ tasks = []
198
+ _run_with_rev(conn, "dev") do
199
+ tasks = hosts.map do |host|
200
+ hostname = host.name
201
+ disks = host.filtered_disks_for_vsan(:state_filter => /^inUse$/)
202
+ if disks.length == 0
203
+ next
204
+ end
205
+ if !opts[:force]
206
+ # Don't actually wipe, but show a warning.
207
+ disks.each do |disk|
208
+ model = [
209
+ disk.vendor,
210
+ disk.model
211
+ ].compact.map{|x| x.strip}.join(" ")
212
+ puts "Would wipe disk #{disk.displayName} (#{model}, ssd = #{disk.ssd})"
213
+ end
214
+ end
215
+
216
+ if opts[:force]
217
+ #disks = disks.select{|x| x.ssd}
218
+ #host.configManager.vsanSystem.RemoveDisk_Task(:disk => disks)
219
+ # See PR 1077658
220
+ vsan = host.configManager.vsanSystem
221
+ vsan.RemoveDiskMapping_Task(:mapping => vsan.config.storageInfo.diskMapping)
222
+ end
223
+ end.compact
224
+ if tasks.length > 0
225
+ results = progress(tasks)
226
+ pp results.values.flatten.map{|x| x.error}.compact
227
+ $wipeResults = results
228
+ end
229
+ end
230
+ if !opts[:force]
231
+ puts ""
232
+ puts "NO ACTION WAS TAKEN. Use --force to actually wipe."
233
+ puts "CAUTION: Wiping disks means all user data will be destroyed!"
234
+ end
235
+ $disksCache = {}
236
+ end
237
+
238
+ opts :host_info do
239
+ summary "Print VSAN info about a host"
240
+ arg :host, nil, :lookup => VIM::HostSystem
241
+ end
242
+
243
+ def host_info host
244
+ conn = host._connection
245
+ _run_with_rev(conn, "dev") do
246
+ _host_info host
247
+ end
248
+ end
249
+
250
+ opts :cluster_info do
251
+ summary "Print VSAN info about a cluster"
252
+ arg :cluster, nil, :lookup => VIM::ClusterComputeResource
253
+ end
254
+
255
+ def cluster_info cluster
256
+ conn = cluster._connection
257
+ pc = conn.propertyCollector
258
+
259
+ hosts = cluster.host
260
+
261
+ hosts_props = pc.collectMultiple(hosts, 'name', 'runtime.connectionState')
262
+ connected_hosts = hosts_props.select do |k,v|
263
+ v['runtime.connectionState'] == 'connected'
264
+ end.keys
265
+ hosts = connected_hosts
266
+
267
+ _run_with_rev(conn, "dev") do
268
+ hosts.each do |host|
269
+ begin
270
+ puts "Host: #{hosts_props[host]['name']}"
271
+ _host_info host, " "
272
+ rescue Exception => ex
273
+ puts "#{Time.now}: Got exception: #{ex.class}: #{ex.message}"
274
+ end
275
+ puts ""
276
+ end
277
+ end
278
+ end
279
+
280
+ opts :disks_info do
281
+ summary "Print physical disk info about a host"
282
+ arg :host, nil, :lookup => VIM::HostSystem, :multi => true
283
+ end
284
+
285
+ def disks_info hosts
286
+ conn = hosts.first._connection
287
+ pc = conn.propertyCollector
288
+ _run_with_rev(conn, "dev") do
289
+ hosts.each do |host|
290
+ if hosts.length > 0
291
+ puts "Disks on host #{host.name}:"
292
+ end
293
+
294
+ dsList = host.datastore
295
+ dsListProps = pc.collectMultiple(dsList, 'summary', 'name', 'info')
296
+ vmfsDsList = dsListProps.select do |ds, props|
297
+ props['summary'].type == "VMFS"
298
+ end.keys
299
+
300
+ vsan = host.configManager.vsanSystem
301
+ disks = vsan.QueryDisksForVsan()
302
+ partitions = host.esxcli.storage.core.device.partition.list
303
+
304
+ t = Terminal::Table.new()
305
+ t << ['DisplayName', 'isSSD', 'Size', 'State']
306
+ needSep = true
307
+ disks.each do |disk|
308
+ capacity = disk.disk.capacity
309
+ size = capacity.block * capacity.blockSize
310
+ sizeStr = "#{size / 1024**3} GB"
311
+ state = disk.state
312
+ # if needSep
313
+ t.add_separator
314
+ needSep = false
315
+ # end
316
+ if state != 'eligible' && disk.error
317
+ state += " (#{disk.error.localizedMessage})"
318
+ if disk.error.fault.is_a?(VIM::DiskHasPartitions)
319
+ state += "\n"
320
+ state += "\n"
321
+ state += "Partition table:\n"
322
+
323
+ partitions.select do |x|
324
+ x.Device == disk.disk.canonicalName && x.Type != 0
325
+ end.each do |x|
326
+ partSize = x.Size.to_f / 1024**3
327
+ types = {
328
+ 0xfb => 'vmfs',
329
+ 0xfc => 'coredump',
330
+ 0xfa => 'vsan',
331
+ 0x0 => 'unused',
332
+ 0x6 => 'vfat',
333
+ }
334
+ type = types[x.Type] || x.Type
335
+ state += "#{x.Partition}: %.2f GB, type = #{type}" % partSize
336
+
337
+ if type == "vmfs"
338
+ vmfsStr = vmfsDsList.select do |vmfsDs|
339
+ props = dsListProps[vmfsDs]
340
+ props['info'].vmfs.extent.any? do |ext|
341
+ ext.diskName == x.Device && x.Partition == ext.partition
342
+ end
343
+ end.map do |vmfsDs|
344
+ "'#{dsListProps[vmfsDs]['name']}'"
345
+ end.join(", ")
346
+ if vmfsStr
347
+ state += " (#{vmfsStr})"
348
+ end
349
+ end
350
+
351
+ state += "\n"
352
+ end
353
+ needSep = true
354
+ end
355
+ end
356
+ t << [
357
+ [
358
+ disk.disk.displayName,
359
+ [
360
+ disk.disk.vendor,
361
+ disk.disk.model
362
+ ].compact.map{|x| x.strip}.join(" ")
363
+ ].join("\n"),
364
+ disk.disk.ssd ? "SSD" : "MD",
365
+ sizeStr,
366
+ state
367
+ ]
368
+ end
369
+ puts t
370
+ if hosts.length > 0
371
+ puts ""
372
+ end
373
+ end
374
+ end
375
+ end
376
+
377
+ def _host_info host, prefix = ''
378
+ configManager = host.configManager
379
+ netSys = configManager.networkSystem
380
+ vsan = configManager.vsanSystem
381
+ config = vsan.config
382
+ enabled = config.enabled
383
+ line = lambda{|x| puts "#{prefix}#{x}" }
384
+ line.call "VSAN enabled: %s" % (enabled ? "yes" : "no")
385
+ if !enabled
386
+ return
387
+ end
388
+ status = vsan.QueryHostStatus()
389
+ line.call "Cluster info:"
390
+ line.call " Cluster role: #{status.nodeState.state}"
391
+ line.call " Cluster UUID: #{config.clusterInfo.uuid}"
392
+ line.call " Node UUID: #{config.clusterInfo.nodeUuid}"
393
+ line.call " Member UUIDs: #{status.memberUuid} (#{status.memberUuid.length})"
394
+ line.call "Storage info:"
395
+ line.call " Auto claim: %s" % (config.storageInfo.autoClaimStorage ? "yes" : "no")
396
+ line.call " Disk Mappings:"
397
+ if config.storageInfo.diskMapping.length == 0
398
+ line.call " None"
399
+ end
400
+ config.storageInfo.diskMapping.each do |mapping|
401
+ capacity = mapping.ssd.capacity
402
+ size = capacity.block * capacity.blockSize
403
+ line.call " SSD: #{mapping.ssd.displayName} - #{size / 1024**3} GB"
404
+ mapping.nonSsd.map do |md|
405
+ capacity = md.capacity
406
+ size = capacity.block * capacity.blockSize
407
+ line.call " MD: #{md.displayName} - #{size / 1024**3} GB"
408
+ end
409
+ end
410
+ line.call "NetworkInfo:"
411
+ if config.networkInfo.port.length == 0
412
+ line.call " Not configured"
413
+ end
414
+ vmknics, = netSys.collect 'networkConfig.vnic'
415
+ config.networkInfo.port.each do |port|
416
+ dev = port.device
417
+ vmknic = vmknics.find{|x| x.device == dev}
418
+ ip = "IP unknown"
419
+ if vmknic
420
+ ip = vmknic.spec.ip.ipAddress
421
+ end
422
+ line.call " Adapter: #{dev} (#{ip})"
423
+ end
424
+ end
425
+
426
+ def _run_with_rev conn, rev
427
+ old_rev = conn.rev
428
+ begin
429
+ conn.rev = rev
430
+ yield
431
+ ensure
432
+ conn.rev = old_rev
433
+ end
434
+ end
435
+
436
+
437
+ opts :cluster_set_default_policy do
438
+ summary "Set default policy on a cluster"
439
+ arg :cluster, nil, :lookup => VIM::ClusterComputeResource
440
+ arg :policy, nil, :type => :string
441
+ end
442
+
443
+ def cluster_set_default_policy cluster, policy
444
+ hosts = cluster.host
445
+ conn = cluster._connection
446
+ pc = conn.propertyCollector
447
+ _run_with_rev(conn, "dev") do
448
+ vsan, = hosts.first.collect 'configManager.vsanSystem'
449
+ cluster_uuid, = vsan.collect 'config.clusterInfo.uuid'
450
+
451
+ hosts.each do |host|
452
+ policy_node = host.esxcli.vsan.policy
453
+ ['cluster', 'vdisk', 'vmnamespace', 'vmswap'].each do |policy_class|
454
+ policy_node.setdefault(
455
+ :clusteruuid => cluster_uuid,
456
+ :policy => policy,
457
+ :policyclass => policy_class,
458
+ )
459
+ end
460
+ end
461
+ end
462
+ end
463
+
464
+ def _components_in_dom_config dom_config
465
+ out = []
466
+ if ['Component', 'Witness'].member?(dom_config['type'])
467
+ out << dom_config
468
+ else
469
+ dom_config.select{|k,v| k =~ /child-\d+/}.each do |k, v|
470
+ out += _components_in_dom_config v
471
+ end
472
+ end
473
+ out
474
+ end
475
+
476
+ def _normalize_uuid uuid
477
+ uuid = uuid.gsub("-", "")
478
+ uuid = "%s-%s-%s-%s-%s" % [
479
+ uuid[0..7], uuid[8..11], uuid[12..15],
480
+ uuid[16..19], uuid[20..31]
481
+ ]
482
+ uuid
483
+ end
484
+
485
+ def _print_dom_config_tree_int dom_config, dom_components_str, indent = 0
486
+ pre = " " * indent
487
+ type = dom_config['type']
488
+ children = dom_config.select{|k,v| k =~ /child-\d+/}.values
489
+ if ['RAID_0', 'RAID_1', 'Concatenation'].member?(type)
490
+ puts "#{pre}#{type}"
491
+ children.each do |child|
492
+ _print_dom_config_tree_int child, dom_components_str, indent + 1
493
+ end
494
+ elsif ['Configuration'].member?(type)
495
+ # puts "#{pre}#{type}"
496
+ children.each do |child|
497
+ _print_dom_config_tree_int child, dom_components_str, indent + 1
498
+ end
499
+ elsif ['Witness', 'Component'].member?(type)
500
+ comp_uuid = dom_config['componentUuid']
501
+ info = dom_components_str[comp_uuid]
502
+ line = "#{pre}#{type}: #{info[0]}"
503
+ if info[2].length > 0
504
+ puts "#{line} (#{info[1]},"
505
+ puts "#{' ' * line.length} #{info[2]})"
506
+ else
507
+ puts "#{line} (#{info[1]})"
508
+ end
509
+ end
510
+ end
511
+
512
+ def _print_dom_config_tree dom_obj_uuid, obj_infos, indent = 0, opts = {}
513
+ pre = " " * indent
514
+ dom_obj_infos = obj_infos['dom_objects'][dom_obj_uuid]
515
+ if !dom_obj_infos
516
+ puts "#{pre}Couldn't find info about DOM object '#{dom_obj_uuid}'"
517
+ return
518
+ end
519
+ dom_obj = dom_obj_infos['config']
520
+ policy = dom_obj_infos['policy']
521
+
522
+ dom_components = _components_in_dom_config(dom_obj['content'])
523
+ csn = nil
524
+ begin
525
+ csn = dom_obj['content']['attributes']['CSN']
526
+ rescue
527
+ end
528
+
529
+ dom_components_str = Hash[dom_components.map do |dom_comp|
530
+ attr = dom_comp['attributes']
531
+ state = attr['componentState']
532
+ comp_uuid = dom_comp['componentUuid']
533
+ state_names = {
534
+ '0' => 'FIRST',
535
+ '1' => 'NONE',
536
+ '2' => 'NEED_CONFIG',
537
+ '3' => 'INITIALIZE',
538
+ '4' => 'INITIALIZED',
539
+ '5' => 'ACTIVE',
540
+ '6' => 'ABSENT',
541
+ '7' => 'STALE',
542
+ '8' => 'RESYNCHING',
543
+ '9' => 'DEGRADED',
544
+ '10' => 'RECONFIGURING',
545
+ '11' => 'CLEANUP',
546
+ '12' => 'TRANSIENT',
547
+ '13' => 'LAST',
548
+ }
549
+ state_name = state.to_s
550
+ if state_names[state.to_s]
551
+ state_name = "#{state_names[state.to_s]} (#{state})"
552
+ end
553
+ props = {
554
+ 'state' => state_name,
555
+ }
556
+
557
+ if state.to_s.to_i == 6 && attr['staleCsn']
558
+ if attr['staleCsn'] != csn
559
+ props['csn'] = "STALE (#{attr['staleCsn']}!=#{csn})"
560
+ end
561
+ end
562
+
563
+ comp_policy = {}
564
+ ['readOPS', 'writeOPS'].select{|x| attr[x]}.each do |x|
565
+ comp_policy[x] = attr[x]
566
+ end
567
+ if attr['readCacheReservation'] && attr['readCacheHitRate']
568
+ comp_policy['rc size/hitrate'] = "%.2fGB/%d%%" % [
569
+ attr['readCacheReservation'].to_f / 1024**3,
570
+ attr['readCacheHitRate'],
571
+ ]
572
+ end
573
+ if attr['bytesToSync']
574
+ comp_policy['dataToSync'] = "%.2f GB" % [
575
+ attr['bytesToSync'].to_f / 1024**3
576
+ ]
577
+ end
578
+
579
+ lsom_object = obj_infos['lsom_objects'][comp_uuid]
580
+ if lsom_object
581
+ host = obj_infos['host_vsan_uuids'][lsom_object['owner']]
582
+ if host
583
+ hostName = obj_infos['host_props'][host]['name']
584
+ else
585
+ hostName = "unknown"
586
+ end
587
+ md_uuid = dom_comp['diskUuid']
588
+ md = obj_infos['vsan_disk_uuids'][md_uuid]
589
+ ssd_uuid = obj_infos['disk_objects'][md_uuid]['content']['ssdUuid']
590
+ #pp ssd_uuid
591
+ ssd = obj_infos['vsan_disk_uuids'][ssd_uuid]
592
+ #pp ssd
593
+ props.merge!({
594
+ 'host' => hostName,
595
+ 'md' => md ? md.DisplayName : "unknown",
596
+ 'ssd' => ssd ? ssd.DisplayName : "unknown",
597
+ })
598
+ if opts[:highlight_disk] && md_uuid == opts[:highlight_disk]
599
+ props['md'] = "**#{props['md']}**"
600
+ elsif opts[:highlight_disk] && ssd_uuid == opts[:highlight_disk]
601
+ props['ssd'] = "**#{props['ssd']}**"
602
+ end
603
+ else
604
+ props.merge!({
605
+ 'host' => "LSOM object not found"
606
+ })
607
+ end
608
+ propsStr = props.map{|k,v| "#{k}: #{v}"}.join(", ")
609
+ comp_policy_str = comp_policy.map{|k,v| "#{k}: #{v}"}.join(", ")
610
+ [comp_uuid, [comp_uuid, propsStr, comp_policy_str]]
611
+ end]
612
+
613
+ if policy
614
+ policy = policy.map{|k,v| "#{k} = #{v}"}.join(", ")
615
+ else
616
+ policy = "No POLICY entry found in CMMDS"
617
+ end
618
+ owner = obj_infos['host_vsan_uuids'][dom_obj['owner']]
619
+ if owner
620
+ owner = obj_infos['host_props'][owner]['name']
621
+ else
622
+ owner = "unknown"
623
+ end
624
+
625
+ puts "#{pre}DOM Object: #{dom_obj['uuid']} (owner: #{owner}, policy: #{policy})"
626
+ if opts[:context]
627
+ puts "#{pre} Context: #{opts[:context]}"
628
+ end
629
+ _print_dom_config_tree_int dom_obj['content'], dom_components_str, indent
630
+ end
631
+
632
+ # hosts is a hash: host => hostname
633
+ def _vsan_host_disks_info hosts
634
+ hosts.each do |k,v|
635
+ if !v
636
+ hosts[k] = k.name
637
+ end
638
+ end
639
+
640
+ conn = hosts.keys.first._connection
641
+ vsanDiskUuids = {}
642
+ $disksCache ||= {}
643
+ if !hosts.keys.all?{|x| $disksCache[x]}
644
+ lock = Mutex.new
645
+ hosts.map do |host, hostname|
646
+ Thread.new do
647
+ if !$disksCache[host]
648
+ c1 = conn.spawn_additional_connection
649
+ host2 = host.dup_on_conn(c1)
650
+ $disksCache[host] = []
651
+ lock.synchronize do
652
+ puts "#{Time.now}: Fetching VSAN disk info from #{hostname} (may take a moment) ..."
653
+ end
654
+ begin
655
+ timeout(45) do
656
+ list = host2.esxcli.vsan.storage.list
657
+ list.each{|x| x._set_property :host, host}
658
+ $disksCache[host] = list
659
+ end
660
+ rescue Exception => ex
661
+ lock.synchronize do
662
+ puts "#{Time.now}: Failed to gather from #{hostname}: #{ex.class}: #{ex.message}"
663
+ end
664
+ end
665
+ end
666
+ end
667
+ end.each{|t| t.join}
668
+ puts "#{Time.now}: Done fetching VSAN disk infos"
669
+ end
670
+
671
+ hosts.map do |host, hostname|
672
+ disks = $disksCache[host]
673
+ disks.each do |disk|
674
+ vsanDiskUuids[disk.VSANUUID] = disk
675
+ end
676
+ end
677
+
678
+ vsanDiskUuids
679
+ end
680
+
681
+ def _vsan_cluster_disks_info cluster, opts = {}
682
+ pc = cluster._connection.propertyCollector
683
+ if cluster.is_a?(VIM::HostSystem)
684
+ hosts = [cluster]
685
+ else
686
+ hosts = cluster.host
687
+ end
688
+ if opts[:hosts_props]
689
+ hosts_props = opts[:hosts_props]
690
+ else
691
+ hosts_props = pc.collectMultiple(hosts,
692
+ 'name',
693
+ 'runtime.connectionState',
694
+ 'configManager.vsanSystem',
695
+ 'configManager.vsanInternalSystem',
696
+ )
697
+ end
698
+ hosts_props = hosts_props.select do |k,v|
699
+ v['runtime.connectionState'] == 'connected'
700
+ end
701
+ vsan_systems = hosts_props.map{|h,p| p['configManager.vsanSystem']}
702
+ vsan_props = pc.collectMultiple(vsan_systems, 'config.clusterInfo')
703
+ host_vsan_uuids = Hash[hosts_props.map do |host, props|
704
+ vsan_system = props['configManager.vsanSystem']
705
+ vsan_info = vsan_props[vsan_system]['config.clusterInfo']
706
+ [vsan_info.nodeUuid, host]
707
+ end]
708
+ vsan_disk_uuids = {}
709
+ vsan_disk_uuids.merge!(
710
+ _vsan_host_disks_info(Hash[hosts_props.map{|h, p| [h, p['name']]}])
711
+ )
712
+
713
+ [host_vsan_uuids, hosts_props, vsan_disk_uuids]
714
+ end
715
+
716
+ opts :object_info do
717
+ summary "Fetch information about a VSAN object"
718
+ arg :cluster, "Cluster on which to fetch the object info", :lookup => [VIM::HostSystem, VIM::ClusterComputeResource]
719
+ arg :obj_uuid, nil, :type => :string, :multi => true
720
+ end
721
+
722
+ def object_info cluster, obj_uuids, opts = {}
723
+ opts[:cluster] = cluster
724
+ objs = _object_info obj_uuids, opts
725
+ indent = 0
726
+ obj_uuids.each do |obj_uuid|
727
+ _print_dom_config_tree(obj_uuid, objs, indent)
728
+ puts ""
729
+ end
730
+ end
731
+
732
+ opts :disk_object_info do
733
+ summary "Fetch information about all VSAN objects on a given physical disk"
734
+ arg :cluster_or_host, "Cluster or host on which to fetch the object info", :lookup => VIM::ClusterComputeResource
735
+ arg :disk_uuid, nil, :type => :string, :multi => true
736
+ end
737
+
738
+ def disk_object_info cluster_or_host, disk_uuids, opts = {}
739
+ conn = cluster_or_host._connection
740
+ pc = conn.propertyCollector
741
+
742
+ if cluster_or_host.is_a?(VIM::ClusterComputeResource)
743
+ cluster = cluster_or_host
744
+ hosts = cluster.host
745
+ else
746
+ hosts = [cluster_or_host]
747
+ end
748
+
749
+ _run_with_rev(conn, "dev") do
750
+ # XXX: This doesn't yet work when no cluster is given
751
+ host_vsan_uuids, hosts_props, vsan_disk_uuids = _vsan_cluster_disks_info(cluster)
752
+
753
+ input_disk_uuids = []
754
+ m_disk_uuids = []
755
+ disk_uuids.each do |disk_uuid|
756
+ disk = vsan_disk_uuids.find {|k,v| v.DisplayName == disk_uuid}
757
+ if disk
758
+ input_disk_uuids << disk
759
+ if disk[1].IsSSD
760
+ disks = vsan_disk_uuids.find_all do |k,v|
761
+ v.VSANDiskGroupName == disk_uuid unless v.IsSSD
762
+ end
763
+ m_disk_uuids += disks
764
+ else
765
+ m_disk_uuids << disk
766
+ end
767
+ else
768
+ input_disk_uuids << [disk_uuid]
769
+ m_disk_uuids << [disk_uuid]
770
+ end
771
+ end
772
+ input_disk_uuids.map! {|x| x[0]}
773
+ m_disk_uuids.map! {|x| x[0]}
774
+
775
+ connected_hosts = hosts_props.select do |k,v|
776
+ v['runtime.connectionState'] == 'connected'
777
+ end.keys
778
+ hosts = connected_hosts
779
+
780
+ if hosts.length == 0
781
+ err "Couldn't find any connected hosts"
782
+ end
783
+
784
+ dslist = hosts.first.datastore
785
+ dslist_props = pc.collectMultiple(dslist, 'name', 'summary.type')
786
+ vsandslist = dslist_props.select{|k, v| v['summary.type'] == 'vsan'}.keys
787
+ vsands = vsandslist.first
788
+ if !vsands
789
+ err "Couldn't find VSAN datastore"
790
+ end
791
+ vms = vsands.vm
792
+ vms_props = pc.collectMultiple(vms,
793
+ 'name', 'config.hardware.device',
794
+ 'summary.config'
795
+ )
796
+ objects = {}
797
+ vms.each do |vm|
798
+ disks = vms_props[vm]['disks'] =
799
+ vms_props[vm]['config.hardware.device'].select{|x| x.is_a?(VIM::VirtualDisk)}
800
+ namespaceUuid = vms_props[vm]['namespaceUuid'] =
801
+ vms_props[vm]['summary.config'].vmPathName.split("] ")[1].split("/")[0]
802
+
803
+ objects[namespaceUuid] = [vm, :namespace]
804
+ disks.each do |disk|
805
+ backing = disk.backing
806
+ while backing
807
+ objects[backing.backingObjectId] = [vm, backing.fileName]
808
+ backing = backing.parent
809
+ end
810
+ end
811
+ end
812
+
813
+ vsanIntSys = hosts_props[hosts.first]['configManager.vsanInternalSystem']
814
+ json = vsanIntSys.QueryObjectsOnPhysicalVsanDisk(:disks => m_disk_uuids)
815
+ if json == "BAD"
816
+ err "Server rejected VSAN object-on-disk query"
817
+ end
818
+ result = nil
819
+ begin
820
+ result = JSON.load(json)
821
+ rescue
822
+ err "Server failed to query VSAN objects-on-disk: #{json}"
823
+ end
824
+
825
+ result.merge!({
826
+ 'host_vsan_uuids' => host_vsan_uuids,
827
+ 'host_props' => hosts_props,
828
+ 'vsan_disk_uuids' => vsan_disk_uuids,
829
+ })
830
+
831
+ input_disk_uuids.each do |disk_uuid|
832
+ dom_obj_uuids = []
833
+ disk_info = vsan_disk_uuids[disk_uuid]
834
+ if disk_info
835
+ name = "#{disk_info.DisplayName} (#{disk_uuid})"
836
+ if disk_info.IsSSD
837
+ m_disks = vsan_disk_uuids.find_all do
838
+ |k, v| v.VSANDiskGroupUUID == disk_uuid unless v.IsSSD
839
+ end
840
+ m_disks ? m_disks.map!{|x| x[0]} : disk_uuid
841
+ m_disks.each {|m_disk| dom_obj_uuids += result['objects_on_disks'][m_disk]}
842
+ else
843
+ dom_obj_uuids = result['objects_on_disks'][disk_uuid]
844
+ end
845
+ else
846
+ name = disk_uuid
847
+ end
848
+ puts "Physical disk #{name}:"
849
+ indent = 1
850
+ dom_obj_uuids.each do |obj_uuid|
851
+ object = objects[obj_uuid]
852
+ if object && object[1] == :namespace
853
+ vm_name = vms_props[object[0]]['name']
854
+ context = "Part of VM #{vm_name}: Namespace directory"
855
+ elsif object
856
+ vm_name = vms_props[object[0]]['name']
857
+ context = "Part of VM #{vm_name}: Disk: #{object[1]}"
858
+ else
859
+ context = "Can't attribute object to any VM, may be swap?"
860
+ end
861
+ _print_dom_config_tree(
862
+ obj_uuid, result, indent,
863
+ :highlight_disk => disk_uuid,
864
+ :context => context
865
+ )
866
+ end
867
+ puts ""
868
+ end
869
+ end
870
+ end
871
+
872
+
873
+ opts :cmmds_find do
874
+ summary "CMMDS Find"
875
+ arg :cluster_or_host, nil, :lookup => [VIM::ClusterComputeResource, VIM::HostSystem]
876
+ opt :type, "CMMDS type, e.g. DOM_OBJECT, LSOM_OBJECT, POLICY, DISK etc.", :type => :string, :short => 't'
877
+ opt :uuid, "UUID of the entry.", :type => :string, :short => 'u'
878
+ opt :owner, "UUID of the owning node.", :type => :string, :short => 'o'
879
+ end
880
+
881
+ def cmmds_find cluster_or_host, opts
882
+ conn = cluster_or_host._connection
883
+ pc = conn.propertyCollector
884
+ host = cluster_or_host
885
+ entries = []
886
+ hostUuidMap = {}
887
+ _run_with_rev(conn, "dev") do
888
+ vsanIntSys = nil
889
+ if cluster_or_host.is_a?(VIM::ClusterComputeResource)
890
+ cluster = cluster_or_host
891
+ hosts = cluster.host
892
+ else
893
+ hosts = [host]
894
+ end
895
+
896
+ hosts_props = pc.collectMultiple(hosts,
897
+ 'name',
898
+ 'runtime.connectionState',
899
+ 'configManager.vsanSystem',
900
+ 'configManager.vsanInternalSystem'
901
+ )
902
+ connected_hosts = hosts_props.select do |k,v|
903
+ v['runtime.connectionState'] == 'connected'
904
+ end.keys
905
+ host = connected_hosts.first
906
+ if !host
907
+ err "Couldn't find any connected hosts"
908
+ end
909
+ vsanIntSys = hosts_props[host]['configManager.vsanInternalSystem']
910
+ vsanSysList = Hash[hosts_props.map do |host, props|
911
+ [props['name'], props['configManager.vsanSystem']]
912
+ end]
913
+ clusterInfos = pc.collectMultiple(vsanSysList.values,
914
+ 'config.clusterInfo')
915
+ hostUuidMap = Hash[vsanSysList.map do |hostname, sys|
916
+ [clusterInfos[sys]['config.clusterInfo'].nodeUuid, hostname]
917
+ end]
918
+ entries = vsanIntSys.query_cmmds([{
919
+ :owner => opts[:owner],
920
+ :uuid => opts[:uuid],
921
+ :type => opts[:type],
922
+ }], :gzip => true)
923
+ end
924
+
925
+ t = Terminal::Table.new()
926
+ t << ['#', 'Type', 'UUID', 'Owner', 'Health', 'Content']
927
+ t.add_separator
928
+ entries.each_with_index do |entry, i|
929
+ t << [
930
+ i + 1,
931
+ entry['type'],
932
+ entry['uuid'],
933
+ hostUuidMap[entry['owner']] || entry['owner'],
934
+ entry['health'],
935
+ PP.pp(entry['content'], ''),
936
+ ]
937
+ end
938
+
939
+ puts t
940
+ end
941
+
942
+ def _get_vm_obj_uuids vm, vmsProps
943
+ obj_uuids = {}
944
+ disks = vmsProps[vm]['disks'] =
945
+ vmsProps[vm]['config.hardware.device'].select{|x| x.is_a?(VIM::VirtualDisk)}
946
+ pathName = vmsProps[vm]['summary.config'].vmPathName
947
+ namespaceUuid = vmsProps[vm]['namespaceUuid'] =
948
+ pathName.split("] ")[1].split("/")[0]
949
+ obj_uuids[namespaceUuid] = pathName
950
+ disks.each do |disk|
951
+ backing = disk.backing
952
+ while backing
953
+ obj_uuids[backing.backingObjectId] = backing.fileName
954
+ backing = backing.parent
955
+ end
956
+ end
957
+ obj_uuids
958
+ end
959
+
960
+ def convert_uuids uuids
961
+ nUuids = {}
962
+ uuids.each do |uuid|
963
+ begin
964
+ oUuid = uuid.split(' ').join()
965
+ nUuids[oUuid[0..7] + '-' + oUuid[8..11] + '-' +
966
+ oUuid[12..20] + '-' + oUuid[21..-1]] = true
967
+ rescue Exception => ex
968
+ puts "Ignoring malformed uuid #{uuid}: #{ex.class}: #{ex.message}"
969
+ end
970
+ end
971
+
972
+ return nUuids
973
+ end
974
+
975
+ # It is possible for the management stack (hostd and vc) to lose the handle of
976
+ # a VM which is powered on (has a running vmx instance). No further operations
977
+ # can be performed on the VM because the running vmx holds locks on the VM.
978
+ # This API is intended to find such VMs. We look for VMs who's power state
979
+ # is not poweredOn (poweredOff, unknown, etc) for which there is a running vmx
980
+ # instance on any host in the cluster.
981
+
982
+ def find_inconsistent_vms cluster_or_host
983
+ if cluster_or_host.is_a?(VIM::ClusterComputeResource)
984
+ hosts = cluster_or_host.host
985
+ else
986
+ hosts = [host]
987
+ end
988
+
989
+ # Find all non-poweredon vms.
990
+ conn = hosts.first._connection
991
+ pc = conn.propertyCollector
992
+ vms = pc.collectMultiple(hosts, 'vm').values.map{|x| x['vm']}.flatten
993
+ vmProps = pc.collectMultiple(vms, 'name', 'runtime.powerState',
994
+ 'summary.config.uuid')
995
+ notOnVMs = vmProps.select{|vm, p| p['runtime.powerState'] !=
996
+ 'poweredOn'}.keys
997
+
998
+ # Get list of all running vms on all hosts in parallel.
999
+ threads = []
1000
+ processList = {}
1001
+ hosts.each do |host|
1002
+ threads << Thread.new do
1003
+ begin
1004
+ processList[host] = host.esxcli.vm.process.list
1005
+ rescue Exception => ex
1006
+ puts "Error getting vm process list on #{host.name}: " \
1007
+ "#{ex.class}: #{ex.message}"
1008
+ end
1009
+ end
1010
+ end
1011
+ threads.each{|t| t.join}
1012
+ uuids = convert_uuids(processList.values.flatten.map{|x| x.UUID})
1013
+
1014
+ inconsistentVMs = notOnVMs.select{|vm|
1015
+ uuids.has_key?(vmProps[vm]['summary.config.uuid'])}
1016
+ if not inconsistentVMs.empty?
1017
+ puts "Found VMs for which VC/hostd/vmx are out of sync:"
1018
+ inconsistentVMs.each do |vm|
1019
+ puts "#{vmProps[vm]['name']}"
1020
+ end
1021
+ else
1022
+ puts "Did not find VMs for which VC/hostd/vmx are out of sync"
1023
+ end
1024
+
1025
+ return inconsistentVMs
1026
+ end
1027
+
1028
+ def fix_inconsistent_vms vms
1029
+ begin
1030
+ tasks = []
1031
+ vms.each do |vm|
1032
+ begin
1033
+ path = vm.summary.config.vmPathName
1034
+ rp = vm.resourcePool
1035
+ folder = vm.parent
1036
+ name = vm.name
1037
+ host = vm.summary.runtime.host
1038
+ puts("Unregistering VM #{name}")
1039
+ vm.UnregisterVM()
1040
+ puts("Registering VM #{name}")
1041
+ tasks << folder.RegisterVM_Task(:path => path,
1042
+ :name => name,
1043
+ :asTemplate => false,
1044
+ :pool => rp,
1045
+ :host => host)
1046
+ rescue Exception => ex
1047
+ puts "Skipping VM #{name} due to exception: " \
1048
+ "#{ex.class}: #{ex.message}"
1049
+ end
1050
+ end
1051
+ progress(tasks)
1052
+ end
1053
+ end
1054
+
1055
+ opts :fix_renamed_vms do
1056
+ summary "This command can be used to rename some VMs which get renamed " \
1057
+ "by the VC in case of storage inaccessibility. It is " \
1058
+ "possible for some VMs to get renamed to vmx file path. " \
1059
+ "eg. \"/vmfs/volumes/vsanDatastore/foo/foo.vmx\". This command " \
1060
+ "will rename this VM to \"foo\". This is the best we can do. " \
1061
+ "This VM may have been named something else but we have no way " \
1062
+ "to know. In this best effort command, we simply rename it to " \
1063
+ "the name of its config file (without the full path and .vmx " \
1064
+ "extension ofcourse!)."
1065
+ arg :vms, nil, :lookup => VIM::VirtualMachine, :multi => true
1066
+ end
1067
+
1068
+ def fix_renamed_vms vms
1069
+ begin
1070
+ conn = vms.first._connection
1071
+ pc = conn.propertyCollector
1072
+ vmProps = pc.collectMultiple(vms, 'name', 'summary.config.vmPathName')
1073
+
1074
+ rename = {}
1075
+ puts "Continuing this command will rename the following VMs:"
1076
+ begin
1077
+ vmProps.each do |k,v|
1078
+ name = v['name']
1079
+ cfgPath = v['summary.config.vmPathName']
1080
+ if /.*vmfs.*volumes.*/.match(name)
1081
+ m = /.+\/(.+)\.vmx/.match(cfgPath)
1082
+ if name != m[1]
1083
+ # Save it in a hash so we don't have to do it again if
1084
+ # user choses Y.
1085
+ rename[k] = m[1]
1086
+ puts "#{name} -> #{m[1]}"
1087
+ end
1088
+ end
1089
+ end
1090
+ rescue Exception => ex
1091
+ # Swallow the exception. No need to stop other vms.
1092
+ puts "Skipping VM due to exception: #{ex.class}: #{ex.message}"
1093
+ end
1094
+
1095
+ if rename.length == 0
1096
+ puts "Nothing to do"
1097
+ return
1098
+ end
1099
+
1100
+ puts "Do you want to continue [y/N]?"
1101
+ opt = $stdin.gets.chomp
1102
+ if opt == 'y' || opt == 'Y'
1103
+ puts "Renaming..."
1104
+ tasks = rename.keys.map do |vm|
1105
+ vm.Rename_Task(:newName => rename[vm])
1106
+ end
1107
+ progress(tasks)
1108
+ end
1109
+ end
1110
+ end
1111
+
1112
+ opts :vm_object_info do
1113
+ summary "Fetch VSAN object information about a VM"
1114
+ arg :vms, nil, :lookup => VIM::VirtualMachine, :multi => true
1115
+ opt :cluster, "Cluster on which to fetch the object info", :lookup => VIM::ClusterComputeResource
1116
+ opt :perspective_from_host, "Host to query object info from", :lookup => VIM::HostSystem
1117
+ end
1118
+
1119
+ def vm_object_info vms, opts
1120
+ begin
1121
+ conn = vms.first._connection
1122
+ pc = conn.propertyCollector
1123
+ firstVm = vms.first
1124
+ host = firstVm.runtime.host
1125
+ if !host
1126
+ err "VM #{firstVm.name} doesn't have an assigned host (yet?)"
1127
+ end
1128
+ opts[:cluster] ||= host.parent
1129
+ _run_with_rev(conn, "dev") do
1130
+ vmsProps = pc.collectMultiple(vms,
1131
+ 'name', 'config.hardware.device', 'summary.config',
1132
+ 'runtime.host',
1133
+ )
1134
+ obj_uuids = []
1135
+ objToHostMap = {}
1136
+ vms.each do |vm|
1137
+ vm_obj_uuids = _get_vm_obj_uuids(vm, vmsProps).keys
1138
+ vm_obj_uuids.each{|x| objToHostMap[x] = vmsProps[vm]['runtime.host']}
1139
+ obj_uuids += vm_obj_uuids
1140
+ end
1141
+ opts[:objToHostMap] = objToHostMap
1142
+
1143
+ objs = _object_info(obj_uuids, opts)
1144
+ hosts_props = objs['host_props']
1145
+
1146
+ vms.each do |vm|
1147
+ vmProps = vmsProps[vm]
1148
+ disks = vmProps['disks']
1149
+ puts "VM #{vmProps['name']}:"
1150
+ if objs['has_partitions']
1151
+ vmHost = vmProps['runtime.host']
1152
+ puts " VM registered on host: #{hosts_props[vmHost]['name']}"
1153
+ end
1154
+
1155
+ indent = 1
1156
+ pre = " " * indent
1157
+ puts "#{pre}Namespace directory"
1158
+ obj_uuid = vmsProps[vm]['namespaceUuid']
1159
+ if objs['has_partitions'] && objs['obj_uuid_from_host'][obj_uuid]
1160
+ objHost = objs['obj_uuid_from_host'][obj_uuid]
1161
+ puts "#{pre} Shown from perspective of host #{hosts_props[objHost]['name']}"
1162
+ end
1163
+ _print_dom_config_tree(obj_uuid, objs, indent + 1)
1164
+
1165
+ disks.each do |disk|
1166
+ indent = 1
1167
+ backing = disk.backing
1168
+ while backing
1169
+ pre = " " * indent
1170
+ puts "#{pre}Disk backing: #{backing.fileName}"
1171
+ obj_uuid = backing.backingObjectId
1172
+ if objs['has_partitions'] && objs['obj_uuid_from_host'][obj_uuid]
1173
+ objHost = objs['obj_uuid_from_host'][obj_uuid]
1174
+ puts "#{pre} Shown from perspective of host #{hosts_props[objHost]['name']}"
1175
+ end
1176
+ _print_dom_config_tree(obj_uuid, objs, indent + 1)
1177
+
1178
+ backing = backing.parent
1179
+ indent += 1
1180
+ end
1181
+ end
1182
+ end
1183
+ end
1184
+ rescue Exception => ex
1185
+ puts ex.message
1186
+ puts ex.backtrace
1187
+ raise
1188
+ end
1189
+ end
1190
+
1191
+ def _object_info obj_uuids, opts
1192
+ if !opts[:cluster]
1193
+ err "Must specify a VSAN Cluster"
1194
+ end
1195
+ host = opts[:host]
1196
+ if opts[:cluster].is_a?(VIM::HostSystem)
1197
+ host = opts[:cluster]
1198
+ end
1199
+ # XXX: Verify VSAN is enabled on the cluster
1200
+ if host
1201
+ hosts = [host]
1202
+ conn = host._connection
1203
+ else
1204
+ hosts = opts[:cluster].host
1205
+ conn = opts[:cluster]._connection
1206
+ end
1207
+
1208
+ _run_with_rev(conn, "dev") do
1209
+ pc = conn.propertyCollector
1210
+
1211
+ hosts_props = pc.collectMultiple(hosts,
1212
+ 'name', 'runtime.connectionState',
1213
+ 'configManager.vsanSystem',
1214
+ 'configManager.vsanInternalSystem'
1215
+ )
1216
+ connected_hosts = hosts_props.select do |k,v|
1217
+ v['runtime.connectionState'] == 'connected'
1218
+ end.keys
1219
+ hosts = connected_hosts
1220
+ if hosts.length == 0
1221
+ err "Couldn't find any connected hosts"
1222
+ end
1223
+
1224
+ if opts[:perspective_from_host]
1225
+ if !connected_hosts.member?(opts[:perspective_from_host])
1226
+ err "Perspective-Host not connected, or not in considered group of hosts"
1227
+ end
1228
+ end
1229
+
1230
+ # Detect partitions:
1231
+ # We need to ask every host which other hosts it believes to share a
1232
+ # VSAN cluster (partition) with. This is a call down to ESX, so we spawn
1233
+ # one connection and one thread per host to parallelize. We detect
1234
+ # partitions by grouping VMs based on quoting the same cluster members.
1235
+ hosts_props.map do |host, props|
1236
+ if !connected_hosts.member?(host)
1237
+ next
1238
+ end
1239
+ Thread.new do
1240
+ begin
1241
+ vsanSys = props['configManager.vsanSystem']
1242
+ c1 = conn.spawn_additional_connection
1243
+ vsanSys = vsanSys.dup_on_conn(c1)
1244
+ res = vsanSys.QueryHostStatus()
1245
+ hosts_props[host]['vsanCluster'] = res
1246
+ rescue Exception => ex
1247
+ puts "Failed to gather host status from #{props['name']}: #{ex.class}: #{ex.message}"
1248
+ end
1249
+ end
1250
+ end.compact.each{|t| t.join}
1251
+
1252
+ partitions = hosts_props.select do |h, p|
1253
+ connected_hosts.member?(h)
1254
+ end.group_by{|h, p| p['vsanCluster'].memberUuid}
1255
+ partition_exists = (partitions.length > 1)
1256
+ if partition_exists
1257
+ puts "#{Time.now}: WARNING: VSAN Cluster network partition detected."
1258
+ puts "#{Time.now}: The individual partitions of the cluster will have "
1259
+ puts "#{Time.now}: different views on object/component availablity. An "
1260
+ puts "#{Time.now}: attempt is made to show VM object accessibility from the "
1261
+ puts "#{Time.now}: perspective of the host on which a VM is registered. "
1262
+ puts "#{Time.now}: Please fix the network partition as soon as possible "
1263
+ puts "#{Time.now}: as it will seriously impact the availability of your "
1264
+ puts "#{Time.now}: VMs in your VSAN cluster. Check vsan.cluster_info for"
1265
+ puts "#{Time.now}: more details."
1266
+ puts "#{Time.now}: "
1267
+ puts "#{Time.now}: The following partitions were detected:"
1268
+ i = 1
1269
+ partitions.values.map do |part|
1270
+ part_hosts = part.map{|x| hosts_props[x[0]]}.compact.map{|x| x['name']}
1271
+ puts "#{Time.now}: #{i}) #{part_hosts.join(", ")}"
1272
+ i += 1
1273
+ end
1274
+ puts ""
1275
+ if opts[:perspective_from_host]
1276
+ name = hosts_props[opts[:perspective_from_host]]['name']
1277
+ puts "Showing data from perspective of host #{name} as requested"
1278
+ puts ""
1279
+ end
1280
+ end
1281
+
1282
+ host_vsan_uuids, host_props, vsan_disk_uuids = _vsan_cluster_disks_info(
1283
+ opts[:cluster],
1284
+ :hosts_props => hosts_props
1285
+ )
1286
+ extra_info = {
1287
+ 'host_vsan_uuids' => host_vsan_uuids,
1288
+ 'host_props' => host_props,
1289
+ 'vsan_disk_uuids' => vsan_disk_uuids,
1290
+ }
1291
+
1292
+ obj_uuids = obj_uuids.compact.map{|x| _normalize_uuid(x)}
1293
+ obj_uuids = obj_uuids.select{|x| is_uuid(x)}
1294
+
1295
+ objs = {'obj_uuid_from_host' => {}}
1296
+ objs['has_partitions'] = partition_exists
1297
+
1298
+ # Dealing with partitions:
1299
+ # In the non-partitioned case we can just select any host and ask it
1300
+ # for the object info, given that CMMDS is (eventual) consistent
1301
+ # across the cluster. But during a network partition it is most logical
1302
+ # to ask the host on which a VM is registered about what it thinks about
1303
+ # the objects in question. So in case of a network partition we fall
1304
+ # back to a slower code path that asks each host individually about
1305
+ # the objects it (hopefully) knows best about.
1306
+ # Note: Upon power on DRS will pick a host to power the VM on. That other
1307
+ # host may not be in the same partition and DRS doesn't know about it,
1308
+ # so although we tried to show the object from the "right" hosts perspective
1309
+ # it may still not be the right host when debugging a power on failure.
1310
+ if opts[:objToHostMap] && partition_exists && !opts[:perspective_from_host]
1311
+ obj_uuids_groups = obj_uuids.group_by{|x| opts[:objToHostMap][x]}
1312
+ obj_uuids_groups.each do |host, group|
1313
+ vsanIntSys = hosts_props[host]['configManager.vsanInternalSystem']
1314
+ group_objs = vsanIntSys.query_vsan_objects(:uuids => group)
1315
+
1316
+ # Here we are merging and overriding potentially conflicting
1317
+ # information about LSOM_OBJECT and DISK entries. No smarts are
1318
+ # applied, as I am not aware of issues arising from those
1319
+ # possible inconsistencies.
1320
+ group_objs.each do |k,v|
1321
+ objs[k] ||= {}
1322
+ objs[k].merge!(v)
1323
+ end
1324
+ group.each do |uuid|
1325
+ objs['obj_uuid_from_host'][uuid] = host
1326
+ end
1327
+ end
1328
+ else
1329
+ if opts[:perspective_from_host]
1330
+ host = opts[:perspective_from_host]
1331
+ else
1332
+ host = hosts.first
1333
+ end
1334
+ vsanIntSys = hosts_props[host]['configManager.vsanInternalSystem']
1335
+ objs = vsanIntSys.query_vsan_objects(:uuids => obj_uuids)
1336
+ end
1337
+
1338
+ objs.merge!(extra_info)
1339
+ objs
1340
+ end
1341
+ end
1342
+
1343
+
1344
+ def _fetch_disk_stats obj, metrics, instances, opts = {}
1345
+ conn = obj._connection
1346
+ pm = conn.serviceContent.perfManager
1347
+
1348
+ metrics.each do |x|
1349
+ err "no such metric #{x}" unless pm.perfcounter_hash.member? x
1350
+ end
1351
+
1352
+ interval = pm.provider_summary(obj).refreshRate
1353
+ start_time = nil
1354
+ if interval == -1
1355
+ # Object does not support real time stats
1356
+ interval = 300
1357
+ start_time = Time.now - 300 * 5
1358
+ end
1359
+ stat_opts = {
1360
+ :interval => interval,
1361
+ :startTime => start_time,
1362
+ :instance => instances,
1363
+ :multi_instance => true,
1364
+ }
1365
+ stat_opts[:max_samples] = opts[:samples] if opts[:samples]
1366
+ res = pm.retrieve_stats [obj], metrics, stat_opts
1367
+
1368
+ out = {}
1369
+ if res && res[obj]
1370
+ res[obj][:metrics].each do |key, values|
1371
+ metric, device = key
1372
+ out[device] ||= {}
1373
+ out[device][metric] = values
1374
+ end
1375
+ end
1376
+ out
1377
+ end
1378
+
1379
+ opts :disks_stats do
1380
+ summary "Show stats on all disks in VSAN"
1381
+ arg :hosts_and_clusters, nil, :lookup => [VIM::HostSystem, VIM::ClusterComputeResource], :multi => true
1382
+ opt :compute_number_of_components, "Deprecated", :type => :boolean
1383
+ opt :show_iops, "Show deprecated fields", :type => :boolean
1384
+ end
1385
+
1386
+ def disks_stats hosts_and_clusters, opts = {}
1387
+ opts[:compute_number_of_components] = true
1388
+ conn = hosts_and_clusters.first._connection
1389
+ hosts = hosts_and_clusters.select{|x| x.is_a?(VIM::HostSystem)}
1390
+ clusters = hosts_and_clusters.select{|x| x.is_a?(VIM::ClusterComputeResource)}
1391
+ pc = conn.propertyCollector
1392
+ cluster_hosts = pc.collectMultiple(clusters, 'host')
1393
+ cluster_hosts.each do |cluster, props|
1394
+ hosts += props['host']
1395
+ end
1396
+ hosts = hosts.uniq
1397
+ _run_with_rev(conn, "dev") do
1398
+ hosts_props = pc.collectMultiple(hosts,
1399
+ 'name',
1400
+ 'runtime.connectionState',
1401
+ 'configManager.vsanSystem',
1402
+ 'configManager.vsanInternalSystem'
1403
+ )
1404
+
1405
+ hosts = hosts_props.select do |k,v|
1406
+ v['runtime.connectionState'] == 'connected'
1407
+ end.keys
1408
+ if hosts.length == 0
1409
+ err "Couldn't find any connected hosts"
1410
+ end
1411
+
1412
+ hosts_vsansys = Hash[hosts_props.map{|k,v| [v['configManager.vsanSystem'], k]}]
1413
+ node_uuids = pc.collectMultiple(hosts_vsansys.keys, 'config.clusterInfo.nodeUuid')
1414
+ node_uuids = Hash[node_uuids.map do |k, v|
1415
+ [v['config.clusterInfo.nodeUuid'], hosts_vsansys[k]]
1416
+ end]
1417
+
1418
+ lock = Mutex.new
1419
+ disks = {}
1420
+ vsanIntSys = hosts_props[hosts.first]['configManager.vsanInternalSystem']
1421
+ disks = vsanIntSys.QueryPhysicalVsanDisks(:props => [
1422
+ 'lsom_objects_count',
1423
+ 'uuid',
1424
+ 'isSsd',
1425
+ 'capacity',
1426
+ 'capacityUsed',
1427
+ 'capacityReserved',
1428
+ 'iops',
1429
+ 'iopsReserved',
1430
+ 'disk_health',
1431
+ ])
1432
+ if disks == "BAD"
1433
+ err "Server failed to gather VSAN disk info"
1434
+ end
1435
+ begin
1436
+ disks = JSON.load(disks)
1437
+ rescue
1438
+ err "Server didn't provide VSAN disk info: #{disks}"
1439
+ end
1440
+ #pp disks
1441
+
1442
+ vsan_disks_info = {}
1443
+ vsan_disks_info.merge!(
1444
+ _vsan_host_disks_info(Hash[hosts.map{|h| [h, hosts_props[h]['name']]}])
1445
+ )
1446
+ disks.each do |k, v|
1447
+ v['esxcli'] = vsan_disks_info[v['uuid']]
1448
+ if v['esxcli']
1449
+ v['host'] = v['esxcli']._get_property :host
1450
+ end
1451
+ end
1452
+
1453
+ #pp vsan_disks_info
1454
+ #pp disks.values.map{|x| [x['host'], x['esxcli']]}
1455
+ #pp disks.values.group_by{|x| x['host']}.keys
1456
+
1457
+ disks = disks.values.sort_by do |x|
1458
+ host_props = hosts_props[x['host']]
1459
+ host_props ? host_props['name'] : ''
1460
+ end
1461
+
1462
+ # Stats are now better handled by observer
1463
+ # disks.group_by{|x| x['host']}.each do |host, host_disks|
1464
+ # next if !host
1465
+ # devices = host_disks.map{|x| x['esxcli'].Device}
1466
+ # metrics = [
1467
+ # 'disk.numberReadAveraged', 'disk.numberWriteAveraged',
1468
+ # 'disk.deviceLatency', 'disk.maxTotalLatency',
1469
+ # 'disk.queueLatency', 'disk.kernelLatency'
1470
+ # ]
1471
+ # stats = _fetch_disk_stats host, metrics, devices
1472
+ # disks.each do |v|
1473
+ # if v['esxcli'] && stats[v['esxcli'].Device]
1474
+ # v['stats'] = stats[v['esxcli'].Device]
1475
+ # else
1476
+ # v['stats'] ||= {}
1477
+ # metrics.each{|m| v['stats'][m] ||= [-1] }
1478
+ # end
1479
+ # end
1480
+ # end
1481
+
1482
+ t = Terminal::Table.new()
1483
+ if opts[:show_iops]
1484
+ t << [nil, nil, nil, 'Num', 'Capacity', nil, nil, 'Iops', nil, nil, ]
1485
+ t << ['DisplayName', 'Host', 'isSSD', 'Comp', 'Total', 'Used', 'Reserved', 'Total', 'Reserved', ]
1486
+ else
1487
+ t << [nil, nil, nil, 'Num', 'Capacity', nil, nil, 'Status']
1488
+ t << ['DisplayName', 'Host', 'isSSD', 'Comp', 'Total', 'Used', 'Reserved', 'Health']
1489
+ end
1490
+ t.add_separator
1491
+ # XXX: Would be nice to show displayName and host
1492
+
1493
+ groups = disks.group_by{|x| x['esxcli'] ? x['esxcli'].VSANDiskGroupUUID : nil}
1494
+
1495
+ groups.each do |group, disks|
1496
+ disks.sort_by{|x| -x['isSsd']}.each do |x|
1497
+ info = x['esxcli']
1498
+ host_props = hosts_props[x['host']]
1499
+ cols = [
1500
+ info ? info.DisplayName : 'N/A',
1501
+ host_props ? host_props['name'] : 'N/A',
1502
+ #x['uuid'],
1503
+ (x['isSsd'] == 1) ? 'SSD' : 'MD',
1504
+ x['lsom_objects_count'] || 'N/A',
1505
+ "%.2f GB" % [x['capacity'].to_f / 1024**3],
1506
+ "%.0f %%" % [x['capacityUsed'].to_f * 100 / x['capacity'].to_f],
1507
+ "%.0f %%" % [x['capacityReserved'].to_f * 100 / x['capacity'].to_f],
1508
+ ]
1509
+
1510
+ if opts[:show_iops]
1511
+ cols += [
1512
+ "%d" % [x['iops']],
1513
+ "%.0f %%" % [ x['iopsReserved'].to_f * 100 / x['iops'].to_f],
1514
+ ]
1515
+ end
1516
+
1517
+ # cols += [
1518
+ # "%dr/%dw" % [x['stats']['disk.numberReadAveraged'].first,
1519
+ # x['stats']['disk.numberWriteAveraged'].first],
1520
+ # "%dd/%dq/%dk" % [x['stats']['disk.deviceLatency'].first,
1521
+ # x['stats']['disk.queueLatency'].first,
1522
+ # x['stats']['disk.kernelLatency'].first,],
1523
+ # ]
1524
+
1525
+ health = "N/A"
1526
+ if x['disk_health'] && x['disk_health']['healthFlags']
1527
+ flags = x['disk_health']['healthFlags']
1528
+ health = []
1529
+ {
1530
+ 4 => "FAILED",
1531
+ 5 => "OFFLINE",
1532
+ 6 => "DECOMMISSIONED",
1533
+ }.each do |k, v|
1534
+ if flags & (1 << k) != 0
1535
+ health << v
1536
+ end
1537
+ end
1538
+ if health.length == 0
1539
+ health = "OK"
1540
+ else
1541
+ health = health.join(", ")
1542
+ end
1543
+
1544
+ end
1545
+ cols += [
1546
+ health
1547
+ ]
1548
+
1549
+ t << cols
1550
+ end
1551
+ if group != groups.keys.last
1552
+ t.add_separator
1553
+ end
1554
+ end
1555
+
1556
+ puts t
1557
+ end
1558
+ end
1559
+
1560
+
1561
+ opts :whatif_host_failures do
1562
+ summary "Simulates how host failures impact VSAN resource usage"
1563
+ banner <<-EOS
1564
+
1565
+ The command shows current VSAN disk usage, but also simulates how
1566
+ disk usage would evolve under a host failure. Concretely the simulation
1567
+ assumes that all objects would be brought back to full policy
1568
+ compliance by bringing up new mirrors of existing data.
1569
+ The command makes some simplifying assumptions about disk space
1570
+ balance in the cluster. It is mostly intended to do a rough estimate
1571
+ if a host failure would drive the cluster to being close to full.
1572
+
1573
+ EOS
1574
+ arg :hosts_and_clusters, nil, :lookup => [VIM::HostSystem, VIM::ClusterComputeResource], :multi => true
1575
+ opt :num_host_failures_to_simulate, "Number of host failures to simulate", :default => 1
1576
+ opt :show_current_usage_per_host, "Show current resources used per host"
1577
+ end
1578
+
1579
+ def whatif_host_failures hosts_and_clusters, opts = {}
1580
+ opts[:compute_number_of_components] = true
1581
+ conn = hosts_and_clusters.first._connection
1582
+ hosts = hosts_and_clusters.select{|x| x.is_a?(VIM::HostSystem)}
1583
+ clusters = hosts_and_clusters.select{|x| x.is_a?(VIM::ClusterComputeResource)}
1584
+ pc = conn.propertyCollector
1585
+ cluster_hosts = pc.collectMultiple(clusters, 'host')
1586
+ cluster_hosts.each do |cluster, props|
1587
+ hosts += props['host']
1588
+ end
1589
+ hosts = hosts.uniq
1590
+
1591
+ if opts[:num_host_failures_to_simulate] != 1
1592
+ err "Only simulation of 1 host failure has been implemented"
1593
+ end
1594
+
1595
+ _run_with_rev(conn, "dev") do
1596
+ hosts_props = pc.collectMultiple(hosts,
1597
+ 'name',
1598
+ 'runtime.connectionState',
1599
+ 'configManager.vsanSystem',
1600
+ 'configManager.vsanInternalSystem'
1601
+ )
1602
+
1603
+ hosts = hosts_props.select do |k,v|
1604
+ v['runtime.connectionState'] == 'connected'
1605
+ end.keys
1606
+ if hosts.length == 0
1607
+ err "Couldn't find any connected hosts"
1608
+ end
1609
+
1610
+ hosts_vsansys = Hash[hosts_props.map{|k,v| [v['configManager.vsanSystem'], k]}]
1611
+ node_uuids = pc.collectMultiple(hosts_vsansys.keys, 'config.clusterInfo.nodeUuid')
1612
+ node_uuids = Hash[node_uuids.map do |k, v|
1613
+ [v['config.clusterInfo.nodeUuid'], hosts_vsansys[k]]
1614
+ end]
1615
+
1616
+ lock = Mutex.new
1617
+ disks = {}
1618
+ vsanIntSys = hosts_props[hosts.first]['configManager.vsanInternalSystem']
1619
+ disks = vsanIntSys.QueryPhysicalVsanDisks(:props => [
1620
+ 'lsom_objects_count',
1621
+ 'uuid',
1622
+ 'isSsd',
1623
+ 'capacity',
1624
+ 'capacityUsed',
1625
+ 'capacityReserved',
1626
+ 'iops',
1627
+ 'iopsReserved',
1628
+ 'owner',
1629
+ ])
1630
+ if disks == "BAD"
1631
+ err "Server failed to gather VSAN disk info"
1632
+ end
1633
+ begin
1634
+ disks = JSON.load(disks)
1635
+ rescue
1636
+ err "Server didn't provide VSAN disk info: #{objs}"
1637
+ end
1638
+
1639
+ # XXX: Do this in threads
1640
+ hosts.map do |host|
1641
+ Thread.new do
1642
+ c1 = conn.spawn_additional_connection
1643
+ props = hosts_props[host]
1644
+ vsanIntSys2 = props['configManager.vsanInternalSystem']
1645
+ vsanIntSys3 = vsanIntSys2.dup_on_conn(c1)
1646
+ res = vsanIntSys3.query_vsan_statistics(:labels => ['lsom-node'])
1647
+ hosts_props[host]['lsom.node'] = res['lsom.node']
1648
+ end
1649
+ end.each{|t| t.join}
1650
+
1651
+ hosts_disks = Hash[disks.values.group_by{|x| x['owner']}.map do |owner, hostDisks|
1652
+ props = {}
1653
+ hdds = hostDisks.select{|disk| disk['isSsd'] == 0}
1654
+ ssds = hostDisks.select{|disk| disk['isSsd'] == 1}
1655
+ hdds.each do |disk|
1656
+ [
1657
+ 'capacityUsed', 'capacityReserved',
1658
+ 'capacity', 'lsom_objects_count'
1659
+ ].each do |x|
1660
+ props[x] ||= 0
1661
+ props[x] += disk[x]
1662
+ end
1663
+ end
1664
+ ssds.each do |disk|
1665
+ [
1666
+ 'capacityReserved', 'capacity',
1667
+ ].each do |x|
1668
+ props["ssd_#{x}"] ||= 0
1669
+ props["ssd_#{x}"] += disk[x]
1670
+ end
1671
+ end
1672
+ h = node_uuids[owner]
1673
+ props['host'] = h
1674
+ props['hostname'] = h ? hosts_props[h]['name'] : owner
1675
+ props['numHDDs'] = hdds.length
1676
+ props['maxComponents'] = 3000
1677
+ if hosts_props[h]['lsom.node']
1678
+ props['maxComponents'] = hosts_props[h]['lsom.node']['numMaxComponents']
1679
+ end
1680
+ [owner, props]
1681
+ end]
1682
+
1683
+ sorted_hosts = hosts_disks.values.sort_by{|x| -x['capacityUsed']}
1684
+
1685
+ if opts[:show_current_usage_per_host]
1686
+ puts "Current utilization of hosts:"
1687
+ t = Terminal::Table.new()
1688
+ t << [nil, nil, 'HDD Capacity', nil, nil, 'Components', 'SSD Capacity']
1689
+ t << ['Host', 'NumHDDs', 'Total', 'Used', 'Reserved', 'Used', 'Reserved']
1690
+ t.add_separator
1691
+
1692
+ hosts_disks.each do |owner, x|
1693
+ cols = [
1694
+ x['hostname'],
1695
+ x['numHDDs'],
1696
+ "%.2f GB" % [x['capacity'].to_f / 1024**3],
1697
+ "%.0f %%" % [x['capacityUsed'].to_f * 100 / x['capacity'].to_f],
1698
+ "%.0f %%" % [x['capacityReserved'].to_f * 100 / x['capacity'].to_f],
1699
+ "%4u/%u (%.0f %%)" % [
1700
+ x['lsom_objects_count'],
1701
+ x['maxComponents'],
1702
+ x['lsom_objects_count'].to_f * 100 / x['maxComponents'].to_f
1703
+ ],
1704
+ "%.0f %%" % [x['ssd_capacityReserved'].to_f * 100 / x['ssd_capacity'].to_f],
1705
+ ]
1706
+ t << cols
1707
+ end
1708
+ puts t
1709
+ puts ""
1710
+ end
1711
+
1712
+ puts "Simulating #{opts[:num_host_failures_to_simulate]} host failures:"
1713
+ puts ""
1714
+ worst_host = sorted_hosts[0]
1715
+
1716
+ if sorted_hosts.length < 3
1717
+ puts "Cluster unable to regain full policy compliance after host failure, "
1718
+ puts "not enough hosts remaining."
1719
+ return
1720
+ end
1721
+
1722
+ t = Terminal::Table.new()
1723
+ t << ["Resource", "Usage right now", "Usage after failure/re-protection"]
1724
+ t.add_separator
1725
+ capacityRow = ["HDD capacity"]
1726
+
1727
+ # Capacity before failure
1728
+ used = sorted_hosts.map{|x| x['capacityUsed']}.sum
1729
+ total = sorted_hosts.map{|x| x['capacity']}.sum
1730
+ free = total - used
1731
+ usedPctOriginal = 100.0 - (free.to_f * 100 / total.to_f)
1732
+ capacityRow << "%3.0f%% used (%.2f GB free)" % [
1733
+ usedPctOriginal,
1734
+ free.to_f / 1024**3,
1735
+ ]
1736
+
1737
+ # Capacity after rebuild
1738
+ used = sorted_hosts[1..-1].map{|x| x['capacityUsed']}.sum
1739
+ total = sorted_hosts[1..-1].map{|x| x['capacity']}.sum
1740
+ additional = worst_host['capacityUsed']
1741
+ free = total - used
1742
+ usedPctBeforeReMirror = 100.0 - (free.to_f * 100 / total.to_f)
1743
+ usedPctAfterReMirror = 100.0 - ((free - additional).to_f * 100 / total.to_f)
1744
+ usedPctIncrease = usedPctAfterReMirror - usedPctOriginal
1745
+ capacityRow << "%3.0f%% used (%.2f GB free)" % [
1746
+ usedPctAfterReMirror,
1747
+ (free - additional).to_f / 1024**3,
1748
+ ]
1749
+ t << capacityRow
1750
+
1751
+ # Components before failure
1752
+ sorted_hosts = hosts_disks.values.sort_by{|x| -x['lsom_objects_count']}
1753
+ worst_host = sorted_hosts[0]
1754
+ used = sorted_hosts.map{|x| x['lsom_objects_count']}.sum
1755
+ total = sorted_hosts.map{|x| x['maxComponents']}.sum
1756
+ free = total - used
1757
+ usedPctOriginal = 100.0 - (free.to_f * 100 / total.to_f)
1758
+ componentsRow = ["Components"]
1759
+ componentsRow << "%3.0f%% used (%u available)" % [
1760
+ usedPctOriginal,
1761
+ free,
1762
+ ]
1763
+
1764
+ # Components after rebuild
1765
+ used = sorted_hosts[1..-1].map{|x| x['lsom_objects_count']}.sum
1766
+ total = sorted_hosts[1..-1].map{|x| x['maxComponents']}.sum
1767
+ additional = worst_host['lsom_objects_count']
1768
+ free = total - used
1769
+ usedPctBeforeReMirror = 100.0 - (free.to_f * 100 / total.to_f)
1770
+ usedPctAfterReMirror = 100.0 - ((free - additional).to_f * 100 / total.to_f)
1771
+ usedPctIncrease = usedPctAfterReMirror - usedPctOriginal
1772
+ componentsRow << "%3.0f%% used (%u available)" % [
1773
+ usedPctAfterReMirror,
1774
+ (free - additional),
1775
+ ]
1776
+ t << componentsRow
1777
+
1778
+ # RC reservations before failure
1779
+ sorted_hosts = hosts_disks.values.sort_by{|x| -x['ssd_capacityReserved']}
1780
+ worst_host = sorted_hosts[0]
1781
+ used = sorted_hosts.map{|x| x['ssd_capacityReserved']}.sum
1782
+ total = sorted_hosts.map{|x| x['ssd_capacity']}.sum
1783
+ free = total - used
1784
+ usedPctOriginal = 100.0 - (free.to_f * 100 / total.to_f)
1785
+ rcReservationsRow = ["RC reservations"]
1786
+ rcReservationsRow << "%3.0f%% used (%.2f GB free)" % [
1787
+ usedPctOriginal,
1788
+ free.to_f / 1024**3,
1789
+ ]
1790
+
1791
+ # RC reservations after rebuild
1792
+ used = sorted_hosts[1..-1].map{|x| x['ssd_capacityReserved']}.sum
1793
+ total = sorted_hosts[1..-1].map{|x| x['ssd_capacity']}.sum
1794
+ additional = worst_host['ssd_capacityReserved']
1795
+ free = total - used
1796
+ usedPctBeforeReMirror = 100.0 - (free.to_f * 100 / total.to_f)
1797
+ usedPctAfterReMirror = 100.0 - ((free - additional).to_f * 100 / total.to_f)
1798
+ usedPctIncrease = usedPctAfterReMirror - usedPctOriginal
1799
+ rcReservationsRow << "%3.0f%% used (%.2f GB free)" % [
1800
+ usedPctAfterReMirror,
1801
+ (free - additional).to_f / 1024**3,
1802
+ ]
1803
+ t << rcReservationsRow
1804
+
1805
+ puts t
1806
+ end
1807
+ end
1808
+
1809
+
1810
+ def _observe_snapshot conn, host, hosts, vmView, pc, hosts_props, vsanIntSys
1811
+ startTime = Time.now
1812
+ observation = {
1813
+ 'cmmds' => {
1814
+ 'clusterInfos' => {},
1815
+ 'clusterDirs' => {},
1816
+ },
1817
+ 'vsi' => {},
1818
+ 'inventory' => {},
1819
+ }
1820
+ exceptions = []
1821
+ threads = []
1822
+ begin
1823
+ threads << Thread.new do
1824
+ begin
1825
+ t1 = Time.now
1826
+ vms = vmView.view
1827
+
1828
+ vmProperties = [
1829
+ 'name', 'runtime.powerState', 'datastore', 'config.annotation',
1830
+ 'parent', 'resourcePool', 'storage.perDatastoreUsage',
1831
+ 'summary.config.memorySizeMB', 'summary.config.numCpu',
1832
+ 'summary.config.vmPathName', 'config.hardware.device',
1833
+ 'runtime.connectionState',
1834
+ ]
1835
+ vmsProps = pc.collectMultiple(vms, *vmProperties)
1836
+ t2 = Time.now
1837
+ puts "Query VM properties: %.2f sec" % (t2 - t1)
1838
+ observation['inventory']['vms'] = {}
1839
+ vmsProps.each do |vm, vmProps|
1840
+ vmProps['vsan-obj-uuids'] = {}
1841
+ devices = vmProps['config.hardware.device'] || []
1842
+ disks = devices.select{|x| x.is_a?(VIM::VirtualDisk)}
1843
+ disks.each do |disk|
1844
+ newBacking = {}
1845
+ newDisk = {
1846
+ 'unitNumber' => disk.unitNumber,
1847
+ 'controllerKey' => disk.controllerKey,
1848
+ 'backing' => newBacking,
1849
+ }
1850
+ backing = disk.backing
1851
+ if !backing.is_a?(VIM::VirtualDiskFlatVer2BackingInfo)
1852
+ next
1853
+ end
1854
+ while backing
1855
+ uuid = backing.backingObjectId
1856
+ if uuid
1857
+ vmProps['vsan-obj-uuids'][uuid] = backing.fileName
1858
+ newBacking['uuid'] = uuid
1859
+ end
1860
+ newBacking['fileName'] = backing.fileName
1861
+ backing = backing.parent
1862
+
1863
+ if backing
1864
+ newBacking['parent'] = {}
1865
+ newBacking = newBacking['parent']
1866
+ end
1867
+ end
1868
+
1869
+ vmProps['disks'] ||= []
1870
+ vmProps['disks'] << newDisk
1871
+ end
1872
+ # Do not add devices to the snapshot as they are too big
1873
+ vmProps.delete('config.hardware.device')
1874
+
1875
+ begin
1876
+ vmPathName = vmProps['summary.config.vmPathName']
1877
+ uuid = vmPathName.split("] ")[1].split("/")[0]
1878
+ vmProps['vsan-obj-uuids'][uuid] = vmPathName
1879
+ rescue
1880
+ end
1881
+
1882
+ observation['inventory']['vms'][vm._ref] = vmProps
1883
+ end
1884
+ rescue Exception => ex
1885
+ exceptions << ex
1886
+ end
1887
+ end
1888
+ threads << Thread.new do
1889
+ begin
1890
+ sleep(20)
1891
+ hostname = hosts_props[host]['name']
1892
+ # XXX: Should pick one host per partition
1893
+ c1 = conn.spawn_additional_connection
1894
+ vsanIntSys1 = vsanIntSys.dup_on_conn(c1)
1895
+
1896
+ t1 = Time.now
1897
+ res = vsanIntSys1.query_cmmds(
1898
+ (1..30).map{|x| {:type => x}}
1899
+ )
1900
+ t2 = Time.now
1901
+ puts "Query CMMDS from #{hostname}: %.2f sec (json size: %dKB)" % [
1902
+ (t2 - t1), JSON.dump(res).length / 1024
1903
+ ]
1904
+ observation['cmmds']['clusterDirs'][hostname] = res
1905
+ rescue Exception => ex
1906
+ exceptions << ex
1907
+ end
1908
+ end
1909
+ hosts.each do |host|
1910
+ threads << Thread.new do
1911
+ begin
1912
+ hostname = hosts_props[host]['name']
1913
+ vsanIntSys1 = hosts_props[host]['configManager.vsanInternalSystem']
1914
+ c1 = conn.spawn_additional_connection
1915
+ vsanIntSys1 = vsanIntSys1.dup_on_conn(c1)
1916
+
1917
+ t1 = Time.now
1918
+ res = vsanIntSys1.QueryVsanStatistics(:labels =>
1919
+ [
1920
+ 'dom', 'lsom', 'worldlets', 'plog',
1921
+ 'dom-objects',
1922
+ 'mem', 'cpus', 'slabs',
1923
+ 'vscsi', 'cbrc',
1924
+ 'disks',
1925
+ #'rdtassocsets',
1926
+ 'system-mem', 'pnics',
1927
+ ]
1928
+ )
1929
+ t2 = Time.now
1930
+ res = JSON.load(res)
1931
+ puts "Query Stats on #{host.name}: %.2f sec (on ESX: %.2f, json size: %dKB)" % [
1932
+ (t2 - t1), res['on-esx-collect-duration'],
1933
+ JSON.dump(res).length / 1024
1934
+ ]
1935
+ observation['vsi'][hostname] = res
1936
+ rescue Exception => ex
1937
+ exceptions << ex
1938
+ end
1939
+ end
1940
+ end
1941
+ threads.each{|x| x.join}
1942
+ if exceptions.length > 0
1943
+ raise exceptions.first
1944
+ end
1945
+ rescue Interrupt
1946
+ threads.each{|t| t.terminate}
1947
+ end
1948
+
1949
+ {
1950
+ 'type' => 'inventory-snapshot',
1951
+ 'snapshot' => observation,
1952
+ 'starttime' => startTime.to_f,
1953
+ 'endtime' => Time.now.to_f,
1954
+ }
1955
+ end
1956
+
1957
+ class VsanObserver
1958
+ def generate_observer_html(tasksAnalyzer, inventoryAnalyzer,
1959
+ vcInfo, hosts_props)
1960
+ opts = {}
1961
+ refreshString = ""
1962
+ vcOS = vcInfo['about']['osType']
1963
+ vcFullName = vcInfo['about']['fullName']
1964
+ testTitleString = "VC #{vcInfo['hostname']} (#{vcFullName} - #{vcOS})"
1965
+ skipTasksTab = true
1966
+ graphUpdateMsg = "XXX"
1967
+ processed = 0
1968
+ puts "#{Time.now}: Generating HTML"
1969
+ inventoryAnalyzerTabs = inventoryAnalyzer.generateHtmlTabs(
1970
+ true,
1971
+ :skipLivenessTab => true,
1972
+ :skipLsomExpert => true,
1973
+ )
1974
+ puts "#{Time.now}: Generating HTML (fill in template)"
1975
+
1976
+ erbFilename = "#{analyser_lib_dirname}/stats.erb.html"
1977
+ @erbFileContent = open(erbFilename, 'r').read
1978
+
1979
+ template = ERB.new(@erbFileContent)
1980
+ html = template.result(binding)
1981
+ puts "#{Time.now}: HTML length: #{html.length}"
1982
+
1983
+ html
1984
+ end
1985
+
1986
+ def generate_observer_bundle(bundlePath, tasksAnalyzer, inventoryAnalyzer,
1987
+ vcInfo, hosts_props)
1988
+ require 'rubygems/package'
1989
+ tarFilename = File.join(
1990
+ bundlePath,
1991
+ "vsan-observer-#{Time.now.strftime('%Y-%m-%d.%H-%M-%S')}.tar"
1992
+ )
1993
+ gzFilename = "%s.gz" % tarFilename
1994
+
1995
+ puts "#{Time.now}: Writing out an HTML bundle to #{gzFilename} ..."
1996
+ tar = open(tarFilename, 'wb+')
1997
+ Gem::Package::TarWriter.new(tar) do |writer|
1998
+ inventoryAnalyzer.dump(:tar => writer)
1999
+
2000
+ writer.add_file('stats.html', 0644) do |io|
2001
+ io.write(self.generate_observer_html(
2002
+ tasksAnalyzer, inventoryAnalyzer, vcInfo,
2003
+ hosts_props
2004
+ )
2005
+ )
2006
+ end
2007
+
2008
+ [
2009
+ 'graphs.html', 'bg_pattern.png', 'vmw_logo_white.png',
2010
+ 'graphs.js', 'observer.css', 'vm-graph.svg'
2011
+ ].each do |filename|
2012
+ writer.add_file(filename, 0644) do |io|
2013
+ content = open("#{analyser_lib_dirname}/#{filename}", "r") do |src|
2014
+ src.read
2015
+ end
2016
+ io.write(content)
2017
+ end
2018
+ end
2019
+ end
2020
+ tar.seek(0)
2021
+
2022
+ gz = Zlib::GzipWriter.new(File.new(gzFilename, 'wb'))
2023
+ while (buffer = tar.read(10000))
2024
+ gz.write(buffer)
2025
+ end
2026
+ tar.close
2027
+ gz.close
2028
+ FileUtils.rm(tarFilename)
2029
+ puts "#{Time.now}: Done writing HTML bundle to #{gzFilename}"
2030
+ end
2031
+ end
2032
+
2033
+ require 'webrick'
2034
+ class SimpleGetForm < WEBrick::HTTPServlet::AbstractServlet
2035
+ def initialize(server, tasksAnalyzer, inventoryAnalyzer,
2036
+ erbFileContent, vcInfo, hosts_props)
2037
+ super server
2038
+ @tasksAnalyzer = tasksAnalyzer
2039
+ @inventoryAnalyzer = inventoryAnalyzer
2040
+ @erbFileContent = erbFileContent
2041
+ @vcInfo = vcInfo
2042
+ @hosts_props = hosts_props
2043
+ end
2044
+
2045
+ # Process the request, return response
2046
+ def do_GET(request, response)
2047
+ staticFiles = [
2048
+ "/graphs.js", "/graphs.html",
2049
+ "/observer.css",
2050
+ "/vmw_logo_white.png",
2051
+ "/bg_pattern.png",
2052
+ "/vm-graph.svg"
2053
+ ]
2054
+ if request.path == "/"
2055
+ status, content_type, body = mainpage(request)
2056
+ elsif staticFiles.member?(request.path)
2057
+ status, content_type, body = servefile(request)
2058
+ # elsif request.path =~ /^\/css\//
2059
+ # status, content_type, body = servefile(request)
2060
+ elsif request.path =~ /^\/jsonstats\/(dom|pcpu|mem|lsom|vm|cmmds|misc)\/(.*).json$/
2061
+ group = $1
2062
+ file = $2
2063
+ opts = {}
2064
+ if file =~ /^(.*)_thumb$/
2065
+ file = $1
2066
+ opts[:points] = 60
2067
+ end
2068
+ status, content_type, body = servejson(group, file, opts)
2069
+ else
2070
+ super(request, response)
2071
+ end
2072
+
2073
+ response.status = status
2074
+ response['Content-Type'] = content_type
2075
+ response.body = body
2076
+ end
2077
+
2078
+ def servefile request
2079
+ filename = "#{analyser_lib_dirname}#{request.path}"
2080
+ content = open(filename, 'r').read
2081
+ if filename =~ /\.js$/
2082
+ return [200, "text/javascript", content]
2083
+ end
2084
+ if filename =~ /\.html$/
2085
+ return [200, "text/html", content]
2086
+ end
2087
+ if filename =~ /\.less$/
2088
+ return [200, "text/css", content]
2089
+ end
2090
+ if filename =~ /\.css$/
2091
+ return [200, "text/css", content]
2092
+ end
2093
+ if filename =~ /\.png$/
2094
+ return [200, "image/png", content]
2095
+ end
2096
+ if filename =~ /\.svg$/
2097
+ return [200, "image/svg+xml", content]
2098
+ end
2099
+
2100
+ [404, "text/html", "Not found"]
2101
+ end
2102
+
2103
+ def json_dump out
2104
+ @inventoryAnalyzer.json_dump out
2105
+ end
2106
+
2107
+ def servejson group, file, opts = {}
2108
+ points = opts[:points]
2109
+ if group == "misc"
2110
+ if file =~ /^distribution$/
2111
+ out = @inventoryAnalyzer.dumpDistribution(:points => points)
2112
+ return [200, "text/json", json_dump(out)]
2113
+ end
2114
+ if file =~ /^crbc-(.*)$/
2115
+ hostname = $1
2116
+ out = @inventoryAnalyzer.dumpCbrc(hostname)
2117
+ return [200, "text/json", json_dump(out)]
2118
+ end
2119
+ if file =~ /^pnics-(.*)$/
2120
+ hostname = $1
2121
+ out = @inventoryAnalyzer.dumpPnics(hostname)
2122
+ return [200, "text/json", json_dump(out)]
2123
+ end
2124
+ end
2125
+ if group == "vm"
2126
+ if file =~ /^list$/
2127
+ out = @inventoryAnalyzer.dumpVmList()
2128
+ return [200, "text/json", json_dump(out)]
2129
+ end
2130
+ if file =~ /^vscsi-([^-]*)-(.*)$/
2131
+ disk = $1
2132
+ vm = $2
2133
+ out = @inventoryAnalyzer.dumpVscsi(vm, disk, nil, :points => points)
2134
+ return [200, "text/json", json_dump(out)]
2135
+ end
2136
+ end
2137
+ if group == "cmmds"
2138
+ if file =~ /^disks$/
2139
+ out = @inventoryAnalyzer.dumpCmmdsDisks()
2140
+ return [200, "text/json", json_dump(out)]
2141
+ end
2142
+ if file =~ /^cmmds-(.*)$/
2143
+ uuid = $1
2144
+ out = @inventoryAnalyzer.dumpCmmdsUuid(uuid)
2145
+ return [200, "text/json", json_dump(out)]
2146
+ end
2147
+ end
2148
+ if group == "dom"
2149
+ if file =~ /^domobj-(client|total|compmgr)-(.*)$/
2150
+ uuid = "#{$1}-#{$2}"
2151
+ out = @inventoryAnalyzer.dumpDom(uuid, nil, :points => points)
2152
+ return [200, "text/json", json_dump(out)]
2153
+ elsif file =~ /^domobj-(.*)$/
2154
+ uuid = $1
2155
+ out = @inventoryAnalyzer.dumpDom(uuid, nil, :points => points)
2156
+ return [200, "text/json", json_dump(out)]
2157
+ end
2158
+ end
2159
+ if group == "pcpu"
2160
+ if file =~ /^wdt-(.*)-([^-]*)$/
2161
+ hostname = $1
2162
+ wdt = $2
2163
+ out = @inventoryAnalyzer.dumpWdt(hostname, wdt, nil, :points => points)
2164
+ return [200, "text/json", json_dump(out)]
2165
+ end
2166
+ if file =~ /^pcpu-(.*)$/
2167
+ hostname = $1
2168
+ out = @inventoryAnalyzer.dumpPcpu(hostname, :points => points)
2169
+ return [200, "text/json", json_dump(out)]
2170
+ end
2171
+ end
2172
+ if group == "mem"
2173
+ if file =~ /^heaps-(.*)$/
2174
+ hostname = $1
2175
+ out = @inventoryAnalyzer.dumpHeaps(hostname, nil, :points => points)
2176
+ return [200, "text/json", json_dump(out)]
2177
+ end
2178
+ if file =~ /^slabs-(.*)$/
2179
+ hostname = $1
2180
+ out = @inventoryAnalyzer.dumpSlabs(hostname, nil, :points => points)
2181
+ return [200, "text/json", json_dump(out)]
2182
+ end
2183
+ if file =~ /^system-(.*)$/
2184
+ hostname = $1
2185
+ out = @inventoryAnalyzer.dumpSystemMem(hostname, nil, :points => points)
2186
+ return [200, "text/json", json_dump(out)]
2187
+ end
2188
+ end
2189
+ if group == "lsom"
2190
+ if file =~ /^lsomcomp-(.*)$/
2191
+ uuid = $1
2192
+ out = @inventoryAnalyzer.dumpLsomComp(uuid, nil, :points => points)
2193
+ return [200, "text/json", json_dump(out)]
2194
+ end
2195
+ if file =~ /^lsomhost-(.*)$/
2196
+ hostname = $1
2197
+ out = @inventoryAnalyzer.dumpLsomHost(hostname, nil, :points => points)
2198
+ return [200, "text/json", json_dump(out)]
2199
+ end
2200
+ if file =~ /^ssd-(.*)$/
2201
+ uuid = $1
2202
+ out = @inventoryAnalyzer.dumpSsd(uuid, nil, nil, :points => points)
2203
+ return [200, "text/json", json_dump(out)]
2204
+ end
2205
+ if file =~ /^plog-(.*)$/
2206
+ dev = $1
2207
+ out = @inventoryAnalyzer.dumpPlog(dev, nil, nil, nil, :points => points)
2208
+ return [200, "text/json", json_dump(out)]
2209
+ end
2210
+ if file =~ /^disk-(.*)$/
2211
+ dev = $1
2212
+ out = @inventoryAnalyzer.dumpDisk(dev, nil, nil, :points => points)
2213
+ return [200, "text/json", json_dump(out)]
2214
+ end
2215
+ if file =~ /^physdisk-(.*)-([^-]*)$/
2216
+ hostname = $1
2217
+ dev = $2
2218
+ out = @inventoryAnalyzer.dumpPhysDisk(hostname, dev, nil, :points => points)
2219
+ return [200, "text/json", json_dump(out)]
2220
+ end
2221
+ end
2222
+
2223
+ [404, "text/html", "Not found"]
2224
+ end
2225
+
2226
+ def mainpage request
2227
+ tasksAnalyzer = @tasksAnalyzer
2228
+ inventoryAnalyzer = @inventoryAnalyzer
2229
+
2230
+ html = VsanObserver.new.generate_observer_html(
2231
+ @tasksAnalyzer, @inventoryAnalyzer, @vcInfo, @hosts_props
2232
+ )
2233
+
2234
+ [200, "text/html", html]
2235
+ end
2236
+ end
2237
+
2238
+ opts :observer do
2239
+ summary "Run observer"
2240
+ arg :cluster_or_host, nil, :lookup => [VIM::ClusterComputeResource, VIM::HostSystem]
2241
+ opt :filename, "Output file path", :type => :string
2242
+ opt :port, "Port on which to run webserver", :type => :int, :default => 8010
2243
+ opt :run_webserver, "Run a webserver to view live stats", :type => :boolean
2244
+ opt :force, "Apply force", :type => :boolean
2245
+ opt :keep_observation_in_memory, "Keep observed stats in memory even when commands ends. Allows to resume later", :type => :boolean
2246
+ opt :generate_html_bundle, "Generates an HTML bundle after completion. Pass a location", :type => :string
2247
+ opt :interval, "Interval (in sec) in which to collect stats", :type => :int, :default => 60
2248
+ opt :max_runtime, "Maximum number of hours to collect stats. Caps memory usage.", :type => :int, :default => 2
2249
+ end
2250
+
2251
+ def observer cluster_or_host, opts
2252
+ conn = cluster_or_host._connection
2253
+ pc = conn.propertyCollector
2254
+ host = cluster_or_host
2255
+ entries = []
2256
+ hostUuidMap = {}
2257
+
2258
+ vcAbout = conn.serviceContent.about
2259
+ vcInfo = {
2260
+ 'hostname' => conn.host,
2261
+ 'about' => {
2262
+ 'fullName' => vcAbout.fullName,
2263
+ 'osType' => vcAbout.osType,
2264
+ 'apiVersion' => vcAbout.apiVersion,
2265
+ 'apiType' => vcAbout.apiType,
2266
+ 'build' => vcAbout.build,
2267
+ 'instanceUuid' => vcAbout.instanceUuid,
2268
+ 'version' => vcAbout.version,
2269
+ },
2270
+ }
2271
+
2272
+ if opts[:run_webserver] && !opts[:force]
2273
+ puts "Running a webserver with unencrypted HTTP on the vCenter machine "
2274
+ puts "could pose a security risk. This tool is an experimenal debugging "
2275
+ puts "tool, which has not been audited or tested for its security."
2276
+ puts "If in doubt, you may want to create a dummy vCenter machine to run"
2277
+ puts "just this tool, instead of running the tool on your production "
2278
+ puts "vCenter machine."
2279
+ puts "In order to run the webserver, please pass --force"
2280
+ err "Force needs to be applied to run the webserver"
2281
+ end
2282
+
2283
+ require 'rvc/observer/analyzer-lib'
2284
+ require 'rvc/observer/tasks-analyzer'
2285
+ require 'rvc/observer/inventory-analyzer'
2286
+
2287
+ inventoryAnalyzer = $inventoryAnalyzer
2288
+ tasksAnalyzer = $tasksAnalyzer
2289
+
2290
+ inventoryAnalyzer ||= InventoryAnalyzer.new
2291
+ tasksAnalyzer ||= TasksAnalyzer.new({})
2292
+
2293
+ file = nil
2294
+ if opts[:filename]
2295
+ file = open(opts[:filename], 'a')
2296
+ end
2297
+ server = nil
2298
+ webrickThread = nil
2299
+ hosts_props = nil
2300
+
2301
+ _run_with_rev(conn, "dev") do
2302
+ vsanIntSys = nil
2303
+ if cluster_or_host.is_a?(VIM::ClusterComputeResource)
2304
+ cluster = cluster_or_host
2305
+ hosts = cluster.host
2306
+ else
2307
+ hosts = [host]
2308
+ end
2309
+
2310
+ hosts_props = pc.collectMultiple(hosts,
2311
+ 'name',
2312
+ 'runtime.connectionState',
2313
+ 'configManager.vsanSystem',
2314
+ 'configManager.vsanInternalSystem',
2315
+ 'summary.config.product',
2316
+ 'summary.hardware'
2317
+ )
2318
+ connected_hosts = hosts_props.select do |k,v|
2319
+ v['runtime.connectionState'] == 'connected'
2320
+ end.keys
2321
+ host = connected_hosts.first
2322
+ if !host
2323
+ err "Couldn't find any connected hosts"
2324
+ end
2325
+ vsanIntSys = hosts_props[host]['configManager.vsanInternalSystem']
2326
+ vsanSysList = Hash[hosts_props.map do |host, props|
2327
+ [props['name'], props['configManager.vsanSystem']]
2328
+ end]
2329
+ clusterInfos = pc.collectMultiple(vsanSysList.values,
2330
+ 'config.clusterInfo')
2331
+ hostUuidMap = Hash[vsanSysList.map do |hostname, sys|
2332
+ [clusterInfos[sys]['config.clusterInfo'].nodeUuid, hostname]
2333
+ end]
2334
+
2335
+ viewMgr = conn.serviceContent.viewManager
2336
+ rootFolder = conn.serviceContent.rootFolder
2337
+
2338
+ vmView = viewMgr.CreateContainerView(
2339
+ :container => rootFolder,
2340
+ :type => ['VirtualMachine'],
2341
+ :recursive => true
2342
+ )
2343
+
2344
+ if opts[:run_webserver]
2345
+ erbFilename = "#{analyser_lib_dirname}/stats.erb.html"
2346
+ erbFileContent = open(erbFilename, 'r').read
2347
+
2348
+ server = WEBrick::HTTPServer.new(:Port => opts[:port])
2349
+ server.mount(
2350
+ "/", SimpleGetForm,
2351
+ tasksAnalyzer, inventoryAnalyzer, erbFileContent, vcInfo,
2352
+ JSON.load(JSON.dump(hosts_props))
2353
+ )
2354
+ webrickThread = Thread.new do
2355
+ server.start
2356
+ end
2357
+ end
2358
+
2359
+ puts "Press <Ctrl>+<C> to stop observing at any point ..."
2360
+ puts
2361
+
2362
+ startTime = Time.now
2363
+ begin
2364
+ while (Time.now - startTime) < opts[:max_runtime] * 3600
2365
+ puts "#{Time.now}: Collect one inventory snapshot"
2366
+ t1 = Time.now
2367
+ begin
2368
+ observation = _observe_snapshot(
2369
+ conn, host, connected_hosts, vmView, pc, hosts_props, vsanIntSys
2370
+ )
2371
+ observation['snapshot']['vcinfo'] = vcInfo
2372
+ observation['timestamp'] = Time.now.to_f
2373
+ if file
2374
+ file.write(JSON.dump(observation) + "\n")
2375
+ file.flush()
2376
+ else
2377
+ puts "#{Time.now}: Live-Processing inventory snapshot"
2378
+ tasksAnalyzer.processTrace(observation)
2379
+ inventoryAnalyzer.processInventorySnapshot(observation)
2380
+ end
2381
+ rescue Interrupt
2382
+ raise
2383
+ rescue Exception => ex
2384
+ puts "#{Time.now}: Got exception: #{ex.class}: #{ex.message}"
2385
+ end
2386
+ t2 = Time.now
2387
+
2388
+ intervalTime = opts[:interval]
2389
+ time = t2 - t1
2390
+ sleepTime = intervalTime - time
2391
+ if sleepTime <= 0.0
2392
+ puts "#{Time.now}: Collection took %.2fs (> %.2fs), no sleep ..." % [
2393
+ time, intervalTime
2394
+ ]
2395
+ else
2396
+ puts "#{Time.now}: Collection took %.2fs, sleeping for %.2fs" % [
2397
+ time, sleepTime
2398
+ ]
2399
+ puts "#{Time.now}: Press <Ctrl>+<C> to stop observing"
2400
+ sleep(sleepTime)
2401
+ end
2402
+ end
2403
+ rescue Interrupt
2404
+ puts "#{Time.now}: Execution interrupted, wrapping up ..."
2405
+ end
2406
+ #pp res
2407
+ vmView.DestroyView()
2408
+
2409
+ end
2410
+
2411
+ if file
2412
+ file.close()
2413
+ end
2414
+ if server
2415
+ server.shutdown
2416
+ webrickThread.join
2417
+ end
2418
+ if opts[:generate_html_bundle]
2419
+ begin
2420
+ VsanObserver.new.generate_observer_bundle(
2421
+ opts[:generate_html_bundle], tasksAnalyzer, inventoryAnalyzer,
2422
+ vcInfo, hosts_props
2423
+ )
2424
+ rescue Exception => ex
2425
+ puts "#{Time.now}: Failed to generate HTML bundle: #{ex.class}: #{ex.message}"
2426
+ end
2427
+ end
2428
+
2429
+ if opts[:keep_observation_in_memory]
2430
+ $inventoryAnalyzer = inventoryAnalyzer
2431
+ $tasksAnalyzer = tasksAnalyzer
2432
+ else
2433
+ $inventoryAnalyzer = nil
2434
+ $tasksAnalyzer = nil
2435
+ end
2436
+ end
2437
+
2438
+ class RbVmomi::VIM
2439
+ def initialize opts
2440
+ super opts
2441
+ end
2442
+
2443
+ def spawn_additional_connection
2444
+ c1 = RbVmomi::VIM.new(@opts)
2445
+ c1.cookie = self.cookie
2446
+ c1.rev = self.rev
2447
+ c1
2448
+ end
2449
+ end
2450
+
2451
+ RbVmomi::VIM::ManagedObject
2452
+ class RbVmomi::VIM::ManagedObject
2453
+ def dup_on_conn conn
2454
+ self.class.new(conn, self._ref)
2455
+ end
2456
+ end
2457
+
2458
+
2459
+ opts :resync_dashboard do
2460
+ summary "Resyncing dashboard"
2461
+ arg :cluster_or_host, nil, :lookup => [VIM::ClusterComputeResource, VIM::HostSystem]
2462
+ opt :refresh_rate, "Refresh interval (in sec). Default is no refresh", :type => :int
2463
+ end
2464
+
2465
+ def resync_dashboard cluster_or_host, opts
2466
+ conn = cluster_or_host._connection
2467
+ pc = conn.propertyCollector
2468
+ if cluster_or_host.is_a?(VIM::ClusterComputeResource)
2469
+ cluster = cluster_or_host
2470
+ hosts = cluster.host
2471
+ else
2472
+ hosts = [host]
2473
+ end
2474
+
2475
+ _run_with_rev(conn, "dev") do
2476
+ hosts_props = pc.collectMultiple(hosts,
2477
+ 'name',
2478
+ 'runtime.connectionState',
2479
+ 'configManager.vsanSystem',
2480
+ 'configManager.vsanInternalSystem'
2481
+ )
2482
+ connected_hosts = hosts_props.select do |k,v|
2483
+ v['runtime.connectionState'] == 'connected'
2484
+ end.keys
2485
+ host = connected_hosts.first
2486
+ if !host
2487
+ err "Couldn't find any connected hosts"
2488
+ end
2489
+ hostname = hosts_props[host]['name']
2490
+ vsanIntSys = hosts_props[host]['configManager.vsanInternalSystem']
2491
+
2492
+ vsanSysList = Hash[hosts_props.map do |host, props|
2493
+ [props['name'], props['configManager.vsanSystem']]
2494
+ end]
2495
+ clusterInfos = pc.collectMultiple(vsanSysList.values,
2496
+ 'config.clusterInfo')
2497
+ hostUuidMap = Hash[vsanSysList.map do |hostname, sys|
2498
+ [clusterInfos[sys]['config.clusterInfo'].nodeUuid, hostname]
2499
+ end]
2500
+
2501
+ entries = nil
2502
+
2503
+ puts "#{Time.now}: Querying all VMs on VSAN ..."
2504
+ ds_list = host.datastore
2505
+ ds_props = pc.collectMultiple(ds_list, 'name', 'summary.type')
2506
+ ds = ds_props.select{|k, x| x['summary.type'] == "vsan"}.keys.first
2507
+ ds_name = ds_props[ds]['name']
2508
+
2509
+ vms = ds.vm
2510
+ vmsProps = pc.collectMultiple(vms,
2511
+ 'name', 'runtime.connectionState',
2512
+ 'config.hardware.device', 'summary.config'
2513
+ )
2514
+
2515
+ iter = 0
2516
+ while (iter == 0) || opts[:refresh_rate]
2517
+ puts "#{Time.now}: Querying all objects in the system from #{hostname} ..."
2518
+
2519
+ result = vsanIntSys.query_syncing_vsan_objects({})
2520
+ if !result
2521
+ err "Server failed to gather syncing objects"
2522
+ end
2523
+ objects = result['dom_objects']
2524
+
2525
+ puts "#{Time.now}: Got all the info, computing table ..."
2526
+ objects = objects.map do |uuid, objInfo|
2527
+ obj = objInfo['config']
2528
+ comps = _components_in_dom_config(obj['content'])
2529
+ bytesToSyncTotal = 0
2530
+ recoveryETATotal = 0
2531
+ comps = comps.select do |comp|
2532
+ state = comp['attributes']['componentState']
2533
+ bytesToSync = comp['attributes']['bytesToSync'] || 0
2534
+ recoveryETA = comp['attributes']['recoveryETA'] || 0
2535
+ resync = [10, 6].member?(state) && bytesToSync != 0
2536
+ if resync
2537
+ bytesToSyncTotal += bytesToSync
2538
+ recoveryETATotal = [recoveryETA, recoveryETATotal].max
2539
+ end
2540
+ resync
2541
+ end
2542
+ obj['bytesToSync'] = bytesToSyncTotal
2543
+ obj['recoveryETA'] = recoveryETATotal
2544
+ if comps.length > 0
2545
+ obj
2546
+ end
2547
+ end.compact
2548
+ obj_uuids = objects.map{|x| x['uuid']}
2549
+ objects = Hash[objects.map{|x| [x['uuid'], x]}]
2550
+
2551
+ all_obj_uuids = []
2552
+ vmToObjMap = {}
2553
+ vms.each do |vm|
2554
+ vm_obj_uuids = _get_vm_obj_uuids(vm, vmsProps)
2555
+ vm_obj_uuids = vm_obj_uuids.select{|x, v| obj_uuids.member?(x)}
2556
+ vm_obj_uuids = vm_obj_uuids.reject{|x, v| all_obj_uuids.member?(x)}
2557
+ all_obj_uuids += vm_obj_uuids.keys
2558
+ if vm_obj_uuids.length > 0
2559
+ vmToObjMap[vm] = vm_obj_uuids
2560
+ end
2561
+ end
2562
+
2563
+ t = Terminal::Table.new()
2564
+ t << [
2565
+ 'VM/Object',
2566
+ 'Syncing objects',
2567
+ 'Bytes to sync',
2568
+ #'ETA',
2569
+ ]
2570
+ t.add_separator
2571
+ bytesToSyncGrandTotal = 0
2572
+ objGrandTotal = 0
2573
+ vmToObjMap.each do |vm, vm_obj_uuids|
2574
+ vmProps = vmsProps[vm]
2575
+ objs = vm_obj_uuids.keys.map{|x| objects[x]}
2576
+ bytesToSyncTotal = objs.map{|obj| obj['bytesToSync']}.sum
2577
+ recoveryETATotal = objs.map{|obj| obj['recoveryETA']}.max
2578
+ t << [
2579
+ vmProps['name'],
2580
+ objs.length,
2581
+ "", #"%.2f GB" % (bytesToSyncTotal.to_f / 1024**3),
2582
+ #"%.2f min" % (recoveryETATotal.to_f / 60),
2583
+ ]
2584
+ objs.each do |obj|
2585
+ t << [
2586
+ " %s" % (vm_obj_uuids[obj['uuid']] || obj['uuid']),
2587
+ '',
2588
+ "%.2f GB" % (obj['bytesToSync'].to_f / 1024**3),
2589
+ #"%.2f min" % (obj['recoveryETA'].to_f / 60),
2590
+ ]
2591
+ end
2592
+ bytesToSyncGrandTotal += bytesToSyncTotal
2593
+ objGrandTotal += objs.length
2594
+ end
2595
+ t.add_separator
2596
+ t << [
2597
+ 'Total',
2598
+ objGrandTotal,
2599
+ "%.2f GB" % (bytesToSyncGrandTotal.to_f / 1024**3),
2600
+ #"%.2f min" % (recoveryETATotal.to_f / 60),
2601
+ ]
2602
+ puts t
2603
+ iter += 1
2604
+
2605
+ if opts[:refresh_rate]
2606
+ sleep opts[:refresh_rate]
2607
+ end
2608
+ end
2609
+ end
2610
+ end
2611
+
2612
+ opts :vm_perf_stats do
2613
+ summary "VM perf stats"
2614
+ arg :vms, nil, :lookup => [VIM::VirtualMachine], :multi => true
2615
+ opt :interval, "Time interval to compute average over", :type => :int, :default => 20
2616
+ opt :show_objects, "Show objects that are part of VM", :type => :boolean
2617
+ end
2618
+
2619
+ def vm_perf_stats vms, opts
2620
+ conn = vms.first._connection
2621
+ pc = conn.propertyCollector
2622
+ cluster = vms.first.runtime.host.parent
2623
+ hosts = cluster.host
2624
+
2625
+ _run_with_rev(conn, "dev") do
2626
+ hosts_props = pc.collectMultiple(hosts,
2627
+ 'name',
2628
+ 'runtime.connectionState',
2629
+ 'configManager.vsanSystem',
2630
+ 'configManager.vsanInternalSystem'
2631
+ )
2632
+ connected_hosts = hosts_props.select do |k,v|
2633
+ v['runtime.connectionState'] == 'connected'
2634
+ end.keys
2635
+ host = connected_hosts.first
2636
+ if !host
2637
+ err "Couldn't find any connected hosts"
2638
+ end
2639
+ vsanIntSys = hosts_props[host]['configManager.vsanInternalSystem']
2640
+
2641
+ vsanSysList = Hash[hosts_props.map do |host, props|
2642
+ [props['name'], props['configManager.vsanSystem']]
2643
+ end]
2644
+ clusterInfos = pc.collectMultiple(vsanSysList.values,
2645
+ 'config.clusterInfo')
2646
+ hostUuidMap = Hash[vsanSysList.map do |hostname, sys|
2647
+ [clusterInfos[sys]['config.clusterInfo'].nodeUuid, hostname]
2648
+ end]
2649
+ hostNameToMoMap = Hash[hosts_props.map do |host, props|
2650
+ [props['name'], host]
2651
+ end]
2652
+
2653
+ entries = nil
2654
+
2655
+ puts "#{Time.now}: Querying info about VMs ..."
2656
+ vmsProps = pc.collectMultiple(vms,
2657
+ 'name', 'runtime.connectionState',
2658
+ 'config.hardware.device', 'summary.config'
2659
+ )
2660
+
2661
+ obj_uuids = []
2662
+ vms.each do |vm|
2663
+ obj_uuids += _get_vm_obj_uuids(vm, vmsProps).keys
2664
+ end
2665
+
2666
+ puts "#{Time.now}: Querying VSAN objects used by the VMs ..."
2667
+
2668
+ objects = vsanIntSys.query_cmmds(obj_uuids.map do |uuid|
2669
+ {:type => 'CONFIG_STATUS', :uuid => uuid}
2670
+ end)
2671
+ if !objects
2672
+ err "Server failed to gather CONFIG_STATUS entries"
2673
+ end
2674
+
2675
+ objByHost = {}
2676
+ objects.each do |entry|
2677
+ host = hostUuidMap[entry['owner']]
2678
+ if !host
2679
+ next
2680
+ end
2681
+ host = hostNameToMoMap[host]
2682
+ if !host
2683
+ next
2684
+ end
2685
+ objByHost[host] ||= []
2686
+ objByHost[host] << entry['uuid']
2687
+ end
2688
+
2689
+ def fetchStats(objByHost, hosts_props)
2690
+ stats = {}
2691
+ objByHost.each do |host, obj_uuids|
2692
+ vsanIntSys = hosts_props[host]['configManager.vsanInternalSystem']
2693
+
2694
+ res = vsanIntSys.QueryVsanStatistics(:labels => obj_uuids.map do |uuid|
2695
+ "dom-object:#{uuid}"
2696
+ end)
2697
+ res = JSON.load(res)
2698
+
2699
+ obj_uuids.each do |uuid|
2700
+ stats[uuid] = res['dom.owners.selected.stats'][uuid]
2701
+ if stats[uuid]
2702
+ stats[uuid]['ts'] = res['dom.owners.selected.stats-taken']
2703
+ end
2704
+ end
2705
+ end
2706
+ stats
2707
+ end
2708
+
2709
+ puts "#{Time.now}: Fetching stats counters once ..."
2710
+ stats1 = fetchStats(objByHost, hosts_props)
2711
+ sleepTime = opts[:interval]
2712
+ puts "#{Time.now}: Sleeping for #{sleepTime} seconds ..."
2713
+ sleep(sleepTime)
2714
+ puts "#{Time.now}: Fetching stats counters again to compute averages ..."
2715
+ stats2 = fetchStats(objByHost, hosts_props)
2716
+
2717
+ puts "#{Time.now}: Got all data, computing table"
2718
+ stats = {}
2719
+ objects.each do |entry|
2720
+ uuid = entry['uuid']
2721
+ deltas = Hash[stats2[uuid].keys.map do |key|
2722
+ [key, stats2[uuid][key] - stats1[uuid][key]]
2723
+ end]
2724
+ deltaT = deltas['ts']
2725
+ stats[uuid] = deltas.merge({
2726
+ :readIops => deltas['readCount'] / deltaT,
2727
+ :writeIops => deltas['writeCount'] / deltaT,
2728
+ :readTput => deltas['readBytes'] / deltaT,
2729
+ :writeTput => deltas['writeBytes'] / deltaT,
2730
+ :readLatency => 0,
2731
+ :writeLatency => 0,
2732
+ })
2733
+ if deltas['readCount'] > 0
2734
+ stats[uuid][:readLatency] = deltas['readLatencySumUs'] / deltas['readCount']
2735
+ end
2736
+ if deltas['writeCount'] > 0
2737
+ stats[uuid][:writeLatency] = deltas['writeLatencySumUs'] / deltas['writeCount']
2738
+ end
2739
+ end
2740
+
2741
+ t = Terminal::Table.new()
2742
+ t << [
2743
+ 'VM/Object',
2744
+ 'IOPS',
2745
+ 'Tput (KB/s)',
2746
+ 'Latency (ms)'
2747
+ ]
2748
+ t.add_separator
2749
+ vms.each do |vm|
2750
+ vmProps = vmsProps[vm]
2751
+ vm_obj_uuids = _get_vm_obj_uuids(vm, vmsProps)
2752
+
2753
+ if !opts[:show_objects]
2754
+ vmStats = {}
2755
+ vmStats[:readLatency] ||= []
2756
+ vmStats[:writeLatency] ||= []
2757
+ [:readIops, :writeIops, :readTput, :writeTput].each do |key|
2758
+ vmStats[key] ||= 0.0
2759
+ end
2760
+
2761
+ vm_obj_uuids.each do |uuid, path|
2762
+ path = path.gsub(/^\[([^\]]*)\] /, "")
2763
+ objStats = stats[uuid]
2764
+ if !objStats
2765
+ next
2766
+ end
2767
+ [:readIops, :writeIops, :readTput, :writeTput].each do |key|
2768
+ vmStats[key] += (objStats[key] || 0.0)
2769
+ end
2770
+ vmStats[:readLatency] << (objStats[:readLatency] * objStats[:readIops])
2771
+ vmStats[:writeLatency] << (objStats[:writeLatency] * objStats[:writeIops])
2772
+ end
2773
+ if vmStats[:readLatency].length > 0 && vmStats[:readIops] > 0.0
2774
+ vmStats[:readLatency] = vmStats[:readLatency].sum / vmStats[:readIops]
2775
+ else
2776
+ vmStats[:readLatency] = 0.0
2777
+ end
2778
+ if vmStats[:writeLatency].length > 0 && vmStats[:writeIops] > 0.0
2779
+ vmStats[:writeLatency] = vmStats[:writeLatency].sum / vmStats[:writeIops]
2780
+ else
2781
+ vmStats[:writeLatency] = 0.0
2782
+ end
2783
+
2784
+ t << [
2785
+ vmProps['name'],
2786
+ [
2787
+ "%.1fr" % [vmStats[:readIops]],
2788
+ "%.1fw" % [vmStats[:writeIops]],
2789
+ ].join("/"),
2790
+ [
2791
+ "%.1fr" % [vmStats[:readTput] / 1024.0],
2792
+ "%.1fw" % [vmStats[:writeTput] / 1024.0],
2793
+ ].join("/"),
2794
+ [
2795
+ "%.1fr" % [vmStats[:readLatency] / 1000.0],
2796
+ "%.1fw" % [vmStats[:writeLatency] / 1000.0],
2797
+ ].join("/"),
2798
+ ]
2799
+ else
2800
+ t << [
2801
+ vmProps['name'],
2802
+ "",
2803
+ "",
2804
+ "",
2805
+ ]
2806
+ vm_obj_uuids.each do |uuid, path|
2807
+ path = path.gsub(/^\[([^\]]*)\] /, "")
2808
+ objStats = stats[uuid]
2809
+ if !objStats
2810
+ t << [
2811
+ " %s" % (path || uuid),
2812
+ "N/A","N/A","N/A",
2813
+ ]
2814
+ next
2815
+ end
2816
+ t << [
2817
+ " %s" % (path || uuid),
2818
+ [
2819
+ "%.1fr" % [objStats[:readIops]],
2820
+ "%.1fw" % [objStats[:writeIops]],
2821
+ ].join("/"),
2822
+ [
2823
+ "%.1fr" % [objStats[:readTput] / 1024.0],
2824
+ "%.1fw" % [objStats[:writeTput] / 1024.0],
2825
+ ].join("/"),
2826
+ [
2827
+ "%.1fr" % [objStats[:readLatency] / 1000.0],
2828
+ "%.1fw" % [objStats[:writeLatency] / 1000.0],
2829
+ ].join("/"),
2830
+ ]
2831
+ end
2832
+ end
2833
+ end
2834
+ # t.add_separator
2835
+ # t << [
2836
+ # 'Total',
2837
+ # objGrandTotal,
2838
+ # "%.2f GB" % (bytesToSyncGrandTotal.to_f / 1024**3),
2839
+ # #"%.2f min" % (recoveryETATotal.to_f / 60),
2840
+ # ]
2841
+ puts t
2842
+ end
2843
+ end
2844
+
2845
+
2846
+ opts :enter_maintenance_mode do
2847
+ summary "Put hosts into maintenance mode"
2848
+ arg :host, nil, :lookup => VIM::HostSystem, :multi => true
2849
+ opt :timeout, "Timeout", :default => 0
2850
+ opt :evacuate_powered_off_vms, "Evacuate powered off vms", :type => :boolean
2851
+ opt :no_wait, "Don't wait for Task to complete", :type => :boolean
2852
+ opt :vsan_mode, "Actions to take for VSAN backed storage", :type => :string, :default => "ensureObjectAccessibility"
2853
+ end
2854
+
2855
+ def enter_maintenance_mode hosts, opts
2856
+ vsanChoices = ['ensureObjectAccessibility', 'evacuateAllData', 'noAction']
2857
+ if !vsanChoices.member?(opts[:vsan_mode])
2858
+ err "VSAN mode can only be one of these: #{vsanChoices}"
2859
+ end
2860
+ tasks = []
2861
+ conn = hosts[0]._connection
2862
+ _run_with_rev(conn, "dev") do
2863
+ tasks = hosts.map do |host|
2864
+ host.EnterMaintenanceMode_Task(
2865
+ :timeout => opts[:timeout],
2866
+ :evacuatePoweredOffVms => opts[:evacuate_powered_off_vms],
2867
+ :maintenanceSpec => {
2868
+ :vsanMode => {
2869
+ :objectAction => opts[:vsan_mode],
2870
+ }
2871
+ }
2872
+ )
2873
+ end
2874
+ end
2875
+
2876
+ if opts[:no_wait]
2877
+ # Do nothing
2878
+ else
2879
+ results = progress(tasks)
2880
+
2881
+ results.each do |task, error|
2882
+ if error.is_a?(VIM::LocalizedMethodFault)
2883
+ state, entityName, name = task.collect('info.state',
2884
+ 'info.entityName',
2885
+ 'info.name')
2886
+ puts "#{name} #{entityName}: #{error.fault.class.wsdl_name}: #{error.localizedMessage}"
2887
+ error.fault.faultMessage.each do |msg|
2888
+ puts " #{msg.key}: #{msg.message}"
2889
+ end
2890
+
2891
+ end
2892
+ end
2893
+ end
2894
+ end
2895
+
2896
+ RbVmomi::VIM::HostVsanInternalSystem
2897
+ class RbVmomi::VIM::HostVsanInternalSystem
2898
+ def _parseJson json
2899
+ if json == "BAD"
2900
+ return nil
2901
+ end
2902
+ begin
2903
+ json = JSON.load(json)
2904
+ rescue
2905
+ nil
2906
+ end
2907
+ end
2908
+
2909
+ def query_cmmds queries, opts = {}
2910
+ useGzip = (opts[:gzip]) && $vsanUseGzipApis
2911
+ if useGzip
2912
+ queries = queries + [{:type => "GZIP"}]
2913
+ end
2914
+ json = self.QueryCmmds(:queries => queries)
2915
+ if useGzip
2916
+ gzip = Base64.decode64(json)
2917
+ gz = Zlib::GzipReader.new(StringIO.new(gzip))
2918
+ json = gz.read
2919
+ end
2920
+ objects = _parseJson json
2921
+ if !objects
2922
+ raise "Server failed to gather CMMDS entries: JSON = '#{json}'"
2923
+ # raise "Server failed to gather CMMDS entries: JSON = #{json.length}"
2924
+ end
2925
+ objects = objects['result']
2926
+ objects
2927
+ end
2928
+
2929
+ def query_vsan_objects(opts)
2930
+ json = self.QueryVsanObjects(opts)
2931
+ objects = _parseJson json
2932
+ if !objects
2933
+ raise "Server failed to gather VSAN object info for #{obj_uuids}: JSON = '#{json}'"
2934
+ end
2935
+ objects
2936
+ end
2937
+
2938
+ def query_syncing_vsan_objects(opts = {})
2939
+ json = self.QuerySyncingVsanObjects(opts)
2940
+ objects = _parseJson json
2941
+ if !objects
2942
+ raise "Server failed to query syncing objects: JSON = '#{json}'"
2943
+ end
2944
+ objects
2945
+ end
2946
+
2947
+ def query_vsan_statistics(opts = {})
2948
+ json = self.QueryVsanStatistics(opts)
2949
+ objects = _parseJson json
2950
+ if !objects
2951
+ raise "Server failed to query vsan stats: JSON = '#{json}'"
2952
+ end
2953
+ objects
2954
+ end
2955
+
2956
+ def query_physical_vsan_disks(opts)
2957
+ json = self.QueryPhysicalVsanDisks(opts)
2958
+ objects = _parseJson json
2959
+ if !objects
2960
+ raise "Server failed to query vsan disks: JSON = '#{json}'"
2961
+ end
2962
+ objects
2963
+ end
2964
+
2965
+ def query_objects_on_physical_vsan_disk(opts)
2966
+ json = self.QueryObjectsOnPhysicalVsanDisk(opts)
2967
+ objects = _parseJson json
2968
+ if !objects
2969
+ raise "Server failed to query objects on vsan disks: JSON = '#{json}'"
2970
+ end
2971
+ objects
2972
+ end
2973
+
2974
+
2975
+ end
2976
+
2977
+ def _parseJson json
2978
+ if json == "BAD"
2979
+ return nil
2980
+ end
2981
+ begin
2982
+ json = JSON.load(json)
2983
+ rescue
2984
+ nil
2985
+ end
2986
+ end
2987
+
2988
+ def _assessAvailabilityByStatus state
2989
+ mask = {
2990
+ 'DATA_AVAILABLE' => (1 << 0),
2991
+ 'QUORUM' => (1 << 1),
2992
+ 'PERF_COMPLIANT' => (1 << 2),
2993
+ 'INCOMPLETE' => (1 << 3),
2994
+ }
2995
+ Hash[mask.map{|k,v| [k, (state & v) != 0]}]
2996
+ end
2997
+
2998
+ opts :lldpnetmap do
2999
+ summary "Gather LLDP mapping information from a set of hosts"
3000
+ arg :hosts_and_clusters, nil, :lookup => [VIM::HostSystem, VIM::ClusterComputeResource], :multi => true
3001
+ end
3002
+
3003
+ def lldpnetmap hosts_and_clusters, opts = {}
3004
+ conn = hosts_and_clusters.first._connection
3005
+ hosts = hosts_and_clusters.select{|x| x.is_a?(VIM::HostSystem)}
3006
+ clusters = hosts_and_clusters.select{|x| x.is_a?(VIM::ClusterComputeResource)}
3007
+ pc = conn.propertyCollector
3008
+ cluster_hosts = pc.collectMultiple(clusters, 'host')
3009
+ cluster_hosts.each do |cluster, props|
3010
+ hosts += props['host']
3011
+ end
3012
+ hosts = hosts.uniq
3013
+ _run_with_rev(conn, "dev") do
3014
+ hosts_props = pc.collectMultiple(hosts,
3015
+ 'name',
3016
+ 'runtime.connectionState',
3017
+ 'configManager.vsanSystem',
3018
+ 'configManager.vsanInternalSystem'
3019
+ )
3020
+
3021
+ hosts = hosts_props.select do |k,v|
3022
+ v['runtime.connectionState'] == 'connected'
3023
+ end.keys
3024
+ if hosts.length == 0
3025
+ err "Couldn't find any connected hosts"
3026
+ end
3027
+
3028
+ hosts_vsansys = Hash[hosts_props.map{|k,v| [v['configManager.vsanSystem'], k]}]
3029
+ node_uuids = pc.collectMultiple(hosts_vsansys.keys, 'config.clusterInfo.nodeUuid')
3030
+ node_uuids = Hash[node_uuids.map do |k, v|
3031
+ [v['config.clusterInfo.nodeUuid'], hosts_vsansys[k]]
3032
+ end]
3033
+
3034
+ puts "#{Time.now}: This operation will take 30-60 seconds ..."
3035
+ hosts_props.map do |host, props|
3036
+ Thread.new do
3037
+ begin
3038
+ vsanIntSys = props['configManager.vsanInternalSystem']
3039
+ c1 = conn.spawn_additional_connection
3040
+ vsanIntSys = vsanIntSys.dup_on_conn(c1)
3041
+ res = vsanIntSys.QueryVsanStatistics(:labels => ['lldpnetmap'])
3042
+ hosts_props[host]['lldpnetmap'] = JSON.parse(res)['lldpnetmap']
3043
+ rescue Exception => ex
3044
+ puts "Failed to gather lldpnetmap from #{props['name']}: #{ex.class}: #{ex.message}"
3045
+ end
3046
+ end
3047
+ end.each{|t| t.join}
3048
+
3049
+ t = Terminal::Table.new()
3050
+ t << ['Host', 'LLDP info']
3051
+ t.add_separator
3052
+ hosts_props.each do |host, props|
3053
+ t << [
3054
+ props['name'],
3055
+ props['lldpnetmap'].map do |switch, pnics|
3056
+ "#{switch}: #{pnics.join(',')}"
3057
+ end.join("\n")
3058
+ ]
3059
+ end
3060
+ puts t
3061
+ end
3062
+ end
3063
+
3064
+ opts :check_limits do
3065
+ summary "Gathers (and checks) counters against limits"
3066
+ arg :hosts_and_clusters, nil, :lookup => [VIM::HostSystem, VIM::ClusterComputeResource], :multi => true
3067
+ end
3068
+
3069
+ def check_limits hosts_and_clusters, opts = {}
3070
+ conn = hosts_and_clusters.first._connection
3071
+ hosts = hosts_and_clusters.select{|x| x.is_a?(VIM::HostSystem)}
3072
+ clusters = hosts_and_clusters.select{|x| x.is_a?(VIM::ClusterComputeResource)}
3073
+ pc = conn.propertyCollector
3074
+ cluster_hosts = pc.collectMultiple(clusters, 'host')
3075
+ cluster_hosts.each do |cluster, props|
3076
+ hosts += props['host']
3077
+ end
3078
+ hosts = hosts.uniq
3079
+ _run_with_rev(conn, "dev") do
3080
+ hosts_props = pc.collectMultiple(hosts,
3081
+ 'name',
3082
+ 'runtime.connectionState',
3083
+ 'configManager.vsanSystem',
3084
+ 'configManager.vsanInternalSystem'
3085
+ )
3086
+
3087
+ hosts = hosts_props.select do |k,v|
3088
+ v['runtime.connectionState'] == 'connected'
3089
+ end.keys
3090
+ if hosts.length == 0
3091
+ err "Couldn't find any connected hosts"
3092
+ end
3093
+
3094
+ lock = Mutex.new
3095
+ all_disks = {}
3096
+ puts "#{Time.now}: Gathering stats from all hosts ..."
3097
+ hosts_props.map do |host, props|
3098
+ if props['runtime.connectionState'] != 'connected'
3099
+ next
3100
+ end
3101
+ hosts_props[host]['profiling'] = {}
3102
+ Thread.new do
3103
+ vsanIntSys = props['configManager.vsanInternalSystem']
3104
+ c1 = conn.spawn_additional_connection
3105
+ vsanIntSys2 = vsanIntSys.dup_on_conn(c1)
3106
+ begin
3107
+ timeout(45) do
3108
+ t1 = Time.now
3109
+ res = vsanIntSys2.query_vsan_statistics(
3110
+ :labels => ['rdtglobal', 'lsom-node']
3111
+ )
3112
+ t2 = Time.now
3113
+ hosts_props[host]['profiling']['rdtglobal'] = t2 - t1
3114
+ hosts_props[host]['rdtglobal'] = res['rdt.globalinfo']
3115
+ hosts_props[host]['lsom.node'] = res['lsom.node']
3116
+ end
3117
+ rescue Exception => ex
3118
+ puts "Failed to gather RDT info from #{props['name']}: #{ex.class}: #{ex.message}"
3119
+ end
3120
+
3121
+ begin
3122
+ timeout(60) do
3123
+ t1 = Time.now
3124
+ res = vsanIntSys2.QueryVsanStatistics(
3125
+ :labels => ['dom', 'dom-objects-counts']
3126
+ )
3127
+ res = JSON.parse(res)
3128
+ if res && !res['dom.owners.count']
3129
+ # XXX: Remove me later
3130
+ # This code is a fall back path in case we are dealing
3131
+ # with an old ESX host (before Nov13 2013). As we only
3132
+ # need to be compatible with VSAN GA, we can remove this
3133
+ # code once everyone is upgraded.
3134
+ res = vsanIntSys2.QueryVsanStatistics(
3135
+ :labels => ['dom', 'dom-objects']
3136
+ )
3137
+ res = JSON.parse(res)
3138
+ numOwners = res['dom.owners.stats'].keys.length
3139
+ else
3140
+ numOwners = res['dom.owners.count'].keys.length
3141
+ end
3142
+ t2 = Time.now
3143
+ hosts_props[host]['profiling']['domstats'] = t2 - t1
3144
+ hosts_props[host]['dom'] = {
3145
+ 'numClients'=> res['dom.clients'].keys.length,
3146
+ 'numOwners'=> numOwners,
3147
+ }
3148
+ end
3149
+ rescue Exception => ex
3150
+ puts "Failed to gather DOM info from #{props['name']}: #{ex.class}: #{ex.message}"
3151
+ end
3152
+
3153
+ begin
3154
+ timeout(45) do
3155
+ t1 = Time.now
3156
+ disks = vsanIntSys2.QueryPhysicalVsanDisks(:props => [
3157
+ 'lsom_objects_count',
3158
+ 'uuid',
3159
+ 'isSsd',
3160
+ 'capacity',
3161
+ 'capacityUsed',
3162
+ ])
3163
+ t2 = Time.now
3164
+ hosts_props[host]['profiling']['physdisk'] = t2 - t1
3165
+ disks = JSON.load(disks)
3166
+
3167
+ # Getting the data from all hosts is kind of overkill, but
3168
+ # this way we deal with partitions and get info on all disks
3169
+ # everywhere. But we have duplicates, so need to merge.
3170
+ lock.synchronize do
3171
+ all_disks.merge!(disks)
3172
+ end
3173
+ end
3174
+ rescue Exception => ex
3175
+ puts "Failed to gather disks info from #{props['name']}: #{ex.class}: #{ex.message}"
3176
+ end
3177
+ end
3178
+ end.compact.each{|t| t.join}
3179
+
3180
+ # hosts_props.each do |host, props|
3181
+ # puts "#{Time.now}: Host #{props['name']}: #{props['profiling']}"
3182
+ # end
3183
+
3184
+ puts "#{Time.now}: Gathering disks info ..."
3185
+ disks = all_disks
3186
+ vsan_disks_info = {}
3187
+ vsan_disks_info.merge!(
3188
+ _vsan_host_disks_info(Hash[hosts.map{|h| [h, hosts_props[h]['name']]}])
3189
+ )
3190
+ disks.each do |k, v|
3191
+ v['esxcli'] = vsan_disks_info[v['uuid']]
3192
+ if v['esxcli']
3193
+ v['host'] = v['esxcli']._get_property :host
3194
+
3195
+ hosts_props[v['host']]['components'] ||= 0
3196
+ hosts_props[v['host']]['components'] += v['lsom_objects_count']
3197
+ hosts_props[v['host']]['disks'] ||= []
3198
+ hosts_props[v['host']]['disks'] << v
3199
+ end
3200
+ end
3201
+
3202
+ t = Terminal::Table.new()
3203
+ t << ['Host', 'RDT', 'Disks']
3204
+ t.add_separator
3205
+ hosts_props.each do |host, props|
3206
+ rdt = props['rdtglobal'] || {}
3207
+ lsomnode = props['lsom.node'] || {}
3208
+ dom = props['dom'] || {}
3209
+ t << [
3210
+ props['name'],
3211
+ [
3212
+ "Assocs: #{rdt['assocCount']}/#{rdt['maxAssocCount']}",
3213
+ "Sockets: #{rdt['socketCount']}/#{rdt['maxSocketCount']}",
3214
+ "Clients: #{dom['numClients'] || 'N/A'}",
3215
+ "Owners: #{dom['numOwners'] || 'N/A'}",
3216
+ ].join("\n"),
3217
+ ([
3218
+ "Components: #{props['components']}/%s" % [
3219
+ lsomnode['numMaxComponents'] || 'N/A'
3220
+ ],
3221
+ ] + (props['disks'] || []).map do |disk|
3222
+ if disk['capacity'] > 0
3223
+ usage = disk['capacityUsed'] * 100 / disk['capacity']
3224
+ usage = "#{usage}%"
3225
+ else
3226
+ usage = "N/A"
3227
+ end
3228
+ "#{disk['esxcli'].DisplayName}: #{usage}"
3229
+ end).join("\n"),
3230
+ ]
3231
+ end
3232
+ puts t
3233
+ end
3234
+ end
3235
+
3236
+ opts :object_reconfigure do
3237
+ summary "Reconfigure a VSAN object"
3238
+ arg :cluster, "Cluster on which to execute the reconfig", :lookup => [VIM::HostSystem, VIM::ClusterComputeResource]
3239
+ arg :obj_uuid, "Object UUID", :type => :string, :multi => true
3240
+ opt :policy, "New policy", :type => :string, :required => true
3241
+ end
3242
+
3243
+ def object_reconfigure cluster_or_host, obj_uuids, opts
3244
+ conn = cluster_or_host._connection
3245
+ pc = conn.propertyCollector
3246
+ if cluster_or_host.is_a?(VIM::ClusterComputeResource)
3247
+ cluster = cluster_or_host
3248
+ hosts = cluster.host
3249
+ else
3250
+ hosts = [host]
3251
+ end
3252
+
3253
+ _run_with_rev(conn, "dev") do
3254
+ hosts_props = pc.collectMultiple(hosts,
3255
+ 'name',
3256
+ 'runtime.connectionState',
3257
+ 'configManager.vsanSystem',
3258
+ 'configManager.vsanInternalSystem'
3259
+ )
3260
+ connected_hosts = hosts_props.select do |k,v|
3261
+ v['runtime.connectionState'] == 'connected'
3262
+ end.keys
3263
+ host = connected_hosts.first
3264
+ if !host
3265
+ err "Couldn't find any connected hosts"
3266
+ end
3267
+ vsanIntSys = hosts_props[host]['configManager.vsanInternalSystem']
3268
+
3269
+ obj_uuids.each do |uuid|
3270
+ puts "Reconfiguring '#{uuid}' to #{opts[:policy]}"
3271
+ puts vsanIntSys.ReconfigureDomObject(
3272
+ :uuid => uuid,
3273
+ :policy => opts[:policy]
3274
+ )
3275
+ end
3276
+ end
3277
+ puts "All reconfigs initiated. Synching operation may be happening in the background"
3278
+ end
3279
+
3280
+
3281
+ opts :obj_status_report do
3282
+ summary "Print component status for objects in the cluster."
3283
+ arg :cluster_or_host, nil, :lookup => [VIM::ClusterComputeResource, VIM::HostSystem]
3284
+ opt :print_table, "Print a table of object and their status, default all objects",
3285
+ :short => 't', :type => :boolean, :default => false
3286
+ opt :filter_table, "Filter the obj table based on status displayed in histogram, e.g. 2/3",
3287
+ :short => 'f', :type => :string, :default => nil
3288
+ opt :print_uuids, "In the table, print object UUIDs instead of vmdk and vm paths",
3289
+ :short => 'u', :type => :boolean, :default => false
3290
+ opt :ignore_node_uuid, "Estimate the status of objects if all comps on a given host were healthy.",
3291
+ :short => 'i', :type => :string, :default => nil
3292
+ end
3293
+
3294
+ def obj_status_report cluster_or_host, opts
3295
+ conn = cluster_or_host._connection
3296
+ pc = conn.propertyCollector
3297
+ if cluster_or_host.is_a?(VIM::ClusterComputeResource)
3298
+ cluster = cluster_or_host
3299
+ hosts = cluster.host
3300
+ else
3301
+ hosts = [host]
3302
+ end
3303
+
3304
+ _run_with_rev(conn, "dev") do
3305
+ hosts_props = pc.collectMultiple(hosts,
3306
+ 'name',
3307
+ 'runtime.connectionState',
3308
+ 'configManager.vsanSystem',
3309
+ 'configManager.vsanInternalSystem'
3310
+ )
3311
+ connected_hosts = hosts_props.select do |k,v|
3312
+ v['runtime.connectionState'] == 'connected'
3313
+ end.keys
3314
+ host = connected_hosts.first
3315
+ if !host
3316
+ err "Couldn't find any connected hosts"
3317
+ end
3318
+ vsanIntSys = hosts_props[host]['configManager.vsanInternalSystem']
3319
+
3320
+ vsanSysList = Hash[hosts_props.map do |host, props|
3321
+ [props['name'], props['configManager.vsanSystem']]
3322
+ end]
3323
+ clusterInfos = pc.collectMultiple(vsanSysList.values,
3324
+ 'config.clusterInfo')
3325
+ hostUuidMap = Hash[vsanSysList.map do |hostname, sys|
3326
+ [clusterInfos[sys]['config.clusterInfo'].nodeUuid, hostname]
3327
+ end]
3328
+
3329
+ entries = nil
3330
+
3331
+ puts "#{Time.now}: Querying all VMs on VSAN ..."
3332
+ ds_list = host.datastore
3333
+ ds_props = pc.collectMultiple(ds_list, 'name', 'summary.type')
3334
+ ds = ds_props.select{|k, x| x['summary.type'] == "vsan"}.keys.first
3335
+ ds_name = ds_props[ds]['name']
3336
+
3337
+ vms = ds.vm
3338
+ vmsProps = pc.collectMultiple(vms,
3339
+ 'name', 'runtime.connectionState',
3340
+ 'config.hardware.device', 'summary.config'
3341
+ )
3342
+
3343
+ hostname = hosts_props[host]['name']
3344
+ puts "#{Time.now}: Querying all objects in the system from #{hostname} ..."
3345
+
3346
+ objects = vsanIntSys.query_cmmds([
3347
+ {:type => 'DOM_OBJECT'}
3348
+ ], :gzip => true)
3349
+ if !objects
3350
+ err "Server failed to gather DOM_OBJECT entries"
3351
+ end
3352
+
3353
+ puts "#{Time.now}: Querying all disks in the system ..."
3354
+ # Need a list of live disk uuids to see if components are orphaned.
3355
+ liveDisks = vsanIntSys.query_cmmds([{:type => 'DISK'}])
3356
+ liveDisks = liveDisks.select do |disk|
3357
+ disk['health'] == "Healthy"
3358
+ end.map do |disk|
3359
+ disk['uuid']
3360
+ end
3361
+
3362
+ puts "#{Time.now}: Querying all components in the system ..."
3363
+ # Need a list of live comp uuids to see if components are orphaned.
3364
+ liveComps = vsanIntSys.query_cmmds(
3365
+ [{:type => 'LSOM_OBJECT'}],
3366
+ :gzip => true
3367
+ )
3368
+ liveComps = liveComps.select do |comp|
3369
+ comp['health'] == "Healthy"
3370
+ end
3371
+ liveComps = liveComps.map do |comp|
3372
+ comp['uuid']
3373
+ end
3374
+
3375
+ #pp liveDisks
3376
+ #puts "%d comps total" % liveComps.length
3377
+
3378
+ puts "#{Time.now}: Got all the info, computing table ..."
3379
+
3380
+ results = {}
3381
+ orphanRes = {}
3382
+ totalObjects = objects.length
3383
+ totalOrphans = 0
3384
+
3385
+ objects = objects.select do |obj|
3386
+ comps = _components_in_dom_config(obj['content'])
3387
+ numHealthy = 0
3388
+ numDeletedComps = 0
3389
+
3390
+ comps.each do |comp|
3391
+ state = comp['attributes']['componentState']
3392
+ bytesToSync = comp['attributes']['bytesToSync'] || 0
3393
+ resync = [10, 6].member?(state) && bytesToSync != 0
3394
+
3395
+ # Should we count resyncing as healthy? For now, lets do that.
3396
+ if resync || state == 5 ||
3397
+ (opts[:ignore_node_uuid] &&
3398
+ comp['attributes']['ownerId'] == opts[:ignore_node_uuid])
3399
+ numHealthy += 1
3400
+ elsif liveDisks.member?(comp['diskUuid']) &&
3401
+ !liveComps.member?(comp['componentUuid'])
3402
+ # A component is considered deleted if it's disk is present
3403
+ # and the component is not present in CMMDS.
3404
+ numDeletedComps += 1
3405
+ end
3406
+ end
3407
+ obj['numHealthy'] = numHealthy
3408
+ obj['numComps'] = comps.length
3409
+ status = [numHealthy, comps.length]
3410
+
3411
+ # An object can be orphaned if it is deleted while a minority of
3412
+ # components are absent. To consider this an orphan, the total
3413
+ # number of provably deleted components must be a quorum.
3414
+ # If we have some deleted comps, but not a quorum, then mark it
3415
+ # as an orphanCandidate instead of a full orphan. Orphan candidates
3416
+ # still go into the normal results table.
3417
+ isOrphan = numDeletedComps > 0 && numDeletedComps > comps.length / 2
3418
+ if isOrphan
3419
+ obj['isOrphan'] = true
3420
+ elsif numDeletedComps > 0
3421
+ obj['isOrphanCandidate'] = true
3422
+ end
3423
+
3424
+ if isOrphan
3425
+ # All absent components are orphaned. Consider the object orphaned.
3426
+ totalOrphans += 1
3427
+ orphanRes[status] ||= 0
3428
+ orphanRes[status] += 1
3429
+ else
3430
+ results[status] ||= 0
3431
+ results[status] += 1
3432
+ end
3433
+
3434
+ if opts[:filter_table]
3435
+ ("%d/%d" % [numHealthy, comps.length]) == opts[:filter_table]
3436
+ else
3437
+ true
3438
+ end
3439
+ end
3440
+ obj_uuids = objects.map{|x| x['uuid']}
3441
+ objectUuidMap = Hash[objects.map{|x| [x['uuid'], x]}]
3442
+
3443
+ all_obj_uuids = []
3444
+ vmToObjMap = {}
3445
+ vms.each do |vm|
3446
+ vm_obj_uuids = _get_vm_obj_uuids(vm, vmsProps)
3447
+ vm_obj_uuids = vm_obj_uuids.select{|x, v| obj_uuids.member?(x)}
3448
+ vm_obj_uuids = vm_obj_uuids.reject{|x, v| all_obj_uuids.member?(x)}
3449
+ all_obj_uuids += vm_obj_uuids.keys
3450
+ if vm_obj_uuids.length > 0
3451
+ vmToObjMap[vm] = vm_obj_uuids
3452
+ end
3453
+ end
3454
+
3455
+ def printObjStatusHist results
3456
+ t = Terminal::Table.new()
3457
+ t << [
3458
+ 'Num Healthy Comps / Total Num Comps',
3459
+ 'Num objects with such status',
3460
+ ]
3461
+ t.add_separator
3462
+
3463
+ results.each do |key,val|
3464
+ t << [
3465
+ "%d/%d" % [key[0], key[1]],
3466
+ " %d" % val,
3467
+ ]
3468
+ end
3469
+ puts t
3470
+ end
3471
+
3472
+ puts ""
3473
+ puts "Histogram of component health for non-orphaned objects"
3474
+ puts ""
3475
+ printObjStatusHist(results)
3476
+ puts "Total non-orphans: %d" % (totalObjects - totalOrphans)
3477
+ puts ""
3478
+ puts ""
3479
+ puts "Histogram of component health for possibly orphaned objects"
3480
+ puts ""
3481
+ printObjStatusHist(orphanRes)
3482
+ puts "Total orphans: %d" % totalOrphans
3483
+ puts ""
3484
+
3485
+
3486
+ if opts[:print_table] || opts[:filter_table]
3487
+ t = Terminal::Table.new()
3488
+ t << [
3489
+ 'VM/Object',
3490
+ 'objects',
3491
+ 'num healthy / total comps',
3492
+ ]
3493
+ t.add_separator
3494
+ bytesToSyncGrandTotal = 0
3495
+ objGrandTotal = 0
3496
+ vmToObjMap.each do |vm, vm_obj_uuids|
3497
+ vmProps = vmsProps[vm]
3498
+ objs = vm_obj_uuids.keys.map{|x| objectUuidMap[x]}
3499
+ t << [
3500
+ vmProps['name'],
3501
+ objs.length,
3502
+ "",
3503
+ ]
3504
+ objs.each do |obj|
3505
+ if opts[:print_uuids]
3506
+ objName = obj['uuid']
3507
+ else
3508
+ objName = (vm_obj_uuids[obj['uuid']] || obj['uuid'])
3509
+ end
3510
+
3511
+ if obj['isOrphan']
3512
+ orphanStr = "*"
3513
+ elsif obj['isOrphanCandidate']
3514
+ orphanStr = "-"
3515
+ else
3516
+ orphanStr = ""
3517
+ end
3518
+
3519
+ t << [
3520
+ " %s" % objName,
3521
+ '',
3522
+ "%d/%d%s" % [obj['numHealthy'], obj['numComps'], orphanStr],
3523
+ ]
3524
+ objects.delete(obj)
3525
+ end
3526
+ end
3527
+
3528
+ # Okay, now print the remaining UUIDs which didn't map to any VM.
3529
+ if objects.length > 0
3530
+ if vmToObjMap.length > 0
3531
+ t.add_separator
3532
+ end
3533
+ t << [
3534
+ "Unassociated objects",
3535
+ '',
3536
+ '',
3537
+ ]
3538
+ end
3539
+ objects.each do |obj|
3540
+ if obj['isOrphan']
3541
+ orphanStr = "*"
3542
+ elsif obj['isOrphanCandidate']
3543
+ orphanStr = "-"
3544
+ else
3545
+ orphanStr = ""
3546
+ end
3547
+
3548
+ t << [
3549
+ " %s" % obj['uuid'],
3550
+ '',
3551
+ "%d/%d%s" % [obj['numHealthy'], obj['numComps'], orphanStr],
3552
+ ]
3553
+ end
3554
+ puts t
3555
+ puts ""
3556
+ puts "+------------------------------------------------------------------+"
3557
+ puts "| Legend: * = all unhealthy comps were deleted (disks present) |"
3558
+ puts "| - = some unhealthy comps deleted, some not or can't tell |"
3559
+ puts "| no symbol = We cannot conclude any comps were deleted |"
3560
+ puts "+------------------------------------------------------------------+"
3561
+ puts ""
3562
+ end
3563
+ end
3564
+ end
3565
+
3566
+
3567
+ opts :apply_license_to_cluster do
3568
+ summary "Apply license to VSAN "
3569
+ arg :cluster, nil, :lookup => VIM::ClusterComputeResource
3570
+ opt :license_key, "License key to be applied to the cluster", :short => 'k', :type => :string, :required => true
3571
+ opt :null_reconfigure, "", :short => 'r', :type => :boolean, :default => true
3572
+ end
3573
+
3574
+ def apply_license_to_cluster cluster, opts
3575
+ conn = cluster._connection
3576
+ puts "#{cluster.name}: Applying VSAN License on the cluster..."
3577
+ licenseManager = conn.serviceContent.licenseManager
3578
+ licenseAssignmentManager = licenseManager.licenseAssignmentManager
3579
+ assignment = licenseAssignmentManager.UpdateAssignedLicense(
3580
+ :entity => cluster._ref,
3581
+ :licenseKey => opts[:license_key]
3582
+ )
3583
+ if opts[:null_reconfigure]
3584
+ # Due to races in the cluster assignment mechanism in vSphere 5.5 GA a
3585
+ # disks may or may not be auto-claimed as would normally be expected. Doing
3586
+ # a Null-Reconfigure causes the license state to be synchronized correctly and
3587
+ # allows auto-claim to work as expected.
3588
+ puts "#{cluster.name}: Null-Reconfigure to force auto-claim..."
3589
+ spec = VIM::ClusterConfigSpecEx()
3590
+ task = cluster.ReconfigureComputeResource_Task(:spec => spec, :modify => true)
3591
+ progress([task])
3592
+ childtasks = task.child_tasks
3593
+ if childtasks && childtasks.length > 0
3594
+ progress(childtasks)
3595
+ end
3596
+ end
3597
+ end
3598
+
3599
+
3600
+ opts :check_state do
3601
+ summary "Checks state of VMs and VSAN objects"
3602
+ arg :cluster_or_host, nil, :lookup => [VIM::ClusterComputeResource, VIM::HostSystem]
3603
+ opt :refresh_state, "Not just check state, but also refresh", :type => :boolean
3604
+ opt :reregister_vms,
3605
+ "Not just check for vms with VC/hostd/vmx out of sync but also " \
3606
+ "fix them by un-registering and re-registering them",
3607
+ :type => :boolean
3608
+ end
3609
+
3610
+ def check_state cluster_or_host, opts
3611
+ conn = cluster_or_host._connection
3612
+ pc = conn.propertyCollector
3613
+ if cluster_or_host.is_a?(VIM::ClusterComputeResource)
3614
+ cluster = cluster_or_host
3615
+ hosts = cluster.host
3616
+ else
3617
+ hosts = [host]
3618
+ end
3619
+
3620
+ _run_with_rev(conn, "dev") do
3621
+ hosts_props = pc.collectMultiple(hosts,
3622
+ 'name',
3623
+ 'runtime.connectionState',
3624
+ 'configManager.vsanSystem',
3625
+ 'configManager.vsanInternalSystem'
3626
+ )
3627
+ connected_hosts = hosts_props.select do |k,v|
3628
+ v['runtime.connectionState'] == 'connected'
3629
+ end.keys
3630
+ host = connected_hosts.first
3631
+ if !host
3632
+ err "Couldn't find any connected hosts"
3633
+ end
3634
+ vsanIntSys = hosts_props[host]['configManager.vsanInternalSystem']
3635
+
3636
+ vsanSysList = Hash[hosts_props.map do |host, props|
3637
+ [props['name'], props['configManager.vsanSystem']]
3638
+ end]
3639
+ clusterInfos = pc.collectMultiple(vsanSysList.values,
3640
+ 'config.clusterInfo')
3641
+ hostUuidMap = Hash[vsanSysList.map do |hostname, sys|
3642
+ [clusterInfos[sys]['config.clusterInfo'].nodeUuid, hostname]
3643
+ end]
3644
+
3645
+ entries = nil
3646
+
3647
+ ds_list = host.datastore
3648
+ ds_props = pc.collectMultiple(ds_list, 'name', 'summary.type')
3649
+ ds = ds_props.select{|k, x| x['summary.type'] == "vsan"}.keys.first
3650
+ ds_name = ds_props[ds]['name']
3651
+
3652
+ vms = ds.vm
3653
+ vms_props = pc.collectMultiple(vms, 'name', 'runtime.connectionState')
3654
+
3655
+ puts "#{Time.now}: Step 1: Check for inaccessible VSAN objects"
3656
+
3657
+ statusses = vsanIntSys.query_cmmds([{:type => 'CONFIG_STATUS'}])
3658
+ bad = statusses.select do |x|
3659
+ state = _assessAvailabilityByStatus(x['content']['state'])
3660
+ !state['DATA_AVAILABLE'] || !state['QUORUM']
3661
+ end
3662
+
3663
+ if !opts[:refresh_state]
3664
+ puts "Detected #{bad.length} objects to not be inaccessible"
3665
+ bad.each do |x|
3666
+ uuid = x['uuid']
3667
+ hostname = hostUuidMap[x['owner']]
3668
+ puts "Detected #{uuid} on #{hostname} to be inaccessible"
3669
+ end
3670
+ else
3671
+ bad.group_by{|x| hostUuidMap[x['owner']]}.each do |hostname, badOnHost|
3672
+ owner = hosts_props.select{|k,v| v['name'] == hostname}.keys.first
3673
+ owner_props = hosts_props[owner]
3674
+ owner_vsanIntSys = owner_props['configManager.vsanInternalSystem']
3675
+ badOnHost.each do |x|
3676
+ uuid = x['uuid']
3677
+ puts "Detected #{uuid} to not be inaccessible, refreshing state"
3678
+ end
3679
+ if badOnHost.length > 0
3680
+ badUuids = badOnHost.map{|x| x['uuid']}
3681
+ owner_vsanIntSys.AbdicateDomOwnership(:uuids => badUuids)
3682
+ end
3683
+ end
3684
+ puts ""
3685
+
3686
+ puts "#{Time.now}: Step 1b: Check for inaccessible VSAN objects, again"
3687
+ statusses = vsanIntSys.query_cmmds([{:type => 'CONFIG_STATUS'}])
3688
+ bad = statusses.select do |x|
3689
+ state = _assessAvailabilityByStatus(x['content']['state'])
3690
+ !state['DATA_AVAILABLE'] || !state['QUORUM']
3691
+ end
3692
+ bad.each do |x|
3693
+ puts "Detected #{x['uuid']} is still inaccessible"
3694
+ end
3695
+ end
3696
+ puts ""
3697
+
3698
+ puts "#{Time.now}: Step 2: Check for invalid/inaccessible VMs"
3699
+ invalid_vms = vms_props.select do |k,v|
3700
+ ['invalid', 'inaccessible', 'orphaned'].member?(v['runtime.connectionState'])
3701
+ end.keys
3702
+ tasks = []
3703
+ invalid_vms.each do |vm|
3704
+ vm_props = vms_props[vm]
3705
+ vm_state = vm_props['runtime.connectionState']
3706
+ if !opts[:refresh_state]
3707
+ puts "Detected VM '#{vm_props['name']}' as being '#{vm_state}'"
3708
+ else
3709
+ puts "Detected VM '#{vm_props['name']}' as being '#{vm_state}', reloading ..."
3710
+ begin
3711
+ if vm_state == 'orphaned'
3712
+ path = vm.summary.config.vmPathName
3713
+ tasks << vm.reloadVirtualMachineFromPath_Task(
3714
+ :configurationPath => path
3715
+ )
3716
+ else
3717
+ vm.Reload
3718
+ vm.Reload
3719
+ end
3720
+ rescue Exception => ex
3721
+ puts "#{ex.class}: #{ex.message}"
3722
+ end
3723
+ end
3724
+ end
3725
+ tasks = tasks.compact
3726
+ if tasks.length > 0
3727
+ progress(tasks)
3728
+ end
3729
+ puts ""
3730
+
3731
+ if opts[:refresh_state]
3732
+ puts "#{Time.now}: Step 2b: Check for invalid/inaccessible VMs again"
3733
+ vms_props = pc.collectMultiple(vms, 'name', 'runtime.connectionState')
3734
+ invalid_vms = vms_props.select do |k,v|
3735
+ ['invalid', 'inaccessible', 'orphaned'].member?(v['runtime.connectionState'])
3736
+ end.keys
3737
+ invalid_vms.each do |vm|
3738
+ vm_props = vms_props[vm]
3739
+ vm_state = vm_props['runtime.connectionState']
3740
+ puts "Detected VM '#{vm_props['name']}' as still '#{vm_state}'"
3741
+ end
3742
+ puts ""
3743
+ end
3744
+
3745
+ puts "#{Time.now}: Step 3: Check for VMs for which VC/hostd/vmx" \
3746
+ " are out of sync"
3747
+ inconsistent_vms = find_inconsistent_vms(cluster_or_host)
3748
+ if opts[:reregister_vms] and not inconsistent_vms.empty?
3749
+ puts "You have chosen to fix these VMs. This involves re-registering" \
3750
+ " the VM which will cause loss of some of the management state of"\
3751
+ " this VM (for eg. storage policy, permissions, tags," \
3752
+ " scheduled tasks, etc. but NO data loss). Do you want to" \
3753
+ " continue [y/N] ?"
3754
+ opt = $stdin.gets.chomp
3755
+ if opt == 'y' || opt == 'Y'
3756
+ puts "Attempting to fix these vms..."
3757
+ fix_inconsistent_vms(inconsistent_vms)
3758
+ end
3759
+ end
3760
+ puts ""
3761
+
3762
+ end
3763
+ end
3764
+
3765
+
3766
+ opts :reapply_vsan_vmknic_config do
3767
+ summary "Unbinds and rebinds VSAN to its vmknics"
3768
+ arg :host, nil, :lookup => [VIM::HostSystem], :multi => true
3769
+ opt :vmknic, "Refresh a specific vmknic. default is all vmknics", :type => :string
3770
+ opt :dry_run, "Do a dry run: Show what changes would be made", :type => :boolean
3771
+ end
3772
+
3773
+ def reapply_vsan_vmknic_config hosts, opts
3774
+ hosts.each do |host|
3775
+ hostname = host.name
3776
+ net = host.esxcli.vsan.network
3777
+ nics = net.list()
3778
+ if opts[:vmknic]
3779
+ nics = nics.select{|x| x.VmkNicName == opts[:vmknic]}
3780
+ end
3781
+ keys = {
3782
+ :AgentGroupMulticastAddress => :agentmcaddr,
3783
+ :AgentGroupMulticastPort => :agentmcport,
3784
+ :IPProtocol => nil,
3785
+ :InterfaceUUID => nil,
3786
+ :MasterGroupMulticastAddress => :mastermcaddr,
3787
+ :MasterGroupMulticastPort => :mastermcport,
3788
+ :MulticastTTL => :multicastttl,
3789
+ }
3790
+ puts "Host: #{hostname}"
3791
+ if opts[:dry_run]
3792
+ nics.each do |nic|
3793
+ puts " Would reapply config of vmknic #{nic.VmkNicName}:"
3794
+ keys.keys.each do |key|
3795
+ puts " #{key.to_s}: #{nic.send(key)}"
3796
+ end
3797
+ end
3798
+ else
3799
+ nics.each do |nic|
3800
+ puts " Reapplying config of #{nic.VmkNicName}:"
3801
+ keys.keys.each do |key|
3802
+ puts " #{key.to_s}: #{nic.send(key)}"
3803
+ end
3804
+ puts " Unbinding VSAN from vmknic #{nic.VmkNicName} ..."
3805
+ net.ipv4.remove(:interfacename => nic.VmkNicName)
3806
+ puts " Rebinding VSAN to vmknic #{nic.VmkNicName} ..."
3807
+ params = {
3808
+ :agentmcaddr => nic.AgentGroupMulticastAddress,
3809
+ :agentmcport => nic.AgentGroupMulticastPort,
3810
+ :interfacename => nic.VmkNicName,
3811
+ :mastermcaddr => nic.MasterGroupMulticastAddress,
3812
+ :mastermcport => nic.MasterGroupMulticastPort,
3813
+ :multicastttl => nic.MulticastTTL,
3814
+ }
3815
+ #pp params
3816
+ net.ipv4.add(params)
3817
+ end
3818
+ end
3819
+ end
3820
+ end
3821
+
3822
+
3823
+ opts :recover_spbm do
3824
+ summary "SPBM Recovery"
3825
+ arg :cluster_or_host, nil, :lookup => [VIM::ClusterComputeResource, VIM::HostSystem]
3826
+ opt :show_details, "Show all the details", :type => :boolean
3827
+ end
3828
+
3829
+ def recover_spbm cluster_or_host, opts
3830
+ conn = cluster_or_host._connection
3831
+ pc = conn.propertyCollector
3832
+ host = cluster_or_host
3833
+ entries = []
3834
+ hostUuidMap = {}
3835
+ startTime = Time.now
3836
+ _run_with_rev(conn, "dev") do
3837
+ vsanIntSys = nil
3838
+ puts "#{Time.now}: Fetching Host info"
3839
+ if cluster_or_host.is_a?(VIM::ClusterComputeResource)
3840
+ cluster = cluster_or_host
3841
+ hosts = cluster.host
3842
+ else
3843
+ hosts = [host]
3844
+ end
3845
+
3846
+ hosts_props = pc.collectMultiple(hosts,
3847
+ 'name',
3848
+ 'runtime.connectionState',
3849
+ 'configManager.vsanSystem',
3850
+ 'configManager.vsanInternalSystem',
3851
+ 'datastore'
3852
+ )
3853
+ connected_hosts = hosts_props.select do |k,v|
3854
+ v['runtime.connectionState'] == 'connected'
3855
+ end.keys
3856
+ host = connected_hosts.first
3857
+ if !host
3858
+ err "Couldn't find any connected hosts"
3859
+ end
3860
+ vsanIntSys = hosts_props[host]['configManager.vsanInternalSystem']
3861
+ vsanSysList = Hash[hosts_props.map do |host, props|
3862
+ [props['name'], props['configManager.vsanSystem']]
3863
+ end]
3864
+ clusterInfos = pc.collectMultiple(vsanSysList.values,
3865
+ 'config.clusterInfo')
3866
+ hostUuidMap = Hash[vsanSysList.map do |hostname, sys|
3867
+ [clusterInfos[sys]['config.clusterInfo'].nodeUuid, hostname]
3868
+ end]
3869
+
3870
+ puts "#{Time.now}: Fetching Datastore info"
3871
+ datastores = hosts_props.values.map{|x| x['datastore']}.flatten
3872
+ datastores_props = pc.collectMultiple(datastores, 'name', 'summary.type')
3873
+ vsanDsList = datastores_props.select do |ds, props|
3874
+ props['summary.type'] == "vsan"
3875
+ end.keys
3876
+ if vsanDsList.length > 1
3877
+ err "Two VSAN datastores found, can't handle that"
3878
+ end
3879
+ vsanDs = vsanDsList[0]
3880
+
3881
+ puts "#{Time.now}: Fetching VM properties"
3882
+ vms = vsanDs.vm
3883
+ vms_props = pc.collectMultiple(vms, 'name', 'config.hardware.device')
3884
+
3885
+ puts "#{Time.now}: Fetching policies used on VSAN from CMMDS"
3886
+ entries = vsanIntSys.query_cmmds([{
3887
+ :type => "POLICY",
3888
+ }], :gzip => true)
3889
+
3890
+ policies = entries.map{|x| x['content']}.uniq
3891
+
3892
+ puts "#{Time.now}: Fetching SPBM profiles"
3893
+ pbm = conn.pbm
3894
+ pm = pbm.serviceContent.profileManager
3895
+ profileIds = pm.PbmQueryProfile(
3896
+ :resourceType => {:resourceType => "STORAGE"},
3897
+ :profileCategory => "REQUIREMENT"
3898
+ )
3899
+ if profileIds.length > 0
3900
+ profiles = pm.PbmRetrieveContent(:profileIds => profileIds)
3901
+ else
3902
+ profiles = []
3903
+ end
3904
+ profilesMap = Hash[profiles.map do |x|
3905
+ ["#{x.profileId.uniqueId}-gen#{x.generationId}", x]
3906
+ end]
3907
+
3908
+ puts "#{Time.now}: Fetching VM <-> SPBM profile association"
3909
+ vms_entities = vms.map do |vm|
3910
+ vm.all_pbmobjref(:vms_props => vms_props)
3911
+ end.flatten.map{|x| x.dynamicProperty = []; x}
3912
+ associatedProfiles = pm.PbmQueryAssociatedProfiles(
3913
+ :entities => vms_entities
3914
+ )
3915
+ associatedEntities = associatedProfiles.map{|x| x.object}.uniq
3916
+ puts "#{Time.now}: Computing which VMs do not have a SPBM Profile ..."
3917
+
3918
+ nonAssociatedEntities = vms_entities - associatedEntities
3919
+
3920
+ vmsMap = Hash[vms.map{|x| [x._ref, x]}]
3921
+ nonAssociatedVms = {}
3922
+ nonAssociatedEntities.map do |entity|
3923
+ vm = vmsMap[entity.key.split(":").first]
3924
+ nonAssociatedVms[vm] ||= []
3925
+ nonAssociatedVms[vm] << [entity.objectType, entity.key]
3926
+ end
3927
+ puts "#{Time.now}: Fetching additional info about some VMs"
3928
+
3929
+ vms_props2 = pc.collectMultiple(vms, 'summary.config.vmPathName')
3930
+
3931
+ puts "#{Time.now}: Got all info, computing after %.2f sec" % [
3932
+ Time.now - startTime
3933
+ ]
3934
+
3935
+ policies.each do |policy|
3936
+ policy['spbmRecoveryCandidate'] = false
3937
+ policy['spbmProfile'] = nil
3938
+ if policy['spbmProfileId']
3939
+ name = "%s-gen%s" % [
3940
+ policy['spbmProfileId'],
3941
+ policy['spbmProfileGenerationNumber'],
3942
+ ]
3943
+ policy['spbmName'] = name
3944
+ policy['spbmProfile'] = profilesMap[name]
3945
+ if policy['spbmProfile']
3946
+ name = policy['spbmProfile'].name
3947
+ policy['spbmName'] = name
3948
+ name = "Existing SPBM Profile:\n#{name}"
3949
+ else
3950
+ policy['spbmRecoveryCandidate'] = true
3951
+ profile = profiles.find do |profile|
3952
+ profile.profileId.uniqueId == policy['spbmProfileId'] &&
3953
+ profile.generationId > policy['spbmProfileGenerationNumber']
3954
+ end
3955
+ # XXX: We should check if there is a profile that matches
3956
+ # one we recovered
3957
+ if profile
3958
+ name = policy['spbmProfile'].name
3959
+ name = "Old generation of SPBM Profile:\n#{name}"
3960
+ else
3961
+ name = "Unknown SPBM Profile. UUID:\n#{name}"
3962
+ end
3963
+ end
3964
+ else
3965
+ name = "Not managed by SPBM"
3966
+ policy['spbmName'] = name
3967
+ end
3968
+ propCap = policy['proportionalCapacity']
3969
+ if propCap && propCap.is_a?(Array) && propCap.length == 2
3970
+ policy['proportionalCapacity'] = policy['proportionalCapacity'][0]
3971
+ end
3972
+
3973
+ policy['spbmDescr'] = name
3974
+ end
3975
+ entriesMap = Hash[entries.map{|x| [x['uuid'], x]}]
3976
+
3977
+ nonAssociatedEntities = []
3978
+ nonAssociatedVms.each do |vm, entities|
3979
+ if entities.any?{|x| x == ["virtualMachine", vm._ref]}
3980
+ vmxPath = vms_props2[vm]['summary.config.vmPathName']
3981
+ if vmxPath =~ /^\[([^\]]*)\] ([^\/])\//
3982
+ nsUuid = $2
3983
+ entry = entriesMap[nsUuid]
3984
+ if entry && entry['content']['spbmProfileId']
3985
+ # This is a candidate
3986
+ nonAssociatedEntities << {
3987
+ :objUuid => nsUuid,
3988
+ :type => "virtualMachine",
3989
+ :key => vm._ref,
3990
+ :entry => entry,
3991
+ :vm => vm,
3992
+ :label => "VM Home",
3993
+ }
3994
+ end
3995
+ end
3996
+ end
3997
+ devices = vms_props[vm]['config.hardware.device']
3998
+ disks = devices.select{|x| x.is_a?(VIM::VirtualDisk)}
3999
+ disks.each do |disk|
4000
+ key = "#{vm._ref}:#{disk.key}"
4001
+ if entities.any?{|x| x == ["virtualDiskId", key]}
4002
+ objUuid = disk.backing.backingObjectId
4003
+ if objUuid
4004
+ entry = entriesMap[objUuid]
4005
+ if entry && entry['content']['spbmProfileId']
4006
+ # This is a candidate
4007
+ nonAssociatedEntities << {
4008
+ :objUuid => objUuid,
4009
+ :type => "virtualDiskId",
4010
+ :key => key,
4011
+ :entry => entry,
4012
+ :vm => vm,
4013
+ :label => disk.deviceInfo.label,
4014
+ }
4015
+ end
4016
+ end
4017
+ end
4018
+ end
4019
+ end
4020
+ nonAssociatedEntities.each do |entity|
4021
+ policy = policies.find do |policy|
4022
+ match = true
4023
+ ['spbmProfileId', 'spbmProfileGenerationNumber'].each do |k|
4024
+ match = match && policy[k] == entity[:entry]['content'][k]
4025
+ end
4026
+ match
4027
+ end
4028
+ entity[:policy] = policy
4029
+ end
4030
+
4031
+ candidates = policies.select{|p| p['spbmRecoveryCandidate'] == true}
4032
+
4033
+ puts "#{Time.now}: Done computing"
4034
+
4035
+ if !opts[:show_details]
4036
+ puts ""
4037
+ puts "Found %d missing SPBM Profiles." % candidates.length
4038
+ puts "Found %d entities not associated with their SPBM Profiles." % nonAssociatedEntities.length
4039
+ puts ""
4040
+ puts "You have a number of options (can be combined):"
4041
+ puts "1) Run command with --show-details to see a full report about missing"
4042
+ puts "SPBM Profiles and missing VM <-> SPBM Profile associations."
4043
+ puts "2) Run command with --create-missing-profiles to automatically create"
4044
+ puts "all missing SPBM profiles."
4045
+ puts "3)Run command with --create-missing-associations to automatically"
4046
+ puts "create all missing VM <-> SPBM Profile associations."
4047
+ end
4048
+
4049
+ if opts[:show_details]
4050
+ puts "SPBM Profiles used by VSAN:"
4051
+ t = Terminal::Table.new()
4052
+ t << ['SPBM ID', 'policy']
4053
+ policies.each do |policy|
4054
+ t.add_separator
4055
+ t << [
4056
+ policy['spbmDescr'],
4057
+ policy.select{|k,v| k !~ /spbm/}.map{|k,v| "#{k}: #{v}"}.join("\n")
4058
+ ]
4059
+ end
4060
+ puts t
4061
+ puts ""
4062
+
4063
+ if candidates.length > 0
4064
+ puts "Recreate missing SPBM Profiles using following RVC commands:"
4065
+ candidates.each do |policy|
4066
+ rules = policy.select{|k,v| k !~ /spbm/}
4067
+ s = rules.map{|k,v| "--rule VSAN.#{k}=#{v}"}.join(" ")
4068
+ puts "spbm.profile_create #{s} #{policy['spbmName']}"
4069
+ end
4070
+ puts ""
4071
+ end
4072
+ end
4073
+
4074
+ if opts[:show_details] && nonAssociatedEntities.length > 0
4075
+ puts "Following missing VM <-> SPBM Profile associations were found:"
4076
+ t = Terminal::Table.new()
4077
+ t << ['Entity', 'VM', 'Profile']
4078
+ t.add_separator
4079
+ nonAssociatedEntities.each do |entity|
4080
+ #puts "'%s' of VM '%s' should be associated with profile '%s' but isn't." % [
4081
+ t << [
4082
+ entity[:label],
4083
+ vms_props[entity[:vm]]['name'],
4084
+ entity[:policy]['spbmName'],
4085
+ ]
4086
+
4087
+ # Fix up the associations. Disabled for now until I can check
4088
+ # with Sudarsan
4089
+ # profile = entity[:policy]['spbmProfile']
4090
+ # if profile
4091
+ # pm.PbmAssociate(
4092
+ # :entity => PBM::PbmServerObjectRef(
4093
+ # :objectType => entity[:type],
4094
+ # :key => entity[:key],
4095
+ # :serverUuid => conn.serviceContent.about.instanceUuid
4096
+ # ),
4097
+ # :profile => profile.profileId
4098
+ # )
4099
+ # end
4100
+ end
4101
+ puts t
4102
+ end
4103
+ end
4104
+
4105
+ end