rvc 1.7.0 → 1.8.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -25,19 +25,38 @@ VNC = ENV['VNC'] || search_path('tightvnc') || search_path('vncviewer') || searc
25
25
  opts :view do
26
26
  summary "Spawn a VNC client"
27
27
  arg :vm, nil, :lookup => VIM::VirtualMachine
28
+ opt :ws, "Enable VNC websocket proxy"
28
29
  end
29
30
 
30
31
  rvc_alias :view, :vnc
31
32
  rvc_alias :view, :V
32
33
 
33
- def view vm
34
+ def view vm, opts
34
35
  ip = reachable_ip vm.collect('runtime.host')[0]
35
36
  extraConfig, = vm.collect('config.extraConfig')
36
37
  already_enabled = extraConfig.find { |x| x.key == 'RemoteDisplay.vnc.enabled' && x.value.downcase == 'true' }
38
+
39
+ if opts[:ws]
40
+ opt = extraConfig.find { |x| x.key == 'RemoteDisplay.vnc.webSocket.port' }
41
+ if opt.nil?
42
+ ws_port = unused_vnc_port ip
43
+ vm.ReconfigVM_Task(:spec => {
44
+ :extraConfig => [
45
+ { :key => 'RemoteDisplay.vnc.webSocket.port', :value => ws_port.to_s }
46
+ ]
47
+ }).wait_for_completion
48
+ else
49
+ ws_port = opt.value
50
+ end
51
+ end
37
52
  if already_enabled
38
53
  puts "VNC already enabled"
39
- port = extraConfig.find { |x| x.key == 'RemoteDisplay.vnc.port' }.value
40
- password = extraConfig.find { |x| x.key == 'RemoteDisplay.vnc.password' }.value
54
+ port = extraConfig.find { |x| x.key == 'RemoteDisplay.vnc.port' }
55
+ if !port
56
+ err "VNC enabled but no port assigned. Use vnc.off to reset config"
57
+ end
58
+ password = extraConfig.find { |x| x.key == 'RemoteDisplay.vnc.password' }
59
+ password = password ? password.value : ""
41
60
  else
42
61
  port = unused_vnc_port ip
43
62
  password = vnc_password
@@ -49,7 +68,11 @@ def view vm
49
68
  ]
50
69
  }).wait_for_completion
51
70
  end
52
- vnc_client ip, port, password
71
+ if opts[:ws]
72
+ puts "open http://novnc.com?host=#{ip}&port=#{ws_port}&password=#{password}"
73
+ else
74
+ vnc_client ip, port, password
75
+ end
53
76
  end
54
77
 
55
78
 
@@ -63,7 +86,8 @@ def off vm
63
86
  :extraConfig => [
64
87
  { :key => 'RemoteDisplay.vnc.enabled', :value => 'false' },
65
88
  { :key => 'RemoteDisplay.vnc.password', :value => '' },
66
- { :key => 'RemoteDisplay.vnc.port', :value => '' }
89
+ { :key => 'RemoteDisplay.vnc.port', :value => '' },
90
+ { :key => 'RemoteDisplay.vnc.webSocket.port', :value => '' }
67
91
  ]
68
92
  }).wait_for_completion
69
93
  end
@@ -0,0 +1,4105 @@
1
+ # Copyright (c) 2013 VMware, Inc. All Rights Reserved.
2
+ #
3
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ # of this software and associated documentation files (the "Software"), to deal
5
+ # in the Software without restriction, including without limitation the rights
6
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ # copies of the Software, and to permit persons to whom the Software is
8
+ # furnished to do so, subject to the following conditions:
9
+ #
10
+ # The above copyright notice and this permission notice shall be included in
11
+ # all copies or substantial portions of the Software.
12
+ #
13
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
+ # THE SOFTWARE.
20
+
21
+ require 'rvc/vim'
22
+ require 'json'
23
+ require 'time'
24
+ VIM::ClusterComputeResource
25
+
26
+ # Patch in some last minute additions to the API
27
+ db = VIM.loader.instance_variable_get(:@db)
28
+ db['HostVsanInternalSystem']['methods']["QuerySyncingVsanObjects"] =
29
+ {"params"=>
30
+ [{"name"=>"uuids",
31
+ "is-array"=>true,
32
+ "is-optional"=>true,
33
+ "version-id-ref"=>nil,
34
+ "wsdl_type"=>"xsd:string"}],
35
+ "result"=>
36
+ {"is-array"=>false,
37
+ "is-optional"=>false,
38
+ "is-task"=>false,
39
+ "version-id-ref"=>nil,
40
+ "wsdl_type"=>"xsd:string"}}
41
+ db['HostVsanInternalSystem']['methods']["GetVsanObjExtAttrs"] =
42
+ {"params"=>
43
+ [{"name"=>"uuids",
44
+ "is-array"=>true,
45
+ "is-optional"=>true,
46
+ "version-id-ref"=>nil,
47
+ "wsdl_type"=>"xsd:string"}],
48
+ "result"=>
49
+ {"is-array"=>false,
50
+ "is-optional"=>false,
51
+ "is-task"=>false,
52
+ "version-id-ref"=>nil,
53
+ "wsdl_type"=>"xsd:string"}}
54
+ db = nil
55
+
56
+ $vsanUseGzipApis = false
57
+
58
+ def is_uuid str
59
+ str =~ /[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}/
60
+ end
61
+
62
+ opts :enable_vsan_on_cluster do
63
+ summary "Enable VSAN on a cluster"
64
+ arg :cluster, nil, :lookup => VIM::ClusterComputeResource
65
+ opt :disable_storage_auto_claim, "Disable auto disk-claim", :type => :boolean
66
+ end
67
+
68
+ def enable_vsan_on_cluster cluster, opts
69
+ conn = cluster._connection
70
+ _run_with_rev(conn, "dev") do
71
+ spec = VIM::ClusterConfigSpecEx(
72
+ :vsanConfig => {
73
+ :enabled => true,
74
+ :defaultConfig => {
75
+ :autoClaimStorage => (!(opts[:disable_storage_auto_claim] || false)),
76
+ }
77
+ }
78
+ )
79
+ task = cluster.ReconfigureComputeResource_Task(:spec => spec, :modify => true)
80
+ progress([task])
81
+ childtasks = task.child_tasks
82
+ if childtasks && childtasks.length > 0
83
+ progress(childtasks)
84
+ end
85
+ childtasks = task.child_tasks
86
+ if childtasks && childtasks.length > 0
87
+ progress(childtasks)
88
+ end
89
+ end
90
+ end
91
+
92
+ opts :disable_vsan_on_cluster do
93
+ summary "Disable VSAN on a cluster"
94
+ arg :cluster, nil, :lookup => VIM::ClusterComputeResource
95
+ end
96
+
97
+ def disable_vsan_on_cluster cluster
98
+ conn = cluster._connection
99
+ _run_with_rev(conn, "dev") do
100
+ spec = VIM::ClusterConfigSpecEx(
101
+ :vsanConfig => {
102
+ :enabled => false,
103
+ }
104
+ )
105
+ task = cluster.ReconfigureComputeResource_Task(:spec => spec, :modify => true)
106
+ progress([task])
107
+ childtasks = task.child_tasks
108
+ if childtasks && childtasks.length > 0
109
+ progress(childtasks)
110
+ end
111
+ end
112
+ end
113
+
114
+ VIM::HostSystem
115
+ class VIM::HostSystem
116
+ def filtered_disks_for_vsan opts = {}
117
+ vsan = opts[:vsanSystem] || self.configManager.vsanSystem
118
+ stateFilter = opts[:state_filter] || /^eligible$/
119
+ disks = vsan.QueryDisksForVsan()
120
+
121
+ disks = disks.select do |disk|
122
+ disk.state =~ stateFilter
123
+ end
124
+
125
+ if opts[:filter_ssd_by_model]
126
+ disks = disks.select do |disk|
127
+ model = [
128
+ disk.disk.vendor,
129
+ disk.disk.model
130
+ ].compact.map{|x| x.strip}.join(" ")
131
+ model_match = (model =~ opts[:filter_ssd_by_model])
132
+ !disk.disk.ssd || model_match
133
+ end
134
+ end
135
+
136
+ disks = disks.map{|x| x.disk}
137
+
138
+ disks
139
+ end
140
+
141
+ def consume_disks_for_vsan opts = {}
142
+ vsan = opts[:vsanSystem] || self.configManager.vsanSystem
143
+ disks = filtered_disks_for_vsan(opts.merge(
144
+ :state_filter => /^eligible$/,
145
+ :vsanSystem => vsan
146
+ ))
147
+ if disks.length > 0
148
+ vsan.AddDisks_Task(:disk => disks)
149
+ end
150
+ end
151
+ end
152
+
153
+ opts :host_consume_disks do
154
+ summary "Consumes all eligible disks on a host"
155
+ arg :host_or_cluster, nil, :lookup => [VIM::ComputeResource, VIM::HostSystem], :multi => true
156
+ opt :filter_ssd_by_model, "Regex to apply as ssd model filter", :type => :string
157
+ end
158
+
159
+ def host_consume_disks hosts_or_clusters, opts
160
+ conn = hosts_or_clusters.first._connection
161
+ hosts = []
162
+ hosts_or_clusters.each do |host_or_cluster|
163
+ if host_or_cluster.is_a?(VIM::HostSystem)
164
+ hosts << host_or_cluster
165
+ else
166
+ hosts += host_or_cluster.host
167
+ end
168
+ end
169
+ if opts[:filter_ssd_by_model]
170
+ opts[:filter_ssd_by_model] = /#{opts[:filter_ssd_by_model]}/
171
+ end
172
+ tasks = []
173
+ results = {}
174
+ _run_with_rev(conn, "dev") do
175
+ tasks = hosts.map do |host|
176
+ host.consume_disks_for_vsan(opts)
177
+ end.compact
178
+ if tasks.length > 0
179
+ results = progress(tasks)
180
+ pp results.values.flatten.map{|x| x.error}.compact
181
+ else
182
+ puts "No disks were consumed."
183
+ end
184
+ $claimResults = results
185
+ end
186
+ $disksCache = {}
187
+ end
188
+
189
+ opts :host_wipe_vsan_disks do
190
+ summary "Wipes content of all VSAN disks on a host"
191
+ arg :host, nil, :lookup => VIM::HostSystem, :multi => true
192
+ opt :force, "Apply force", :type => :boolean
193
+ end
194
+
195
+ def host_wipe_vsan_disks hosts, opts
196
+ conn = hosts.first._connection
197
+ tasks = []
198
+ _run_with_rev(conn, "dev") do
199
+ tasks = hosts.map do |host|
200
+ hostname = host.name
201
+ disks = host.filtered_disks_for_vsan(:state_filter => /^inUse$/)
202
+ if disks.length == 0
203
+ next
204
+ end
205
+ if !opts[:force]
206
+ # Don't actually wipe, but show a warning.
207
+ disks.each do |disk|
208
+ model = [
209
+ disk.vendor,
210
+ disk.model
211
+ ].compact.map{|x| x.strip}.join(" ")
212
+ puts "Would wipe disk #{disk.displayName} (#{model}, ssd = #{disk.ssd})"
213
+ end
214
+ end
215
+
216
+ if opts[:force]
217
+ #disks = disks.select{|x| x.ssd}
218
+ #host.configManager.vsanSystem.RemoveDisk_Task(:disk => disks)
219
+ # See PR 1077658
220
+ vsan = host.configManager.vsanSystem
221
+ vsan.RemoveDiskMapping_Task(:mapping => vsan.config.storageInfo.diskMapping)
222
+ end
223
+ end.compact
224
+ if tasks.length > 0
225
+ results = progress(tasks)
226
+ pp results.values.flatten.map{|x| x.error}.compact
227
+ $wipeResults = results
228
+ end
229
+ end
230
+ if !opts[:force]
231
+ puts ""
232
+ puts "NO ACTION WAS TAKEN. Use --force to actually wipe."
233
+ puts "CAUTION: Wiping disks means all user data will be destroyed!"
234
+ end
235
+ $disksCache = {}
236
+ end
237
+
238
+ opts :host_info do
239
+ summary "Print VSAN info about a host"
240
+ arg :host, nil, :lookup => VIM::HostSystem
241
+ end
242
+
243
+ def host_info host
244
+ conn = host._connection
245
+ _run_with_rev(conn, "dev") do
246
+ _host_info host
247
+ end
248
+ end
249
+
250
+ opts :cluster_info do
251
+ summary "Print VSAN info about a cluster"
252
+ arg :cluster, nil, :lookup => VIM::ClusterComputeResource
253
+ end
254
+
255
+ def cluster_info cluster
256
+ conn = cluster._connection
257
+ pc = conn.propertyCollector
258
+
259
+ hosts = cluster.host
260
+
261
+ hosts_props = pc.collectMultiple(hosts, 'name', 'runtime.connectionState')
262
+ connected_hosts = hosts_props.select do |k,v|
263
+ v['runtime.connectionState'] == 'connected'
264
+ end.keys
265
+ hosts = connected_hosts
266
+
267
+ _run_with_rev(conn, "dev") do
268
+ hosts.each do |host|
269
+ begin
270
+ puts "Host: #{hosts_props[host]['name']}"
271
+ _host_info host, " "
272
+ rescue Exception => ex
273
+ puts "#{Time.now}: Got exception: #{ex.class}: #{ex.message}"
274
+ end
275
+ puts ""
276
+ end
277
+ end
278
+ end
279
+
280
+ opts :disks_info do
281
+ summary "Print physical disk info about a host"
282
+ arg :host, nil, :lookup => VIM::HostSystem, :multi => true
283
+ end
284
+
285
+ def disks_info hosts
286
+ conn = hosts.first._connection
287
+ pc = conn.propertyCollector
288
+ _run_with_rev(conn, "dev") do
289
+ hosts.each do |host|
290
+ if hosts.length > 0
291
+ puts "Disks on host #{host.name}:"
292
+ end
293
+
294
+ dsList = host.datastore
295
+ dsListProps = pc.collectMultiple(dsList, 'summary', 'name', 'info')
296
+ vmfsDsList = dsListProps.select do |ds, props|
297
+ props['summary'].type == "VMFS"
298
+ end.keys
299
+
300
+ vsan = host.configManager.vsanSystem
301
+ disks = vsan.QueryDisksForVsan()
302
+ partitions = host.esxcli.storage.core.device.partition.list
303
+
304
+ t = Terminal::Table.new()
305
+ t << ['DisplayName', 'isSSD', 'Size', 'State']
306
+ needSep = true
307
+ disks.each do |disk|
308
+ capacity = disk.disk.capacity
309
+ size = capacity.block * capacity.blockSize
310
+ sizeStr = "#{size / 1024**3} GB"
311
+ state = disk.state
312
+ # if needSep
313
+ t.add_separator
314
+ needSep = false
315
+ # end
316
+ if state != 'eligible' && disk.error
317
+ state += " (#{disk.error.localizedMessage})"
318
+ if disk.error.fault.is_a?(VIM::DiskHasPartitions)
319
+ state += "\n"
320
+ state += "\n"
321
+ state += "Partition table:\n"
322
+
323
+ partitions.select do |x|
324
+ x.Device == disk.disk.canonicalName && x.Type != 0
325
+ end.each do |x|
326
+ partSize = x.Size.to_f / 1024**3
327
+ types = {
328
+ 0xfb => 'vmfs',
329
+ 0xfc => 'coredump',
330
+ 0xfa => 'vsan',
331
+ 0x0 => 'unused',
332
+ 0x6 => 'vfat',
333
+ }
334
+ type = types[x.Type] || x.Type
335
+ state += "#{x.Partition}: %.2f GB, type = #{type}" % partSize
336
+
337
+ if type == "vmfs"
338
+ vmfsStr = vmfsDsList.select do |vmfsDs|
339
+ props = dsListProps[vmfsDs]
340
+ props['info'].vmfs.extent.any? do |ext|
341
+ ext.diskName == x.Device && x.Partition == ext.partition
342
+ end
343
+ end.map do |vmfsDs|
344
+ "'#{dsListProps[vmfsDs]['name']}'"
345
+ end.join(", ")
346
+ if vmfsStr
347
+ state += " (#{vmfsStr})"
348
+ end
349
+ end
350
+
351
+ state += "\n"
352
+ end
353
+ needSep = true
354
+ end
355
+ end
356
+ t << [
357
+ [
358
+ disk.disk.displayName,
359
+ [
360
+ disk.disk.vendor,
361
+ disk.disk.model
362
+ ].compact.map{|x| x.strip}.join(" ")
363
+ ].join("\n"),
364
+ disk.disk.ssd ? "SSD" : "MD",
365
+ sizeStr,
366
+ state
367
+ ]
368
+ end
369
+ puts t
370
+ if hosts.length > 0
371
+ puts ""
372
+ end
373
+ end
374
+ end
375
+ end
376
+
377
+ def _host_info host, prefix = ''
378
+ configManager = host.configManager
379
+ netSys = configManager.networkSystem
380
+ vsan = configManager.vsanSystem
381
+ config = vsan.config
382
+ enabled = config.enabled
383
+ line = lambda{|x| puts "#{prefix}#{x}" }
384
+ line.call "VSAN enabled: %s" % (enabled ? "yes" : "no")
385
+ if !enabled
386
+ return
387
+ end
388
+ status = vsan.QueryHostStatus()
389
+ line.call "Cluster info:"
390
+ line.call " Cluster role: #{status.nodeState.state}"
391
+ line.call " Cluster UUID: #{config.clusterInfo.uuid}"
392
+ line.call " Node UUID: #{config.clusterInfo.nodeUuid}"
393
+ line.call " Member UUIDs: #{status.memberUuid} (#{status.memberUuid.length})"
394
+ line.call "Storage info:"
395
+ line.call " Auto claim: %s" % (config.storageInfo.autoClaimStorage ? "yes" : "no")
396
+ line.call " Disk Mappings:"
397
+ if config.storageInfo.diskMapping.length == 0
398
+ line.call " None"
399
+ end
400
+ config.storageInfo.diskMapping.each do |mapping|
401
+ capacity = mapping.ssd.capacity
402
+ size = capacity.block * capacity.blockSize
403
+ line.call " SSD: #{mapping.ssd.displayName} - #{size / 1024**3} GB"
404
+ mapping.nonSsd.map do |md|
405
+ capacity = md.capacity
406
+ size = capacity.block * capacity.blockSize
407
+ line.call " MD: #{md.displayName} - #{size / 1024**3} GB"
408
+ end
409
+ end
410
+ line.call "NetworkInfo:"
411
+ if config.networkInfo.port.length == 0
412
+ line.call " Not configured"
413
+ end
414
+ vmknics, = netSys.collect 'networkConfig.vnic'
415
+ config.networkInfo.port.each do |port|
416
+ dev = port.device
417
+ vmknic = vmknics.find{|x| x.device == dev}
418
+ ip = "IP unknown"
419
+ if vmknic
420
+ ip = vmknic.spec.ip.ipAddress
421
+ end
422
+ line.call " Adapter: #{dev} (#{ip})"
423
+ end
424
+ end
425
+
426
+ def _run_with_rev conn, rev
427
+ old_rev = conn.rev
428
+ begin
429
+ conn.rev = rev
430
+ yield
431
+ ensure
432
+ conn.rev = old_rev
433
+ end
434
+ end
435
+
436
+
437
+ opts :cluster_set_default_policy do
438
+ summary "Set default policy on a cluster"
439
+ arg :cluster, nil, :lookup => VIM::ClusterComputeResource
440
+ arg :policy, nil, :type => :string
441
+ end
442
+
443
+ def cluster_set_default_policy cluster, policy
444
+ hosts = cluster.host
445
+ conn = cluster._connection
446
+ pc = conn.propertyCollector
447
+ _run_with_rev(conn, "dev") do
448
+ vsan, = hosts.first.collect 'configManager.vsanSystem'
449
+ cluster_uuid, = vsan.collect 'config.clusterInfo.uuid'
450
+
451
+ hosts.each do |host|
452
+ policy_node = host.esxcli.vsan.policy
453
+ ['cluster', 'vdisk', 'vmnamespace', 'vmswap'].each do |policy_class|
454
+ policy_node.setdefault(
455
+ :clusteruuid => cluster_uuid,
456
+ :policy => policy,
457
+ :policyclass => policy_class,
458
+ )
459
+ end
460
+ end
461
+ end
462
+ end
463
+
464
+ def _components_in_dom_config dom_config
465
+ out = []
466
+ if ['Component', 'Witness'].member?(dom_config['type'])
467
+ out << dom_config
468
+ else
469
+ dom_config.select{|k,v| k =~ /child-\d+/}.each do |k, v|
470
+ out += _components_in_dom_config v
471
+ end
472
+ end
473
+ out
474
+ end
475
+
476
+ def _normalize_uuid uuid
477
+ uuid = uuid.gsub("-", "")
478
+ uuid = "%s-%s-%s-%s-%s" % [
479
+ uuid[0..7], uuid[8..11], uuid[12..15],
480
+ uuid[16..19], uuid[20..31]
481
+ ]
482
+ uuid
483
+ end
484
+
485
+ def _print_dom_config_tree_int dom_config, dom_components_str, indent = 0
486
+ pre = " " * indent
487
+ type = dom_config['type']
488
+ children = dom_config.select{|k,v| k =~ /child-\d+/}.values
489
+ if ['RAID_0', 'RAID_1', 'Concatenation'].member?(type)
490
+ puts "#{pre}#{type}"
491
+ children.each do |child|
492
+ _print_dom_config_tree_int child, dom_components_str, indent + 1
493
+ end
494
+ elsif ['Configuration'].member?(type)
495
+ # puts "#{pre}#{type}"
496
+ children.each do |child|
497
+ _print_dom_config_tree_int child, dom_components_str, indent + 1
498
+ end
499
+ elsif ['Witness', 'Component'].member?(type)
500
+ comp_uuid = dom_config['componentUuid']
501
+ info = dom_components_str[comp_uuid]
502
+ line = "#{pre}#{type}: #{info[0]}"
503
+ if info[2].length > 0
504
+ puts "#{line} (#{info[1]},"
505
+ puts "#{' ' * line.length} #{info[2]})"
506
+ else
507
+ puts "#{line} (#{info[1]})"
508
+ end
509
+ end
510
+ end
511
+
512
+ def _print_dom_config_tree dom_obj_uuid, obj_infos, indent = 0, opts = {}
513
+ pre = " " * indent
514
+ dom_obj_infos = obj_infos['dom_objects'][dom_obj_uuid]
515
+ if !dom_obj_infos
516
+ puts "#{pre}Couldn't find info about DOM object '#{dom_obj_uuid}'"
517
+ return
518
+ end
519
+ dom_obj = dom_obj_infos['config']
520
+ policy = dom_obj_infos['policy']
521
+
522
+ dom_components = _components_in_dom_config(dom_obj['content'])
523
+ csn = nil
524
+ begin
525
+ csn = dom_obj['content']['attributes']['CSN']
526
+ rescue
527
+ end
528
+
529
+ dom_components_str = Hash[dom_components.map do |dom_comp|
530
+ attr = dom_comp['attributes']
531
+ state = attr['componentState']
532
+ comp_uuid = dom_comp['componentUuid']
533
+ state_names = {
534
+ '0' => 'FIRST',
535
+ '1' => 'NONE',
536
+ '2' => 'NEED_CONFIG',
537
+ '3' => 'INITIALIZE',
538
+ '4' => 'INITIALIZED',
539
+ '5' => 'ACTIVE',
540
+ '6' => 'ABSENT',
541
+ '7' => 'STALE',
542
+ '8' => 'RESYNCHING',
543
+ '9' => 'DEGRADED',
544
+ '10' => 'RECONFIGURING',
545
+ '11' => 'CLEANUP',
546
+ '12' => 'TRANSIENT',
547
+ '13' => 'LAST',
548
+ }
549
+ state_name = state.to_s
550
+ if state_names[state.to_s]
551
+ state_name = "#{state_names[state.to_s]} (#{state})"
552
+ end
553
+ props = {
554
+ 'state' => state_name,
555
+ }
556
+
557
+ if state.to_s.to_i == 6 && attr['staleCsn']
558
+ if attr['staleCsn'] != csn
559
+ props['csn'] = "STALE (#{attr['staleCsn']}!=#{csn})"
560
+ end
561
+ end
562
+
563
+ comp_policy = {}
564
+ ['readOPS', 'writeOPS'].select{|x| attr[x]}.each do |x|
565
+ comp_policy[x] = attr[x]
566
+ end
567
+ if attr['readCacheReservation'] && attr['readCacheHitRate']
568
+ comp_policy['rc size/hitrate'] = "%.2fGB/%d%%" % [
569
+ attr['readCacheReservation'].to_f / 1024**3,
570
+ attr['readCacheHitRate'],
571
+ ]
572
+ end
573
+ if attr['bytesToSync']
574
+ comp_policy['dataToSync'] = "%.2f GB" % [
575
+ attr['bytesToSync'].to_f / 1024**3
576
+ ]
577
+ end
578
+
579
+ lsom_object = obj_infos['lsom_objects'][comp_uuid]
580
+ if lsom_object
581
+ host = obj_infos['host_vsan_uuids'][lsom_object['owner']]
582
+ if host
583
+ hostName = obj_infos['host_props'][host]['name']
584
+ else
585
+ hostName = "unknown"
586
+ end
587
+ md_uuid = dom_comp['diskUuid']
588
+ md = obj_infos['vsan_disk_uuids'][md_uuid]
589
+ ssd_uuid = obj_infos['disk_objects'][md_uuid]['content']['ssdUuid']
590
+ #pp ssd_uuid
591
+ ssd = obj_infos['vsan_disk_uuids'][ssd_uuid]
592
+ #pp ssd
593
+ props.merge!({
594
+ 'host' => hostName,
595
+ 'md' => md ? md.DisplayName : "unknown",
596
+ 'ssd' => ssd ? ssd.DisplayName : "unknown",
597
+ })
598
+ if opts[:highlight_disk] && md_uuid == opts[:highlight_disk]
599
+ props['md'] = "**#{props['md']}**"
600
+ elsif opts[:highlight_disk] && ssd_uuid == opts[:highlight_disk]
601
+ props['ssd'] = "**#{props['ssd']}**"
602
+ end
603
+ else
604
+ props.merge!({
605
+ 'host' => "LSOM object not found"
606
+ })
607
+ end
608
+ propsStr = props.map{|k,v| "#{k}: #{v}"}.join(", ")
609
+ comp_policy_str = comp_policy.map{|k,v| "#{k}: #{v}"}.join(", ")
610
+ [comp_uuid, [comp_uuid, propsStr, comp_policy_str]]
611
+ end]
612
+
613
+ if policy
614
+ policy = policy.map{|k,v| "#{k} = #{v}"}.join(", ")
615
+ else
616
+ policy = "No POLICY entry found in CMMDS"
617
+ end
618
+ owner = obj_infos['host_vsan_uuids'][dom_obj['owner']]
619
+ if owner
620
+ owner = obj_infos['host_props'][owner]['name']
621
+ else
622
+ owner = "unknown"
623
+ end
624
+
625
+ puts "#{pre}DOM Object: #{dom_obj['uuid']} (owner: #{owner}, policy: #{policy})"
626
+ if opts[:context]
627
+ puts "#{pre} Context: #{opts[:context]}"
628
+ end
629
+ _print_dom_config_tree_int dom_obj['content'], dom_components_str, indent
630
+ end
631
+
632
+ # hosts is a hash: host => hostname
633
+ def _vsan_host_disks_info hosts
634
+ hosts.each do |k,v|
635
+ if !v
636
+ hosts[k] = k.name
637
+ end
638
+ end
639
+
640
+ conn = hosts.keys.first._connection
641
+ vsanDiskUuids = {}
642
+ $disksCache ||= {}
643
+ if !hosts.keys.all?{|x| $disksCache[x]}
644
+ lock = Mutex.new
645
+ hosts.map do |host, hostname|
646
+ Thread.new do
647
+ if !$disksCache[host]
648
+ c1 = conn.spawn_additional_connection
649
+ host2 = host.dup_on_conn(c1)
650
+ $disksCache[host] = []
651
+ lock.synchronize do
652
+ puts "#{Time.now}: Fetching VSAN disk info from #{hostname} (may take a moment) ..."
653
+ end
654
+ begin
655
+ timeout(45) do
656
+ list = host2.esxcli.vsan.storage.list
657
+ list.each{|x| x._set_property :host, host}
658
+ $disksCache[host] = list
659
+ end
660
+ rescue Exception => ex
661
+ lock.synchronize do
662
+ puts "#{Time.now}: Failed to gather from #{hostname}: #{ex.class}: #{ex.message}"
663
+ end
664
+ end
665
+ end
666
+ end
667
+ end.each{|t| t.join}
668
+ puts "#{Time.now}: Done fetching VSAN disk infos"
669
+ end
670
+
671
+ hosts.map do |host, hostname|
672
+ disks = $disksCache[host]
673
+ disks.each do |disk|
674
+ vsanDiskUuids[disk.VSANUUID] = disk
675
+ end
676
+ end
677
+
678
+ vsanDiskUuids
679
+ end
680
+
681
+ def _vsan_cluster_disks_info cluster, opts = {}
682
+ pc = cluster._connection.propertyCollector
683
+ if cluster.is_a?(VIM::HostSystem)
684
+ hosts = [cluster]
685
+ else
686
+ hosts = cluster.host
687
+ end
688
+ if opts[:hosts_props]
689
+ hosts_props = opts[:hosts_props]
690
+ else
691
+ hosts_props = pc.collectMultiple(hosts,
692
+ 'name',
693
+ 'runtime.connectionState',
694
+ 'configManager.vsanSystem',
695
+ 'configManager.vsanInternalSystem',
696
+ )
697
+ end
698
+ hosts_props = hosts_props.select do |k,v|
699
+ v['runtime.connectionState'] == 'connected'
700
+ end
701
+ vsan_systems = hosts_props.map{|h,p| p['configManager.vsanSystem']}
702
+ vsan_props = pc.collectMultiple(vsan_systems, 'config.clusterInfo')
703
+ host_vsan_uuids = Hash[hosts_props.map do |host, props|
704
+ vsan_system = props['configManager.vsanSystem']
705
+ vsan_info = vsan_props[vsan_system]['config.clusterInfo']
706
+ [vsan_info.nodeUuid, host]
707
+ end]
708
+ vsan_disk_uuids = {}
709
+ vsan_disk_uuids.merge!(
710
+ _vsan_host_disks_info(Hash[hosts_props.map{|h, p| [h, p['name']]}])
711
+ )
712
+
713
+ [host_vsan_uuids, hosts_props, vsan_disk_uuids]
714
+ end
715
+
716
+ opts :object_info do
717
+ summary "Fetch information about a VSAN object"
718
+ arg :cluster, "Cluster on which to fetch the object info", :lookup => [VIM::HostSystem, VIM::ClusterComputeResource]
719
+ arg :obj_uuid, nil, :type => :string, :multi => true
720
+ end
721
+
722
+ def object_info cluster, obj_uuids, opts = {}
723
+ opts[:cluster] = cluster
724
+ objs = _object_info obj_uuids, opts
725
+ indent = 0
726
+ obj_uuids.each do |obj_uuid|
727
+ _print_dom_config_tree(obj_uuid, objs, indent)
728
+ puts ""
729
+ end
730
+ end
731
+
732
+ opts :disk_object_info do
733
+ summary "Fetch information about all VSAN objects on a given physical disk"
734
+ arg :cluster_or_host, "Cluster or host on which to fetch the object info", :lookup => VIM::ClusterComputeResource
735
+ arg :disk_uuid, nil, :type => :string, :multi => true
736
+ end
737
+
738
+ def disk_object_info cluster_or_host, disk_uuids, opts = {}
739
+ conn = cluster_or_host._connection
740
+ pc = conn.propertyCollector
741
+
742
+ if cluster_or_host.is_a?(VIM::ClusterComputeResource)
743
+ cluster = cluster_or_host
744
+ hosts = cluster.host
745
+ else
746
+ hosts = [cluster_or_host]
747
+ end
748
+
749
+ _run_with_rev(conn, "dev") do
750
+ # XXX: This doesn't yet work when no cluster is given
751
+ host_vsan_uuids, hosts_props, vsan_disk_uuids = _vsan_cluster_disks_info(cluster)
752
+
753
+ input_disk_uuids = []
754
+ m_disk_uuids = []
755
+ disk_uuids.each do |disk_uuid|
756
+ disk = vsan_disk_uuids.find {|k,v| v.DisplayName == disk_uuid}
757
+ if disk
758
+ input_disk_uuids << disk
759
+ if disk[1].IsSSD
760
+ disks = vsan_disk_uuids.find_all do |k,v|
761
+ v.VSANDiskGroupName == disk_uuid unless v.IsSSD
762
+ end
763
+ m_disk_uuids += disks
764
+ else
765
+ m_disk_uuids << disk
766
+ end
767
+ else
768
+ input_disk_uuids << [disk_uuid]
769
+ m_disk_uuids << [disk_uuid]
770
+ end
771
+ end
772
+ input_disk_uuids.map! {|x| x[0]}
773
+ m_disk_uuids.map! {|x| x[0]}
774
+
775
+ connected_hosts = hosts_props.select do |k,v|
776
+ v['runtime.connectionState'] == 'connected'
777
+ end.keys
778
+ hosts = connected_hosts
779
+
780
+ if hosts.length == 0
781
+ err "Couldn't find any connected hosts"
782
+ end
783
+
784
+ dslist = hosts.first.datastore
785
+ dslist_props = pc.collectMultiple(dslist, 'name', 'summary.type')
786
+ vsandslist = dslist_props.select{|k, v| v['summary.type'] == 'vsan'}.keys
787
+ vsands = vsandslist.first
788
+ if !vsands
789
+ err "Couldn't find VSAN datastore"
790
+ end
791
+ vms = vsands.vm
792
+ vms_props = pc.collectMultiple(vms,
793
+ 'name', 'config.hardware.device',
794
+ 'summary.config'
795
+ )
796
+ objects = {}
797
+ vms.each do |vm|
798
+ disks = vms_props[vm]['disks'] =
799
+ vms_props[vm]['config.hardware.device'].select{|x| x.is_a?(VIM::VirtualDisk)}
800
+ namespaceUuid = vms_props[vm]['namespaceUuid'] =
801
+ vms_props[vm]['summary.config'].vmPathName.split("] ")[1].split("/")[0]
802
+
803
+ objects[namespaceUuid] = [vm, :namespace]
804
+ disks.each do |disk|
805
+ backing = disk.backing
806
+ while backing
807
+ objects[backing.backingObjectId] = [vm, backing.fileName]
808
+ backing = backing.parent
809
+ end
810
+ end
811
+ end
812
+
813
+ vsanIntSys = hosts_props[hosts.first]['configManager.vsanInternalSystem']
814
+ json = vsanIntSys.QueryObjectsOnPhysicalVsanDisk(:disks => m_disk_uuids)
815
+ if json == "BAD"
816
+ err "Server rejected VSAN object-on-disk query"
817
+ end
818
+ result = nil
819
+ begin
820
+ result = JSON.load(json)
821
+ rescue
822
+ err "Server failed to query VSAN objects-on-disk: #{json}"
823
+ end
824
+
825
+ result.merge!({
826
+ 'host_vsan_uuids' => host_vsan_uuids,
827
+ 'host_props' => hosts_props,
828
+ 'vsan_disk_uuids' => vsan_disk_uuids,
829
+ })
830
+
831
+ input_disk_uuids.each do |disk_uuid|
832
+ dom_obj_uuids = []
833
+ disk_info = vsan_disk_uuids[disk_uuid]
834
+ if disk_info
835
+ name = "#{disk_info.DisplayName} (#{disk_uuid})"
836
+ if disk_info.IsSSD
837
+ m_disks = vsan_disk_uuids.find_all do
838
+ |k, v| v.VSANDiskGroupUUID == disk_uuid unless v.IsSSD
839
+ end
840
+ m_disks ? m_disks.map!{|x| x[0]} : disk_uuid
841
+ m_disks.each {|m_disk| dom_obj_uuids += result['objects_on_disks'][m_disk]}
842
+ else
843
+ dom_obj_uuids = result['objects_on_disks'][disk_uuid]
844
+ end
845
+ else
846
+ name = disk_uuid
847
+ end
848
+ puts "Physical disk #{name}:"
849
+ indent = 1
850
+ dom_obj_uuids.each do |obj_uuid|
851
+ object = objects[obj_uuid]
852
+ if object && object[1] == :namespace
853
+ vm_name = vms_props[object[0]]['name']
854
+ context = "Part of VM #{vm_name}: Namespace directory"
855
+ elsif object
856
+ vm_name = vms_props[object[0]]['name']
857
+ context = "Part of VM #{vm_name}: Disk: #{object[1]}"
858
+ else
859
+ context = "Can't attribute object to any VM, may be swap?"
860
+ end
861
+ _print_dom_config_tree(
862
+ obj_uuid, result, indent,
863
+ :highlight_disk => disk_uuid,
864
+ :context => context
865
+ )
866
+ end
867
+ puts ""
868
+ end
869
+ end
870
+ end
871
+
872
+
873
+ opts :cmmds_find do
874
+ summary "CMMDS Find"
875
+ arg :cluster_or_host, nil, :lookup => [VIM::ClusterComputeResource, VIM::HostSystem]
876
+ opt :type, "CMMDS type, e.g. DOM_OBJECT, LSOM_OBJECT, POLICY, DISK etc.", :type => :string, :short => 't'
877
+ opt :uuid, "UUID of the entry.", :type => :string, :short => 'u'
878
+ opt :owner, "UUID of the owning node.", :type => :string, :short => 'o'
879
+ end
880
+
881
+ def cmmds_find cluster_or_host, opts
882
+ conn = cluster_or_host._connection
883
+ pc = conn.propertyCollector
884
+ host = cluster_or_host
885
+ entries = []
886
+ hostUuidMap = {}
887
+ _run_with_rev(conn, "dev") do
888
+ vsanIntSys = nil
889
+ if cluster_or_host.is_a?(VIM::ClusterComputeResource)
890
+ cluster = cluster_or_host
891
+ hosts = cluster.host
892
+ else
893
+ hosts = [host]
894
+ end
895
+
896
+ hosts_props = pc.collectMultiple(hosts,
897
+ 'name',
898
+ 'runtime.connectionState',
899
+ 'configManager.vsanSystem',
900
+ 'configManager.vsanInternalSystem'
901
+ )
902
+ connected_hosts = hosts_props.select do |k,v|
903
+ v['runtime.connectionState'] == 'connected'
904
+ end.keys
905
+ host = connected_hosts.first
906
+ if !host
907
+ err "Couldn't find any connected hosts"
908
+ end
909
+ vsanIntSys = hosts_props[host]['configManager.vsanInternalSystem']
910
+ vsanSysList = Hash[hosts_props.map do |host, props|
911
+ [props['name'], props['configManager.vsanSystem']]
912
+ end]
913
+ clusterInfos = pc.collectMultiple(vsanSysList.values,
914
+ 'config.clusterInfo')
915
+ hostUuidMap = Hash[vsanSysList.map do |hostname, sys|
916
+ [clusterInfos[sys]['config.clusterInfo'].nodeUuid, hostname]
917
+ end]
918
+ entries = vsanIntSys.query_cmmds([{
919
+ :owner => opts[:owner],
920
+ :uuid => opts[:uuid],
921
+ :type => opts[:type],
922
+ }], :gzip => true)
923
+ end
924
+
925
+ t = Terminal::Table.new()
926
+ t << ['#', 'Type', 'UUID', 'Owner', 'Health', 'Content']
927
+ t.add_separator
928
+ entries.each_with_index do |entry, i|
929
+ t << [
930
+ i + 1,
931
+ entry['type'],
932
+ entry['uuid'],
933
+ hostUuidMap[entry['owner']] || entry['owner'],
934
+ entry['health'],
935
+ PP.pp(entry['content'], ''),
936
+ ]
937
+ end
938
+
939
+ puts t
940
+ end
941
+
942
+ def _get_vm_obj_uuids vm, vmsProps
943
+ obj_uuids = {}
944
+ disks = vmsProps[vm]['disks'] =
945
+ vmsProps[vm]['config.hardware.device'].select{|x| x.is_a?(VIM::VirtualDisk)}
946
+ pathName = vmsProps[vm]['summary.config'].vmPathName
947
+ namespaceUuid = vmsProps[vm]['namespaceUuid'] =
948
+ pathName.split("] ")[1].split("/")[0]
949
+ obj_uuids[namespaceUuid] = pathName
950
+ disks.each do |disk|
951
+ backing = disk.backing
952
+ while backing
953
+ obj_uuids[backing.backingObjectId] = backing.fileName
954
+ backing = backing.parent
955
+ end
956
+ end
957
+ obj_uuids
958
+ end
959
+
960
+ def convert_uuids uuids
961
+ nUuids = {}
962
+ uuids.each do |uuid|
963
+ begin
964
+ oUuid = uuid.split(' ').join()
965
+ nUuids[oUuid[0..7] + '-' + oUuid[8..11] + '-' +
966
+ oUuid[12..20] + '-' + oUuid[21..-1]] = true
967
+ rescue Exception => ex
968
+ puts "Ignoring malformed uuid #{uuid}: #{ex.class}: #{ex.message}"
969
+ end
970
+ end
971
+
972
+ return nUuids
973
+ end
974
+
975
+ # It is possible for the management stack (hostd and vc) to lose the handle of
976
+ # a VM which is powered on (has a running vmx instance). No further operations
977
+ # can be performed on the VM because the running vmx holds locks on the VM.
978
+ # This API is intended to find such VMs. We look for VMs who's power state
979
+ # is not poweredOn (poweredOff, unknown, etc) for which there is a running vmx
980
+ # instance on any host in the cluster.
981
+
982
+ def find_inconsistent_vms cluster_or_host
983
+ if cluster_or_host.is_a?(VIM::ClusterComputeResource)
984
+ hosts = cluster_or_host.host
985
+ else
986
+ hosts = [host]
987
+ end
988
+
989
+ # Find all non-poweredon vms.
990
+ conn = hosts.first._connection
991
+ pc = conn.propertyCollector
992
+ vms = pc.collectMultiple(hosts, 'vm').values.map{|x| x['vm']}.flatten
993
+ vmProps = pc.collectMultiple(vms, 'name', 'runtime.powerState',
994
+ 'summary.config.uuid')
995
+ notOnVMs = vmProps.select{|vm, p| p['runtime.powerState'] !=
996
+ 'poweredOn'}.keys
997
+
998
+ # Get list of all running vms on all hosts in parallel.
999
+ threads = []
1000
+ processList = {}
1001
+ hosts.each do |host|
1002
+ threads << Thread.new do
1003
+ begin
1004
+ processList[host] = host.esxcli.vm.process.list
1005
+ rescue Exception => ex
1006
+ puts "Error getting vm process list on #{host.name}: " \
1007
+ "#{ex.class}: #{ex.message}"
1008
+ end
1009
+ end
1010
+ end
1011
+ threads.each{|t| t.join}
1012
+ uuids = convert_uuids(processList.values.flatten.map{|x| x.UUID})
1013
+
1014
+ inconsistentVMs = notOnVMs.select{|vm|
1015
+ uuids.has_key?(vmProps[vm]['summary.config.uuid'])}
1016
+ if not inconsistentVMs.empty?
1017
+ puts "Found VMs for which VC/hostd/vmx are out of sync:"
1018
+ inconsistentVMs.each do |vm|
1019
+ puts "#{vmProps[vm]['name']}"
1020
+ end
1021
+ else
1022
+ puts "Did not find VMs for which VC/hostd/vmx are out of sync"
1023
+ end
1024
+
1025
+ return inconsistentVMs
1026
+ end
1027
+
1028
+ def fix_inconsistent_vms vms
1029
+ begin
1030
+ tasks = []
1031
+ vms.each do |vm|
1032
+ begin
1033
+ path = vm.summary.config.vmPathName
1034
+ rp = vm.resourcePool
1035
+ folder = vm.parent
1036
+ name = vm.name
1037
+ host = vm.summary.runtime.host
1038
+ puts("Unregistering VM #{name}")
1039
+ vm.UnregisterVM()
1040
+ puts("Registering VM #{name}")
1041
+ tasks << folder.RegisterVM_Task(:path => path,
1042
+ :name => name,
1043
+ :asTemplate => false,
1044
+ :pool => rp,
1045
+ :host => host)
1046
+ rescue Exception => ex
1047
+ puts "Skipping VM #{name} due to exception: " \
1048
+ "#{ex.class}: #{ex.message}"
1049
+ end
1050
+ end
1051
+ progress(tasks)
1052
+ end
1053
+ end
1054
+
1055
+ opts :fix_renamed_vms do
1056
+ summary "This command can be used to rename some VMs which get renamed " \
1057
+ "by the VC in case of storage inaccessibility. It is " \
1058
+ "possible for some VMs to get renamed to vmx file path. " \
1059
+ "eg. \"/vmfs/volumes/vsanDatastore/foo/foo.vmx\". This command " \
1060
+ "will rename this VM to \"foo\". This is the best we can do. " \
1061
+ "This VM may have been named something else but we have no way " \
1062
+ "to know. In this best effort command, we simply rename it to " \
1063
+ "the name of its config file (without the full path and .vmx " \
1064
+ "extension ofcourse!)."
1065
+ arg :vms, nil, :lookup => VIM::VirtualMachine, :multi => true
1066
+ end
1067
+
1068
+ def fix_renamed_vms vms
1069
+ begin
1070
+ conn = vms.first._connection
1071
+ pc = conn.propertyCollector
1072
+ vmProps = pc.collectMultiple(vms, 'name', 'summary.config.vmPathName')
1073
+
1074
+ rename = {}
1075
+ puts "Continuing this command will rename the following VMs:"
1076
+ begin
1077
+ vmProps.each do |k,v|
1078
+ name = v['name']
1079
+ cfgPath = v['summary.config.vmPathName']
1080
+ if /.*vmfs.*volumes.*/.match(name)
1081
+ m = /.+\/(.+)\.vmx/.match(cfgPath)
1082
+ if name != m[1]
1083
+ # Save it in a hash so we don't have to do it again if
1084
+ # user choses Y.
1085
+ rename[k] = m[1]
1086
+ puts "#{name} -> #{m[1]}"
1087
+ end
1088
+ end
1089
+ end
1090
+ rescue Exception => ex
1091
+ # Swallow the exception. No need to stop other vms.
1092
+ puts "Skipping VM due to exception: #{ex.class}: #{ex.message}"
1093
+ end
1094
+
1095
+ if rename.length == 0
1096
+ puts "Nothing to do"
1097
+ return
1098
+ end
1099
+
1100
+ puts "Do you want to continue [y/N]?"
1101
+ opt = $stdin.gets.chomp
1102
+ if opt == 'y' || opt == 'Y'
1103
+ puts "Renaming..."
1104
+ tasks = rename.keys.map do |vm|
1105
+ vm.Rename_Task(:newName => rename[vm])
1106
+ end
1107
+ progress(tasks)
1108
+ end
1109
+ end
1110
+ end
1111
+
1112
+ opts :vm_object_info do
1113
+ summary "Fetch VSAN object information about a VM"
1114
+ arg :vms, nil, :lookup => VIM::VirtualMachine, :multi => true
1115
+ opt :cluster, "Cluster on which to fetch the object info", :lookup => VIM::ClusterComputeResource
1116
+ opt :perspective_from_host, "Host to query object info from", :lookup => VIM::HostSystem
1117
+ end
1118
+
1119
+ def vm_object_info vms, opts
1120
+ begin
1121
+ conn = vms.first._connection
1122
+ pc = conn.propertyCollector
1123
+ firstVm = vms.first
1124
+ host = firstVm.runtime.host
1125
+ if !host
1126
+ err "VM #{firstVm.name} doesn't have an assigned host (yet?)"
1127
+ end
1128
+ opts[:cluster] ||= host.parent
1129
+ _run_with_rev(conn, "dev") do
1130
+ vmsProps = pc.collectMultiple(vms,
1131
+ 'name', 'config.hardware.device', 'summary.config',
1132
+ 'runtime.host',
1133
+ )
1134
+ obj_uuids = []
1135
+ objToHostMap = {}
1136
+ vms.each do |vm|
1137
+ vm_obj_uuids = _get_vm_obj_uuids(vm, vmsProps).keys
1138
+ vm_obj_uuids.each{|x| objToHostMap[x] = vmsProps[vm]['runtime.host']}
1139
+ obj_uuids += vm_obj_uuids
1140
+ end
1141
+ opts[:objToHostMap] = objToHostMap
1142
+
1143
+ objs = _object_info(obj_uuids, opts)
1144
+ hosts_props = objs['host_props']
1145
+
1146
+ vms.each do |vm|
1147
+ vmProps = vmsProps[vm]
1148
+ disks = vmProps['disks']
1149
+ puts "VM #{vmProps['name']}:"
1150
+ if objs['has_partitions']
1151
+ vmHost = vmProps['runtime.host']
1152
+ puts " VM registered on host: #{hosts_props[vmHost]['name']}"
1153
+ end
1154
+
1155
+ indent = 1
1156
+ pre = " " * indent
1157
+ puts "#{pre}Namespace directory"
1158
+ obj_uuid = vmsProps[vm]['namespaceUuid']
1159
+ if objs['has_partitions'] && objs['obj_uuid_from_host'][obj_uuid]
1160
+ objHost = objs['obj_uuid_from_host'][obj_uuid]
1161
+ puts "#{pre} Shown from perspective of host #{hosts_props[objHost]['name']}"
1162
+ end
1163
+ _print_dom_config_tree(obj_uuid, objs, indent + 1)
1164
+
1165
+ disks.each do |disk|
1166
+ indent = 1
1167
+ backing = disk.backing
1168
+ while backing
1169
+ pre = " " * indent
1170
+ puts "#{pre}Disk backing: #{backing.fileName}"
1171
+ obj_uuid = backing.backingObjectId
1172
+ if objs['has_partitions'] && objs['obj_uuid_from_host'][obj_uuid]
1173
+ objHost = objs['obj_uuid_from_host'][obj_uuid]
1174
+ puts "#{pre} Shown from perspective of host #{hosts_props[objHost]['name']}"
1175
+ end
1176
+ _print_dom_config_tree(obj_uuid, objs, indent + 1)
1177
+
1178
+ backing = backing.parent
1179
+ indent += 1
1180
+ end
1181
+ end
1182
+ end
1183
+ end
1184
+ rescue Exception => ex
1185
+ puts ex.message
1186
+ puts ex.backtrace
1187
+ raise
1188
+ end
1189
+ end
1190
+
1191
+ def _object_info obj_uuids, opts
1192
+ if !opts[:cluster]
1193
+ err "Must specify a VSAN Cluster"
1194
+ end
1195
+ host = opts[:host]
1196
+ if opts[:cluster].is_a?(VIM::HostSystem)
1197
+ host = opts[:cluster]
1198
+ end
1199
+ # XXX: Verify VSAN is enabled on the cluster
1200
+ if host
1201
+ hosts = [host]
1202
+ conn = host._connection
1203
+ else
1204
+ hosts = opts[:cluster].host
1205
+ conn = opts[:cluster]._connection
1206
+ end
1207
+
1208
+ _run_with_rev(conn, "dev") do
1209
+ pc = conn.propertyCollector
1210
+
1211
+ hosts_props = pc.collectMultiple(hosts,
1212
+ 'name', 'runtime.connectionState',
1213
+ 'configManager.vsanSystem',
1214
+ 'configManager.vsanInternalSystem'
1215
+ )
1216
+ connected_hosts = hosts_props.select do |k,v|
1217
+ v['runtime.connectionState'] == 'connected'
1218
+ end.keys
1219
+ hosts = connected_hosts
1220
+ if hosts.length == 0
1221
+ err "Couldn't find any connected hosts"
1222
+ end
1223
+
1224
+ if opts[:perspective_from_host]
1225
+ if !connected_hosts.member?(opts[:perspective_from_host])
1226
+ err "Perspective-Host not connected, or not in considered group of hosts"
1227
+ end
1228
+ end
1229
+
1230
+ # Detect partitions:
1231
+ # We need to ask every host which other hosts it believes to share a
1232
+ # VSAN cluster (partition) with. This is a call down to ESX, so we spawn
1233
+ # one connection and one thread per host to parallelize. We detect
1234
+ # partitions by grouping VMs based on quoting the same cluster members.
1235
+ hosts_props.map do |host, props|
1236
+ if !connected_hosts.member?(host)
1237
+ next
1238
+ end
1239
+ Thread.new do
1240
+ begin
1241
+ vsanSys = props['configManager.vsanSystem']
1242
+ c1 = conn.spawn_additional_connection
1243
+ vsanSys = vsanSys.dup_on_conn(c1)
1244
+ res = vsanSys.QueryHostStatus()
1245
+ hosts_props[host]['vsanCluster'] = res
1246
+ rescue Exception => ex
1247
+ puts "Failed to gather host status from #{props['name']}: #{ex.class}: #{ex.message}"
1248
+ end
1249
+ end
1250
+ end.compact.each{|t| t.join}
1251
+
1252
+ partitions = hosts_props.select do |h, p|
1253
+ connected_hosts.member?(h)
1254
+ end.group_by{|h, p| p['vsanCluster'].memberUuid}
1255
+ partition_exists = (partitions.length > 1)
1256
+ if partition_exists
1257
+ puts "#{Time.now}: WARNING: VSAN Cluster network partition detected."
1258
+ puts "#{Time.now}: The individual partitions of the cluster will have "
1259
+ puts "#{Time.now}: different views on object/component availablity. An "
1260
+ puts "#{Time.now}: attempt is made to show VM object accessibility from the "
1261
+ puts "#{Time.now}: perspective of the host on which a VM is registered. "
1262
+ puts "#{Time.now}: Please fix the network partition as soon as possible "
1263
+ puts "#{Time.now}: as it will seriously impact the availability of your "
1264
+ puts "#{Time.now}: VMs in your VSAN cluster. Check vsan.cluster_info for"
1265
+ puts "#{Time.now}: more details."
1266
+ puts "#{Time.now}: "
1267
+ puts "#{Time.now}: The following partitions were detected:"
1268
+ i = 1
1269
+ partitions.values.map do |part|
1270
+ part_hosts = part.map{|x| hosts_props[x[0]]}.compact.map{|x| x['name']}
1271
+ puts "#{Time.now}: #{i}) #{part_hosts.join(", ")}"
1272
+ i += 1
1273
+ end
1274
+ puts ""
1275
+ if opts[:perspective_from_host]
1276
+ name = hosts_props[opts[:perspective_from_host]]['name']
1277
+ puts "Showing data from perspective of host #{name} as requested"
1278
+ puts ""
1279
+ end
1280
+ end
1281
+
1282
+ host_vsan_uuids, host_props, vsan_disk_uuids = _vsan_cluster_disks_info(
1283
+ opts[:cluster],
1284
+ :hosts_props => hosts_props
1285
+ )
1286
+ extra_info = {
1287
+ 'host_vsan_uuids' => host_vsan_uuids,
1288
+ 'host_props' => host_props,
1289
+ 'vsan_disk_uuids' => vsan_disk_uuids,
1290
+ }
1291
+
1292
+ obj_uuids = obj_uuids.compact.map{|x| _normalize_uuid(x)}
1293
+ obj_uuids = obj_uuids.select{|x| is_uuid(x)}
1294
+
1295
+ objs = {'obj_uuid_from_host' => {}}
1296
+ objs['has_partitions'] = partition_exists
1297
+
1298
+ # Dealing with partitions:
1299
+ # In the non-partitioned case we can just select any host and ask it
1300
+ # for the object info, given that CMMDS is (eventual) consistent
1301
+ # across the cluster. But during a network partition it is most logical
1302
+ # to ask the host on which a VM is registered about what it thinks about
1303
+ # the objects in question. So in case of a network partition we fall
1304
+ # back to a slower code path that asks each host individually about
1305
+ # the objects it (hopefully) knows best about.
1306
+ # Note: Upon power on DRS will pick a host to power the VM on. That other
1307
+ # host may not be in the same partition and DRS doesn't know about it,
1308
+ # so although we tried to show the object from the "right" hosts perspective
1309
+ # it may still not be the right host when debugging a power on failure.
1310
+ if opts[:objToHostMap] && partition_exists && !opts[:perspective_from_host]
1311
+ obj_uuids_groups = obj_uuids.group_by{|x| opts[:objToHostMap][x]}
1312
+ obj_uuids_groups.each do |host, group|
1313
+ vsanIntSys = hosts_props[host]['configManager.vsanInternalSystem']
1314
+ group_objs = vsanIntSys.query_vsan_objects(:uuids => group)
1315
+
1316
+ # Here we are merging and overriding potentially conflicting
1317
+ # information about LSOM_OBJECT and DISK entries. No smarts are
1318
+ # applied, as I am not aware of issues arising from those
1319
+ # possible inconsistencies.
1320
+ group_objs.each do |k,v|
1321
+ objs[k] ||= {}
1322
+ objs[k].merge!(v)
1323
+ end
1324
+ group.each do |uuid|
1325
+ objs['obj_uuid_from_host'][uuid] = host
1326
+ end
1327
+ end
1328
+ else
1329
+ if opts[:perspective_from_host]
1330
+ host = opts[:perspective_from_host]
1331
+ else
1332
+ host = hosts.first
1333
+ end
1334
+ vsanIntSys = hosts_props[host]['configManager.vsanInternalSystem']
1335
+ objs = vsanIntSys.query_vsan_objects(:uuids => obj_uuids)
1336
+ end
1337
+
1338
+ objs.merge!(extra_info)
1339
+ objs
1340
+ end
1341
+ end
1342
+
1343
+
1344
+ def _fetch_disk_stats obj, metrics, instances, opts = {}
1345
+ conn = obj._connection
1346
+ pm = conn.serviceContent.perfManager
1347
+
1348
+ metrics.each do |x|
1349
+ err "no such metric #{x}" unless pm.perfcounter_hash.member? x
1350
+ end
1351
+
1352
+ interval = pm.provider_summary(obj).refreshRate
1353
+ start_time = nil
1354
+ if interval == -1
1355
+ # Object does not support real time stats
1356
+ interval = 300
1357
+ start_time = Time.now - 300 * 5
1358
+ end
1359
+ stat_opts = {
1360
+ :interval => interval,
1361
+ :startTime => start_time,
1362
+ :instance => instances,
1363
+ :multi_instance => true,
1364
+ }
1365
+ stat_opts[:max_samples] = opts[:samples] if opts[:samples]
1366
+ res = pm.retrieve_stats [obj], metrics, stat_opts
1367
+
1368
+ out = {}
1369
+ if res && res[obj]
1370
+ res[obj][:metrics].each do |key, values|
1371
+ metric, device = key
1372
+ out[device] ||= {}
1373
+ out[device][metric] = values
1374
+ end
1375
+ end
1376
+ out
1377
+ end
1378
+
1379
+ opts :disks_stats do
1380
+ summary "Show stats on all disks in VSAN"
1381
+ arg :hosts_and_clusters, nil, :lookup => [VIM::HostSystem, VIM::ClusterComputeResource], :multi => true
1382
+ opt :compute_number_of_components, "Deprecated", :type => :boolean
1383
+ opt :show_iops, "Show deprecated fields", :type => :boolean
1384
+ end
1385
+
1386
+ def disks_stats hosts_and_clusters, opts = {}
1387
+ opts[:compute_number_of_components] = true
1388
+ conn = hosts_and_clusters.first._connection
1389
+ hosts = hosts_and_clusters.select{|x| x.is_a?(VIM::HostSystem)}
1390
+ clusters = hosts_and_clusters.select{|x| x.is_a?(VIM::ClusterComputeResource)}
1391
+ pc = conn.propertyCollector
1392
+ cluster_hosts = pc.collectMultiple(clusters, 'host')
1393
+ cluster_hosts.each do |cluster, props|
1394
+ hosts += props['host']
1395
+ end
1396
+ hosts = hosts.uniq
1397
+ _run_with_rev(conn, "dev") do
1398
+ hosts_props = pc.collectMultiple(hosts,
1399
+ 'name',
1400
+ 'runtime.connectionState',
1401
+ 'configManager.vsanSystem',
1402
+ 'configManager.vsanInternalSystem'
1403
+ )
1404
+
1405
+ hosts = hosts_props.select do |k,v|
1406
+ v['runtime.connectionState'] == 'connected'
1407
+ end.keys
1408
+ if hosts.length == 0
1409
+ err "Couldn't find any connected hosts"
1410
+ end
1411
+
1412
+ hosts_vsansys = Hash[hosts_props.map{|k,v| [v['configManager.vsanSystem'], k]}]
1413
+ node_uuids = pc.collectMultiple(hosts_vsansys.keys, 'config.clusterInfo.nodeUuid')
1414
+ node_uuids = Hash[node_uuids.map do |k, v|
1415
+ [v['config.clusterInfo.nodeUuid'], hosts_vsansys[k]]
1416
+ end]
1417
+
1418
+ lock = Mutex.new
1419
+ disks = {}
1420
+ vsanIntSys = hosts_props[hosts.first]['configManager.vsanInternalSystem']
1421
+ disks = vsanIntSys.QueryPhysicalVsanDisks(:props => [
1422
+ 'lsom_objects_count',
1423
+ 'uuid',
1424
+ 'isSsd',
1425
+ 'capacity',
1426
+ 'capacityUsed',
1427
+ 'capacityReserved',
1428
+ 'iops',
1429
+ 'iopsReserved',
1430
+ 'disk_health',
1431
+ ])
1432
+ if disks == "BAD"
1433
+ err "Server failed to gather VSAN disk info"
1434
+ end
1435
+ begin
1436
+ disks = JSON.load(disks)
1437
+ rescue
1438
+ err "Server didn't provide VSAN disk info: #{disks}"
1439
+ end
1440
+ #pp disks
1441
+
1442
+ vsan_disks_info = {}
1443
+ vsan_disks_info.merge!(
1444
+ _vsan_host_disks_info(Hash[hosts.map{|h| [h, hosts_props[h]['name']]}])
1445
+ )
1446
+ disks.each do |k, v|
1447
+ v['esxcli'] = vsan_disks_info[v['uuid']]
1448
+ if v['esxcli']
1449
+ v['host'] = v['esxcli']._get_property :host
1450
+ end
1451
+ end
1452
+
1453
+ #pp vsan_disks_info
1454
+ #pp disks.values.map{|x| [x['host'], x['esxcli']]}
1455
+ #pp disks.values.group_by{|x| x['host']}.keys
1456
+
1457
+ disks = disks.values.sort_by do |x|
1458
+ host_props = hosts_props[x['host']]
1459
+ host_props ? host_props['name'] : ''
1460
+ end
1461
+
1462
+ # Stats are now better handled by observer
1463
+ # disks.group_by{|x| x['host']}.each do |host, host_disks|
1464
+ # next if !host
1465
+ # devices = host_disks.map{|x| x['esxcli'].Device}
1466
+ # metrics = [
1467
+ # 'disk.numberReadAveraged', 'disk.numberWriteAveraged',
1468
+ # 'disk.deviceLatency', 'disk.maxTotalLatency',
1469
+ # 'disk.queueLatency', 'disk.kernelLatency'
1470
+ # ]
1471
+ # stats = _fetch_disk_stats host, metrics, devices
1472
+ # disks.each do |v|
1473
+ # if v['esxcli'] && stats[v['esxcli'].Device]
1474
+ # v['stats'] = stats[v['esxcli'].Device]
1475
+ # else
1476
+ # v['stats'] ||= {}
1477
+ # metrics.each{|m| v['stats'][m] ||= [-1] }
1478
+ # end
1479
+ # end
1480
+ # end
1481
+
1482
+ t = Terminal::Table.new()
1483
+ if opts[:show_iops]
1484
+ t << [nil, nil, nil, 'Num', 'Capacity', nil, nil, 'Iops', nil, nil, ]
1485
+ t << ['DisplayName', 'Host', 'isSSD', 'Comp', 'Total', 'Used', 'Reserved', 'Total', 'Reserved', ]
1486
+ else
1487
+ t << [nil, nil, nil, 'Num', 'Capacity', nil, nil, 'Status']
1488
+ t << ['DisplayName', 'Host', 'isSSD', 'Comp', 'Total', 'Used', 'Reserved', 'Health']
1489
+ end
1490
+ t.add_separator
1491
+ # XXX: Would be nice to show displayName and host
1492
+
1493
+ groups = disks.group_by{|x| x['esxcli'] ? x['esxcli'].VSANDiskGroupUUID : nil}
1494
+
1495
+ groups.each do |group, disks|
1496
+ disks.sort_by{|x| -x['isSsd']}.each do |x|
1497
+ info = x['esxcli']
1498
+ host_props = hosts_props[x['host']]
1499
+ cols = [
1500
+ info ? info.DisplayName : 'N/A',
1501
+ host_props ? host_props['name'] : 'N/A',
1502
+ #x['uuid'],
1503
+ (x['isSsd'] == 1) ? 'SSD' : 'MD',
1504
+ x['lsom_objects_count'] || 'N/A',
1505
+ "%.2f GB" % [x['capacity'].to_f / 1024**3],
1506
+ "%.0f %%" % [x['capacityUsed'].to_f * 100 / x['capacity'].to_f],
1507
+ "%.0f %%" % [x['capacityReserved'].to_f * 100 / x['capacity'].to_f],
1508
+ ]
1509
+
1510
+ if opts[:show_iops]
1511
+ cols += [
1512
+ "%d" % [x['iops']],
1513
+ "%.0f %%" % [ x['iopsReserved'].to_f * 100 / x['iops'].to_f],
1514
+ ]
1515
+ end
1516
+
1517
+ # cols += [
1518
+ # "%dr/%dw" % [x['stats']['disk.numberReadAveraged'].first,
1519
+ # x['stats']['disk.numberWriteAveraged'].first],
1520
+ # "%dd/%dq/%dk" % [x['stats']['disk.deviceLatency'].first,
1521
+ # x['stats']['disk.queueLatency'].first,
1522
+ # x['stats']['disk.kernelLatency'].first,],
1523
+ # ]
1524
+
1525
+ health = "N/A"
1526
+ if x['disk_health'] && x['disk_health']['healthFlags']
1527
+ flags = x['disk_health']['healthFlags']
1528
+ health = []
1529
+ {
1530
+ 4 => "FAILED",
1531
+ 5 => "OFFLINE",
1532
+ 6 => "DECOMMISSIONED",
1533
+ }.each do |k, v|
1534
+ if flags & (1 << k) != 0
1535
+ health << v
1536
+ end
1537
+ end
1538
+ if health.length == 0
1539
+ health = "OK"
1540
+ else
1541
+ health = health.join(", ")
1542
+ end
1543
+
1544
+ end
1545
+ cols += [
1546
+ health
1547
+ ]
1548
+
1549
+ t << cols
1550
+ end
1551
+ if group != groups.keys.last
1552
+ t.add_separator
1553
+ end
1554
+ end
1555
+
1556
+ puts t
1557
+ end
1558
+ end
1559
+
1560
+
1561
+ opts :whatif_host_failures do
1562
+ summary "Simulates how host failures impact VSAN resource usage"
1563
+ banner <<-EOS
1564
+
1565
+ The command shows current VSAN disk usage, but also simulates how
1566
+ disk usage would evolve under a host failure. Concretely the simulation
1567
+ assumes that all objects would be brought back to full policy
1568
+ compliance by bringing up new mirrors of existing data.
1569
+ The command makes some simplifying assumptions about disk space
1570
+ balance in the cluster. It is mostly intended to do a rough estimate
1571
+ if a host failure would drive the cluster to being close to full.
1572
+
1573
+ EOS
1574
+ arg :hosts_and_clusters, nil, :lookup => [VIM::HostSystem, VIM::ClusterComputeResource], :multi => true
1575
+ opt :num_host_failures_to_simulate, "Number of host failures to simulate", :default => 1
1576
+ opt :show_current_usage_per_host, "Show current resources used per host"
1577
+ end
1578
+
1579
+ def whatif_host_failures hosts_and_clusters, opts = {}
1580
+ opts[:compute_number_of_components] = true
1581
+ conn = hosts_and_clusters.first._connection
1582
+ hosts = hosts_and_clusters.select{|x| x.is_a?(VIM::HostSystem)}
1583
+ clusters = hosts_and_clusters.select{|x| x.is_a?(VIM::ClusterComputeResource)}
1584
+ pc = conn.propertyCollector
1585
+ cluster_hosts = pc.collectMultiple(clusters, 'host')
1586
+ cluster_hosts.each do |cluster, props|
1587
+ hosts += props['host']
1588
+ end
1589
+ hosts = hosts.uniq
1590
+
1591
+ if opts[:num_host_failures_to_simulate] != 1
1592
+ err "Only simulation of 1 host failure has been implemented"
1593
+ end
1594
+
1595
+ _run_with_rev(conn, "dev") do
1596
+ hosts_props = pc.collectMultiple(hosts,
1597
+ 'name',
1598
+ 'runtime.connectionState',
1599
+ 'configManager.vsanSystem',
1600
+ 'configManager.vsanInternalSystem'
1601
+ )
1602
+
1603
+ hosts = hosts_props.select do |k,v|
1604
+ v['runtime.connectionState'] == 'connected'
1605
+ end.keys
1606
+ if hosts.length == 0
1607
+ err "Couldn't find any connected hosts"
1608
+ end
1609
+
1610
+ hosts_vsansys = Hash[hosts_props.map{|k,v| [v['configManager.vsanSystem'], k]}]
1611
+ node_uuids = pc.collectMultiple(hosts_vsansys.keys, 'config.clusterInfo.nodeUuid')
1612
+ node_uuids = Hash[node_uuids.map do |k, v|
1613
+ [v['config.clusterInfo.nodeUuid'], hosts_vsansys[k]]
1614
+ end]
1615
+
1616
+ lock = Mutex.new
1617
+ disks = {}
1618
+ vsanIntSys = hosts_props[hosts.first]['configManager.vsanInternalSystem']
1619
+ disks = vsanIntSys.QueryPhysicalVsanDisks(:props => [
1620
+ 'lsom_objects_count',
1621
+ 'uuid',
1622
+ 'isSsd',
1623
+ 'capacity',
1624
+ 'capacityUsed',
1625
+ 'capacityReserved',
1626
+ 'iops',
1627
+ 'iopsReserved',
1628
+ 'owner',
1629
+ ])
1630
+ if disks == "BAD"
1631
+ err "Server failed to gather VSAN disk info"
1632
+ end
1633
+ begin
1634
+ disks = JSON.load(disks)
1635
+ rescue
1636
+ err "Server didn't provide VSAN disk info: #{objs}"
1637
+ end
1638
+
1639
+ # XXX: Do this in threads
1640
+ hosts.map do |host|
1641
+ Thread.new do
1642
+ c1 = conn.spawn_additional_connection
1643
+ props = hosts_props[host]
1644
+ vsanIntSys2 = props['configManager.vsanInternalSystem']
1645
+ vsanIntSys3 = vsanIntSys2.dup_on_conn(c1)
1646
+ res = vsanIntSys3.query_vsan_statistics(:labels => ['lsom-node'])
1647
+ hosts_props[host]['lsom.node'] = res['lsom.node']
1648
+ end
1649
+ end.each{|t| t.join}
1650
+
1651
+ hosts_disks = Hash[disks.values.group_by{|x| x['owner']}.map do |owner, hostDisks|
1652
+ props = {}
1653
+ hdds = hostDisks.select{|disk| disk['isSsd'] == 0}
1654
+ ssds = hostDisks.select{|disk| disk['isSsd'] == 1}
1655
+ hdds.each do |disk|
1656
+ [
1657
+ 'capacityUsed', 'capacityReserved',
1658
+ 'capacity', 'lsom_objects_count'
1659
+ ].each do |x|
1660
+ props[x] ||= 0
1661
+ props[x] += disk[x]
1662
+ end
1663
+ end
1664
+ ssds.each do |disk|
1665
+ [
1666
+ 'capacityReserved', 'capacity',
1667
+ ].each do |x|
1668
+ props["ssd_#{x}"] ||= 0
1669
+ props["ssd_#{x}"] += disk[x]
1670
+ end
1671
+ end
1672
+ h = node_uuids[owner]
1673
+ props['host'] = h
1674
+ props['hostname'] = h ? hosts_props[h]['name'] : owner
1675
+ props['numHDDs'] = hdds.length
1676
+ props['maxComponents'] = 3000
1677
+ if hosts_props[h]['lsom.node']
1678
+ props['maxComponents'] = hosts_props[h]['lsom.node']['numMaxComponents']
1679
+ end
1680
+ [owner, props]
1681
+ end]
1682
+
1683
+ sorted_hosts = hosts_disks.values.sort_by{|x| -x['capacityUsed']}
1684
+
1685
+ if opts[:show_current_usage_per_host]
1686
+ puts "Current utilization of hosts:"
1687
+ t = Terminal::Table.new()
1688
+ t << [nil, nil, 'HDD Capacity', nil, nil, 'Components', 'SSD Capacity']
1689
+ t << ['Host', 'NumHDDs', 'Total', 'Used', 'Reserved', 'Used', 'Reserved']
1690
+ t.add_separator
1691
+
1692
+ hosts_disks.each do |owner, x|
1693
+ cols = [
1694
+ x['hostname'],
1695
+ x['numHDDs'],
1696
+ "%.2f GB" % [x['capacity'].to_f / 1024**3],
1697
+ "%.0f %%" % [x['capacityUsed'].to_f * 100 / x['capacity'].to_f],
1698
+ "%.0f %%" % [x['capacityReserved'].to_f * 100 / x['capacity'].to_f],
1699
+ "%4u/%u (%.0f %%)" % [
1700
+ x['lsom_objects_count'],
1701
+ x['maxComponents'],
1702
+ x['lsom_objects_count'].to_f * 100 / x['maxComponents'].to_f
1703
+ ],
1704
+ "%.0f %%" % [x['ssd_capacityReserved'].to_f * 100 / x['ssd_capacity'].to_f],
1705
+ ]
1706
+ t << cols
1707
+ end
1708
+ puts t
1709
+ puts ""
1710
+ end
1711
+
1712
+ puts "Simulating #{opts[:num_host_failures_to_simulate]} host failures:"
1713
+ puts ""
1714
+ worst_host = sorted_hosts[0]
1715
+
1716
+ if sorted_hosts.length < 3
1717
+ puts "Cluster unable to regain full policy compliance after host failure, "
1718
+ puts "not enough hosts remaining."
1719
+ return
1720
+ end
1721
+
1722
+ t = Terminal::Table.new()
1723
+ t << ["Resource", "Usage right now", "Usage after failure/re-protection"]
1724
+ t.add_separator
1725
+ capacityRow = ["HDD capacity"]
1726
+
1727
+ # Capacity before failure
1728
+ used = sorted_hosts.map{|x| x['capacityUsed']}.sum
1729
+ total = sorted_hosts.map{|x| x['capacity']}.sum
1730
+ free = total - used
1731
+ usedPctOriginal = 100.0 - (free.to_f * 100 / total.to_f)
1732
+ capacityRow << "%3.0f%% used (%.2f GB free)" % [
1733
+ usedPctOriginal,
1734
+ free.to_f / 1024**3,
1735
+ ]
1736
+
1737
+ # Capacity after rebuild
1738
+ used = sorted_hosts[1..-1].map{|x| x['capacityUsed']}.sum
1739
+ total = sorted_hosts[1..-1].map{|x| x['capacity']}.sum
1740
+ additional = worst_host['capacityUsed']
1741
+ free = total - used
1742
+ usedPctBeforeReMirror = 100.0 - (free.to_f * 100 / total.to_f)
1743
+ usedPctAfterReMirror = 100.0 - ((free - additional).to_f * 100 / total.to_f)
1744
+ usedPctIncrease = usedPctAfterReMirror - usedPctOriginal
1745
+ capacityRow << "%3.0f%% used (%.2f GB free)" % [
1746
+ usedPctAfterReMirror,
1747
+ (free - additional).to_f / 1024**3,
1748
+ ]
1749
+ t << capacityRow
1750
+
1751
+ # Components before failure
1752
+ sorted_hosts = hosts_disks.values.sort_by{|x| -x['lsom_objects_count']}
1753
+ worst_host = sorted_hosts[0]
1754
+ used = sorted_hosts.map{|x| x['lsom_objects_count']}.sum
1755
+ total = sorted_hosts.map{|x| x['maxComponents']}.sum
1756
+ free = total - used
1757
+ usedPctOriginal = 100.0 - (free.to_f * 100 / total.to_f)
1758
+ componentsRow = ["Components"]
1759
+ componentsRow << "%3.0f%% used (%u available)" % [
1760
+ usedPctOriginal,
1761
+ free,
1762
+ ]
1763
+
1764
+ # Components after rebuild
1765
+ used = sorted_hosts[1..-1].map{|x| x['lsom_objects_count']}.sum
1766
+ total = sorted_hosts[1..-1].map{|x| x['maxComponents']}.sum
1767
+ additional = worst_host['lsom_objects_count']
1768
+ free = total - used
1769
+ usedPctBeforeReMirror = 100.0 - (free.to_f * 100 / total.to_f)
1770
+ usedPctAfterReMirror = 100.0 - ((free - additional).to_f * 100 / total.to_f)
1771
+ usedPctIncrease = usedPctAfterReMirror - usedPctOriginal
1772
+ componentsRow << "%3.0f%% used (%u available)" % [
1773
+ usedPctAfterReMirror,
1774
+ (free - additional),
1775
+ ]
1776
+ t << componentsRow
1777
+
1778
+ # RC reservations before failure
1779
+ sorted_hosts = hosts_disks.values.sort_by{|x| -x['ssd_capacityReserved']}
1780
+ worst_host = sorted_hosts[0]
1781
+ used = sorted_hosts.map{|x| x['ssd_capacityReserved']}.sum
1782
+ total = sorted_hosts.map{|x| x['ssd_capacity']}.sum
1783
+ free = total - used
1784
+ usedPctOriginal = 100.0 - (free.to_f * 100 / total.to_f)
1785
+ rcReservationsRow = ["RC reservations"]
1786
+ rcReservationsRow << "%3.0f%% used (%.2f GB free)" % [
1787
+ usedPctOriginal,
1788
+ free.to_f / 1024**3,
1789
+ ]
1790
+
1791
+ # RC reservations after rebuild
1792
+ used = sorted_hosts[1..-1].map{|x| x['ssd_capacityReserved']}.sum
1793
+ total = sorted_hosts[1..-1].map{|x| x['ssd_capacity']}.sum
1794
+ additional = worst_host['ssd_capacityReserved']
1795
+ free = total - used
1796
+ usedPctBeforeReMirror = 100.0 - (free.to_f * 100 / total.to_f)
1797
+ usedPctAfterReMirror = 100.0 - ((free - additional).to_f * 100 / total.to_f)
1798
+ usedPctIncrease = usedPctAfterReMirror - usedPctOriginal
1799
+ rcReservationsRow << "%3.0f%% used (%.2f GB free)" % [
1800
+ usedPctAfterReMirror,
1801
+ (free - additional).to_f / 1024**3,
1802
+ ]
1803
+ t << rcReservationsRow
1804
+
1805
+ puts t
1806
+ end
1807
+ end
1808
+
1809
+
1810
+ def _observe_snapshot conn, host, hosts, vmView, pc, hosts_props, vsanIntSys
1811
+ startTime = Time.now
1812
+ observation = {
1813
+ 'cmmds' => {
1814
+ 'clusterInfos' => {},
1815
+ 'clusterDirs' => {},
1816
+ },
1817
+ 'vsi' => {},
1818
+ 'inventory' => {},
1819
+ }
1820
+ exceptions = []
1821
+ threads = []
1822
+ begin
1823
+ threads << Thread.new do
1824
+ begin
1825
+ t1 = Time.now
1826
+ vms = vmView.view
1827
+
1828
+ vmProperties = [
1829
+ 'name', 'runtime.powerState', 'datastore', 'config.annotation',
1830
+ 'parent', 'resourcePool', 'storage.perDatastoreUsage',
1831
+ 'summary.config.memorySizeMB', 'summary.config.numCpu',
1832
+ 'summary.config.vmPathName', 'config.hardware.device',
1833
+ 'runtime.connectionState',
1834
+ ]
1835
+ vmsProps = pc.collectMultiple(vms, *vmProperties)
1836
+ t2 = Time.now
1837
+ puts "Query VM properties: %.2f sec" % (t2 - t1)
1838
+ observation['inventory']['vms'] = {}
1839
+ vmsProps.each do |vm, vmProps|
1840
+ vmProps['vsan-obj-uuids'] = {}
1841
+ devices = vmProps['config.hardware.device'] || []
1842
+ disks = devices.select{|x| x.is_a?(VIM::VirtualDisk)}
1843
+ disks.each do |disk|
1844
+ newBacking = {}
1845
+ newDisk = {
1846
+ 'unitNumber' => disk.unitNumber,
1847
+ 'controllerKey' => disk.controllerKey,
1848
+ 'backing' => newBacking,
1849
+ }
1850
+ backing = disk.backing
1851
+ if !backing.is_a?(VIM::VirtualDiskFlatVer2BackingInfo)
1852
+ next
1853
+ end
1854
+ while backing
1855
+ uuid = backing.backingObjectId
1856
+ if uuid
1857
+ vmProps['vsan-obj-uuids'][uuid] = backing.fileName
1858
+ newBacking['uuid'] = uuid
1859
+ end
1860
+ newBacking['fileName'] = backing.fileName
1861
+ backing = backing.parent
1862
+
1863
+ if backing
1864
+ newBacking['parent'] = {}
1865
+ newBacking = newBacking['parent']
1866
+ end
1867
+ end
1868
+
1869
+ vmProps['disks'] ||= []
1870
+ vmProps['disks'] << newDisk
1871
+ end
1872
+ # Do not add devices to the snapshot as they are too big
1873
+ vmProps.delete('config.hardware.device')
1874
+
1875
+ begin
1876
+ vmPathName = vmProps['summary.config.vmPathName']
1877
+ uuid = vmPathName.split("] ")[1].split("/")[0]
1878
+ vmProps['vsan-obj-uuids'][uuid] = vmPathName
1879
+ rescue
1880
+ end
1881
+
1882
+ observation['inventory']['vms'][vm._ref] = vmProps
1883
+ end
1884
+ rescue Exception => ex
1885
+ exceptions << ex
1886
+ end
1887
+ end
1888
+ threads << Thread.new do
1889
+ begin
1890
+ sleep(20)
1891
+ hostname = hosts_props[host]['name']
1892
+ # XXX: Should pick one host per partition
1893
+ c1 = conn.spawn_additional_connection
1894
+ vsanIntSys1 = vsanIntSys.dup_on_conn(c1)
1895
+
1896
+ t1 = Time.now
1897
+ res = vsanIntSys1.query_cmmds(
1898
+ (1..30).map{|x| {:type => x}}
1899
+ )
1900
+ t2 = Time.now
1901
+ puts "Query CMMDS from #{hostname}: %.2f sec (json size: %dKB)" % [
1902
+ (t2 - t1), JSON.dump(res).length / 1024
1903
+ ]
1904
+ observation['cmmds']['clusterDirs'][hostname] = res
1905
+ rescue Exception => ex
1906
+ exceptions << ex
1907
+ end
1908
+ end
1909
+ hosts.each do |host|
1910
+ threads << Thread.new do
1911
+ begin
1912
+ hostname = hosts_props[host]['name']
1913
+ vsanIntSys1 = hosts_props[host]['configManager.vsanInternalSystem']
1914
+ c1 = conn.spawn_additional_connection
1915
+ vsanIntSys1 = vsanIntSys1.dup_on_conn(c1)
1916
+
1917
+ t1 = Time.now
1918
+ res = vsanIntSys1.QueryVsanStatistics(:labels =>
1919
+ [
1920
+ 'dom', 'lsom', 'worldlets', 'plog',
1921
+ 'dom-objects',
1922
+ 'mem', 'cpus', 'slabs',
1923
+ 'vscsi', 'cbrc',
1924
+ 'disks',
1925
+ #'rdtassocsets',
1926
+ 'system-mem', 'pnics',
1927
+ ]
1928
+ )
1929
+ t2 = Time.now
1930
+ res = JSON.load(res)
1931
+ puts "Query Stats on #{host.name}: %.2f sec (on ESX: %.2f, json size: %dKB)" % [
1932
+ (t2 - t1), res['on-esx-collect-duration'],
1933
+ JSON.dump(res).length / 1024
1934
+ ]
1935
+ observation['vsi'][hostname] = res
1936
+ rescue Exception => ex
1937
+ exceptions << ex
1938
+ end
1939
+ end
1940
+ end
1941
+ threads.each{|x| x.join}
1942
+ if exceptions.length > 0
1943
+ raise exceptions.first
1944
+ end
1945
+ rescue Interrupt
1946
+ threads.each{|t| t.terminate}
1947
+ end
1948
+
1949
+ {
1950
+ 'type' => 'inventory-snapshot',
1951
+ 'snapshot' => observation,
1952
+ 'starttime' => startTime.to_f,
1953
+ 'endtime' => Time.now.to_f,
1954
+ }
1955
+ end
1956
+
1957
+ class VsanObserver
1958
+ def generate_observer_html(tasksAnalyzer, inventoryAnalyzer,
1959
+ vcInfo, hosts_props)
1960
+ opts = {}
1961
+ refreshString = ""
1962
+ vcOS = vcInfo['about']['osType']
1963
+ vcFullName = vcInfo['about']['fullName']
1964
+ testTitleString = "VC #{vcInfo['hostname']} (#{vcFullName} - #{vcOS})"
1965
+ skipTasksTab = true
1966
+ graphUpdateMsg = "XXX"
1967
+ processed = 0
1968
+ puts "#{Time.now}: Generating HTML"
1969
+ inventoryAnalyzerTabs = inventoryAnalyzer.generateHtmlTabs(
1970
+ true,
1971
+ :skipLivenessTab => true,
1972
+ :skipLsomExpert => true,
1973
+ )
1974
+ puts "#{Time.now}: Generating HTML (fill in template)"
1975
+
1976
+ erbFilename = "#{analyser_lib_dirname}/stats.erb.html"
1977
+ @erbFileContent = open(erbFilename, 'r').read
1978
+
1979
+ template = ERB.new(@erbFileContent)
1980
+ html = template.result(binding)
1981
+ puts "#{Time.now}: HTML length: #{html.length}"
1982
+
1983
+ html
1984
+ end
1985
+
1986
+ def generate_observer_bundle(bundlePath, tasksAnalyzer, inventoryAnalyzer,
1987
+ vcInfo, hosts_props)
1988
+ require 'rubygems/package'
1989
+ tarFilename = File.join(
1990
+ bundlePath,
1991
+ "vsan-observer-#{Time.now.strftime('%Y-%m-%d.%H-%M-%S')}.tar"
1992
+ )
1993
+ gzFilename = "%s.gz" % tarFilename
1994
+
1995
+ puts "#{Time.now}: Writing out an HTML bundle to #{gzFilename} ..."
1996
+ tar = open(tarFilename, 'wb+')
1997
+ Gem::Package::TarWriter.new(tar) do |writer|
1998
+ inventoryAnalyzer.dump(:tar => writer)
1999
+
2000
+ writer.add_file('stats.html', 0644) do |io|
2001
+ io.write(self.generate_observer_html(
2002
+ tasksAnalyzer, inventoryAnalyzer, vcInfo,
2003
+ hosts_props
2004
+ )
2005
+ )
2006
+ end
2007
+
2008
+ [
2009
+ 'graphs.html', 'bg_pattern.png', 'vmw_logo_white.png',
2010
+ 'graphs.js', 'observer.css', 'vm-graph.svg'
2011
+ ].each do |filename|
2012
+ writer.add_file(filename, 0644) do |io|
2013
+ content = open("#{analyser_lib_dirname}/#{filename}", "r") do |src|
2014
+ src.read
2015
+ end
2016
+ io.write(content)
2017
+ end
2018
+ end
2019
+ end
2020
+ tar.seek(0)
2021
+
2022
+ gz = Zlib::GzipWriter.new(File.new(gzFilename, 'wb'))
2023
+ while (buffer = tar.read(10000))
2024
+ gz.write(buffer)
2025
+ end
2026
+ tar.close
2027
+ gz.close
2028
+ FileUtils.rm(tarFilename)
2029
+ puts "#{Time.now}: Done writing HTML bundle to #{gzFilename}"
2030
+ end
2031
+ end
2032
+
2033
+ require 'webrick'
2034
+ class SimpleGetForm < WEBrick::HTTPServlet::AbstractServlet
2035
+ def initialize(server, tasksAnalyzer, inventoryAnalyzer,
2036
+ erbFileContent, vcInfo, hosts_props)
2037
+ super server
2038
+ @tasksAnalyzer = tasksAnalyzer
2039
+ @inventoryAnalyzer = inventoryAnalyzer
2040
+ @erbFileContent = erbFileContent
2041
+ @vcInfo = vcInfo
2042
+ @hosts_props = hosts_props
2043
+ end
2044
+
2045
+ # Process the request, return response
2046
+ def do_GET(request, response)
2047
+ staticFiles = [
2048
+ "/graphs.js", "/graphs.html",
2049
+ "/observer.css",
2050
+ "/vmw_logo_white.png",
2051
+ "/bg_pattern.png",
2052
+ "/vm-graph.svg"
2053
+ ]
2054
+ if request.path == "/"
2055
+ status, content_type, body = mainpage(request)
2056
+ elsif staticFiles.member?(request.path)
2057
+ status, content_type, body = servefile(request)
2058
+ # elsif request.path =~ /^\/css\//
2059
+ # status, content_type, body = servefile(request)
2060
+ elsif request.path =~ /^\/jsonstats\/(dom|pcpu|mem|lsom|vm|cmmds|misc)\/(.*).json$/
2061
+ group = $1
2062
+ file = $2
2063
+ opts = {}
2064
+ if file =~ /^(.*)_thumb$/
2065
+ file = $1
2066
+ opts[:points] = 60
2067
+ end
2068
+ status, content_type, body = servejson(group, file, opts)
2069
+ else
2070
+ super(request, response)
2071
+ end
2072
+
2073
+ response.status = status
2074
+ response['Content-Type'] = content_type
2075
+ response.body = body
2076
+ end
2077
+
2078
+ def servefile request
2079
+ filename = "#{analyser_lib_dirname}#{request.path}"
2080
+ content = open(filename, 'r').read
2081
+ if filename =~ /\.js$/
2082
+ return [200, "text/javascript", content]
2083
+ end
2084
+ if filename =~ /\.html$/
2085
+ return [200, "text/html", content]
2086
+ end
2087
+ if filename =~ /\.less$/
2088
+ return [200, "text/css", content]
2089
+ end
2090
+ if filename =~ /\.css$/
2091
+ return [200, "text/css", content]
2092
+ end
2093
+ if filename =~ /\.png$/
2094
+ return [200, "image/png", content]
2095
+ end
2096
+ if filename =~ /\.svg$/
2097
+ return [200, "image/svg+xml", content]
2098
+ end
2099
+
2100
+ [404, "text/html", "Not found"]
2101
+ end
2102
+
2103
+ def json_dump out
2104
+ @inventoryAnalyzer.json_dump out
2105
+ end
2106
+
2107
+ def servejson group, file, opts = {}
2108
+ points = opts[:points]
2109
+ if group == "misc"
2110
+ if file =~ /^distribution$/
2111
+ out = @inventoryAnalyzer.dumpDistribution(:points => points)
2112
+ return [200, "text/json", json_dump(out)]
2113
+ end
2114
+ if file =~ /^crbc-(.*)$/
2115
+ hostname = $1
2116
+ out = @inventoryAnalyzer.dumpCbrc(hostname)
2117
+ return [200, "text/json", json_dump(out)]
2118
+ end
2119
+ if file =~ /^pnics-(.*)$/
2120
+ hostname = $1
2121
+ out = @inventoryAnalyzer.dumpPnics(hostname)
2122
+ return [200, "text/json", json_dump(out)]
2123
+ end
2124
+ end
2125
+ if group == "vm"
2126
+ if file =~ /^list$/
2127
+ out = @inventoryAnalyzer.dumpVmList()
2128
+ return [200, "text/json", json_dump(out)]
2129
+ end
2130
+ if file =~ /^vscsi-([^-]*)-(.*)$/
2131
+ disk = $1
2132
+ vm = $2
2133
+ out = @inventoryAnalyzer.dumpVscsi(vm, disk, nil, :points => points)
2134
+ return [200, "text/json", json_dump(out)]
2135
+ end
2136
+ end
2137
+ if group == "cmmds"
2138
+ if file =~ /^disks$/
2139
+ out = @inventoryAnalyzer.dumpCmmdsDisks()
2140
+ return [200, "text/json", json_dump(out)]
2141
+ end
2142
+ if file =~ /^cmmds-(.*)$/
2143
+ uuid = $1
2144
+ out = @inventoryAnalyzer.dumpCmmdsUuid(uuid)
2145
+ return [200, "text/json", json_dump(out)]
2146
+ end
2147
+ end
2148
+ if group == "dom"
2149
+ if file =~ /^domobj-(client|total|compmgr)-(.*)$/
2150
+ uuid = "#{$1}-#{$2}"
2151
+ out = @inventoryAnalyzer.dumpDom(uuid, nil, :points => points)
2152
+ return [200, "text/json", json_dump(out)]
2153
+ elsif file =~ /^domobj-(.*)$/
2154
+ uuid = $1
2155
+ out = @inventoryAnalyzer.dumpDom(uuid, nil, :points => points)
2156
+ return [200, "text/json", json_dump(out)]
2157
+ end
2158
+ end
2159
+ if group == "pcpu"
2160
+ if file =~ /^wdt-(.*)-([^-]*)$/
2161
+ hostname = $1
2162
+ wdt = $2
2163
+ out = @inventoryAnalyzer.dumpWdt(hostname, wdt, nil, :points => points)
2164
+ return [200, "text/json", json_dump(out)]
2165
+ end
2166
+ if file =~ /^pcpu-(.*)$/
2167
+ hostname = $1
2168
+ out = @inventoryAnalyzer.dumpPcpu(hostname, :points => points)
2169
+ return [200, "text/json", json_dump(out)]
2170
+ end
2171
+ end
2172
+ if group == "mem"
2173
+ if file =~ /^heaps-(.*)$/
2174
+ hostname = $1
2175
+ out = @inventoryAnalyzer.dumpHeaps(hostname, nil, :points => points)
2176
+ return [200, "text/json", json_dump(out)]
2177
+ end
2178
+ if file =~ /^slabs-(.*)$/
2179
+ hostname = $1
2180
+ out = @inventoryAnalyzer.dumpSlabs(hostname, nil, :points => points)
2181
+ return [200, "text/json", json_dump(out)]
2182
+ end
2183
+ if file =~ /^system-(.*)$/
2184
+ hostname = $1
2185
+ out = @inventoryAnalyzer.dumpSystemMem(hostname, nil, :points => points)
2186
+ return [200, "text/json", json_dump(out)]
2187
+ end
2188
+ end
2189
+ if group == "lsom"
2190
+ if file =~ /^lsomcomp-(.*)$/
2191
+ uuid = $1
2192
+ out = @inventoryAnalyzer.dumpLsomComp(uuid, nil, :points => points)
2193
+ return [200, "text/json", json_dump(out)]
2194
+ end
2195
+ if file =~ /^lsomhost-(.*)$/
2196
+ hostname = $1
2197
+ out = @inventoryAnalyzer.dumpLsomHost(hostname, nil, :points => points)
2198
+ return [200, "text/json", json_dump(out)]
2199
+ end
2200
+ if file =~ /^ssd-(.*)$/
2201
+ uuid = $1
2202
+ out = @inventoryAnalyzer.dumpSsd(uuid, nil, nil, :points => points)
2203
+ return [200, "text/json", json_dump(out)]
2204
+ end
2205
+ if file =~ /^plog-(.*)$/
2206
+ dev = $1
2207
+ out = @inventoryAnalyzer.dumpPlog(dev, nil, nil, nil, :points => points)
2208
+ return [200, "text/json", json_dump(out)]
2209
+ end
2210
+ if file =~ /^disk-(.*)$/
2211
+ dev = $1
2212
+ out = @inventoryAnalyzer.dumpDisk(dev, nil, nil, :points => points)
2213
+ return [200, "text/json", json_dump(out)]
2214
+ end
2215
+ if file =~ /^physdisk-(.*)-([^-]*)$/
2216
+ hostname = $1
2217
+ dev = $2
2218
+ out = @inventoryAnalyzer.dumpPhysDisk(hostname, dev, nil, :points => points)
2219
+ return [200, "text/json", json_dump(out)]
2220
+ end
2221
+ end
2222
+
2223
+ [404, "text/html", "Not found"]
2224
+ end
2225
+
2226
+ def mainpage request
2227
+ tasksAnalyzer = @tasksAnalyzer
2228
+ inventoryAnalyzer = @inventoryAnalyzer
2229
+
2230
+ html = VsanObserver.new.generate_observer_html(
2231
+ @tasksAnalyzer, @inventoryAnalyzer, @vcInfo, @hosts_props
2232
+ )
2233
+
2234
+ [200, "text/html", html]
2235
+ end
2236
+ end
2237
+
2238
+ opts :observer do
2239
+ summary "Run observer"
2240
+ arg :cluster_or_host, nil, :lookup => [VIM::ClusterComputeResource, VIM::HostSystem]
2241
+ opt :filename, "Output file path", :type => :string
2242
+ opt :port, "Port on which to run webserver", :type => :int, :default => 8010
2243
+ opt :run_webserver, "Run a webserver to view live stats", :type => :boolean
2244
+ opt :force, "Apply force", :type => :boolean
2245
+ opt :keep_observation_in_memory, "Keep observed stats in memory even when commands ends. Allows to resume later", :type => :boolean
2246
+ opt :generate_html_bundle, "Generates an HTML bundle after completion. Pass a location", :type => :string
2247
+ opt :interval, "Interval (in sec) in which to collect stats", :type => :int, :default => 60
2248
+ opt :max_runtime, "Maximum number of hours to collect stats. Caps memory usage.", :type => :int, :default => 2
2249
+ end
2250
+
2251
+ def observer cluster_or_host, opts
2252
+ conn = cluster_or_host._connection
2253
+ pc = conn.propertyCollector
2254
+ host = cluster_or_host
2255
+ entries = []
2256
+ hostUuidMap = {}
2257
+
2258
+ vcAbout = conn.serviceContent.about
2259
+ vcInfo = {
2260
+ 'hostname' => conn.host,
2261
+ 'about' => {
2262
+ 'fullName' => vcAbout.fullName,
2263
+ 'osType' => vcAbout.osType,
2264
+ 'apiVersion' => vcAbout.apiVersion,
2265
+ 'apiType' => vcAbout.apiType,
2266
+ 'build' => vcAbout.build,
2267
+ 'instanceUuid' => vcAbout.instanceUuid,
2268
+ 'version' => vcAbout.version,
2269
+ },
2270
+ }
2271
+
2272
+ if opts[:run_webserver] && !opts[:force]
2273
+ puts "Running a webserver with unencrypted HTTP on the vCenter machine "
2274
+ puts "could pose a security risk. This tool is an experimenal debugging "
2275
+ puts "tool, which has not been audited or tested for its security."
2276
+ puts "If in doubt, you may want to create a dummy vCenter machine to run"
2277
+ puts "just this tool, instead of running the tool on your production "
2278
+ puts "vCenter machine."
2279
+ puts "In order to run the webserver, please pass --force"
2280
+ err "Force needs to be applied to run the webserver"
2281
+ end
2282
+
2283
+ require 'rvc/observer/analyzer-lib'
2284
+ require 'rvc/observer/tasks-analyzer'
2285
+ require 'rvc/observer/inventory-analyzer'
2286
+
2287
+ inventoryAnalyzer = $inventoryAnalyzer
2288
+ tasksAnalyzer = $tasksAnalyzer
2289
+
2290
+ inventoryAnalyzer ||= InventoryAnalyzer.new
2291
+ tasksAnalyzer ||= TasksAnalyzer.new({})
2292
+
2293
+ file = nil
2294
+ if opts[:filename]
2295
+ file = open(opts[:filename], 'a')
2296
+ end
2297
+ server = nil
2298
+ webrickThread = nil
2299
+ hosts_props = nil
2300
+
2301
+ _run_with_rev(conn, "dev") do
2302
+ vsanIntSys = nil
2303
+ if cluster_or_host.is_a?(VIM::ClusterComputeResource)
2304
+ cluster = cluster_or_host
2305
+ hosts = cluster.host
2306
+ else
2307
+ hosts = [host]
2308
+ end
2309
+
2310
+ hosts_props = pc.collectMultiple(hosts,
2311
+ 'name',
2312
+ 'runtime.connectionState',
2313
+ 'configManager.vsanSystem',
2314
+ 'configManager.vsanInternalSystem',
2315
+ 'summary.config.product',
2316
+ 'summary.hardware'
2317
+ )
2318
+ connected_hosts = hosts_props.select do |k,v|
2319
+ v['runtime.connectionState'] == 'connected'
2320
+ end.keys
2321
+ host = connected_hosts.first
2322
+ if !host
2323
+ err "Couldn't find any connected hosts"
2324
+ end
2325
+ vsanIntSys = hosts_props[host]['configManager.vsanInternalSystem']
2326
+ vsanSysList = Hash[hosts_props.map do |host, props|
2327
+ [props['name'], props['configManager.vsanSystem']]
2328
+ end]
2329
+ clusterInfos = pc.collectMultiple(vsanSysList.values,
2330
+ 'config.clusterInfo')
2331
+ hostUuidMap = Hash[vsanSysList.map do |hostname, sys|
2332
+ [clusterInfos[sys]['config.clusterInfo'].nodeUuid, hostname]
2333
+ end]
2334
+
2335
+ viewMgr = conn.serviceContent.viewManager
2336
+ rootFolder = conn.serviceContent.rootFolder
2337
+
2338
+ vmView = viewMgr.CreateContainerView(
2339
+ :container => rootFolder,
2340
+ :type => ['VirtualMachine'],
2341
+ :recursive => true
2342
+ )
2343
+
2344
+ if opts[:run_webserver]
2345
+ erbFilename = "#{analyser_lib_dirname}/stats.erb.html"
2346
+ erbFileContent = open(erbFilename, 'r').read
2347
+
2348
+ server = WEBrick::HTTPServer.new(:Port => opts[:port])
2349
+ server.mount(
2350
+ "/", SimpleGetForm,
2351
+ tasksAnalyzer, inventoryAnalyzer, erbFileContent, vcInfo,
2352
+ JSON.load(JSON.dump(hosts_props))
2353
+ )
2354
+ webrickThread = Thread.new do
2355
+ server.start
2356
+ end
2357
+ end
2358
+
2359
+ puts "Press <Ctrl>+<C> to stop observing at any point ..."
2360
+ puts
2361
+
2362
+ startTime = Time.now
2363
+ begin
2364
+ while (Time.now - startTime) < opts[:max_runtime] * 3600
2365
+ puts "#{Time.now}: Collect one inventory snapshot"
2366
+ t1 = Time.now
2367
+ begin
2368
+ observation = _observe_snapshot(
2369
+ conn, host, connected_hosts, vmView, pc, hosts_props, vsanIntSys
2370
+ )
2371
+ observation['snapshot']['vcinfo'] = vcInfo
2372
+ observation['timestamp'] = Time.now.to_f
2373
+ if file
2374
+ file.write(JSON.dump(observation) + "\n")
2375
+ file.flush()
2376
+ else
2377
+ puts "#{Time.now}: Live-Processing inventory snapshot"
2378
+ tasksAnalyzer.processTrace(observation)
2379
+ inventoryAnalyzer.processInventorySnapshot(observation)
2380
+ end
2381
+ rescue Interrupt
2382
+ raise
2383
+ rescue Exception => ex
2384
+ puts "#{Time.now}: Got exception: #{ex.class}: #{ex.message}"
2385
+ end
2386
+ t2 = Time.now
2387
+
2388
+ intervalTime = opts[:interval]
2389
+ time = t2 - t1
2390
+ sleepTime = intervalTime - time
2391
+ if sleepTime <= 0.0
2392
+ puts "#{Time.now}: Collection took %.2fs (> %.2fs), no sleep ..." % [
2393
+ time, intervalTime
2394
+ ]
2395
+ else
2396
+ puts "#{Time.now}: Collection took %.2fs, sleeping for %.2fs" % [
2397
+ time, sleepTime
2398
+ ]
2399
+ puts "#{Time.now}: Press <Ctrl>+<C> to stop observing"
2400
+ sleep(sleepTime)
2401
+ end
2402
+ end
2403
+ rescue Interrupt
2404
+ puts "#{Time.now}: Execution interrupted, wrapping up ..."
2405
+ end
2406
+ #pp res
2407
+ vmView.DestroyView()
2408
+
2409
+ end
2410
+
2411
+ if file
2412
+ file.close()
2413
+ end
2414
+ if server
2415
+ server.shutdown
2416
+ webrickThread.join
2417
+ end
2418
+ if opts[:generate_html_bundle]
2419
+ begin
2420
+ VsanObserver.new.generate_observer_bundle(
2421
+ opts[:generate_html_bundle], tasksAnalyzer, inventoryAnalyzer,
2422
+ vcInfo, hosts_props
2423
+ )
2424
+ rescue Exception => ex
2425
+ puts "#{Time.now}: Failed to generate HTML bundle: #{ex.class}: #{ex.message}"
2426
+ end
2427
+ end
2428
+
2429
+ if opts[:keep_observation_in_memory]
2430
+ $inventoryAnalyzer = inventoryAnalyzer
2431
+ $tasksAnalyzer = tasksAnalyzer
2432
+ else
2433
+ $inventoryAnalyzer = nil
2434
+ $tasksAnalyzer = nil
2435
+ end
2436
+ end
2437
+
2438
+ class RbVmomi::VIM
2439
+ def initialize opts
2440
+ super opts
2441
+ end
2442
+
2443
+ def spawn_additional_connection
2444
+ c1 = RbVmomi::VIM.new(@opts)
2445
+ c1.cookie = self.cookie
2446
+ c1.rev = self.rev
2447
+ c1
2448
+ end
2449
+ end
2450
+
2451
+ RbVmomi::VIM::ManagedObject
2452
+ class RbVmomi::VIM::ManagedObject
2453
+ def dup_on_conn conn
2454
+ self.class.new(conn, self._ref)
2455
+ end
2456
+ end
2457
+
2458
+
2459
+ opts :resync_dashboard do
2460
+ summary "Resyncing dashboard"
2461
+ arg :cluster_or_host, nil, :lookup => [VIM::ClusterComputeResource, VIM::HostSystem]
2462
+ opt :refresh_rate, "Refresh interval (in sec). Default is no refresh", :type => :int
2463
+ end
2464
+
2465
+ def resync_dashboard cluster_or_host, opts
2466
+ conn = cluster_or_host._connection
2467
+ pc = conn.propertyCollector
2468
+ if cluster_or_host.is_a?(VIM::ClusterComputeResource)
2469
+ cluster = cluster_or_host
2470
+ hosts = cluster.host
2471
+ else
2472
+ hosts = [host]
2473
+ end
2474
+
2475
+ _run_with_rev(conn, "dev") do
2476
+ hosts_props = pc.collectMultiple(hosts,
2477
+ 'name',
2478
+ 'runtime.connectionState',
2479
+ 'configManager.vsanSystem',
2480
+ 'configManager.vsanInternalSystem'
2481
+ )
2482
+ connected_hosts = hosts_props.select do |k,v|
2483
+ v['runtime.connectionState'] == 'connected'
2484
+ end.keys
2485
+ host = connected_hosts.first
2486
+ if !host
2487
+ err "Couldn't find any connected hosts"
2488
+ end
2489
+ hostname = hosts_props[host]['name']
2490
+ vsanIntSys = hosts_props[host]['configManager.vsanInternalSystem']
2491
+
2492
+ vsanSysList = Hash[hosts_props.map do |host, props|
2493
+ [props['name'], props['configManager.vsanSystem']]
2494
+ end]
2495
+ clusterInfos = pc.collectMultiple(vsanSysList.values,
2496
+ 'config.clusterInfo')
2497
+ hostUuidMap = Hash[vsanSysList.map do |hostname, sys|
2498
+ [clusterInfos[sys]['config.clusterInfo'].nodeUuid, hostname]
2499
+ end]
2500
+
2501
+ entries = nil
2502
+
2503
+ puts "#{Time.now}: Querying all VMs on VSAN ..."
2504
+ ds_list = host.datastore
2505
+ ds_props = pc.collectMultiple(ds_list, 'name', 'summary.type')
2506
+ ds = ds_props.select{|k, x| x['summary.type'] == "vsan"}.keys.first
2507
+ ds_name = ds_props[ds]['name']
2508
+
2509
+ vms = ds.vm
2510
+ vmsProps = pc.collectMultiple(vms,
2511
+ 'name', 'runtime.connectionState',
2512
+ 'config.hardware.device', 'summary.config'
2513
+ )
2514
+
2515
+ iter = 0
2516
+ while (iter == 0) || opts[:refresh_rate]
2517
+ puts "#{Time.now}: Querying all objects in the system from #{hostname} ..."
2518
+
2519
+ result = vsanIntSys.query_syncing_vsan_objects({})
2520
+ if !result
2521
+ err "Server failed to gather syncing objects"
2522
+ end
2523
+ objects = result['dom_objects']
2524
+
2525
+ puts "#{Time.now}: Got all the info, computing table ..."
2526
+ objects = objects.map do |uuid, objInfo|
2527
+ obj = objInfo['config']
2528
+ comps = _components_in_dom_config(obj['content'])
2529
+ bytesToSyncTotal = 0
2530
+ recoveryETATotal = 0
2531
+ comps = comps.select do |comp|
2532
+ state = comp['attributes']['componentState']
2533
+ bytesToSync = comp['attributes']['bytesToSync'] || 0
2534
+ recoveryETA = comp['attributes']['recoveryETA'] || 0
2535
+ resync = [10, 6].member?(state) && bytesToSync != 0
2536
+ if resync
2537
+ bytesToSyncTotal += bytesToSync
2538
+ recoveryETATotal = [recoveryETA, recoveryETATotal].max
2539
+ end
2540
+ resync
2541
+ end
2542
+ obj['bytesToSync'] = bytesToSyncTotal
2543
+ obj['recoveryETA'] = recoveryETATotal
2544
+ if comps.length > 0
2545
+ obj
2546
+ end
2547
+ end.compact
2548
+ obj_uuids = objects.map{|x| x['uuid']}
2549
+ objects = Hash[objects.map{|x| [x['uuid'], x]}]
2550
+
2551
+ all_obj_uuids = []
2552
+ vmToObjMap = {}
2553
+ vms.each do |vm|
2554
+ vm_obj_uuids = _get_vm_obj_uuids(vm, vmsProps)
2555
+ vm_obj_uuids = vm_obj_uuids.select{|x, v| obj_uuids.member?(x)}
2556
+ vm_obj_uuids = vm_obj_uuids.reject{|x, v| all_obj_uuids.member?(x)}
2557
+ all_obj_uuids += vm_obj_uuids.keys
2558
+ if vm_obj_uuids.length > 0
2559
+ vmToObjMap[vm] = vm_obj_uuids
2560
+ end
2561
+ end
2562
+
2563
+ t = Terminal::Table.new()
2564
+ t << [
2565
+ 'VM/Object',
2566
+ 'Syncing objects',
2567
+ 'Bytes to sync',
2568
+ #'ETA',
2569
+ ]
2570
+ t.add_separator
2571
+ bytesToSyncGrandTotal = 0
2572
+ objGrandTotal = 0
2573
+ vmToObjMap.each do |vm, vm_obj_uuids|
2574
+ vmProps = vmsProps[vm]
2575
+ objs = vm_obj_uuids.keys.map{|x| objects[x]}
2576
+ bytesToSyncTotal = objs.map{|obj| obj['bytesToSync']}.sum
2577
+ recoveryETATotal = objs.map{|obj| obj['recoveryETA']}.max
2578
+ t << [
2579
+ vmProps['name'],
2580
+ objs.length,
2581
+ "", #"%.2f GB" % (bytesToSyncTotal.to_f / 1024**3),
2582
+ #"%.2f min" % (recoveryETATotal.to_f / 60),
2583
+ ]
2584
+ objs.each do |obj|
2585
+ t << [
2586
+ " %s" % (vm_obj_uuids[obj['uuid']] || obj['uuid']),
2587
+ '',
2588
+ "%.2f GB" % (obj['bytesToSync'].to_f / 1024**3),
2589
+ #"%.2f min" % (obj['recoveryETA'].to_f / 60),
2590
+ ]
2591
+ end
2592
+ bytesToSyncGrandTotal += bytesToSyncTotal
2593
+ objGrandTotal += objs.length
2594
+ end
2595
+ t.add_separator
2596
+ t << [
2597
+ 'Total',
2598
+ objGrandTotal,
2599
+ "%.2f GB" % (bytesToSyncGrandTotal.to_f / 1024**3),
2600
+ #"%.2f min" % (recoveryETATotal.to_f / 60),
2601
+ ]
2602
+ puts t
2603
+ iter += 1
2604
+
2605
+ if opts[:refresh_rate]
2606
+ sleep opts[:refresh_rate]
2607
+ end
2608
+ end
2609
+ end
2610
+ end
2611
+
2612
+ opts :vm_perf_stats do
2613
+ summary "VM perf stats"
2614
+ arg :vms, nil, :lookup => [VIM::VirtualMachine], :multi => true
2615
+ opt :interval, "Time interval to compute average over", :type => :int, :default => 20
2616
+ opt :show_objects, "Show objects that are part of VM", :type => :boolean
2617
+ end
2618
+
2619
+ def vm_perf_stats vms, opts
2620
+ conn = vms.first._connection
2621
+ pc = conn.propertyCollector
2622
+ cluster = vms.first.runtime.host.parent
2623
+ hosts = cluster.host
2624
+
2625
+ _run_with_rev(conn, "dev") do
2626
+ hosts_props = pc.collectMultiple(hosts,
2627
+ 'name',
2628
+ 'runtime.connectionState',
2629
+ 'configManager.vsanSystem',
2630
+ 'configManager.vsanInternalSystem'
2631
+ )
2632
+ connected_hosts = hosts_props.select do |k,v|
2633
+ v['runtime.connectionState'] == 'connected'
2634
+ end.keys
2635
+ host = connected_hosts.first
2636
+ if !host
2637
+ err "Couldn't find any connected hosts"
2638
+ end
2639
+ vsanIntSys = hosts_props[host]['configManager.vsanInternalSystem']
2640
+
2641
+ vsanSysList = Hash[hosts_props.map do |host, props|
2642
+ [props['name'], props['configManager.vsanSystem']]
2643
+ end]
2644
+ clusterInfos = pc.collectMultiple(vsanSysList.values,
2645
+ 'config.clusterInfo')
2646
+ hostUuidMap = Hash[vsanSysList.map do |hostname, sys|
2647
+ [clusterInfos[sys]['config.clusterInfo'].nodeUuid, hostname]
2648
+ end]
2649
+ hostNameToMoMap = Hash[hosts_props.map do |host, props|
2650
+ [props['name'], host]
2651
+ end]
2652
+
2653
+ entries = nil
2654
+
2655
+ puts "#{Time.now}: Querying info about VMs ..."
2656
+ vmsProps = pc.collectMultiple(vms,
2657
+ 'name', 'runtime.connectionState',
2658
+ 'config.hardware.device', 'summary.config'
2659
+ )
2660
+
2661
+ obj_uuids = []
2662
+ vms.each do |vm|
2663
+ obj_uuids += _get_vm_obj_uuids(vm, vmsProps).keys
2664
+ end
2665
+
2666
+ puts "#{Time.now}: Querying VSAN objects used by the VMs ..."
2667
+
2668
+ objects = vsanIntSys.query_cmmds(obj_uuids.map do |uuid|
2669
+ {:type => 'CONFIG_STATUS', :uuid => uuid}
2670
+ end)
2671
+ if !objects
2672
+ err "Server failed to gather CONFIG_STATUS entries"
2673
+ end
2674
+
2675
+ objByHost = {}
2676
+ objects.each do |entry|
2677
+ host = hostUuidMap[entry['owner']]
2678
+ if !host
2679
+ next
2680
+ end
2681
+ host = hostNameToMoMap[host]
2682
+ if !host
2683
+ next
2684
+ end
2685
+ objByHost[host] ||= []
2686
+ objByHost[host] << entry['uuid']
2687
+ end
2688
+
2689
+ def fetchStats(objByHost, hosts_props)
2690
+ stats = {}
2691
+ objByHost.each do |host, obj_uuids|
2692
+ vsanIntSys = hosts_props[host]['configManager.vsanInternalSystem']
2693
+
2694
+ res = vsanIntSys.QueryVsanStatistics(:labels => obj_uuids.map do |uuid|
2695
+ "dom-object:#{uuid}"
2696
+ end)
2697
+ res = JSON.load(res)
2698
+
2699
+ obj_uuids.each do |uuid|
2700
+ stats[uuid] = res['dom.owners.selected.stats'][uuid]
2701
+ if stats[uuid]
2702
+ stats[uuid]['ts'] = res['dom.owners.selected.stats-taken']
2703
+ end
2704
+ end
2705
+ end
2706
+ stats
2707
+ end
2708
+
2709
+ puts "#{Time.now}: Fetching stats counters once ..."
2710
+ stats1 = fetchStats(objByHost, hosts_props)
2711
+ sleepTime = opts[:interval]
2712
+ puts "#{Time.now}: Sleeping for #{sleepTime} seconds ..."
2713
+ sleep(sleepTime)
2714
+ puts "#{Time.now}: Fetching stats counters again to compute averages ..."
2715
+ stats2 = fetchStats(objByHost, hosts_props)
2716
+
2717
+ puts "#{Time.now}: Got all data, computing table"
2718
+ stats = {}
2719
+ objects.each do |entry|
2720
+ uuid = entry['uuid']
2721
+ deltas = Hash[stats2[uuid].keys.map do |key|
2722
+ [key, stats2[uuid][key] - stats1[uuid][key]]
2723
+ end]
2724
+ deltaT = deltas['ts']
2725
+ stats[uuid] = deltas.merge({
2726
+ :readIops => deltas['readCount'] / deltaT,
2727
+ :writeIops => deltas['writeCount'] / deltaT,
2728
+ :readTput => deltas['readBytes'] / deltaT,
2729
+ :writeTput => deltas['writeBytes'] / deltaT,
2730
+ :readLatency => 0,
2731
+ :writeLatency => 0,
2732
+ })
2733
+ if deltas['readCount'] > 0
2734
+ stats[uuid][:readLatency] = deltas['readLatencySumUs'] / deltas['readCount']
2735
+ end
2736
+ if deltas['writeCount'] > 0
2737
+ stats[uuid][:writeLatency] = deltas['writeLatencySumUs'] / deltas['writeCount']
2738
+ end
2739
+ end
2740
+
2741
+ t = Terminal::Table.new()
2742
+ t << [
2743
+ 'VM/Object',
2744
+ 'IOPS',
2745
+ 'Tput (KB/s)',
2746
+ 'Latency (ms)'
2747
+ ]
2748
+ t.add_separator
2749
+ vms.each do |vm|
2750
+ vmProps = vmsProps[vm]
2751
+ vm_obj_uuids = _get_vm_obj_uuids(vm, vmsProps)
2752
+
2753
+ if !opts[:show_objects]
2754
+ vmStats = {}
2755
+ vmStats[:readLatency] ||= []
2756
+ vmStats[:writeLatency] ||= []
2757
+ [:readIops, :writeIops, :readTput, :writeTput].each do |key|
2758
+ vmStats[key] ||= 0.0
2759
+ end
2760
+
2761
+ vm_obj_uuids.each do |uuid, path|
2762
+ path = path.gsub(/^\[([^\]]*)\] /, "")
2763
+ objStats = stats[uuid]
2764
+ if !objStats
2765
+ next
2766
+ end
2767
+ [:readIops, :writeIops, :readTput, :writeTput].each do |key|
2768
+ vmStats[key] += (objStats[key] || 0.0)
2769
+ end
2770
+ vmStats[:readLatency] << (objStats[:readLatency] * objStats[:readIops])
2771
+ vmStats[:writeLatency] << (objStats[:writeLatency] * objStats[:writeIops])
2772
+ end
2773
+ if vmStats[:readLatency].length > 0 && vmStats[:readIops] > 0.0
2774
+ vmStats[:readLatency] = vmStats[:readLatency].sum / vmStats[:readIops]
2775
+ else
2776
+ vmStats[:readLatency] = 0.0
2777
+ end
2778
+ if vmStats[:writeLatency].length > 0 && vmStats[:writeIops] > 0.0
2779
+ vmStats[:writeLatency] = vmStats[:writeLatency].sum / vmStats[:writeIops]
2780
+ else
2781
+ vmStats[:writeLatency] = 0.0
2782
+ end
2783
+
2784
+ t << [
2785
+ vmProps['name'],
2786
+ [
2787
+ "%.1fr" % [vmStats[:readIops]],
2788
+ "%.1fw" % [vmStats[:writeIops]],
2789
+ ].join("/"),
2790
+ [
2791
+ "%.1fr" % [vmStats[:readTput] / 1024.0],
2792
+ "%.1fw" % [vmStats[:writeTput] / 1024.0],
2793
+ ].join("/"),
2794
+ [
2795
+ "%.1fr" % [vmStats[:readLatency] / 1000.0],
2796
+ "%.1fw" % [vmStats[:writeLatency] / 1000.0],
2797
+ ].join("/"),
2798
+ ]
2799
+ else
2800
+ t << [
2801
+ vmProps['name'],
2802
+ "",
2803
+ "",
2804
+ "",
2805
+ ]
2806
+ vm_obj_uuids.each do |uuid, path|
2807
+ path = path.gsub(/^\[([^\]]*)\] /, "")
2808
+ objStats = stats[uuid]
2809
+ if !objStats
2810
+ t << [
2811
+ " %s" % (path || uuid),
2812
+ "N/A","N/A","N/A",
2813
+ ]
2814
+ next
2815
+ end
2816
+ t << [
2817
+ " %s" % (path || uuid),
2818
+ [
2819
+ "%.1fr" % [objStats[:readIops]],
2820
+ "%.1fw" % [objStats[:writeIops]],
2821
+ ].join("/"),
2822
+ [
2823
+ "%.1fr" % [objStats[:readTput] / 1024.0],
2824
+ "%.1fw" % [objStats[:writeTput] / 1024.0],
2825
+ ].join("/"),
2826
+ [
2827
+ "%.1fr" % [objStats[:readLatency] / 1000.0],
2828
+ "%.1fw" % [objStats[:writeLatency] / 1000.0],
2829
+ ].join("/"),
2830
+ ]
2831
+ end
2832
+ end
2833
+ end
2834
+ # t.add_separator
2835
+ # t << [
2836
+ # 'Total',
2837
+ # objGrandTotal,
2838
+ # "%.2f GB" % (bytesToSyncGrandTotal.to_f / 1024**3),
2839
+ # #"%.2f min" % (recoveryETATotal.to_f / 60),
2840
+ # ]
2841
+ puts t
2842
+ end
2843
+ end
2844
+
2845
+
2846
+ opts :enter_maintenance_mode do
2847
+ summary "Put hosts into maintenance mode"
2848
+ arg :host, nil, :lookup => VIM::HostSystem, :multi => true
2849
+ opt :timeout, "Timeout", :default => 0
2850
+ opt :evacuate_powered_off_vms, "Evacuate powered off vms", :type => :boolean
2851
+ opt :no_wait, "Don't wait for Task to complete", :type => :boolean
2852
+ opt :vsan_mode, "Actions to take for VSAN backed storage", :type => :string, :default => "ensureObjectAccessibility"
2853
+ end
2854
+
2855
+ def enter_maintenance_mode hosts, opts
2856
+ vsanChoices = ['ensureObjectAccessibility', 'evacuateAllData', 'noAction']
2857
+ if !vsanChoices.member?(opts[:vsan_mode])
2858
+ err "VSAN mode can only be one of these: #{vsanChoices}"
2859
+ end
2860
+ tasks = []
2861
+ conn = hosts[0]._connection
2862
+ _run_with_rev(conn, "dev") do
2863
+ tasks = hosts.map do |host|
2864
+ host.EnterMaintenanceMode_Task(
2865
+ :timeout => opts[:timeout],
2866
+ :evacuatePoweredOffVms => opts[:evacuate_powered_off_vms],
2867
+ :maintenanceSpec => {
2868
+ :vsanMode => {
2869
+ :objectAction => opts[:vsan_mode],
2870
+ }
2871
+ }
2872
+ )
2873
+ end
2874
+ end
2875
+
2876
+ if opts[:no_wait]
2877
+ # Do nothing
2878
+ else
2879
+ results = progress(tasks)
2880
+
2881
+ results.each do |task, error|
2882
+ if error.is_a?(VIM::LocalizedMethodFault)
2883
+ state, entityName, name = task.collect('info.state',
2884
+ 'info.entityName',
2885
+ 'info.name')
2886
+ puts "#{name} #{entityName}: #{error.fault.class.wsdl_name}: #{error.localizedMessage}"
2887
+ error.fault.faultMessage.each do |msg|
2888
+ puts " #{msg.key}: #{msg.message}"
2889
+ end
2890
+
2891
+ end
2892
+ end
2893
+ end
2894
+ end
2895
+
2896
+ RbVmomi::VIM::HostVsanInternalSystem
2897
+ class RbVmomi::VIM::HostVsanInternalSystem
2898
+ def _parseJson json
2899
+ if json == "BAD"
2900
+ return nil
2901
+ end
2902
+ begin
2903
+ json = JSON.load(json)
2904
+ rescue
2905
+ nil
2906
+ end
2907
+ end
2908
+
2909
+ def query_cmmds queries, opts = {}
2910
+ useGzip = (opts[:gzip]) && $vsanUseGzipApis
2911
+ if useGzip
2912
+ queries = queries + [{:type => "GZIP"}]
2913
+ end
2914
+ json = self.QueryCmmds(:queries => queries)
2915
+ if useGzip
2916
+ gzip = Base64.decode64(json)
2917
+ gz = Zlib::GzipReader.new(StringIO.new(gzip))
2918
+ json = gz.read
2919
+ end
2920
+ objects = _parseJson json
2921
+ if !objects
2922
+ raise "Server failed to gather CMMDS entries: JSON = '#{json}'"
2923
+ # raise "Server failed to gather CMMDS entries: JSON = #{json.length}"
2924
+ end
2925
+ objects = objects['result']
2926
+ objects
2927
+ end
2928
+
2929
+ def query_vsan_objects(opts)
2930
+ json = self.QueryVsanObjects(opts)
2931
+ objects = _parseJson json
2932
+ if !objects
2933
+ raise "Server failed to gather VSAN object info for #{obj_uuids}: JSON = '#{json}'"
2934
+ end
2935
+ objects
2936
+ end
2937
+
2938
+ def query_syncing_vsan_objects(opts = {})
2939
+ json = self.QuerySyncingVsanObjects(opts)
2940
+ objects = _parseJson json
2941
+ if !objects
2942
+ raise "Server failed to query syncing objects: JSON = '#{json}'"
2943
+ end
2944
+ objects
2945
+ end
2946
+
2947
+ def query_vsan_statistics(opts = {})
2948
+ json = self.QueryVsanStatistics(opts)
2949
+ objects = _parseJson json
2950
+ if !objects
2951
+ raise "Server failed to query vsan stats: JSON = '#{json}'"
2952
+ end
2953
+ objects
2954
+ end
2955
+
2956
+ def query_physical_vsan_disks(opts)
2957
+ json = self.QueryPhysicalVsanDisks(opts)
2958
+ objects = _parseJson json
2959
+ if !objects
2960
+ raise "Server failed to query vsan disks: JSON = '#{json}'"
2961
+ end
2962
+ objects
2963
+ end
2964
+
2965
+ def query_objects_on_physical_vsan_disk(opts)
2966
+ json = self.QueryObjectsOnPhysicalVsanDisk(opts)
2967
+ objects = _parseJson json
2968
+ if !objects
2969
+ raise "Server failed to query objects on vsan disks: JSON = '#{json}'"
2970
+ end
2971
+ objects
2972
+ end
2973
+
2974
+
2975
+ end
2976
+
2977
+ def _parseJson json
2978
+ if json == "BAD"
2979
+ return nil
2980
+ end
2981
+ begin
2982
+ json = JSON.load(json)
2983
+ rescue
2984
+ nil
2985
+ end
2986
+ end
2987
+
2988
+ def _assessAvailabilityByStatus state
2989
+ mask = {
2990
+ 'DATA_AVAILABLE' => (1 << 0),
2991
+ 'QUORUM' => (1 << 1),
2992
+ 'PERF_COMPLIANT' => (1 << 2),
2993
+ 'INCOMPLETE' => (1 << 3),
2994
+ }
2995
+ Hash[mask.map{|k,v| [k, (state & v) != 0]}]
2996
+ end
2997
+
2998
+ opts :lldpnetmap do
2999
+ summary "Gather LLDP mapping information from a set of hosts"
3000
+ arg :hosts_and_clusters, nil, :lookup => [VIM::HostSystem, VIM::ClusterComputeResource], :multi => true
3001
+ end
3002
+
3003
+ def lldpnetmap hosts_and_clusters, opts = {}
3004
+ conn = hosts_and_clusters.first._connection
3005
+ hosts = hosts_and_clusters.select{|x| x.is_a?(VIM::HostSystem)}
3006
+ clusters = hosts_and_clusters.select{|x| x.is_a?(VIM::ClusterComputeResource)}
3007
+ pc = conn.propertyCollector
3008
+ cluster_hosts = pc.collectMultiple(clusters, 'host')
3009
+ cluster_hosts.each do |cluster, props|
3010
+ hosts += props['host']
3011
+ end
3012
+ hosts = hosts.uniq
3013
+ _run_with_rev(conn, "dev") do
3014
+ hosts_props = pc.collectMultiple(hosts,
3015
+ 'name',
3016
+ 'runtime.connectionState',
3017
+ 'configManager.vsanSystem',
3018
+ 'configManager.vsanInternalSystem'
3019
+ )
3020
+
3021
+ hosts = hosts_props.select do |k,v|
3022
+ v['runtime.connectionState'] == 'connected'
3023
+ end.keys
3024
+ if hosts.length == 0
3025
+ err "Couldn't find any connected hosts"
3026
+ end
3027
+
3028
+ hosts_vsansys = Hash[hosts_props.map{|k,v| [v['configManager.vsanSystem'], k]}]
3029
+ node_uuids = pc.collectMultiple(hosts_vsansys.keys, 'config.clusterInfo.nodeUuid')
3030
+ node_uuids = Hash[node_uuids.map do |k, v|
3031
+ [v['config.clusterInfo.nodeUuid'], hosts_vsansys[k]]
3032
+ end]
3033
+
3034
+ puts "#{Time.now}: This operation will take 30-60 seconds ..."
3035
+ hosts_props.map do |host, props|
3036
+ Thread.new do
3037
+ begin
3038
+ vsanIntSys = props['configManager.vsanInternalSystem']
3039
+ c1 = conn.spawn_additional_connection
3040
+ vsanIntSys = vsanIntSys.dup_on_conn(c1)
3041
+ res = vsanIntSys.QueryVsanStatistics(:labels => ['lldpnetmap'])
3042
+ hosts_props[host]['lldpnetmap'] = JSON.parse(res)['lldpnetmap']
3043
+ rescue Exception => ex
3044
+ puts "Failed to gather lldpnetmap from #{props['name']}: #{ex.class}: #{ex.message}"
3045
+ end
3046
+ end
3047
+ end.each{|t| t.join}
3048
+
3049
+ t = Terminal::Table.new()
3050
+ t << ['Host', 'LLDP info']
3051
+ t.add_separator
3052
+ hosts_props.each do |host, props|
3053
+ t << [
3054
+ props['name'],
3055
+ props['lldpnetmap'].map do |switch, pnics|
3056
+ "#{switch}: #{pnics.join(',')}"
3057
+ end.join("\n")
3058
+ ]
3059
+ end
3060
+ puts t
3061
+ end
3062
+ end
3063
+
3064
+ opts :check_limits do
3065
+ summary "Gathers (and checks) counters against limits"
3066
+ arg :hosts_and_clusters, nil, :lookup => [VIM::HostSystem, VIM::ClusterComputeResource], :multi => true
3067
+ end
3068
+
3069
+ def check_limits hosts_and_clusters, opts = {}
3070
+ conn = hosts_and_clusters.first._connection
3071
+ hosts = hosts_and_clusters.select{|x| x.is_a?(VIM::HostSystem)}
3072
+ clusters = hosts_and_clusters.select{|x| x.is_a?(VIM::ClusterComputeResource)}
3073
+ pc = conn.propertyCollector
3074
+ cluster_hosts = pc.collectMultiple(clusters, 'host')
3075
+ cluster_hosts.each do |cluster, props|
3076
+ hosts += props['host']
3077
+ end
3078
+ hosts = hosts.uniq
3079
+ _run_with_rev(conn, "dev") do
3080
+ hosts_props = pc.collectMultiple(hosts,
3081
+ 'name',
3082
+ 'runtime.connectionState',
3083
+ 'configManager.vsanSystem',
3084
+ 'configManager.vsanInternalSystem'
3085
+ )
3086
+
3087
+ hosts = hosts_props.select do |k,v|
3088
+ v['runtime.connectionState'] == 'connected'
3089
+ end.keys
3090
+ if hosts.length == 0
3091
+ err "Couldn't find any connected hosts"
3092
+ end
3093
+
3094
+ lock = Mutex.new
3095
+ all_disks = {}
3096
+ puts "#{Time.now}: Gathering stats from all hosts ..."
3097
+ hosts_props.map do |host, props|
3098
+ if props['runtime.connectionState'] != 'connected'
3099
+ next
3100
+ end
3101
+ hosts_props[host]['profiling'] = {}
3102
+ Thread.new do
3103
+ vsanIntSys = props['configManager.vsanInternalSystem']
3104
+ c1 = conn.spawn_additional_connection
3105
+ vsanIntSys2 = vsanIntSys.dup_on_conn(c1)
3106
+ begin
3107
+ timeout(45) do
3108
+ t1 = Time.now
3109
+ res = vsanIntSys2.query_vsan_statistics(
3110
+ :labels => ['rdtglobal', 'lsom-node']
3111
+ )
3112
+ t2 = Time.now
3113
+ hosts_props[host]['profiling']['rdtglobal'] = t2 - t1
3114
+ hosts_props[host]['rdtglobal'] = res['rdt.globalinfo']
3115
+ hosts_props[host]['lsom.node'] = res['lsom.node']
3116
+ end
3117
+ rescue Exception => ex
3118
+ puts "Failed to gather RDT info from #{props['name']}: #{ex.class}: #{ex.message}"
3119
+ end
3120
+
3121
+ begin
3122
+ timeout(60) do
3123
+ t1 = Time.now
3124
+ res = vsanIntSys2.QueryVsanStatistics(
3125
+ :labels => ['dom', 'dom-objects-counts']
3126
+ )
3127
+ res = JSON.parse(res)
3128
+ if res && !res['dom.owners.count']
3129
+ # XXX: Remove me later
3130
+ # This code is a fall back path in case we are dealing
3131
+ # with an old ESX host (before Nov13 2013). As we only
3132
+ # need to be compatible with VSAN GA, we can remove this
3133
+ # code once everyone is upgraded.
3134
+ res = vsanIntSys2.QueryVsanStatistics(
3135
+ :labels => ['dom', 'dom-objects']
3136
+ )
3137
+ res = JSON.parse(res)
3138
+ numOwners = res['dom.owners.stats'].keys.length
3139
+ else
3140
+ numOwners = res['dom.owners.count'].keys.length
3141
+ end
3142
+ t2 = Time.now
3143
+ hosts_props[host]['profiling']['domstats'] = t2 - t1
3144
+ hosts_props[host]['dom'] = {
3145
+ 'numClients'=> res['dom.clients'].keys.length,
3146
+ 'numOwners'=> numOwners,
3147
+ }
3148
+ end
3149
+ rescue Exception => ex
3150
+ puts "Failed to gather DOM info from #{props['name']}: #{ex.class}: #{ex.message}"
3151
+ end
3152
+
3153
+ begin
3154
+ timeout(45) do
3155
+ t1 = Time.now
3156
+ disks = vsanIntSys2.QueryPhysicalVsanDisks(:props => [
3157
+ 'lsom_objects_count',
3158
+ 'uuid',
3159
+ 'isSsd',
3160
+ 'capacity',
3161
+ 'capacityUsed',
3162
+ ])
3163
+ t2 = Time.now
3164
+ hosts_props[host]['profiling']['physdisk'] = t2 - t1
3165
+ disks = JSON.load(disks)
3166
+
3167
+ # Getting the data from all hosts is kind of overkill, but
3168
+ # this way we deal with partitions and get info on all disks
3169
+ # everywhere. But we have duplicates, so need to merge.
3170
+ lock.synchronize do
3171
+ all_disks.merge!(disks)
3172
+ end
3173
+ end
3174
+ rescue Exception => ex
3175
+ puts "Failed to gather disks info from #{props['name']}: #{ex.class}: #{ex.message}"
3176
+ end
3177
+ end
3178
+ end.compact.each{|t| t.join}
3179
+
3180
+ # hosts_props.each do |host, props|
3181
+ # puts "#{Time.now}: Host #{props['name']}: #{props['profiling']}"
3182
+ # end
3183
+
3184
+ puts "#{Time.now}: Gathering disks info ..."
3185
+ disks = all_disks
3186
+ vsan_disks_info = {}
3187
+ vsan_disks_info.merge!(
3188
+ _vsan_host_disks_info(Hash[hosts.map{|h| [h, hosts_props[h]['name']]}])
3189
+ )
3190
+ disks.each do |k, v|
3191
+ v['esxcli'] = vsan_disks_info[v['uuid']]
3192
+ if v['esxcli']
3193
+ v['host'] = v['esxcli']._get_property :host
3194
+
3195
+ hosts_props[v['host']]['components'] ||= 0
3196
+ hosts_props[v['host']]['components'] += v['lsom_objects_count']
3197
+ hosts_props[v['host']]['disks'] ||= []
3198
+ hosts_props[v['host']]['disks'] << v
3199
+ end
3200
+ end
3201
+
3202
+ t = Terminal::Table.new()
3203
+ t << ['Host', 'RDT', 'Disks']
3204
+ t.add_separator
3205
+ hosts_props.each do |host, props|
3206
+ rdt = props['rdtglobal'] || {}
3207
+ lsomnode = props['lsom.node'] || {}
3208
+ dom = props['dom'] || {}
3209
+ t << [
3210
+ props['name'],
3211
+ [
3212
+ "Assocs: #{rdt['assocCount']}/#{rdt['maxAssocCount']}",
3213
+ "Sockets: #{rdt['socketCount']}/#{rdt['maxSocketCount']}",
3214
+ "Clients: #{dom['numClients'] || 'N/A'}",
3215
+ "Owners: #{dom['numOwners'] || 'N/A'}",
3216
+ ].join("\n"),
3217
+ ([
3218
+ "Components: #{props['components']}/%s" % [
3219
+ lsomnode['numMaxComponents'] || 'N/A'
3220
+ ],
3221
+ ] + (props['disks'] || []).map do |disk|
3222
+ if disk['capacity'] > 0
3223
+ usage = disk['capacityUsed'] * 100 / disk['capacity']
3224
+ usage = "#{usage}%"
3225
+ else
3226
+ usage = "N/A"
3227
+ end
3228
+ "#{disk['esxcli'].DisplayName}: #{usage}"
3229
+ end).join("\n"),
3230
+ ]
3231
+ end
3232
+ puts t
3233
+ end
3234
+ end
3235
+
3236
+ opts :object_reconfigure do
3237
+ summary "Reconfigure a VSAN object"
3238
+ arg :cluster, "Cluster on which to execute the reconfig", :lookup => [VIM::HostSystem, VIM::ClusterComputeResource]
3239
+ arg :obj_uuid, "Object UUID", :type => :string, :multi => true
3240
+ opt :policy, "New policy", :type => :string, :required => true
3241
+ end
3242
+
3243
+ def object_reconfigure cluster_or_host, obj_uuids, opts
3244
+ conn = cluster_or_host._connection
3245
+ pc = conn.propertyCollector
3246
+ if cluster_or_host.is_a?(VIM::ClusterComputeResource)
3247
+ cluster = cluster_or_host
3248
+ hosts = cluster.host
3249
+ else
3250
+ hosts = [host]
3251
+ end
3252
+
3253
+ _run_with_rev(conn, "dev") do
3254
+ hosts_props = pc.collectMultiple(hosts,
3255
+ 'name',
3256
+ 'runtime.connectionState',
3257
+ 'configManager.vsanSystem',
3258
+ 'configManager.vsanInternalSystem'
3259
+ )
3260
+ connected_hosts = hosts_props.select do |k,v|
3261
+ v['runtime.connectionState'] == 'connected'
3262
+ end.keys
3263
+ host = connected_hosts.first
3264
+ if !host
3265
+ err "Couldn't find any connected hosts"
3266
+ end
3267
+ vsanIntSys = hosts_props[host]['configManager.vsanInternalSystem']
3268
+
3269
+ obj_uuids.each do |uuid|
3270
+ puts "Reconfiguring '#{uuid}' to #{opts[:policy]}"
3271
+ puts vsanIntSys.ReconfigureDomObject(
3272
+ :uuid => uuid,
3273
+ :policy => opts[:policy]
3274
+ )
3275
+ end
3276
+ end
3277
+ puts "All reconfigs initiated. Synching operation may be happening in the background"
3278
+ end
3279
+
3280
+
3281
+ opts :obj_status_report do
3282
+ summary "Print component status for objects in the cluster."
3283
+ arg :cluster_or_host, nil, :lookup => [VIM::ClusterComputeResource, VIM::HostSystem]
3284
+ opt :print_table, "Print a table of object and their status, default all objects",
3285
+ :short => 't', :type => :boolean, :default => false
3286
+ opt :filter_table, "Filter the obj table based on status displayed in histogram, e.g. 2/3",
3287
+ :short => 'f', :type => :string, :default => nil
3288
+ opt :print_uuids, "In the table, print object UUIDs instead of vmdk and vm paths",
3289
+ :short => 'u', :type => :boolean, :default => false
3290
+ opt :ignore_node_uuid, "Estimate the status of objects if all comps on a given host were healthy.",
3291
+ :short => 'i', :type => :string, :default => nil
3292
+ end
3293
+
3294
+ def obj_status_report cluster_or_host, opts
3295
+ conn = cluster_or_host._connection
3296
+ pc = conn.propertyCollector
3297
+ if cluster_or_host.is_a?(VIM::ClusterComputeResource)
3298
+ cluster = cluster_or_host
3299
+ hosts = cluster.host
3300
+ else
3301
+ hosts = [host]
3302
+ end
3303
+
3304
+ _run_with_rev(conn, "dev") do
3305
+ hosts_props = pc.collectMultiple(hosts,
3306
+ 'name',
3307
+ 'runtime.connectionState',
3308
+ 'configManager.vsanSystem',
3309
+ 'configManager.vsanInternalSystem'
3310
+ )
3311
+ connected_hosts = hosts_props.select do |k,v|
3312
+ v['runtime.connectionState'] == 'connected'
3313
+ end.keys
3314
+ host = connected_hosts.first
3315
+ if !host
3316
+ err "Couldn't find any connected hosts"
3317
+ end
3318
+ vsanIntSys = hosts_props[host]['configManager.vsanInternalSystem']
3319
+
3320
+ vsanSysList = Hash[hosts_props.map do |host, props|
3321
+ [props['name'], props['configManager.vsanSystem']]
3322
+ end]
3323
+ clusterInfos = pc.collectMultiple(vsanSysList.values,
3324
+ 'config.clusterInfo')
3325
+ hostUuidMap = Hash[vsanSysList.map do |hostname, sys|
3326
+ [clusterInfos[sys]['config.clusterInfo'].nodeUuid, hostname]
3327
+ end]
3328
+
3329
+ entries = nil
3330
+
3331
+ puts "#{Time.now}: Querying all VMs on VSAN ..."
3332
+ ds_list = host.datastore
3333
+ ds_props = pc.collectMultiple(ds_list, 'name', 'summary.type')
3334
+ ds = ds_props.select{|k, x| x['summary.type'] == "vsan"}.keys.first
3335
+ ds_name = ds_props[ds]['name']
3336
+
3337
+ vms = ds.vm
3338
+ vmsProps = pc.collectMultiple(vms,
3339
+ 'name', 'runtime.connectionState',
3340
+ 'config.hardware.device', 'summary.config'
3341
+ )
3342
+
3343
+ hostname = hosts_props[host]['name']
3344
+ puts "#{Time.now}: Querying all objects in the system from #{hostname} ..."
3345
+
3346
+ objects = vsanIntSys.query_cmmds([
3347
+ {:type => 'DOM_OBJECT'}
3348
+ ], :gzip => true)
3349
+ if !objects
3350
+ err "Server failed to gather DOM_OBJECT entries"
3351
+ end
3352
+
3353
+ puts "#{Time.now}: Querying all disks in the system ..."
3354
+ # Need a list of live disk uuids to see if components are orphaned.
3355
+ liveDisks = vsanIntSys.query_cmmds([{:type => 'DISK'}])
3356
+ liveDisks = liveDisks.select do |disk|
3357
+ disk['health'] == "Healthy"
3358
+ end.map do |disk|
3359
+ disk['uuid']
3360
+ end
3361
+
3362
+ puts "#{Time.now}: Querying all components in the system ..."
3363
+ # Need a list of live comp uuids to see if components are orphaned.
3364
+ liveComps = vsanIntSys.query_cmmds(
3365
+ [{:type => 'LSOM_OBJECT'}],
3366
+ :gzip => true
3367
+ )
3368
+ liveComps = liveComps.select do |comp|
3369
+ comp['health'] == "Healthy"
3370
+ end
3371
+ liveComps = liveComps.map do |comp|
3372
+ comp['uuid']
3373
+ end
3374
+
3375
+ #pp liveDisks
3376
+ #puts "%d comps total" % liveComps.length
3377
+
3378
+ puts "#{Time.now}: Got all the info, computing table ..."
3379
+
3380
+ results = {}
3381
+ orphanRes = {}
3382
+ totalObjects = objects.length
3383
+ totalOrphans = 0
3384
+
3385
+ objects = objects.select do |obj|
3386
+ comps = _components_in_dom_config(obj['content'])
3387
+ numHealthy = 0
3388
+ numDeletedComps = 0
3389
+
3390
+ comps.each do |comp|
3391
+ state = comp['attributes']['componentState']
3392
+ bytesToSync = comp['attributes']['bytesToSync'] || 0
3393
+ resync = [10, 6].member?(state) && bytesToSync != 0
3394
+
3395
+ # Should we count resyncing as healthy? For now, lets do that.
3396
+ if resync || state == 5 ||
3397
+ (opts[:ignore_node_uuid] &&
3398
+ comp['attributes']['ownerId'] == opts[:ignore_node_uuid])
3399
+ numHealthy += 1
3400
+ elsif liveDisks.member?(comp['diskUuid']) &&
3401
+ !liveComps.member?(comp['componentUuid'])
3402
+ # A component is considered deleted if it's disk is present
3403
+ # and the component is not present in CMMDS.
3404
+ numDeletedComps += 1
3405
+ end
3406
+ end
3407
+ obj['numHealthy'] = numHealthy
3408
+ obj['numComps'] = comps.length
3409
+ status = [numHealthy, comps.length]
3410
+
3411
+ # An object can be orphaned if it is deleted while a minority of
3412
+ # components are absent. To consider this an orphan, the total
3413
+ # number of provably deleted components must be a quorum.
3414
+ # If we have some deleted comps, but not a quorum, then mark it
3415
+ # as an orphanCandidate instead of a full orphan. Orphan candidates
3416
+ # still go into the normal results table.
3417
+ isOrphan = numDeletedComps > 0 && numDeletedComps > comps.length / 2
3418
+ if isOrphan
3419
+ obj['isOrphan'] = true
3420
+ elsif numDeletedComps > 0
3421
+ obj['isOrphanCandidate'] = true
3422
+ end
3423
+
3424
+ if isOrphan
3425
+ # All absent components are orphaned. Consider the object orphaned.
3426
+ totalOrphans += 1
3427
+ orphanRes[status] ||= 0
3428
+ orphanRes[status] += 1
3429
+ else
3430
+ results[status] ||= 0
3431
+ results[status] += 1
3432
+ end
3433
+
3434
+ if opts[:filter_table]
3435
+ ("%d/%d" % [numHealthy, comps.length]) == opts[:filter_table]
3436
+ else
3437
+ true
3438
+ end
3439
+ end
3440
+ obj_uuids = objects.map{|x| x['uuid']}
3441
+ objectUuidMap = Hash[objects.map{|x| [x['uuid'], x]}]
3442
+
3443
+ all_obj_uuids = []
3444
+ vmToObjMap = {}
3445
+ vms.each do |vm|
3446
+ vm_obj_uuids = _get_vm_obj_uuids(vm, vmsProps)
3447
+ vm_obj_uuids = vm_obj_uuids.select{|x, v| obj_uuids.member?(x)}
3448
+ vm_obj_uuids = vm_obj_uuids.reject{|x, v| all_obj_uuids.member?(x)}
3449
+ all_obj_uuids += vm_obj_uuids.keys
3450
+ if vm_obj_uuids.length > 0
3451
+ vmToObjMap[vm] = vm_obj_uuids
3452
+ end
3453
+ end
3454
+
3455
+ def printObjStatusHist results
3456
+ t = Terminal::Table.new()
3457
+ t << [
3458
+ 'Num Healthy Comps / Total Num Comps',
3459
+ 'Num objects with such status',
3460
+ ]
3461
+ t.add_separator
3462
+
3463
+ results.each do |key,val|
3464
+ t << [
3465
+ "%d/%d" % [key[0], key[1]],
3466
+ " %d" % val,
3467
+ ]
3468
+ end
3469
+ puts t
3470
+ end
3471
+
3472
+ puts ""
3473
+ puts "Histogram of component health for non-orphaned objects"
3474
+ puts ""
3475
+ printObjStatusHist(results)
3476
+ puts "Total non-orphans: %d" % (totalObjects - totalOrphans)
3477
+ puts ""
3478
+ puts ""
3479
+ puts "Histogram of component health for possibly orphaned objects"
3480
+ puts ""
3481
+ printObjStatusHist(orphanRes)
3482
+ puts "Total orphans: %d" % totalOrphans
3483
+ puts ""
3484
+
3485
+
3486
+ if opts[:print_table] || opts[:filter_table]
3487
+ t = Terminal::Table.new()
3488
+ t << [
3489
+ 'VM/Object',
3490
+ 'objects',
3491
+ 'num healthy / total comps',
3492
+ ]
3493
+ t.add_separator
3494
+ bytesToSyncGrandTotal = 0
3495
+ objGrandTotal = 0
3496
+ vmToObjMap.each do |vm, vm_obj_uuids|
3497
+ vmProps = vmsProps[vm]
3498
+ objs = vm_obj_uuids.keys.map{|x| objectUuidMap[x]}
3499
+ t << [
3500
+ vmProps['name'],
3501
+ objs.length,
3502
+ "",
3503
+ ]
3504
+ objs.each do |obj|
3505
+ if opts[:print_uuids]
3506
+ objName = obj['uuid']
3507
+ else
3508
+ objName = (vm_obj_uuids[obj['uuid']] || obj['uuid'])
3509
+ end
3510
+
3511
+ if obj['isOrphan']
3512
+ orphanStr = "*"
3513
+ elsif obj['isOrphanCandidate']
3514
+ orphanStr = "-"
3515
+ else
3516
+ orphanStr = ""
3517
+ end
3518
+
3519
+ t << [
3520
+ " %s" % objName,
3521
+ '',
3522
+ "%d/%d%s" % [obj['numHealthy'], obj['numComps'], orphanStr],
3523
+ ]
3524
+ objects.delete(obj)
3525
+ end
3526
+ end
3527
+
3528
+ # Okay, now print the remaining UUIDs which didn't map to any VM.
3529
+ if objects.length > 0
3530
+ if vmToObjMap.length > 0
3531
+ t.add_separator
3532
+ end
3533
+ t << [
3534
+ "Unassociated objects",
3535
+ '',
3536
+ '',
3537
+ ]
3538
+ end
3539
+ objects.each do |obj|
3540
+ if obj['isOrphan']
3541
+ orphanStr = "*"
3542
+ elsif obj['isOrphanCandidate']
3543
+ orphanStr = "-"
3544
+ else
3545
+ orphanStr = ""
3546
+ end
3547
+
3548
+ t << [
3549
+ " %s" % obj['uuid'],
3550
+ '',
3551
+ "%d/%d%s" % [obj['numHealthy'], obj['numComps'], orphanStr],
3552
+ ]
3553
+ end
3554
+ puts t
3555
+ puts ""
3556
+ puts "+------------------------------------------------------------------+"
3557
+ puts "| Legend: * = all unhealthy comps were deleted (disks present) |"
3558
+ puts "| - = some unhealthy comps deleted, some not or can't tell |"
3559
+ puts "| no symbol = We cannot conclude any comps were deleted |"
3560
+ puts "+------------------------------------------------------------------+"
3561
+ puts ""
3562
+ end
3563
+ end
3564
+ end
3565
+
3566
+
3567
+ opts :apply_license_to_cluster do
3568
+ summary "Apply license to VSAN "
3569
+ arg :cluster, nil, :lookup => VIM::ClusterComputeResource
3570
+ opt :license_key, "License key to be applied to the cluster", :short => 'k', :type => :string, :required => true
3571
+ opt :null_reconfigure, "", :short => 'r', :type => :boolean, :default => true
3572
+ end
3573
+
3574
+ def apply_license_to_cluster cluster, opts
3575
+ conn = cluster._connection
3576
+ puts "#{cluster.name}: Applying VSAN License on the cluster..."
3577
+ licenseManager = conn.serviceContent.licenseManager
3578
+ licenseAssignmentManager = licenseManager.licenseAssignmentManager
3579
+ assignment = licenseAssignmentManager.UpdateAssignedLicense(
3580
+ :entity => cluster._ref,
3581
+ :licenseKey => opts[:license_key]
3582
+ )
3583
+ if opts[:null_reconfigure]
3584
+ # Due to races in the cluster assignment mechanism in vSphere 5.5 GA a
3585
+ # disks may or may not be auto-claimed as would normally be expected. Doing
3586
+ # a Null-Reconfigure causes the license state to be synchronized correctly and
3587
+ # allows auto-claim to work as expected.
3588
+ puts "#{cluster.name}: Null-Reconfigure to force auto-claim..."
3589
+ spec = VIM::ClusterConfigSpecEx()
3590
+ task = cluster.ReconfigureComputeResource_Task(:spec => spec, :modify => true)
3591
+ progress([task])
3592
+ childtasks = task.child_tasks
3593
+ if childtasks && childtasks.length > 0
3594
+ progress(childtasks)
3595
+ end
3596
+ end
3597
+ end
3598
+
3599
+
3600
+ opts :check_state do
3601
+ summary "Checks state of VMs and VSAN objects"
3602
+ arg :cluster_or_host, nil, :lookup => [VIM::ClusterComputeResource, VIM::HostSystem]
3603
+ opt :refresh_state, "Not just check state, but also refresh", :type => :boolean
3604
+ opt :reregister_vms,
3605
+ "Not just check for vms with VC/hostd/vmx out of sync but also " \
3606
+ "fix them by un-registering and re-registering them",
3607
+ :type => :boolean
3608
+ end
3609
+
3610
+ def check_state cluster_or_host, opts
3611
+ conn = cluster_or_host._connection
3612
+ pc = conn.propertyCollector
3613
+ if cluster_or_host.is_a?(VIM::ClusterComputeResource)
3614
+ cluster = cluster_or_host
3615
+ hosts = cluster.host
3616
+ else
3617
+ hosts = [host]
3618
+ end
3619
+
3620
+ _run_with_rev(conn, "dev") do
3621
+ hosts_props = pc.collectMultiple(hosts,
3622
+ 'name',
3623
+ 'runtime.connectionState',
3624
+ 'configManager.vsanSystem',
3625
+ 'configManager.vsanInternalSystem'
3626
+ )
3627
+ connected_hosts = hosts_props.select do |k,v|
3628
+ v['runtime.connectionState'] == 'connected'
3629
+ end.keys
3630
+ host = connected_hosts.first
3631
+ if !host
3632
+ err "Couldn't find any connected hosts"
3633
+ end
3634
+ vsanIntSys = hosts_props[host]['configManager.vsanInternalSystem']
3635
+
3636
+ vsanSysList = Hash[hosts_props.map do |host, props|
3637
+ [props['name'], props['configManager.vsanSystem']]
3638
+ end]
3639
+ clusterInfos = pc.collectMultiple(vsanSysList.values,
3640
+ 'config.clusterInfo')
3641
+ hostUuidMap = Hash[vsanSysList.map do |hostname, sys|
3642
+ [clusterInfos[sys]['config.clusterInfo'].nodeUuid, hostname]
3643
+ end]
3644
+
3645
+ entries = nil
3646
+
3647
+ ds_list = host.datastore
3648
+ ds_props = pc.collectMultiple(ds_list, 'name', 'summary.type')
3649
+ ds = ds_props.select{|k, x| x['summary.type'] == "vsan"}.keys.first
3650
+ ds_name = ds_props[ds]['name']
3651
+
3652
+ vms = ds.vm
3653
+ vms_props = pc.collectMultiple(vms, 'name', 'runtime.connectionState')
3654
+
3655
+ puts "#{Time.now}: Step 1: Check for inaccessible VSAN objects"
3656
+
3657
+ statusses = vsanIntSys.query_cmmds([{:type => 'CONFIG_STATUS'}])
3658
+ bad = statusses.select do |x|
3659
+ state = _assessAvailabilityByStatus(x['content']['state'])
3660
+ !state['DATA_AVAILABLE'] || !state['QUORUM']
3661
+ end
3662
+
3663
+ if !opts[:refresh_state]
3664
+ puts "Detected #{bad.length} objects to not be inaccessible"
3665
+ bad.each do |x|
3666
+ uuid = x['uuid']
3667
+ hostname = hostUuidMap[x['owner']]
3668
+ puts "Detected #{uuid} on #{hostname} to be inaccessible"
3669
+ end
3670
+ else
3671
+ bad.group_by{|x| hostUuidMap[x['owner']]}.each do |hostname, badOnHost|
3672
+ owner = hosts_props.select{|k,v| v['name'] == hostname}.keys.first
3673
+ owner_props = hosts_props[owner]
3674
+ owner_vsanIntSys = owner_props['configManager.vsanInternalSystem']
3675
+ badOnHost.each do |x|
3676
+ uuid = x['uuid']
3677
+ puts "Detected #{uuid} to not be inaccessible, refreshing state"
3678
+ end
3679
+ if badOnHost.length > 0
3680
+ badUuids = badOnHost.map{|x| x['uuid']}
3681
+ owner_vsanIntSys.AbdicateDomOwnership(:uuids => badUuids)
3682
+ end
3683
+ end
3684
+ puts ""
3685
+
3686
+ puts "#{Time.now}: Step 1b: Check for inaccessible VSAN objects, again"
3687
+ statusses = vsanIntSys.query_cmmds([{:type => 'CONFIG_STATUS'}])
3688
+ bad = statusses.select do |x|
3689
+ state = _assessAvailabilityByStatus(x['content']['state'])
3690
+ !state['DATA_AVAILABLE'] || !state['QUORUM']
3691
+ end
3692
+ bad.each do |x|
3693
+ puts "Detected #{x['uuid']} is still inaccessible"
3694
+ end
3695
+ end
3696
+ puts ""
3697
+
3698
+ puts "#{Time.now}: Step 2: Check for invalid/inaccessible VMs"
3699
+ invalid_vms = vms_props.select do |k,v|
3700
+ ['invalid', 'inaccessible', 'orphaned'].member?(v['runtime.connectionState'])
3701
+ end.keys
3702
+ tasks = []
3703
+ invalid_vms.each do |vm|
3704
+ vm_props = vms_props[vm]
3705
+ vm_state = vm_props['runtime.connectionState']
3706
+ if !opts[:refresh_state]
3707
+ puts "Detected VM '#{vm_props['name']}' as being '#{vm_state}'"
3708
+ else
3709
+ puts "Detected VM '#{vm_props['name']}' as being '#{vm_state}', reloading ..."
3710
+ begin
3711
+ if vm_state == 'orphaned'
3712
+ path = vm.summary.config.vmPathName
3713
+ tasks << vm.reloadVirtualMachineFromPath_Task(
3714
+ :configurationPath => path
3715
+ )
3716
+ else
3717
+ vm.Reload
3718
+ vm.Reload
3719
+ end
3720
+ rescue Exception => ex
3721
+ puts "#{ex.class}: #{ex.message}"
3722
+ end
3723
+ end
3724
+ end
3725
+ tasks = tasks.compact
3726
+ if tasks.length > 0
3727
+ progress(tasks)
3728
+ end
3729
+ puts ""
3730
+
3731
+ if opts[:refresh_state]
3732
+ puts "#{Time.now}: Step 2b: Check for invalid/inaccessible VMs again"
3733
+ vms_props = pc.collectMultiple(vms, 'name', 'runtime.connectionState')
3734
+ invalid_vms = vms_props.select do |k,v|
3735
+ ['invalid', 'inaccessible', 'orphaned'].member?(v['runtime.connectionState'])
3736
+ end.keys
3737
+ invalid_vms.each do |vm|
3738
+ vm_props = vms_props[vm]
3739
+ vm_state = vm_props['runtime.connectionState']
3740
+ puts "Detected VM '#{vm_props['name']}' as still '#{vm_state}'"
3741
+ end
3742
+ puts ""
3743
+ end
3744
+
3745
+ puts "#{Time.now}: Step 3: Check for VMs for which VC/hostd/vmx" \
3746
+ " are out of sync"
3747
+ inconsistent_vms = find_inconsistent_vms(cluster_or_host)
3748
+ if opts[:reregister_vms] and not inconsistent_vms.empty?
3749
+ puts "You have chosen to fix these VMs. This involves re-registering" \
3750
+ " the VM which will cause loss of some of the management state of"\
3751
+ " this VM (for eg. storage policy, permissions, tags," \
3752
+ " scheduled tasks, etc. but NO data loss). Do you want to" \
3753
+ " continue [y/N] ?"
3754
+ opt = $stdin.gets.chomp
3755
+ if opt == 'y' || opt == 'Y'
3756
+ puts "Attempting to fix these vms..."
3757
+ fix_inconsistent_vms(inconsistent_vms)
3758
+ end
3759
+ end
3760
+ puts ""
3761
+
3762
+ end
3763
+ end
3764
+
3765
+
3766
+ opts :reapply_vsan_vmknic_config do
3767
+ summary "Unbinds and rebinds VSAN to its vmknics"
3768
+ arg :host, nil, :lookup => [VIM::HostSystem], :multi => true
3769
+ opt :vmknic, "Refresh a specific vmknic. default is all vmknics", :type => :string
3770
+ opt :dry_run, "Do a dry run: Show what changes would be made", :type => :boolean
3771
+ end
3772
+
3773
+ def reapply_vsan_vmknic_config hosts, opts
3774
+ hosts.each do |host|
3775
+ hostname = host.name
3776
+ net = host.esxcli.vsan.network
3777
+ nics = net.list()
3778
+ if opts[:vmknic]
3779
+ nics = nics.select{|x| x.VmkNicName == opts[:vmknic]}
3780
+ end
3781
+ keys = {
3782
+ :AgentGroupMulticastAddress => :agentmcaddr,
3783
+ :AgentGroupMulticastPort => :agentmcport,
3784
+ :IPProtocol => nil,
3785
+ :InterfaceUUID => nil,
3786
+ :MasterGroupMulticastAddress => :mastermcaddr,
3787
+ :MasterGroupMulticastPort => :mastermcport,
3788
+ :MulticastTTL => :multicastttl,
3789
+ }
3790
+ puts "Host: #{hostname}"
3791
+ if opts[:dry_run]
3792
+ nics.each do |nic|
3793
+ puts " Would reapply config of vmknic #{nic.VmkNicName}:"
3794
+ keys.keys.each do |key|
3795
+ puts " #{key.to_s}: #{nic.send(key)}"
3796
+ end
3797
+ end
3798
+ else
3799
+ nics.each do |nic|
3800
+ puts " Reapplying config of #{nic.VmkNicName}:"
3801
+ keys.keys.each do |key|
3802
+ puts " #{key.to_s}: #{nic.send(key)}"
3803
+ end
3804
+ puts " Unbinding VSAN from vmknic #{nic.VmkNicName} ..."
3805
+ net.ipv4.remove(:interfacename => nic.VmkNicName)
3806
+ puts " Rebinding VSAN to vmknic #{nic.VmkNicName} ..."
3807
+ params = {
3808
+ :agentmcaddr => nic.AgentGroupMulticastAddress,
3809
+ :agentmcport => nic.AgentGroupMulticastPort,
3810
+ :interfacename => nic.VmkNicName,
3811
+ :mastermcaddr => nic.MasterGroupMulticastAddress,
3812
+ :mastermcport => nic.MasterGroupMulticastPort,
3813
+ :multicastttl => nic.MulticastTTL,
3814
+ }
3815
+ #pp params
3816
+ net.ipv4.add(params)
3817
+ end
3818
+ end
3819
+ end
3820
+ end
3821
+
3822
+
3823
+ opts :recover_spbm do
3824
+ summary "SPBM Recovery"
3825
+ arg :cluster_or_host, nil, :lookup => [VIM::ClusterComputeResource, VIM::HostSystem]
3826
+ opt :show_details, "Show all the details", :type => :boolean
3827
+ end
3828
+
3829
+ def recover_spbm cluster_or_host, opts
3830
+ conn = cluster_or_host._connection
3831
+ pc = conn.propertyCollector
3832
+ host = cluster_or_host
3833
+ entries = []
3834
+ hostUuidMap = {}
3835
+ startTime = Time.now
3836
+ _run_with_rev(conn, "dev") do
3837
+ vsanIntSys = nil
3838
+ puts "#{Time.now}: Fetching Host info"
3839
+ if cluster_or_host.is_a?(VIM::ClusterComputeResource)
3840
+ cluster = cluster_or_host
3841
+ hosts = cluster.host
3842
+ else
3843
+ hosts = [host]
3844
+ end
3845
+
3846
+ hosts_props = pc.collectMultiple(hosts,
3847
+ 'name',
3848
+ 'runtime.connectionState',
3849
+ 'configManager.vsanSystem',
3850
+ 'configManager.vsanInternalSystem',
3851
+ 'datastore'
3852
+ )
3853
+ connected_hosts = hosts_props.select do |k,v|
3854
+ v['runtime.connectionState'] == 'connected'
3855
+ end.keys
3856
+ host = connected_hosts.first
3857
+ if !host
3858
+ err "Couldn't find any connected hosts"
3859
+ end
3860
+ vsanIntSys = hosts_props[host]['configManager.vsanInternalSystem']
3861
+ vsanSysList = Hash[hosts_props.map do |host, props|
3862
+ [props['name'], props['configManager.vsanSystem']]
3863
+ end]
3864
+ clusterInfos = pc.collectMultiple(vsanSysList.values,
3865
+ 'config.clusterInfo')
3866
+ hostUuidMap = Hash[vsanSysList.map do |hostname, sys|
3867
+ [clusterInfos[sys]['config.clusterInfo'].nodeUuid, hostname]
3868
+ end]
3869
+
3870
+ puts "#{Time.now}: Fetching Datastore info"
3871
+ datastores = hosts_props.values.map{|x| x['datastore']}.flatten
3872
+ datastores_props = pc.collectMultiple(datastores, 'name', 'summary.type')
3873
+ vsanDsList = datastores_props.select do |ds, props|
3874
+ props['summary.type'] == "vsan"
3875
+ end.keys
3876
+ if vsanDsList.length > 1
3877
+ err "Two VSAN datastores found, can't handle that"
3878
+ end
3879
+ vsanDs = vsanDsList[0]
3880
+
3881
+ puts "#{Time.now}: Fetching VM properties"
3882
+ vms = vsanDs.vm
3883
+ vms_props = pc.collectMultiple(vms, 'name', 'config.hardware.device')
3884
+
3885
+ puts "#{Time.now}: Fetching policies used on VSAN from CMMDS"
3886
+ entries = vsanIntSys.query_cmmds([{
3887
+ :type => "POLICY",
3888
+ }], :gzip => true)
3889
+
3890
+ policies = entries.map{|x| x['content']}.uniq
3891
+
3892
+ puts "#{Time.now}: Fetching SPBM profiles"
3893
+ pbm = conn.pbm
3894
+ pm = pbm.serviceContent.profileManager
3895
+ profileIds = pm.PbmQueryProfile(
3896
+ :resourceType => {:resourceType => "STORAGE"},
3897
+ :profileCategory => "REQUIREMENT"
3898
+ )
3899
+ if profileIds.length > 0
3900
+ profiles = pm.PbmRetrieveContent(:profileIds => profileIds)
3901
+ else
3902
+ profiles = []
3903
+ end
3904
+ profilesMap = Hash[profiles.map do |x|
3905
+ ["#{x.profileId.uniqueId}-gen#{x.generationId}", x]
3906
+ end]
3907
+
3908
+ puts "#{Time.now}: Fetching VM <-> SPBM profile association"
3909
+ vms_entities = vms.map do |vm|
3910
+ vm.all_pbmobjref(:vms_props => vms_props)
3911
+ end.flatten.map{|x| x.dynamicProperty = []; x}
3912
+ associatedProfiles = pm.PbmQueryAssociatedProfiles(
3913
+ :entities => vms_entities
3914
+ )
3915
+ associatedEntities = associatedProfiles.map{|x| x.object}.uniq
3916
+ puts "#{Time.now}: Computing which VMs do not have a SPBM Profile ..."
3917
+
3918
+ nonAssociatedEntities = vms_entities - associatedEntities
3919
+
3920
+ vmsMap = Hash[vms.map{|x| [x._ref, x]}]
3921
+ nonAssociatedVms = {}
3922
+ nonAssociatedEntities.map do |entity|
3923
+ vm = vmsMap[entity.key.split(":").first]
3924
+ nonAssociatedVms[vm] ||= []
3925
+ nonAssociatedVms[vm] << [entity.objectType, entity.key]
3926
+ end
3927
+ puts "#{Time.now}: Fetching additional info about some VMs"
3928
+
3929
+ vms_props2 = pc.collectMultiple(vms, 'summary.config.vmPathName')
3930
+
3931
+ puts "#{Time.now}: Got all info, computing after %.2f sec" % [
3932
+ Time.now - startTime
3933
+ ]
3934
+
3935
+ policies.each do |policy|
3936
+ policy['spbmRecoveryCandidate'] = false
3937
+ policy['spbmProfile'] = nil
3938
+ if policy['spbmProfileId']
3939
+ name = "%s-gen%s" % [
3940
+ policy['spbmProfileId'],
3941
+ policy['spbmProfileGenerationNumber'],
3942
+ ]
3943
+ policy['spbmName'] = name
3944
+ policy['spbmProfile'] = profilesMap[name]
3945
+ if policy['spbmProfile']
3946
+ name = policy['spbmProfile'].name
3947
+ policy['spbmName'] = name
3948
+ name = "Existing SPBM Profile:\n#{name}"
3949
+ else
3950
+ policy['spbmRecoveryCandidate'] = true
3951
+ profile = profiles.find do |profile|
3952
+ profile.profileId.uniqueId == policy['spbmProfileId'] &&
3953
+ profile.generationId > policy['spbmProfileGenerationNumber']
3954
+ end
3955
+ # XXX: We should check if there is a profile that matches
3956
+ # one we recovered
3957
+ if profile
3958
+ name = policy['spbmProfile'].name
3959
+ name = "Old generation of SPBM Profile:\n#{name}"
3960
+ else
3961
+ name = "Unknown SPBM Profile. UUID:\n#{name}"
3962
+ end
3963
+ end
3964
+ else
3965
+ name = "Not managed by SPBM"
3966
+ policy['spbmName'] = name
3967
+ end
3968
+ propCap = policy['proportionalCapacity']
3969
+ if propCap && propCap.is_a?(Array) && propCap.length == 2
3970
+ policy['proportionalCapacity'] = policy['proportionalCapacity'][0]
3971
+ end
3972
+
3973
+ policy['spbmDescr'] = name
3974
+ end
3975
+ entriesMap = Hash[entries.map{|x| [x['uuid'], x]}]
3976
+
3977
+ nonAssociatedEntities = []
3978
+ nonAssociatedVms.each do |vm, entities|
3979
+ if entities.any?{|x| x == ["virtualMachine", vm._ref]}
3980
+ vmxPath = vms_props2[vm]['summary.config.vmPathName']
3981
+ if vmxPath =~ /^\[([^\]]*)\] ([^\/])\//
3982
+ nsUuid = $2
3983
+ entry = entriesMap[nsUuid]
3984
+ if entry && entry['content']['spbmProfileId']
3985
+ # This is a candidate
3986
+ nonAssociatedEntities << {
3987
+ :objUuid => nsUuid,
3988
+ :type => "virtualMachine",
3989
+ :key => vm._ref,
3990
+ :entry => entry,
3991
+ :vm => vm,
3992
+ :label => "VM Home",
3993
+ }
3994
+ end
3995
+ end
3996
+ end
3997
+ devices = vms_props[vm]['config.hardware.device']
3998
+ disks = devices.select{|x| x.is_a?(VIM::VirtualDisk)}
3999
+ disks.each do |disk|
4000
+ key = "#{vm._ref}:#{disk.key}"
4001
+ if entities.any?{|x| x == ["virtualDiskId", key]}
4002
+ objUuid = disk.backing.backingObjectId
4003
+ if objUuid
4004
+ entry = entriesMap[objUuid]
4005
+ if entry && entry['content']['spbmProfileId']
4006
+ # This is a candidate
4007
+ nonAssociatedEntities << {
4008
+ :objUuid => objUuid,
4009
+ :type => "virtualDiskId",
4010
+ :key => key,
4011
+ :entry => entry,
4012
+ :vm => vm,
4013
+ :label => disk.deviceInfo.label,
4014
+ }
4015
+ end
4016
+ end
4017
+ end
4018
+ end
4019
+ end
4020
+ nonAssociatedEntities.each do |entity|
4021
+ policy = policies.find do |policy|
4022
+ match = true
4023
+ ['spbmProfileId', 'spbmProfileGenerationNumber'].each do |k|
4024
+ match = match && policy[k] == entity[:entry]['content'][k]
4025
+ end
4026
+ match
4027
+ end
4028
+ entity[:policy] = policy
4029
+ end
4030
+
4031
+ candidates = policies.select{|p| p['spbmRecoveryCandidate'] == true}
4032
+
4033
+ puts "#{Time.now}: Done computing"
4034
+
4035
+ if !opts[:show_details]
4036
+ puts ""
4037
+ puts "Found %d missing SPBM Profiles." % candidates.length
4038
+ puts "Found %d entities not associated with their SPBM Profiles." % nonAssociatedEntities.length
4039
+ puts ""
4040
+ puts "You have a number of options (can be combined):"
4041
+ puts "1) Run command with --show-details to see a full report about missing"
4042
+ puts "SPBM Profiles and missing VM <-> SPBM Profile associations."
4043
+ puts "2) Run command with --create-missing-profiles to automatically create"
4044
+ puts "all missing SPBM profiles."
4045
+ puts "3)Run command with --create-missing-associations to automatically"
4046
+ puts "create all missing VM <-> SPBM Profile associations."
4047
+ end
4048
+
4049
+ if opts[:show_details]
4050
+ puts "SPBM Profiles used by VSAN:"
4051
+ t = Terminal::Table.new()
4052
+ t << ['SPBM ID', 'policy']
4053
+ policies.each do |policy|
4054
+ t.add_separator
4055
+ t << [
4056
+ policy['spbmDescr'],
4057
+ policy.select{|k,v| k !~ /spbm/}.map{|k,v| "#{k}: #{v}"}.join("\n")
4058
+ ]
4059
+ end
4060
+ puts t
4061
+ puts ""
4062
+
4063
+ if candidates.length > 0
4064
+ puts "Recreate missing SPBM Profiles using following RVC commands:"
4065
+ candidates.each do |policy|
4066
+ rules = policy.select{|k,v| k !~ /spbm/}
4067
+ s = rules.map{|k,v| "--rule VSAN.#{k}=#{v}"}.join(" ")
4068
+ puts "spbm.profile_create #{s} #{policy['spbmName']}"
4069
+ end
4070
+ puts ""
4071
+ end
4072
+ end
4073
+
4074
+ if opts[:show_details] && nonAssociatedEntities.length > 0
4075
+ puts "Following missing VM <-> SPBM Profile associations were found:"
4076
+ t = Terminal::Table.new()
4077
+ t << ['Entity', 'VM', 'Profile']
4078
+ t.add_separator
4079
+ nonAssociatedEntities.each do |entity|
4080
+ #puts "'%s' of VM '%s' should be associated with profile '%s' but isn't." % [
4081
+ t << [
4082
+ entity[:label],
4083
+ vms_props[entity[:vm]]['name'],
4084
+ entity[:policy]['spbmName'],
4085
+ ]
4086
+
4087
+ # Fix up the associations. Disabled for now until I can check
4088
+ # with Sudarsan
4089
+ # profile = entity[:policy]['spbmProfile']
4090
+ # if profile
4091
+ # pm.PbmAssociate(
4092
+ # :entity => PBM::PbmServerObjectRef(
4093
+ # :objectType => entity[:type],
4094
+ # :key => entity[:key],
4095
+ # :serverUuid => conn.serviceContent.about.instanceUuid
4096
+ # ),
4097
+ # :profile => profile.profileId
4098
+ # )
4099
+ # end
4100
+ end
4101
+ puts t
4102
+ end
4103
+ end
4104
+
4105
+ end