rvc 1.7.0 → 1.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +2 -2
- data/VERSION +1 -1
- data/lib/rvc/completion.rb +1 -1
- data/lib/rvc/extensions/ClusterComputeResource.rb +11 -0
- data/lib/rvc/extensions/ComputeResource.rb +5 -1
- data/lib/rvc/extensions/Datacenter.rb +1 -1
- data/lib/rvc/extensions/HostSystem.rb +15 -0
- data/lib/rvc/extensions/VirtualMachine.rb +7 -2
- data/lib/rvc/fs.rb +2 -2
- data/lib/rvc/inventory.rb +9 -1
- data/lib/rvc/modules/basic.rb +4 -1
- data/lib/rvc/modules/cluster.rb +99 -38
- data/lib/rvc/modules/connection.rb +2 -0
- data/lib/rvc/modules/device.rb +37 -1
- data/lib/rvc/modules/diagnostics.rb +119 -11
- data/lib/rvc/modules/find.rb +1 -1
- data/lib/rvc/modules/host.rb +116 -4
- data/lib/rvc/modules/perf.rb +53 -7
- data/lib/rvc/modules/snapshot.rb +7 -1
- data/lib/rvc/modules/spbm.rb +728 -0
- data/lib/rvc/modules/syslog.rb +103 -0
- data/lib/rvc/modules/vds.rb +59 -2
- data/lib/rvc/modules/vim.rb +1 -1
- data/lib/rvc/modules/vm.rb +70 -7
- data/lib/rvc/modules/vm_guest.rb +190 -91
- data/lib/rvc/modules/vnc.rb +29 -5
- data/lib/rvc/modules/vsan.rb +4105 -0
- data/lib/rvc/util.rb +31 -11
- metadata +7 -3
data/lib/rvc/modules/vnc.rb
CHANGED
@@ -25,19 +25,38 @@ VNC = ENV['VNC'] || search_path('tightvnc') || search_path('vncviewer') || searc
|
|
25
25
|
opts :view do
|
26
26
|
summary "Spawn a VNC client"
|
27
27
|
arg :vm, nil, :lookup => VIM::VirtualMachine
|
28
|
+
opt :ws, "Enable VNC websocket proxy"
|
28
29
|
end
|
29
30
|
|
30
31
|
rvc_alias :view, :vnc
|
31
32
|
rvc_alias :view, :V
|
32
33
|
|
33
|
-
def view vm
|
34
|
+
def view vm, opts
|
34
35
|
ip = reachable_ip vm.collect('runtime.host')[0]
|
35
36
|
extraConfig, = vm.collect('config.extraConfig')
|
36
37
|
already_enabled = extraConfig.find { |x| x.key == 'RemoteDisplay.vnc.enabled' && x.value.downcase == 'true' }
|
38
|
+
|
39
|
+
if opts[:ws]
|
40
|
+
opt = extraConfig.find { |x| x.key == 'RemoteDisplay.vnc.webSocket.port' }
|
41
|
+
if opt.nil?
|
42
|
+
ws_port = unused_vnc_port ip
|
43
|
+
vm.ReconfigVM_Task(:spec => {
|
44
|
+
:extraConfig => [
|
45
|
+
{ :key => 'RemoteDisplay.vnc.webSocket.port', :value => ws_port.to_s }
|
46
|
+
]
|
47
|
+
}).wait_for_completion
|
48
|
+
else
|
49
|
+
ws_port = opt.value
|
50
|
+
end
|
51
|
+
end
|
37
52
|
if already_enabled
|
38
53
|
puts "VNC already enabled"
|
39
|
-
port = extraConfig.find { |x| x.key == 'RemoteDisplay.vnc.port' }
|
40
|
-
|
54
|
+
port = extraConfig.find { |x| x.key == 'RemoteDisplay.vnc.port' }
|
55
|
+
if !port
|
56
|
+
err "VNC enabled but no port assigned. Use vnc.off to reset config"
|
57
|
+
end
|
58
|
+
password = extraConfig.find { |x| x.key == 'RemoteDisplay.vnc.password' }
|
59
|
+
password = password ? password.value : ""
|
41
60
|
else
|
42
61
|
port = unused_vnc_port ip
|
43
62
|
password = vnc_password
|
@@ -49,7 +68,11 @@ def view vm
|
|
49
68
|
]
|
50
69
|
}).wait_for_completion
|
51
70
|
end
|
52
|
-
|
71
|
+
if opts[:ws]
|
72
|
+
puts "open http://novnc.com?host=#{ip}&port=#{ws_port}&password=#{password}"
|
73
|
+
else
|
74
|
+
vnc_client ip, port, password
|
75
|
+
end
|
53
76
|
end
|
54
77
|
|
55
78
|
|
@@ -63,7 +86,8 @@ def off vm
|
|
63
86
|
:extraConfig => [
|
64
87
|
{ :key => 'RemoteDisplay.vnc.enabled', :value => 'false' },
|
65
88
|
{ :key => 'RemoteDisplay.vnc.password', :value => '' },
|
66
|
-
{ :key => 'RemoteDisplay.vnc.port', :value => '' }
|
89
|
+
{ :key => 'RemoteDisplay.vnc.port', :value => '' },
|
90
|
+
{ :key => 'RemoteDisplay.vnc.webSocket.port', :value => '' }
|
67
91
|
]
|
68
92
|
}).wait_for_completion
|
69
93
|
end
|
@@ -0,0 +1,4105 @@
|
|
1
|
+
# Copyright (c) 2013 VMware, Inc. All Rights Reserved.
|
2
|
+
#
|
3
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
4
|
+
# of this software and associated documentation files (the "Software"), to deal
|
5
|
+
# in the Software without restriction, including without limitation the rights
|
6
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
7
|
+
# copies of the Software, and to permit persons to whom the Software is
|
8
|
+
# furnished to do so, subject to the following conditions:
|
9
|
+
#
|
10
|
+
# The above copyright notice and this permission notice shall be included in
|
11
|
+
# all copies or substantial portions of the Software.
|
12
|
+
#
|
13
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
14
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
15
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
16
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
17
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
18
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
19
|
+
# THE SOFTWARE.
|
20
|
+
|
21
|
+
require 'rvc/vim'
|
22
|
+
require 'json'
|
23
|
+
require 'time'
|
24
|
+
VIM::ClusterComputeResource
|
25
|
+
|
26
|
+
# Patch in some last minute additions to the API
|
27
|
+
db = VIM.loader.instance_variable_get(:@db)
|
28
|
+
db['HostVsanInternalSystem']['methods']["QuerySyncingVsanObjects"] =
|
29
|
+
{"params"=>
|
30
|
+
[{"name"=>"uuids",
|
31
|
+
"is-array"=>true,
|
32
|
+
"is-optional"=>true,
|
33
|
+
"version-id-ref"=>nil,
|
34
|
+
"wsdl_type"=>"xsd:string"}],
|
35
|
+
"result"=>
|
36
|
+
{"is-array"=>false,
|
37
|
+
"is-optional"=>false,
|
38
|
+
"is-task"=>false,
|
39
|
+
"version-id-ref"=>nil,
|
40
|
+
"wsdl_type"=>"xsd:string"}}
|
41
|
+
db['HostVsanInternalSystem']['methods']["GetVsanObjExtAttrs"] =
|
42
|
+
{"params"=>
|
43
|
+
[{"name"=>"uuids",
|
44
|
+
"is-array"=>true,
|
45
|
+
"is-optional"=>true,
|
46
|
+
"version-id-ref"=>nil,
|
47
|
+
"wsdl_type"=>"xsd:string"}],
|
48
|
+
"result"=>
|
49
|
+
{"is-array"=>false,
|
50
|
+
"is-optional"=>false,
|
51
|
+
"is-task"=>false,
|
52
|
+
"version-id-ref"=>nil,
|
53
|
+
"wsdl_type"=>"xsd:string"}}
|
54
|
+
db = nil
|
55
|
+
|
56
|
+
$vsanUseGzipApis = false
|
57
|
+
|
58
|
+
def is_uuid str
|
59
|
+
str =~ /[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}/
|
60
|
+
end
|
61
|
+
|
62
|
+
opts :enable_vsan_on_cluster do
|
63
|
+
summary "Enable VSAN on a cluster"
|
64
|
+
arg :cluster, nil, :lookup => VIM::ClusterComputeResource
|
65
|
+
opt :disable_storage_auto_claim, "Disable auto disk-claim", :type => :boolean
|
66
|
+
end
|
67
|
+
|
68
|
+
def enable_vsan_on_cluster cluster, opts
|
69
|
+
conn = cluster._connection
|
70
|
+
_run_with_rev(conn, "dev") do
|
71
|
+
spec = VIM::ClusterConfigSpecEx(
|
72
|
+
:vsanConfig => {
|
73
|
+
:enabled => true,
|
74
|
+
:defaultConfig => {
|
75
|
+
:autoClaimStorage => (!(opts[:disable_storage_auto_claim] || false)),
|
76
|
+
}
|
77
|
+
}
|
78
|
+
)
|
79
|
+
task = cluster.ReconfigureComputeResource_Task(:spec => spec, :modify => true)
|
80
|
+
progress([task])
|
81
|
+
childtasks = task.child_tasks
|
82
|
+
if childtasks && childtasks.length > 0
|
83
|
+
progress(childtasks)
|
84
|
+
end
|
85
|
+
childtasks = task.child_tasks
|
86
|
+
if childtasks && childtasks.length > 0
|
87
|
+
progress(childtasks)
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
opts :disable_vsan_on_cluster do
|
93
|
+
summary "Disable VSAN on a cluster"
|
94
|
+
arg :cluster, nil, :lookup => VIM::ClusterComputeResource
|
95
|
+
end
|
96
|
+
|
97
|
+
def disable_vsan_on_cluster cluster
|
98
|
+
conn = cluster._connection
|
99
|
+
_run_with_rev(conn, "dev") do
|
100
|
+
spec = VIM::ClusterConfigSpecEx(
|
101
|
+
:vsanConfig => {
|
102
|
+
:enabled => false,
|
103
|
+
}
|
104
|
+
)
|
105
|
+
task = cluster.ReconfigureComputeResource_Task(:spec => spec, :modify => true)
|
106
|
+
progress([task])
|
107
|
+
childtasks = task.child_tasks
|
108
|
+
if childtasks && childtasks.length > 0
|
109
|
+
progress(childtasks)
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
VIM::HostSystem
|
115
|
+
class VIM::HostSystem
|
116
|
+
def filtered_disks_for_vsan opts = {}
|
117
|
+
vsan = opts[:vsanSystem] || self.configManager.vsanSystem
|
118
|
+
stateFilter = opts[:state_filter] || /^eligible$/
|
119
|
+
disks = vsan.QueryDisksForVsan()
|
120
|
+
|
121
|
+
disks = disks.select do |disk|
|
122
|
+
disk.state =~ stateFilter
|
123
|
+
end
|
124
|
+
|
125
|
+
if opts[:filter_ssd_by_model]
|
126
|
+
disks = disks.select do |disk|
|
127
|
+
model = [
|
128
|
+
disk.disk.vendor,
|
129
|
+
disk.disk.model
|
130
|
+
].compact.map{|x| x.strip}.join(" ")
|
131
|
+
model_match = (model =~ opts[:filter_ssd_by_model])
|
132
|
+
!disk.disk.ssd || model_match
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
disks = disks.map{|x| x.disk}
|
137
|
+
|
138
|
+
disks
|
139
|
+
end
|
140
|
+
|
141
|
+
def consume_disks_for_vsan opts = {}
|
142
|
+
vsan = opts[:vsanSystem] || self.configManager.vsanSystem
|
143
|
+
disks = filtered_disks_for_vsan(opts.merge(
|
144
|
+
:state_filter => /^eligible$/,
|
145
|
+
:vsanSystem => vsan
|
146
|
+
))
|
147
|
+
if disks.length > 0
|
148
|
+
vsan.AddDisks_Task(:disk => disks)
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
opts :host_consume_disks do
|
154
|
+
summary "Consumes all eligible disks on a host"
|
155
|
+
arg :host_or_cluster, nil, :lookup => [VIM::ComputeResource, VIM::HostSystem], :multi => true
|
156
|
+
opt :filter_ssd_by_model, "Regex to apply as ssd model filter", :type => :string
|
157
|
+
end
|
158
|
+
|
159
|
+
def host_consume_disks hosts_or_clusters, opts
|
160
|
+
conn = hosts_or_clusters.first._connection
|
161
|
+
hosts = []
|
162
|
+
hosts_or_clusters.each do |host_or_cluster|
|
163
|
+
if host_or_cluster.is_a?(VIM::HostSystem)
|
164
|
+
hosts << host_or_cluster
|
165
|
+
else
|
166
|
+
hosts += host_or_cluster.host
|
167
|
+
end
|
168
|
+
end
|
169
|
+
if opts[:filter_ssd_by_model]
|
170
|
+
opts[:filter_ssd_by_model] = /#{opts[:filter_ssd_by_model]}/
|
171
|
+
end
|
172
|
+
tasks = []
|
173
|
+
results = {}
|
174
|
+
_run_with_rev(conn, "dev") do
|
175
|
+
tasks = hosts.map do |host|
|
176
|
+
host.consume_disks_for_vsan(opts)
|
177
|
+
end.compact
|
178
|
+
if tasks.length > 0
|
179
|
+
results = progress(tasks)
|
180
|
+
pp results.values.flatten.map{|x| x.error}.compact
|
181
|
+
else
|
182
|
+
puts "No disks were consumed."
|
183
|
+
end
|
184
|
+
$claimResults = results
|
185
|
+
end
|
186
|
+
$disksCache = {}
|
187
|
+
end
|
188
|
+
|
189
|
+
opts :host_wipe_vsan_disks do
|
190
|
+
summary "Wipes content of all VSAN disks on a host"
|
191
|
+
arg :host, nil, :lookup => VIM::HostSystem, :multi => true
|
192
|
+
opt :force, "Apply force", :type => :boolean
|
193
|
+
end
|
194
|
+
|
195
|
+
def host_wipe_vsan_disks hosts, opts
|
196
|
+
conn = hosts.first._connection
|
197
|
+
tasks = []
|
198
|
+
_run_with_rev(conn, "dev") do
|
199
|
+
tasks = hosts.map do |host|
|
200
|
+
hostname = host.name
|
201
|
+
disks = host.filtered_disks_for_vsan(:state_filter => /^inUse$/)
|
202
|
+
if disks.length == 0
|
203
|
+
next
|
204
|
+
end
|
205
|
+
if !opts[:force]
|
206
|
+
# Don't actually wipe, but show a warning.
|
207
|
+
disks.each do |disk|
|
208
|
+
model = [
|
209
|
+
disk.vendor,
|
210
|
+
disk.model
|
211
|
+
].compact.map{|x| x.strip}.join(" ")
|
212
|
+
puts "Would wipe disk #{disk.displayName} (#{model}, ssd = #{disk.ssd})"
|
213
|
+
end
|
214
|
+
end
|
215
|
+
|
216
|
+
if opts[:force]
|
217
|
+
#disks = disks.select{|x| x.ssd}
|
218
|
+
#host.configManager.vsanSystem.RemoveDisk_Task(:disk => disks)
|
219
|
+
# See PR 1077658
|
220
|
+
vsan = host.configManager.vsanSystem
|
221
|
+
vsan.RemoveDiskMapping_Task(:mapping => vsan.config.storageInfo.diskMapping)
|
222
|
+
end
|
223
|
+
end.compact
|
224
|
+
if tasks.length > 0
|
225
|
+
results = progress(tasks)
|
226
|
+
pp results.values.flatten.map{|x| x.error}.compact
|
227
|
+
$wipeResults = results
|
228
|
+
end
|
229
|
+
end
|
230
|
+
if !opts[:force]
|
231
|
+
puts ""
|
232
|
+
puts "NO ACTION WAS TAKEN. Use --force to actually wipe."
|
233
|
+
puts "CAUTION: Wiping disks means all user data will be destroyed!"
|
234
|
+
end
|
235
|
+
$disksCache = {}
|
236
|
+
end
|
237
|
+
|
238
|
+
opts :host_info do
|
239
|
+
summary "Print VSAN info about a host"
|
240
|
+
arg :host, nil, :lookup => VIM::HostSystem
|
241
|
+
end
|
242
|
+
|
243
|
+
def host_info host
|
244
|
+
conn = host._connection
|
245
|
+
_run_with_rev(conn, "dev") do
|
246
|
+
_host_info host
|
247
|
+
end
|
248
|
+
end
|
249
|
+
|
250
|
+
opts :cluster_info do
|
251
|
+
summary "Print VSAN info about a cluster"
|
252
|
+
arg :cluster, nil, :lookup => VIM::ClusterComputeResource
|
253
|
+
end
|
254
|
+
|
255
|
+
def cluster_info cluster
|
256
|
+
conn = cluster._connection
|
257
|
+
pc = conn.propertyCollector
|
258
|
+
|
259
|
+
hosts = cluster.host
|
260
|
+
|
261
|
+
hosts_props = pc.collectMultiple(hosts, 'name', 'runtime.connectionState')
|
262
|
+
connected_hosts = hosts_props.select do |k,v|
|
263
|
+
v['runtime.connectionState'] == 'connected'
|
264
|
+
end.keys
|
265
|
+
hosts = connected_hosts
|
266
|
+
|
267
|
+
_run_with_rev(conn, "dev") do
|
268
|
+
hosts.each do |host|
|
269
|
+
begin
|
270
|
+
puts "Host: #{hosts_props[host]['name']}"
|
271
|
+
_host_info host, " "
|
272
|
+
rescue Exception => ex
|
273
|
+
puts "#{Time.now}: Got exception: #{ex.class}: #{ex.message}"
|
274
|
+
end
|
275
|
+
puts ""
|
276
|
+
end
|
277
|
+
end
|
278
|
+
end
|
279
|
+
|
280
|
+
opts :disks_info do
|
281
|
+
summary "Print physical disk info about a host"
|
282
|
+
arg :host, nil, :lookup => VIM::HostSystem, :multi => true
|
283
|
+
end
|
284
|
+
|
285
|
+
def disks_info hosts
|
286
|
+
conn = hosts.first._connection
|
287
|
+
pc = conn.propertyCollector
|
288
|
+
_run_with_rev(conn, "dev") do
|
289
|
+
hosts.each do |host|
|
290
|
+
if hosts.length > 0
|
291
|
+
puts "Disks on host #{host.name}:"
|
292
|
+
end
|
293
|
+
|
294
|
+
dsList = host.datastore
|
295
|
+
dsListProps = pc.collectMultiple(dsList, 'summary', 'name', 'info')
|
296
|
+
vmfsDsList = dsListProps.select do |ds, props|
|
297
|
+
props['summary'].type == "VMFS"
|
298
|
+
end.keys
|
299
|
+
|
300
|
+
vsan = host.configManager.vsanSystem
|
301
|
+
disks = vsan.QueryDisksForVsan()
|
302
|
+
partitions = host.esxcli.storage.core.device.partition.list
|
303
|
+
|
304
|
+
t = Terminal::Table.new()
|
305
|
+
t << ['DisplayName', 'isSSD', 'Size', 'State']
|
306
|
+
needSep = true
|
307
|
+
disks.each do |disk|
|
308
|
+
capacity = disk.disk.capacity
|
309
|
+
size = capacity.block * capacity.blockSize
|
310
|
+
sizeStr = "#{size / 1024**3} GB"
|
311
|
+
state = disk.state
|
312
|
+
# if needSep
|
313
|
+
t.add_separator
|
314
|
+
needSep = false
|
315
|
+
# end
|
316
|
+
if state != 'eligible' && disk.error
|
317
|
+
state += " (#{disk.error.localizedMessage})"
|
318
|
+
if disk.error.fault.is_a?(VIM::DiskHasPartitions)
|
319
|
+
state += "\n"
|
320
|
+
state += "\n"
|
321
|
+
state += "Partition table:\n"
|
322
|
+
|
323
|
+
partitions.select do |x|
|
324
|
+
x.Device == disk.disk.canonicalName && x.Type != 0
|
325
|
+
end.each do |x|
|
326
|
+
partSize = x.Size.to_f / 1024**3
|
327
|
+
types = {
|
328
|
+
0xfb => 'vmfs',
|
329
|
+
0xfc => 'coredump',
|
330
|
+
0xfa => 'vsan',
|
331
|
+
0x0 => 'unused',
|
332
|
+
0x6 => 'vfat',
|
333
|
+
}
|
334
|
+
type = types[x.Type] || x.Type
|
335
|
+
state += "#{x.Partition}: %.2f GB, type = #{type}" % partSize
|
336
|
+
|
337
|
+
if type == "vmfs"
|
338
|
+
vmfsStr = vmfsDsList.select do |vmfsDs|
|
339
|
+
props = dsListProps[vmfsDs]
|
340
|
+
props['info'].vmfs.extent.any? do |ext|
|
341
|
+
ext.diskName == x.Device && x.Partition == ext.partition
|
342
|
+
end
|
343
|
+
end.map do |vmfsDs|
|
344
|
+
"'#{dsListProps[vmfsDs]['name']}'"
|
345
|
+
end.join(", ")
|
346
|
+
if vmfsStr
|
347
|
+
state += " (#{vmfsStr})"
|
348
|
+
end
|
349
|
+
end
|
350
|
+
|
351
|
+
state += "\n"
|
352
|
+
end
|
353
|
+
needSep = true
|
354
|
+
end
|
355
|
+
end
|
356
|
+
t << [
|
357
|
+
[
|
358
|
+
disk.disk.displayName,
|
359
|
+
[
|
360
|
+
disk.disk.vendor,
|
361
|
+
disk.disk.model
|
362
|
+
].compact.map{|x| x.strip}.join(" ")
|
363
|
+
].join("\n"),
|
364
|
+
disk.disk.ssd ? "SSD" : "MD",
|
365
|
+
sizeStr,
|
366
|
+
state
|
367
|
+
]
|
368
|
+
end
|
369
|
+
puts t
|
370
|
+
if hosts.length > 0
|
371
|
+
puts ""
|
372
|
+
end
|
373
|
+
end
|
374
|
+
end
|
375
|
+
end
|
376
|
+
|
377
|
+
def _host_info host, prefix = ''
|
378
|
+
configManager = host.configManager
|
379
|
+
netSys = configManager.networkSystem
|
380
|
+
vsan = configManager.vsanSystem
|
381
|
+
config = vsan.config
|
382
|
+
enabled = config.enabled
|
383
|
+
line = lambda{|x| puts "#{prefix}#{x}" }
|
384
|
+
line.call "VSAN enabled: %s" % (enabled ? "yes" : "no")
|
385
|
+
if !enabled
|
386
|
+
return
|
387
|
+
end
|
388
|
+
status = vsan.QueryHostStatus()
|
389
|
+
line.call "Cluster info:"
|
390
|
+
line.call " Cluster role: #{status.nodeState.state}"
|
391
|
+
line.call " Cluster UUID: #{config.clusterInfo.uuid}"
|
392
|
+
line.call " Node UUID: #{config.clusterInfo.nodeUuid}"
|
393
|
+
line.call " Member UUIDs: #{status.memberUuid} (#{status.memberUuid.length})"
|
394
|
+
line.call "Storage info:"
|
395
|
+
line.call " Auto claim: %s" % (config.storageInfo.autoClaimStorage ? "yes" : "no")
|
396
|
+
line.call " Disk Mappings:"
|
397
|
+
if config.storageInfo.diskMapping.length == 0
|
398
|
+
line.call " None"
|
399
|
+
end
|
400
|
+
config.storageInfo.diskMapping.each do |mapping|
|
401
|
+
capacity = mapping.ssd.capacity
|
402
|
+
size = capacity.block * capacity.blockSize
|
403
|
+
line.call " SSD: #{mapping.ssd.displayName} - #{size / 1024**3} GB"
|
404
|
+
mapping.nonSsd.map do |md|
|
405
|
+
capacity = md.capacity
|
406
|
+
size = capacity.block * capacity.blockSize
|
407
|
+
line.call " MD: #{md.displayName} - #{size / 1024**3} GB"
|
408
|
+
end
|
409
|
+
end
|
410
|
+
line.call "NetworkInfo:"
|
411
|
+
if config.networkInfo.port.length == 0
|
412
|
+
line.call " Not configured"
|
413
|
+
end
|
414
|
+
vmknics, = netSys.collect 'networkConfig.vnic'
|
415
|
+
config.networkInfo.port.each do |port|
|
416
|
+
dev = port.device
|
417
|
+
vmknic = vmknics.find{|x| x.device == dev}
|
418
|
+
ip = "IP unknown"
|
419
|
+
if vmknic
|
420
|
+
ip = vmknic.spec.ip.ipAddress
|
421
|
+
end
|
422
|
+
line.call " Adapter: #{dev} (#{ip})"
|
423
|
+
end
|
424
|
+
end
|
425
|
+
|
426
|
+
def _run_with_rev conn, rev
|
427
|
+
old_rev = conn.rev
|
428
|
+
begin
|
429
|
+
conn.rev = rev
|
430
|
+
yield
|
431
|
+
ensure
|
432
|
+
conn.rev = old_rev
|
433
|
+
end
|
434
|
+
end
|
435
|
+
|
436
|
+
|
437
|
+
opts :cluster_set_default_policy do
|
438
|
+
summary "Set default policy on a cluster"
|
439
|
+
arg :cluster, nil, :lookup => VIM::ClusterComputeResource
|
440
|
+
arg :policy, nil, :type => :string
|
441
|
+
end
|
442
|
+
|
443
|
+
def cluster_set_default_policy cluster, policy
|
444
|
+
hosts = cluster.host
|
445
|
+
conn = cluster._connection
|
446
|
+
pc = conn.propertyCollector
|
447
|
+
_run_with_rev(conn, "dev") do
|
448
|
+
vsan, = hosts.first.collect 'configManager.vsanSystem'
|
449
|
+
cluster_uuid, = vsan.collect 'config.clusterInfo.uuid'
|
450
|
+
|
451
|
+
hosts.each do |host|
|
452
|
+
policy_node = host.esxcli.vsan.policy
|
453
|
+
['cluster', 'vdisk', 'vmnamespace', 'vmswap'].each do |policy_class|
|
454
|
+
policy_node.setdefault(
|
455
|
+
:clusteruuid => cluster_uuid,
|
456
|
+
:policy => policy,
|
457
|
+
:policyclass => policy_class,
|
458
|
+
)
|
459
|
+
end
|
460
|
+
end
|
461
|
+
end
|
462
|
+
end
|
463
|
+
|
464
|
+
def _components_in_dom_config dom_config
|
465
|
+
out = []
|
466
|
+
if ['Component', 'Witness'].member?(dom_config['type'])
|
467
|
+
out << dom_config
|
468
|
+
else
|
469
|
+
dom_config.select{|k,v| k =~ /child-\d+/}.each do |k, v|
|
470
|
+
out += _components_in_dom_config v
|
471
|
+
end
|
472
|
+
end
|
473
|
+
out
|
474
|
+
end
|
475
|
+
|
476
|
+
def _normalize_uuid uuid
|
477
|
+
uuid = uuid.gsub("-", "")
|
478
|
+
uuid = "%s-%s-%s-%s-%s" % [
|
479
|
+
uuid[0..7], uuid[8..11], uuid[12..15],
|
480
|
+
uuid[16..19], uuid[20..31]
|
481
|
+
]
|
482
|
+
uuid
|
483
|
+
end
|
484
|
+
|
485
|
+
def _print_dom_config_tree_int dom_config, dom_components_str, indent = 0
|
486
|
+
pre = " " * indent
|
487
|
+
type = dom_config['type']
|
488
|
+
children = dom_config.select{|k,v| k =~ /child-\d+/}.values
|
489
|
+
if ['RAID_0', 'RAID_1', 'Concatenation'].member?(type)
|
490
|
+
puts "#{pre}#{type}"
|
491
|
+
children.each do |child|
|
492
|
+
_print_dom_config_tree_int child, dom_components_str, indent + 1
|
493
|
+
end
|
494
|
+
elsif ['Configuration'].member?(type)
|
495
|
+
# puts "#{pre}#{type}"
|
496
|
+
children.each do |child|
|
497
|
+
_print_dom_config_tree_int child, dom_components_str, indent + 1
|
498
|
+
end
|
499
|
+
elsif ['Witness', 'Component'].member?(type)
|
500
|
+
comp_uuid = dom_config['componentUuid']
|
501
|
+
info = dom_components_str[comp_uuid]
|
502
|
+
line = "#{pre}#{type}: #{info[0]}"
|
503
|
+
if info[2].length > 0
|
504
|
+
puts "#{line} (#{info[1]},"
|
505
|
+
puts "#{' ' * line.length} #{info[2]})"
|
506
|
+
else
|
507
|
+
puts "#{line} (#{info[1]})"
|
508
|
+
end
|
509
|
+
end
|
510
|
+
end
|
511
|
+
|
512
|
+
def _print_dom_config_tree dom_obj_uuid, obj_infos, indent = 0, opts = {}
|
513
|
+
pre = " " * indent
|
514
|
+
dom_obj_infos = obj_infos['dom_objects'][dom_obj_uuid]
|
515
|
+
if !dom_obj_infos
|
516
|
+
puts "#{pre}Couldn't find info about DOM object '#{dom_obj_uuid}'"
|
517
|
+
return
|
518
|
+
end
|
519
|
+
dom_obj = dom_obj_infos['config']
|
520
|
+
policy = dom_obj_infos['policy']
|
521
|
+
|
522
|
+
dom_components = _components_in_dom_config(dom_obj['content'])
|
523
|
+
csn = nil
|
524
|
+
begin
|
525
|
+
csn = dom_obj['content']['attributes']['CSN']
|
526
|
+
rescue
|
527
|
+
end
|
528
|
+
|
529
|
+
dom_components_str = Hash[dom_components.map do |dom_comp|
|
530
|
+
attr = dom_comp['attributes']
|
531
|
+
state = attr['componentState']
|
532
|
+
comp_uuid = dom_comp['componentUuid']
|
533
|
+
state_names = {
|
534
|
+
'0' => 'FIRST',
|
535
|
+
'1' => 'NONE',
|
536
|
+
'2' => 'NEED_CONFIG',
|
537
|
+
'3' => 'INITIALIZE',
|
538
|
+
'4' => 'INITIALIZED',
|
539
|
+
'5' => 'ACTIVE',
|
540
|
+
'6' => 'ABSENT',
|
541
|
+
'7' => 'STALE',
|
542
|
+
'8' => 'RESYNCHING',
|
543
|
+
'9' => 'DEGRADED',
|
544
|
+
'10' => 'RECONFIGURING',
|
545
|
+
'11' => 'CLEANUP',
|
546
|
+
'12' => 'TRANSIENT',
|
547
|
+
'13' => 'LAST',
|
548
|
+
}
|
549
|
+
state_name = state.to_s
|
550
|
+
if state_names[state.to_s]
|
551
|
+
state_name = "#{state_names[state.to_s]} (#{state})"
|
552
|
+
end
|
553
|
+
props = {
|
554
|
+
'state' => state_name,
|
555
|
+
}
|
556
|
+
|
557
|
+
if state.to_s.to_i == 6 && attr['staleCsn']
|
558
|
+
if attr['staleCsn'] != csn
|
559
|
+
props['csn'] = "STALE (#{attr['staleCsn']}!=#{csn})"
|
560
|
+
end
|
561
|
+
end
|
562
|
+
|
563
|
+
comp_policy = {}
|
564
|
+
['readOPS', 'writeOPS'].select{|x| attr[x]}.each do |x|
|
565
|
+
comp_policy[x] = attr[x]
|
566
|
+
end
|
567
|
+
if attr['readCacheReservation'] && attr['readCacheHitRate']
|
568
|
+
comp_policy['rc size/hitrate'] = "%.2fGB/%d%%" % [
|
569
|
+
attr['readCacheReservation'].to_f / 1024**3,
|
570
|
+
attr['readCacheHitRate'],
|
571
|
+
]
|
572
|
+
end
|
573
|
+
if attr['bytesToSync']
|
574
|
+
comp_policy['dataToSync'] = "%.2f GB" % [
|
575
|
+
attr['bytesToSync'].to_f / 1024**3
|
576
|
+
]
|
577
|
+
end
|
578
|
+
|
579
|
+
lsom_object = obj_infos['lsom_objects'][comp_uuid]
|
580
|
+
if lsom_object
|
581
|
+
host = obj_infos['host_vsan_uuids'][lsom_object['owner']]
|
582
|
+
if host
|
583
|
+
hostName = obj_infos['host_props'][host]['name']
|
584
|
+
else
|
585
|
+
hostName = "unknown"
|
586
|
+
end
|
587
|
+
md_uuid = dom_comp['diskUuid']
|
588
|
+
md = obj_infos['vsan_disk_uuids'][md_uuid]
|
589
|
+
ssd_uuid = obj_infos['disk_objects'][md_uuid]['content']['ssdUuid']
|
590
|
+
#pp ssd_uuid
|
591
|
+
ssd = obj_infos['vsan_disk_uuids'][ssd_uuid]
|
592
|
+
#pp ssd
|
593
|
+
props.merge!({
|
594
|
+
'host' => hostName,
|
595
|
+
'md' => md ? md.DisplayName : "unknown",
|
596
|
+
'ssd' => ssd ? ssd.DisplayName : "unknown",
|
597
|
+
})
|
598
|
+
if opts[:highlight_disk] && md_uuid == opts[:highlight_disk]
|
599
|
+
props['md'] = "**#{props['md']}**"
|
600
|
+
elsif opts[:highlight_disk] && ssd_uuid == opts[:highlight_disk]
|
601
|
+
props['ssd'] = "**#{props['ssd']}**"
|
602
|
+
end
|
603
|
+
else
|
604
|
+
props.merge!({
|
605
|
+
'host' => "LSOM object not found"
|
606
|
+
})
|
607
|
+
end
|
608
|
+
propsStr = props.map{|k,v| "#{k}: #{v}"}.join(", ")
|
609
|
+
comp_policy_str = comp_policy.map{|k,v| "#{k}: #{v}"}.join(", ")
|
610
|
+
[comp_uuid, [comp_uuid, propsStr, comp_policy_str]]
|
611
|
+
end]
|
612
|
+
|
613
|
+
if policy
|
614
|
+
policy = policy.map{|k,v| "#{k} = #{v}"}.join(", ")
|
615
|
+
else
|
616
|
+
policy = "No POLICY entry found in CMMDS"
|
617
|
+
end
|
618
|
+
owner = obj_infos['host_vsan_uuids'][dom_obj['owner']]
|
619
|
+
if owner
|
620
|
+
owner = obj_infos['host_props'][owner]['name']
|
621
|
+
else
|
622
|
+
owner = "unknown"
|
623
|
+
end
|
624
|
+
|
625
|
+
puts "#{pre}DOM Object: #{dom_obj['uuid']} (owner: #{owner}, policy: #{policy})"
|
626
|
+
if opts[:context]
|
627
|
+
puts "#{pre} Context: #{opts[:context]}"
|
628
|
+
end
|
629
|
+
_print_dom_config_tree_int dom_obj['content'], dom_components_str, indent
|
630
|
+
end
|
631
|
+
|
632
|
+
# hosts is a hash: host => hostname
|
633
|
+
def _vsan_host_disks_info hosts
|
634
|
+
hosts.each do |k,v|
|
635
|
+
if !v
|
636
|
+
hosts[k] = k.name
|
637
|
+
end
|
638
|
+
end
|
639
|
+
|
640
|
+
conn = hosts.keys.first._connection
|
641
|
+
vsanDiskUuids = {}
|
642
|
+
$disksCache ||= {}
|
643
|
+
if !hosts.keys.all?{|x| $disksCache[x]}
|
644
|
+
lock = Mutex.new
|
645
|
+
hosts.map do |host, hostname|
|
646
|
+
Thread.new do
|
647
|
+
if !$disksCache[host]
|
648
|
+
c1 = conn.spawn_additional_connection
|
649
|
+
host2 = host.dup_on_conn(c1)
|
650
|
+
$disksCache[host] = []
|
651
|
+
lock.synchronize do
|
652
|
+
puts "#{Time.now}: Fetching VSAN disk info from #{hostname} (may take a moment) ..."
|
653
|
+
end
|
654
|
+
begin
|
655
|
+
timeout(45) do
|
656
|
+
list = host2.esxcli.vsan.storage.list
|
657
|
+
list.each{|x| x._set_property :host, host}
|
658
|
+
$disksCache[host] = list
|
659
|
+
end
|
660
|
+
rescue Exception => ex
|
661
|
+
lock.synchronize do
|
662
|
+
puts "#{Time.now}: Failed to gather from #{hostname}: #{ex.class}: #{ex.message}"
|
663
|
+
end
|
664
|
+
end
|
665
|
+
end
|
666
|
+
end
|
667
|
+
end.each{|t| t.join}
|
668
|
+
puts "#{Time.now}: Done fetching VSAN disk infos"
|
669
|
+
end
|
670
|
+
|
671
|
+
hosts.map do |host, hostname|
|
672
|
+
disks = $disksCache[host]
|
673
|
+
disks.each do |disk|
|
674
|
+
vsanDiskUuids[disk.VSANUUID] = disk
|
675
|
+
end
|
676
|
+
end
|
677
|
+
|
678
|
+
vsanDiskUuids
|
679
|
+
end
|
680
|
+
|
681
|
+
def _vsan_cluster_disks_info cluster, opts = {}
|
682
|
+
pc = cluster._connection.propertyCollector
|
683
|
+
if cluster.is_a?(VIM::HostSystem)
|
684
|
+
hosts = [cluster]
|
685
|
+
else
|
686
|
+
hosts = cluster.host
|
687
|
+
end
|
688
|
+
if opts[:hosts_props]
|
689
|
+
hosts_props = opts[:hosts_props]
|
690
|
+
else
|
691
|
+
hosts_props = pc.collectMultiple(hosts,
|
692
|
+
'name',
|
693
|
+
'runtime.connectionState',
|
694
|
+
'configManager.vsanSystem',
|
695
|
+
'configManager.vsanInternalSystem',
|
696
|
+
)
|
697
|
+
end
|
698
|
+
hosts_props = hosts_props.select do |k,v|
|
699
|
+
v['runtime.connectionState'] == 'connected'
|
700
|
+
end
|
701
|
+
vsan_systems = hosts_props.map{|h,p| p['configManager.vsanSystem']}
|
702
|
+
vsan_props = pc.collectMultiple(vsan_systems, 'config.clusterInfo')
|
703
|
+
host_vsan_uuids = Hash[hosts_props.map do |host, props|
|
704
|
+
vsan_system = props['configManager.vsanSystem']
|
705
|
+
vsan_info = vsan_props[vsan_system]['config.clusterInfo']
|
706
|
+
[vsan_info.nodeUuid, host]
|
707
|
+
end]
|
708
|
+
vsan_disk_uuids = {}
|
709
|
+
vsan_disk_uuids.merge!(
|
710
|
+
_vsan_host_disks_info(Hash[hosts_props.map{|h, p| [h, p['name']]}])
|
711
|
+
)
|
712
|
+
|
713
|
+
[host_vsan_uuids, hosts_props, vsan_disk_uuids]
|
714
|
+
end
|
715
|
+
|
716
|
+
opts :object_info do
|
717
|
+
summary "Fetch information about a VSAN object"
|
718
|
+
arg :cluster, "Cluster on which to fetch the object info", :lookup => [VIM::HostSystem, VIM::ClusterComputeResource]
|
719
|
+
arg :obj_uuid, nil, :type => :string, :multi => true
|
720
|
+
end
|
721
|
+
|
722
|
+
def object_info cluster, obj_uuids, opts = {}
|
723
|
+
opts[:cluster] = cluster
|
724
|
+
objs = _object_info obj_uuids, opts
|
725
|
+
indent = 0
|
726
|
+
obj_uuids.each do |obj_uuid|
|
727
|
+
_print_dom_config_tree(obj_uuid, objs, indent)
|
728
|
+
puts ""
|
729
|
+
end
|
730
|
+
end
|
731
|
+
|
732
|
+
opts :disk_object_info do
|
733
|
+
summary "Fetch information about all VSAN objects on a given physical disk"
|
734
|
+
arg :cluster_or_host, "Cluster or host on which to fetch the object info", :lookup => VIM::ClusterComputeResource
|
735
|
+
arg :disk_uuid, nil, :type => :string, :multi => true
|
736
|
+
end
|
737
|
+
|
738
|
+
def disk_object_info cluster_or_host, disk_uuids, opts = {}
|
739
|
+
conn = cluster_or_host._connection
|
740
|
+
pc = conn.propertyCollector
|
741
|
+
|
742
|
+
if cluster_or_host.is_a?(VIM::ClusterComputeResource)
|
743
|
+
cluster = cluster_or_host
|
744
|
+
hosts = cluster.host
|
745
|
+
else
|
746
|
+
hosts = [cluster_or_host]
|
747
|
+
end
|
748
|
+
|
749
|
+
_run_with_rev(conn, "dev") do
|
750
|
+
# XXX: This doesn't yet work when no cluster is given
|
751
|
+
host_vsan_uuids, hosts_props, vsan_disk_uuids = _vsan_cluster_disks_info(cluster)
|
752
|
+
|
753
|
+
input_disk_uuids = []
|
754
|
+
m_disk_uuids = []
|
755
|
+
disk_uuids.each do |disk_uuid|
|
756
|
+
disk = vsan_disk_uuids.find {|k,v| v.DisplayName == disk_uuid}
|
757
|
+
if disk
|
758
|
+
input_disk_uuids << disk
|
759
|
+
if disk[1].IsSSD
|
760
|
+
disks = vsan_disk_uuids.find_all do |k,v|
|
761
|
+
v.VSANDiskGroupName == disk_uuid unless v.IsSSD
|
762
|
+
end
|
763
|
+
m_disk_uuids += disks
|
764
|
+
else
|
765
|
+
m_disk_uuids << disk
|
766
|
+
end
|
767
|
+
else
|
768
|
+
input_disk_uuids << [disk_uuid]
|
769
|
+
m_disk_uuids << [disk_uuid]
|
770
|
+
end
|
771
|
+
end
|
772
|
+
input_disk_uuids.map! {|x| x[0]}
|
773
|
+
m_disk_uuids.map! {|x| x[0]}
|
774
|
+
|
775
|
+
connected_hosts = hosts_props.select do |k,v|
|
776
|
+
v['runtime.connectionState'] == 'connected'
|
777
|
+
end.keys
|
778
|
+
hosts = connected_hosts
|
779
|
+
|
780
|
+
if hosts.length == 0
|
781
|
+
err "Couldn't find any connected hosts"
|
782
|
+
end
|
783
|
+
|
784
|
+
dslist = hosts.first.datastore
|
785
|
+
dslist_props = pc.collectMultiple(dslist, 'name', 'summary.type')
|
786
|
+
vsandslist = dslist_props.select{|k, v| v['summary.type'] == 'vsan'}.keys
|
787
|
+
vsands = vsandslist.first
|
788
|
+
if !vsands
|
789
|
+
err "Couldn't find VSAN datastore"
|
790
|
+
end
|
791
|
+
vms = vsands.vm
|
792
|
+
vms_props = pc.collectMultiple(vms,
|
793
|
+
'name', 'config.hardware.device',
|
794
|
+
'summary.config'
|
795
|
+
)
|
796
|
+
objects = {}
|
797
|
+
vms.each do |vm|
|
798
|
+
disks = vms_props[vm]['disks'] =
|
799
|
+
vms_props[vm]['config.hardware.device'].select{|x| x.is_a?(VIM::VirtualDisk)}
|
800
|
+
namespaceUuid = vms_props[vm]['namespaceUuid'] =
|
801
|
+
vms_props[vm]['summary.config'].vmPathName.split("] ")[1].split("/")[0]
|
802
|
+
|
803
|
+
objects[namespaceUuid] = [vm, :namespace]
|
804
|
+
disks.each do |disk|
|
805
|
+
backing = disk.backing
|
806
|
+
while backing
|
807
|
+
objects[backing.backingObjectId] = [vm, backing.fileName]
|
808
|
+
backing = backing.parent
|
809
|
+
end
|
810
|
+
end
|
811
|
+
end
|
812
|
+
|
813
|
+
vsanIntSys = hosts_props[hosts.first]['configManager.vsanInternalSystem']
|
814
|
+
json = vsanIntSys.QueryObjectsOnPhysicalVsanDisk(:disks => m_disk_uuids)
|
815
|
+
if json == "BAD"
|
816
|
+
err "Server rejected VSAN object-on-disk query"
|
817
|
+
end
|
818
|
+
result = nil
|
819
|
+
begin
|
820
|
+
result = JSON.load(json)
|
821
|
+
rescue
|
822
|
+
err "Server failed to query VSAN objects-on-disk: #{json}"
|
823
|
+
end
|
824
|
+
|
825
|
+
result.merge!({
|
826
|
+
'host_vsan_uuids' => host_vsan_uuids,
|
827
|
+
'host_props' => hosts_props,
|
828
|
+
'vsan_disk_uuids' => vsan_disk_uuids,
|
829
|
+
})
|
830
|
+
|
831
|
+
input_disk_uuids.each do |disk_uuid|
|
832
|
+
dom_obj_uuids = []
|
833
|
+
disk_info = vsan_disk_uuids[disk_uuid]
|
834
|
+
if disk_info
|
835
|
+
name = "#{disk_info.DisplayName} (#{disk_uuid})"
|
836
|
+
if disk_info.IsSSD
|
837
|
+
m_disks = vsan_disk_uuids.find_all do
|
838
|
+
|k, v| v.VSANDiskGroupUUID == disk_uuid unless v.IsSSD
|
839
|
+
end
|
840
|
+
m_disks ? m_disks.map!{|x| x[0]} : disk_uuid
|
841
|
+
m_disks.each {|m_disk| dom_obj_uuids += result['objects_on_disks'][m_disk]}
|
842
|
+
else
|
843
|
+
dom_obj_uuids = result['objects_on_disks'][disk_uuid]
|
844
|
+
end
|
845
|
+
else
|
846
|
+
name = disk_uuid
|
847
|
+
end
|
848
|
+
puts "Physical disk #{name}:"
|
849
|
+
indent = 1
|
850
|
+
dom_obj_uuids.each do |obj_uuid|
|
851
|
+
object = objects[obj_uuid]
|
852
|
+
if object && object[1] == :namespace
|
853
|
+
vm_name = vms_props[object[0]]['name']
|
854
|
+
context = "Part of VM #{vm_name}: Namespace directory"
|
855
|
+
elsif object
|
856
|
+
vm_name = vms_props[object[0]]['name']
|
857
|
+
context = "Part of VM #{vm_name}: Disk: #{object[1]}"
|
858
|
+
else
|
859
|
+
context = "Can't attribute object to any VM, may be swap?"
|
860
|
+
end
|
861
|
+
_print_dom_config_tree(
|
862
|
+
obj_uuid, result, indent,
|
863
|
+
:highlight_disk => disk_uuid,
|
864
|
+
:context => context
|
865
|
+
)
|
866
|
+
end
|
867
|
+
puts ""
|
868
|
+
end
|
869
|
+
end
|
870
|
+
end
|
871
|
+
|
872
|
+
|
873
|
+
opts :cmmds_find do
|
874
|
+
summary "CMMDS Find"
|
875
|
+
arg :cluster_or_host, nil, :lookup => [VIM::ClusterComputeResource, VIM::HostSystem]
|
876
|
+
opt :type, "CMMDS type, e.g. DOM_OBJECT, LSOM_OBJECT, POLICY, DISK etc.", :type => :string, :short => 't'
|
877
|
+
opt :uuid, "UUID of the entry.", :type => :string, :short => 'u'
|
878
|
+
opt :owner, "UUID of the owning node.", :type => :string, :short => 'o'
|
879
|
+
end
|
880
|
+
|
881
|
+
def cmmds_find cluster_or_host, opts
|
882
|
+
conn = cluster_or_host._connection
|
883
|
+
pc = conn.propertyCollector
|
884
|
+
host = cluster_or_host
|
885
|
+
entries = []
|
886
|
+
hostUuidMap = {}
|
887
|
+
_run_with_rev(conn, "dev") do
|
888
|
+
vsanIntSys = nil
|
889
|
+
if cluster_or_host.is_a?(VIM::ClusterComputeResource)
|
890
|
+
cluster = cluster_or_host
|
891
|
+
hosts = cluster.host
|
892
|
+
else
|
893
|
+
hosts = [host]
|
894
|
+
end
|
895
|
+
|
896
|
+
hosts_props = pc.collectMultiple(hosts,
|
897
|
+
'name',
|
898
|
+
'runtime.connectionState',
|
899
|
+
'configManager.vsanSystem',
|
900
|
+
'configManager.vsanInternalSystem'
|
901
|
+
)
|
902
|
+
connected_hosts = hosts_props.select do |k,v|
|
903
|
+
v['runtime.connectionState'] == 'connected'
|
904
|
+
end.keys
|
905
|
+
host = connected_hosts.first
|
906
|
+
if !host
|
907
|
+
err "Couldn't find any connected hosts"
|
908
|
+
end
|
909
|
+
vsanIntSys = hosts_props[host]['configManager.vsanInternalSystem']
|
910
|
+
vsanSysList = Hash[hosts_props.map do |host, props|
|
911
|
+
[props['name'], props['configManager.vsanSystem']]
|
912
|
+
end]
|
913
|
+
clusterInfos = pc.collectMultiple(vsanSysList.values,
|
914
|
+
'config.clusterInfo')
|
915
|
+
hostUuidMap = Hash[vsanSysList.map do |hostname, sys|
|
916
|
+
[clusterInfos[sys]['config.clusterInfo'].nodeUuid, hostname]
|
917
|
+
end]
|
918
|
+
entries = vsanIntSys.query_cmmds([{
|
919
|
+
:owner => opts[:owner],
|
920
|
+
:uuid => opts[:uuid],
|
921
|
+
:type => opts[:type],
|
922
|
+
}], :gzip => true)
|
923
|
+
end
|
924
|
+
|
925
|
+
t = Terminal::Table.new()
|
926
|
+
t << ['#', 'Type', 'UUID', 'Owner', 'Health', 'Content']
|
927
|
+
t.add_separator
|
928
|
+
entries.each_with_index do |entry, i|
|
929
|
+
t << [
|
930
|
+
i + 1,
|
931
|
+
entry['type'],
|
932
|
+
entry['uuid'],
|
933
|
+
hostUuidMap[entry['owner']] || entry['owner'],
|
934
|
+
entry['health'],
|
935
|
+
PP.pp(entry['content'], ''),
|
936
|
+
]
|
937
|
+
end
|
938
|
+
|
939
|
+
puts t
|
940
|
+
end
|
941
|
+
|
942
|
+
def _get_vm_obj_uuids vm, vmsProps
|
943
|
+
obj_uuids = {}
|
944
|
+
disks = vmsProps[vm]['disks'] =
|
945
|
+
vmsProps[vm]['config.hardware.device'].select{|x| x.is_a?(VIM::VirtualDisk)}
|
946
|
+
pathName = vmsProps[vm]['summary.config'].vmPathName
|
947
|
+
namespaceUuid = vmsProps[vm]['namespaceUuid'] =
|
948
|
+
pathName.split("] ")[1].split("/")[0]
|
949
|
+
obj_uuids[namespaceUuid] = pathName
|
950
|
+
disks.each do |disk|
|
951
|
+
backing = disk.backing
|
952
|
+
while backing
|
953
|
+
obj_uuids[backing.backingObjectId] = backing.fileName
|
954
|
+
backing = backing.parent
|
955
|
+
end
|
956
|
+
end
|
957
|
+
obj_uuids
|
958
|
+
end
|
959
|
+
|
960
|
+
def convert_uuids uuids
|
961
|
+
nUuids = {}
|
962
|
+
uuids.each do |uuid|
|
963
|
+
begin
|
964
|
+
oUuid = uuid.split(' ').join()
|
965
|
+
nUuids[oUuid[0..7] + '-' + oUuid[8..11] + '-' +
|
966
|
+
oUuid[12..20] + '-' + oUuid[21..-1]] = true
|
967
|
+
rescue Exception => ex
|
968
|
+
puts "Ignoring malformed uuid #{uuid}: #{ex.class}: #{ex.message}"
|
969
|
+
end
|
970
|
+
end
|
971
|
+
|
972
|
+
return nUuids
|
973
|
+
end
|
974
|
+
|
975
|
+
# It is possible for the management stack (hostd and vc) to lose the handle of
|
976
|
+
# a VM which is powered on (has a running vmx instance). No further operations
|
977
|
+
# can be performed on the VM because the running vmx holds locks on the VM.
|
978
|
+
# This API is intended to find such VMs. We look for VMs who's power state
|
979
|
+
# is not poweredOn (poweredOff, unknown, etc) for which there is a running vmx
|
980
|
+
# instance on any host in the cluster.
|
981
|
+
|
982
|
+
def find_inconsistent_vms cluster_or_host
|
983
|
+
if cluster_or_host.is_a?(VIM::ClusterComputeResource)
|
984
|
+
hosts = cluster_or_host.host
|
985
|
+
else
|
986
|
+
hosts = [host]
|
987
|
+
end
|
988
|
+
|
989
|
+
# Find all non-poweredon vms.
|
990
|
+
conn = hosts.first._connection
|
991
|
+
pc = conn.propertyCollector
|
992
|
+
vms = pc.collectMultiple(hosts, 'vm').values.map{|x| x['vm']}.flatten
|
993
|
+
vmProps = pc.collectMultiple(vms, 'name', 'runtime.powerState',
|
994
|
+
'summary.config.uuid')
|
995
|
+
notOnVMs = vmProps.select{|vm, p| p['runtime.powerState'] !=
|
996
|
+
'poweredOn'}.keys
|
997
|
+
|
998
|
+
# Get list of all running vms on all hosts in parallel.
|
999
|
+
threads = []
|
1000
|
+
processList = {}
|
1001
|
+
hosts.each do |host|
|
1002
|
+
threads << Thread.new do
|
1003
|
+
begin
|
1004
|
+
processList[host] = host.esxcli.vm.process.list
|
1005
|
+
rescue Exception => ex
|
1006
|
+
puts "Error getting vm process list on #{host.name}: " \
|
1007
|
+
"#{ex.class}: #{ex.message}"
|
1008
|
+
end
|
1009
|
+
end
|
1010
|
+
end
|
1011
|
+
threads.each{|t| t.join}
|
1012
|
+
uuids = convert_uuids(processList.values.flatten.map{|x| x.UUID})
|
1013
|
+
|
1014
|
+
inconsistentVMs = notOnVMs.select{|vm|
|
1015
|
+
uuids.has_key?(vmProps[vm]['summary.config.uuid'])}
|
1016
|
+
if not inconsistentVMs.empty?
|
1017
|
+
puts "Found VMs for which VC/hostd/vmx are out of sync:"
|
1018
|
+
inconsistentVMs.each do |vm|
|
1019
|
+
puts "#{vmProps[vm]['name']}"
|
1020
|
+
end
|
1021
|
+
else
|
1022
|
+
puts "Did not find VMs for which VC/hostd/vmx are out of sync"
|
1023
|
+
end
|
1024
|
+
|
1025
|
+
return inconsistentVMs
|
1026
|
+
end
|
1027
|
+
|
1028
|
+
def fix_inconsistent_vms vms
|
1029
|
+
begin
|
1030
|
+
tasks = []
|
1031
|
+
vms.each do |vm|
|
1032
|
+
begin
|
1033
|
+
path = vm.summary.config.vmPathName
|
1034
|
+
rp = vm.resourcePool
|
1035
|
+
folder = vm.parent
|
1036
|
+
name = vm.name
|
1037
|
+
host = vm.summary.runtime.host
|
1038
|
+
puts("Unregistering VM #{name}")
|
1039
|
+
vm.UnregisterVM()
|
1040
|
+
puts("Registering VM #{name}")
|
1041
|
+
tasks << folder.RegisterVM_Task(:path => path,
|
1042
|
+
:name => name,
|
1043
|
+
:asTemplate => false,
|
1044
|
+
:pool => rp,
|
1045
|
+
:host => host)
|
1046
|
+
rescue Exception => ex
|
1047
|
+
puts "Skipping VM #{name} due to exception: " \
|
1048
|
+
"#{ex.class}: #{ex.message}"
|
1049
|
+
end
|
1050
|
+
end
|
1051
|
+
progress(tasks)
|
1052
|
+
end
|
1053
|
+
end
|
1054
|
+
|
1055
|
+
opts :fix_renamed_vms do
|
1056
|
+
summary "This command can be used to rename some VMs which get renamed " \
|
1057
|
+
"by the VC in case of storage inaccessibility. It is " \
|
1058
|
+
"possible for some VMs to get renamed to vmx file path. " \
|
1059
|
+
"eg. \"/vmfs/volumes/vsanDatastore/foo/foo.vmx\". This command " \
|
1060
|
+
"will rename this VM to \"foo\". This is the best we can do. " \
|
1061
|
+
"This VM may have been named something else but we have no way " \
|
1062
|
+
"to know. In this best effort command, we simply rename it to " \
|
1063
|
+
"the name of its config file (without the full path and .vmx " \
|
1064
|
+
"extension ofcourse!)."
|
1065
|
+
arg :vms, nil, :lookup => VIM::VirtualMachine, :multi => true
|
1066
|
+
end
|
1067
|
+
|
1068
|
+
def fix_renamed_vms vms
|
1069
|
+
begin
|
1070
|
+
conn = vms.first._connection
|
1071
|
+
pc = conn.propertyCollector
|
1072
|
+
vmProps = pc.collectMultiple(vms, 'name', 'summary.config.vmPathName')
|
1073
|
+
|
1074
|
+
rename = {}
|
1075
|
+
puts "Continuing this command will rename the following VMs:"
|
1076
|
+
begin
|
1077
|
+
vmProps.each do |k,v|
|
1078
|
+
name = v['name']
|
1079
|
+
cfgPath = v['summary.config.vmPathName']
|
1080
|
+
if /.*vmfs.*volumes.*/.match(name)
|
1081
|
+
m = /.+\/(.+)\.vmx/.match(cfgPath)
|
1082
|
+
if name != m[1]
|
1083
|
+
# Save it in a hash so we don't have to do it again if
|
1084
|
+
# user choses Y.
|
1085
|
+
rename[k] = m[1]
|
1086
|
+
puts "#{name} -> #{m[1]}"
|
1087
|
+
end
|
1088
|
+
end
|
1089
|
+
end
|
1090
|
+
rescue Exception => ex
|
1091
|
+
# Swallow the exception. No need to stop other vms.
|
1092
|
+
puts "Skipping VM due to exception: #{ex.class}: #{ex.message}"
|
1093
|
+
end
|
1094
|
+
|
1095
|
+
if rename.length == 0
|
1096
|
+
puts "Nothing to do"
|
1097
|
+
return
|
1098
|
+
end
|
1099
|
+
|
1100
|
+
puts "Do you want to continue [y/N]?"
|
1101
|
+
opt = $stdin.gets.chomp
|
1102
|
+
if opt == 'y' || opt == 'Y'
|
1103
|
+
puts "Renaming..."
|
1104
|
+
tasks = rename.keys.map do |vm|
|
1105
|
+
vm.Rename_Task(:newName => rename[vm])
|
1106
|
+
end
|
1107
|
+
progress(tasks)
|
1108
|
+
end
|
1109
|
+
end
|
1110
|
+
end
|
1111
|
+
|
1112
|
+
opts :vm_object_info do
|
1113
|
+
summary "Fetch VSAN object information about a VM"
|
1114
|
+
arg :vms, nil, :lookup => VIM::VirtualMachine, :multi => true
|
1115
|
+
opt :cluster, "Cluster on which to fetch the object info", :lookup => VIM::ClusterComputeResource
|
1116
|
+
opt :perspective_from_host, "Host to query object info from", :lookup => VIM::HostSystem
|
1117
|
+
end
|
1118
|
+
|
1119
|
+
def vm_object_info vms, opts
|
1120
|
+
begin
|
1121
|
+
conn = vms.first._connection
|
1122
|
+
pc = conn.propertyCollector
|
1123
|
+
firstVm = vms.first
|
1124
|
+
host = firstVm.runtime.host
|
1125
|
+
if !host
|
1126
|
+
err "VM #{firstVm.name} doesn't have an assigned host (yet?)"
|
1127
|
+
end
|
1128
|
+
opts[:cluster] ||= host.parent
|
1129
|
+
_run_with_rev(conn, "dev") do
|
1130
|
+
vmsProps = pc.collectMultiple(vms,
|
1131
|
+
'name', 'config.hardware.device', 'summary.config',
|
1132
|
+
'runtime.host',
|
1133
|
+
)
|
1134
|
+
obj_uuids = []
|
1135
|
+
objToHostMap = {}
|
1136
|
+
vms.each do |vm|
|
1137
|
+
vm_obj_uuids = _get_vm_obj_uuids(vm, vmsProps).keys
|
1138
|
+
vm_obj_uuids.each{|x| objToHostMap[x] = vmsProps[vm]['runtime.host']}
|
1139
|
+
obj_uuids += vm_obj_uuids
|
1140
|
+
end
|
1141
|
+
opts[:objToHostMap] = objToHostMap
|
1142
|
+
|
1143
|
+
objs = _object_info(obj_uuids, opts)
|
1144
|
+
hosts_props = objs['host_props']
|
1145
|
+
|
1146
|
+
vms.each do |vm|
|
1147
|
+
vmProps = vmsProps[vm]
|
1148
|
+
disks = vmProps['disks']
|
1149
|
+
puts "VM #{vmProps['name']}:"
|
1150
|
+
if objs['has_partitions']
|
1151
|
+
vmHost = vmProps['runtime.host']
|
1152
|
+
puts " VM registered on host: #{hosts_props[vmHost]['name']}"
|
1153
|
+
end
|
1154
|
+
|
1155
|
+
indent = 1
|
1156
|
+
pre = " " * indent
|
1157
|
+
puts "#{pre}Namespace directory"
|
1158
|
+
obj_uuid = vmsProps[vm]['namespaceUuid']
|
1159
|
+
if objs['has_partitions'] && objs['obj_uuid_from_host'][obj_uuid]
|
1160
|
+
objHost = objs['obj_uuid_from_host'][obj_uuid]
|
1161
|
+
puts "#{pre} Shown from perspective of host #{hosts_props[objHost]['name']}"
|
1162
|
+
end
|
1163
|
+
_print_dom_config_tree(obj_uuid, objs, indent + 1)
|
1164
|
+
|
1165
|
+
disks.each do |disk|
|
1166
|
+
indent = 1
|
1167
|
+
backing = disk.backing
|
1168
|
+
while backing
|
1169
|
+
pre = " " * indent
|
1170
|
+
puts "#{pre}Disk backing: #{backing.fileName}"
|
1171
|
+
obj_uuid = backing.backingObjectId
|
1172
|
+
if objs['has_partitions'] && objs['obj_uuid_from_host'][obj_uuid]
|
1173
|
+
objHost = objs['obj_uuid_from_host'][obj_uuid]
|
1174
|
+
puts "#{pre} Shown from perspective of host #{hosts_props[objHost]['name']}"
|
1175
|
+
end
|
1176
|
+
_print_dom_config_tree(obj_uuid, objs, indent + 1)
|
1177
|
+
|
1178
|
+
backing = backing.parent
|
1179
|
+
indent += 1
|
1180
|
+
end
|
1181
|
+
end
|
1182
|
+
end
|
1183
|
+
end
|
1184
|
+
rescue Exception => ex
|
1185
|
+
puts ex.message
|
1186
|
+
puts ex.backtrace
|
1187
|
+
raise
|
1188
|
+
end
|
1189
|
+
end
|
1190
|
+
|
1191
|
+
def _object_info obj_uuids, opts
|
1192
|
+
if !opts[:cluster]
|
1193
|
+
err "Must specify a VSAN Cluster"
|
1194
|
+
end
|
1195
|
+
host = opts[:host]
|
1196
|
+
if opts[:cluster].is_a?(VIM::HostSystem)
|
1197
|
+
host = opts[:cluster]
|
1198
|
+
end
|
1199
|
+
# XXX: Verify VSAN is enabled on the cluster
|
1200
|
+
if host
|
1201
|
+
hosts = [host]
|
1202
|
+
conn = host._connection
|
1203
|
+
else
|
1204
|
+
hosts = opts[:cluster].host
|
1205
|
+
conn = opts[:cluster]._connection
|
1206
|
+
end
|
1207
|
+
|
1208
|
+
_run_with_rev(conn, "dev") do
|
1209
|
+
pc = conn.propertyCollector
|
1210
|
+
|
1211
|
+
hosts_props = pc.collectMultiple(hosts,
|
1212
|
+
'name', 'runtime.connectionState',
|
1213
|
+
'configManager.vsanSystem',
|
1214
|
+
'configManager.vsanInternalSystem'
|
1215
|
+
)
|
1216
|
+
connected_hosts = hosts_props.select do |k,v|
|
1217
|
+
v['runtime.connectionState'] == 'connected'
|
1218
|
+
end.keys
|
1219
|
+
hosts = connected_hosts
|
1220
|
+
if hosts.length == 0
|
1221
|
+
err "Couldn't find any connected hosts"
|
1222
|
+
end
|
1223
|
+
|
1224
|
+
if opts[:perspective_from_host]
|
1225
|
+
if !connected_hosts.member?(opts[:perspective_from_host])
|
1226
|
+
err "Perspective-Host not connected, or not in considered group of hosts"
|
1227
|
+
end
|
1228
|
+
end
|
1229
|
+
|
1230
|
+
# Detect partitions:
|
1231
|
+
# We need to ask every host which other hosts it believes to share a
|
1232
|
+
# VSAN cluster (partition) with. This is a call down to ESX, so we spawn
|
1233
|
+
# one connection and one thread per host to parallelize. We detect
|
1234
|
+
# partitions by grouping VMs based on quoting the same cluster members.
|
1235
|
+
hosts_props.map do |host, props|
|
1236
|
+
if !connected_hosts.member?(host)
|
1237
|
+
next
|
1238
|
+
end
|
1239
|
+
Thread.new do
|
1240
|
+
begin
|
1241
|
+
vsanSys = props['configManager.vsanSystem']
|
1242
|
+
c1 = conn.spawn_additional_connection
|
1243
|
+
vsanSys = vsanSys.dup_on_conn(c1)
|
1244
|
+
res = vsanSys.QueryHostStatus()
|
1245
|
+
hosts_props[host]['vsanCluster'] = res
|
1246
|
+
rescue Exception => ex
|
1247
|
+
puts "Failed to gather host status from #{props['name']}: #{ex.class}: #{ex.message}"
|
1248
|
+
end
|
1249
|
+
end
|
1250
|
+
end.compact.each{|t| t.join}
|
1251
|
+
|
1252
|
+
partitions = hosts_props.select do |h, p|
|
1253
|
+
connected_hosts.member?(h)
|
1254
|
+
end.group_by{|h, p| p['vsanCluster'].memberUuid}
|
1255
|
+
partition_exists = (partitions.length > 1)
|
1256
|
+
if partition_exists
|
1257
|
+
puts "#{Time.now}: WARNING: VSAN Cluster network partition detected."
|
1258
|
+
puts "#{Time.now}: The individual partitions of the cluster will have "
|
1259
|
+
puts "#{Time.now}: different views on object/component availablity. An "
|
1260
|
+
puts "#{Time.now}: attempt is made to show VM object accessibility from the "
|
1261
|
+
puts "#{Time.now}: perspective of the host on which a VM is registered. "
|
1262
|
+
puts "#{Time.now}: Please fix the network partition as soon as possible "
|
1263
|
+
puts "#{Time.now}: as it will seriously impact the availability of your "
|
1264
|
+
puts "#{Time.now}: VMs in your VSAN cluster. Check vsan.cluster_info for"
|
1265
|
+
puts "#{Time.now}: more details."
|
1266
|
+
puts "#{Time.now}: "
|
1267
|
+
puts "#{Time.now}: The following partitions were detected:"
|
1268
|
+
i = 1
|
1269
|
+
partitions.values.map do |part|
|
1270
|
+
part_hosts = part.map{|x| hosts_props[x[0]]}.compact.map{|x| x['name']}
|
1271
|
+
puts "#{Time.now}: #{i}) #{part_hosts.join(", ")}"
|
1272
|
+
i += 1
|
1273
|
+
end
|
1274
|
+
puts ""
|
1275
|
+
if opts[:perspective_from_host]
|
1276
|
+
name = hosts_props[opts[:perspective_from_host]]['name']
|
1277
|
+
puts "Showing data from perspective of host #{name} as requested"
|
1278
|
+
puts ""
|
1279
|
+
end
|
1280
|
+
end
|
1281
|
+
|
1282
|
+
host_vsan_uuids, host_props, vsan_disk_uuids = _vsan_cluster_disks_info(
|
1283
|
+
opts[:cluster],
|
1284
|
+
:hosts_props => hosts_props
|
1285
|
+
)
|
1286
|
+
extra_info = {
|
1287
|
+
'host_vsan_uuids' => host_vsan_uuids,
|
1288
|
+
'host_props' => host_props,
|
1289
|
+
'vsan_disk_uuids' => vsan_disk_uuids,
|
1290
|
+
}
|
1291
|
+
|
1292
|
+
obj_uuids = obj_uuids.compact.map{|x| _normalize_uuid(x)}
|
1293
|
+
obj_uuids = obj_uuids.select{|x| is_uuid(x)}
|
1294
|
+
|
1295
|
+
objs = {'obj_uuid_from_host' => {}}
|
1296
|
+
objs['has_partitions'] = partition_exists
|
1297
|
+
|
1298
|
+
# Dealing with partitions:
|
1299
|
+
# In the non-partitioned case we can just select any host and ask it
|
1300
|
+
# for the object info, given that CMMDS is (eventual) consistent
|
1301
|
+
# across the cluster. But during a network partition it is most logical
|
1302
|
+
# to ask the host on which a VM is registered about what it thinks about
|
1303
|
+
# the objects in question. So in case of a network partition we fall
|
1304
|
+
# back to a slower code path that asks each host individually about
|
1305
|
+
# the objects it (hopefully) knows best about.
|
1306
|
+
# Note: Upon power on DRS will pick a host to power the VM on. That other
|
1307
|
+
# host may not be in the same partition and DRS doesn't know about it,
|
1308
|
+
# so although we tried to show the object from the "right" hosts perspective
|
1309
|
+
# it may still not be the right host when debugging a power on failure.
|
1310
|
+
if opts[:objToHostMap] && partition_exists && !opts[:perspective_from_host]
|
1311
|
+
obj_uuids_groups = obj_uuids.group_by{|x| opts[:objToHostMap][x]}
|
1312
|
+
obj_uuids_groups.each do |host, group|
|
1313
|
+
vsanIntSys = hosts_props[host]['configManager.vsanInternalSystem']
|
1314
|
+
group_objs = vsanIntSys.query_vsan_objects(:uuids => group)
|
1315
|
+
|
1316
|
+
# Here we are merging and overriding potentially conflicting
|
1317
|
+
# information about LSOM_OBJECT and DISK entries. No smarts are
|
1318
|
+
# applied, as I am not aware of issues arising from those
|
1319
|
+
# possible inconsistencies.
|
1320
|
+
group_objs.each do |k,v|
|
1321
|
+
objs[k] ||= {}
|
1322
|
+
objs[k].merge!(v)
|
1323
|
+
end
|
1324
|
+
group.each do |uuid|
|
1325
|
+
objs['obj_uuid_from_host'][uuid] = host
|
1326
|
+
end
|
1327
|
+
end
|
1328
|
+
else
|
1329
|
+
if opts[:perspective_from_host]
|
1330
|
+
host = opts[:perspective_from_host]
|
1331
|
+
else
|
1332
|
+
host = hosts.first
|
1333
|
+
end
|
1334
|
+
vsanIntSys = hosts_props[host]['configManager.vsanInternalSystem']
|
1335
|
+
objs = vsanIntSys.query_vsan_objects(:uuids => obj_uuids)
|
1336
|
+
end
|
1337
|
+
|
1338
|
+
objs.merge!(extra_info)
|
1339
|
+
objs
|
1340
|
+
end
|
1341
|
+
end
|
1342
|
+
|
1343
|
+
|
1344
|
+
def _fetch_disk_stats obj, metrics, instances, opts = {}
|
1345
|
+
conn = obj._connection
|
1346
|
+
pm = conn.serviceContent.perfManager
|
1347
|
+
|
1348
|
+
metrics.each do |x|
|
1349
|
+
err "no such metric #{x}" unless pm.perfcounter_hash.member? x
|
1350
|
+
end
|
1351
|
+
|
1352
|
+
interval = pm.provider_summary(obj).refreshRate
|
1353
|
+
start_time = nil
|
1354
|
+
if interval == -1
|
1355
|
+
# Object does not support real time stats
|
1356
|
+
interval = 300
|
1357
|
+
start_time = Time.now - 300 * 5
|
1358
|
+
end
|
1359
|
+
stat_opts = {
|
1360
|
+
:interval => interval,
|
1361
|
+
:startTime => start_time,
|
1362
|
+
:instance => instances,
|
1363
|
+
:multi_instance => true,
|
1364
|
+
}
|
1365
|
+
stat_opts[:max_samples] = opts[:samples] if opts[:samples]
|
1366
|
+
res = pm.retrieve_stats [obj], metrics, stat_opts
|
1367
|
+
|
1368
|
+
out = {}
|
1369
|
+
if res && res[obj]
|
1370
|
+
res[obj][:metrics].each do |key, values|
|
1371
|
+
metric, device = key
|
1372
|
+
out[device] ||= {}
|
1373
|
+
out[device][metric] = values
|
1374
|
+
end
|
1375
|
+
end
|
1376
|
+
out
|
1377
|
+
end
|
1378
|
+
|
1379
|
+
opts :disks_stats do
|
1380
|
+
summary "Show stats on all disks in VSAN"
|
1381
|
+
arg :hosts_and_clusters, nil, :lookup => [VIM::HostSystem, VIM::ClusterComputeResource], :multi => true
|
1382
|
+
opt :compute_number_of_components, "Deprecated", :type => :boolean
|
1383
|
+
opt :show_iops, "Show deprecated fields", :type => :boolean
|
1384
|
+
end
|
1385
|
+
|
1386
|
+
def disks_stats hosts_and_clusters, opts = {}
|
1387
|
+
opts[:compute_number_of_components] = true
|
1388
|
+
conn = hosts_and_clusters.first._connection
|
1389
|
+
hosts = hosts_and_clusters.select{|x| x.is_a?(VIM::HostSystem)}
|
1390
|
+
clusters = hosts_and_clusters.select{|x| x.is_a?(VIM::ClusterComputeResource)}
|
1391
|
+
pc = conn.propertyCollector
|
1392
|
+
cluster_hosts = pc.collectMultiple(clusters, 'host')
|
1393
|
+
cluster_hosts.each do |cluster, props|
|
1394
|
+
hosts += props['host']
|
1395
|
+
end
|
1396
|
+
hosts = hosts.uniq
|
1397
|
+
_run_with_rev(conn, "dev") do
|
1398
|
+
hosts_props = pc.collectMultiple(hosts,
|
1399
|
+
'name',
|
1400
|
+
'runtime.connectionState',
|
1401
|
+
'configManager.vsanSystem',
|
1402
|
+
'configManager.vsanInternalSystem'
|
1403
|
+
)
|
1404
|
+
|
1405
|
+
hosts = hosts_props.select do |k,v|
|
1406
|
+
v['runtime.connectionState'] == 'connected'
|
1407
|
+
end.keys
|
1408
|
+
if hosts.length == 0
|
1409
|
+
err "Couldn't find any connected hosts"
|
1410
|
+
end
|
1411
|
+
|
1412
|
+
hosts_vsansys = Hash[hosts_props.map{|k,v| [v['configManager.vsanSystem'], k]}]
|
1413
|
+
node_uuids = pc.collectMultiple(hosts_vsansys.keys, 'config.clusterInfo.nodeUuid')
|
1414
|
+
node_uuids = Hash[node_uuids.map do |k, v|
|
1415
|
+
[v['config.clusterInfo.nodeUuid'], hosts_vsansys[k]]
|
1416
|
+
end]
|
1417
|
+
|
1418
|
+
lock = Mutex.new
|
1419
|
+
disks = {}
|
1420
|
+
vsanIntSys = hosts_props[hosts.first]['configManager.vsanInternalSystem']
|
1421
|
+
disks = vsanIntSys.QueryPhysicalVsanDisks(:props => [
|
1422
|
+
'lsom_objects_count',
|
1423
|
+
'uuid',
|
1424
|
+
'isSsd',
|
1425
|
+
'capacity',
|
1426
|
+
'capacityUsed',
|
1427
|
+
'capacityReserved',
|
1428
|
+
'iops',
|
1429
|
+
'iopsReserved',
|
1430
|
+
'disk_health',
|
1431
|
+
])
|
1432
|
+
if disks == "BAD"
|
1433
|
+
err "Server failed to gather VSAN disk info"
|
1434
|
+
end
|
1435
|
+
begin
|
1436
|
+
disks = JSON.load(disks)
|
1437
|
+
rescue
|
1438
|
+
err "Server didn't provide VSAN disk info: #{disks}"
|
1439
|
+
end
|
1440
|
+
#pp disks
|
1441
|
+
|
1442
|
+
vsan_disks_info = {}
|
1443
|
+
vsan_disks_info.merge!(
|
1444
|
+
_vsan_host_disks_info(Hash[hosts.map{|h| [h, hosts_props[h]['name']]}])
|
1445
|
+
)
|
1446
|
+
disks.each do |k, v|
|
1447
|
+
v['esxcli'] = vsan_disks_info[v['uuid']]
|
1448
|
+
if v['esxcli']
|
1449
|
+
v['host'] = v['esxcli']._get_property :host
|
1450
|
+
end
|
1451
|
+
end
|
1452
|
+
|
1453
|
+
#pp vsan_disks_info
|
1454
|
+
#pp disks.values.map{|x| [x['host'], x['esxcli']]}
|
1455
|
+
#pp disks.values.group_by{|x| x['host']}.keys
|
1456
|
+
|
1457
|
+
disks = disks.values.sort_by do |x|
|
1458
|
+
host_props = hosts_props[x['host']]
|
1459
|
+
host_props ? host_props['name'] : ''
|
1460
|
+
end
|
1461
|
+
|
1462
|
+
# Stats are now better handled by observer
|
1463
|
+
# disks.group_by{|x| x['host']}.each do |host, host_disks|
|
1464
|
+
# next if !host
|
1465
|
+
# devices = host_disks.map{|x| x['esxcli'].Device}
|
1466
|
+
# metrics = [
|
1467
|
+
# 'disk.numberReadAveraged', 'disk.numberWriteAveraged',
|
1468
|
+
# 'disk.deviceLatency', 'disk.maxTotalLatency',
|
1469
|
+
# 'disk.queueLatency', 'disk.kernelLatency'
|
1470
|
+
# ]
|
1471
|
+
# stats = _fetch_disk_stats host, metrics, devices
|
1472
|
+
# disks.each do |v|
|
1473
|
+
# if v['esxcli'] && stats[v['esxcli'].Device]
|
1474
|
+
# v['stats'] = stats[v['esxcli'].Device]
|
1475
|
+
# else
|
1476
|
+
# v['stats'] ||= {}
|
1477
|
+
# metrics.each{|m| v['stats'][m] ||= [-1] }
|
1478
|
+
# end
|
1479
|
+
# end
|
1480
|
+
# end
|
1481
|
+
|
1482
|
+
t = Terminal::Table.new()
|
1483
|
+
if opts[:show_iops]
|
1484
|
+
t << [nil, nil, nil, 'Num', 'Capacity', nil, nil, 'Iops', nil, nil, ]
|
1485
|
+
t << ['DisplayName', 'Host', 'isSSD', 'Comp', 'Total', 'Used', 'Reserved', 'Total', 'Reserved', ]
|
1486
|
+
else
|
1487
|
+
t << [nil, nil, nil, 'Num', 'Capacity', nil, nil, 'Status']
|
1488
|
+
t << ['DisplayName', 'Host', 'isSSD', 'Comp', 'Total', 'Used', 'Reserved', 'Health']
|
1489
|
+
end
|
1490
|
+
t.add_separator
|
1491
|
+
# XXX: Would be nice to show displayName and host
|
1492
|
+
|
1493
|
+
groups = disks.group_by{|x| x['esxcli'] ? x['esxcli'].VSANDiskGroupUUID : nil}
|
1494
|
+
|
1495
|
+
groups.each do |group, disks|
|
1496
|
+
disks.sort_by{|x| -x['isSsd']}.each do |x|
|
1497
|
+
info = x['esxcli']
|
1498
|
+
host_props = hosts_props[x['host']]
|
1499
|
+
cols = [
|
1500
|
+
info ? info.DisplayName : 'N/A',
|
1501
|
+
host_props ? host_props['name'] : 'N/A',
|
1502
|
+
#x['uuid'],
|
1503
|
+
(x['isSsd'] == 1) ? 'SSD' : 'MD',
|
1504
|
+
x['lsom_objects_count'] || 'N/A',
|
1505
|
+
"%.2f GB" % [x['capacity'].to_f / 1024**3],
|
1506
|
+
"%.0f %%" % [x['capacityUsed'].to_f * 100 / x['capacity'].to_f],
|
1507
|
+
"%.0f %%" % [x['capacityReserved'].to_f * 100 / x['capacity'].to_f],
|
1508
|
+
]
|
1509
|
+
|
1510
|
+
if opts[:show_iops]
|
1511
|
+
cols += [
|
1512
|
+
"%d" % [x['iops']],
|
1513
|
+
"%.0f %%" % [ x['iopsReserved'].to_f * 100 / x['iops'].to_f],
|
1514
|
+
]
|
1515
|
+
end
|
1516
|
+
|
1517
|
+
# cols += [
|
1518
|
+
# "%dr/%dw" % [x['stats']['disk.numberReadAveraged'].first,
|
1519
|
+
# x['stats']['disk.numberWriteAveraged'].first],
|
1520
|
+
# "%dd/%dq/%dk" % [x['stats']['disk.deviceLatency'].first,
|
1521
|
+
# x['stats']['disk.queueLatency'].first,
|
1522
|
+
# x['stats']['disk.kernelLatency'].first,],
|
1523
|
+
# ]
|
1524
|
+
|
1525
|
+
health = "N/A"
|
1526
|
+
if x['disk_health'] && x['disk_health']['healthFlags']
|
1527
|
+
flags = x['disk_health']['healthFlags']
|
1528
|
+
health = []
|
1529
|
+
{
|
1530
|
+
4 => "FAILED",
|
1531
|
+
5 => "OFFLINE",
|
1532
|
+
6 => "DECOMMISSIONED",
|
1533
|
+
}.each do |k, v|
|
1534
|
+
if flags & (1 << k) != 0
|
1535
|
+
health << v
|
1536
|
+
end
|
1537
|
+
end
|
1538
|
+
if health.length == 0
|
1539
|
+
health = "OK"
|
1540
|
+
else
|
1541
|
+
health = health.join(", ")
|
1542
|
+
end
|
1543
|
+
|
1544
|
+
end
|
1545
|
+
cols += [
|
1546
|
+
health
|
1547
|
+
]
|
1548
|
+
|
1549
|
+
t << cols
|
1550
|
+
end
|
1551
|
+
if group != groups.keys.last
|
1552
|
+
t.add_separator
|
1553
|
+
end
|
1554
|
+
end
|
1555
|
+
|
1556
|
+
puts t
|
1557
|
+
end
|
1558
|
+
end
|
1559
|
+
|
1560
|
+
|
1561
|
+
opts :whatif_host_failures do
|
1562
|
+
summary "Simulates how host failures impact VSAN resource usage"
|
1563
|
+
banner <<-EOS
|
1564
|
+
|
1565
|
+
The command shows current VSAN disk usage, but also simulates how
|
1566
|
+
disk usage would evolve under a host failure. Concretely the simulation
|
1567
|
+
assumes that all objects would be brought back to full policy
|
1568
|
+
compliance by bringing up new mirrors of existing data.
|
1569
|
+
The command makes some simplifying assumptions about disk space
|
1570
|
+
balance in the cluster. It is mostly intended to do a rough estimate
|
1571
|
+
if a host failure would drive the cluster to being close to full.
|
1572
|
+
|
1573
|
+
EOS
|
1574
|
+
arg :hosts_and_clusters, nil, :lookup => [VIM::HostSystem, VIM::ClusterComputeResource], :multi => true
|
1575
|
+
opt :num_host_failures_to_simulate, "Number of host failures to simulate", :default => 1
|
1576
|
+
opt :show_current_usage_per_host, "Show current resources used per host"
|
1577
|
+
end
|
1578
|
+
|
1579
|
+
def whatif_host_failures hosts_and_clusters, opts = {}
|
1580
|
+
opts[:compute_number_of_components] = true
|
1581
|
+
conn = hosts_and_clusters.first._connection
|
1582
|
+
hosts = hosts_and_clusters.select{|x| x.is_a?(VIM::HostSystem)}
|
1583
|
+
clusters = hosts_and_clusters.select{|x| x.is_a?(VIM::ClusterComputeResource)}
|
1584
|
+
pc = conn.propertyCollector
|
1585
|
+
cluster_hosts = pc.collectMultiple(clusters, 'host')
|
1586
|
+
cluster_hosts.each do |cluster, props|
|
1587
|
+
hosts += props['host']
|
1588
|
+
end
|
1589
|
+
hosts = hosts.uniq
|
1590
|
+
|
1591
|
+
if opts[:num_host_failures_to_simulate] != 1
|
1592
|
+
err "Only simulation of 1 host failure has been implemented"
|
1593
|
+
end
|
1594
|
+
|
1595
|
+
_run_with_rev(conn, "dev") do
|
1596
|
+
hosts_props = pc.collectMultiple(hosts,
|
1597
|
+
'name',
|
1598
|
+
'runtime.connectionState',
|
1599
|
+
'configManager.vsanSystem',
|
1600
|
+
'configManager.vsanInternalSystem'
|
1601
|
+
)
|
1602
|
+
|
1603
|
+
hosts = hosts_props.select do |k,v|
|
1604
|
+
v['runtime.connectionState'] == 'connected'
|
1605
|
+
end.keys
|
1606
|
+
if hosts.length == 0
|
1607
|
+
err "Couldn't find any connected hosts"
|
1608
|
+
end
|
1609
|
+
|
1610
|
+
hosts_vsansys = Hash[hosts_props.map{|k,v| [v['configManager.vsanSystem'], k]}]
|
1611
|
+
node_uuids = pc.collectMultiple(hosts_vsansys.keys, 'config.clusterInfo.nodeUuid')
|
1612
|
+
node_uuids = Hash[node_uuids.map do |k, v|
|
1613
|
+
[v['config.clusterInfo.nodeUuid'], hosts_vsansys[k]]
|
1614
|
+
end]
|
1615
|
+
|
1616
|
+
lock = Mutex.new
|
1617
|
+
disks = {}
|
1618
|
+
vsanIntSys = hosts_props[hosts.first]['configManager.vsanInternalSystem']
|
1619
|
+
disks = vsanIntSys.QueryPhysicalVsanDisks(:props => [
|
1620
|
+
'lsom_objects_count',
|
1621
|
+
'uuid',
|
1622
|
+
'isSsd',
|
1623
|
+
'capacity',
|
1624
|
+
'capacityUsed',
|
1625
|
+
'capacityReserved',
|
1626
|
+
'iops',
|
1627
|
+
'iopsReserved',
|
1628
|
+
'owner',
|
1629
|
+
])
|
1630
|
+
if disks == "BAD"
|
1631
|
+
err "Server failed to gather VSAN disk info"
|
1632
|
+
end
|
1633
|
+
begin
|
1634
|
+
disks = JSON.load(disks)
|
1635
|
+
rescue
|
1636
|
+
err "Server didn't provide VSAN disk info: #{objs}"
|
1637
|
+
end
|
1638
|
+
|
1639
|
+
# XXX: Do this in threads
|
1640
|
+
hosts.map do |host|
|
1641
|
+
Thread.new do
|
1642
|
+
c1 = conn.spawn_additional_connection
|
1643
|
+
props = hosts_props[host]
|
1644
|
+
vsanIntSys2 = props['configManager.vsanInternalSystem']
|
1645
|
+
vsanIntSys3 = vsanIntSys2.dup_on_conn(c1)
|
1646
|
+
res = vsanIntSys3.query_vsan_statistics(:labels => ['lsom-node'])
|
1647
|
+
hosts_props[host]['lsom.node'] = res['lsom.node']
|
1648
|
+
end
|
1649
|
+
end.each{|t| t.join}
|
1650
|
+
|
1651
|
+
hosts_disks = Hash[disks.values.group_by{|x| x['owner']}.map do |owner, hostDisks|
|
1652
|
+
props = {}
|
1653
|
+
hdds = hostDisks.select{|disk| disk['isSsd'] == 0}
|
1654
|
+
ssds = hostDisks.select{|disk| disk['isSsd'] == 1}
|
1655
|
+
hdds.each do |disk|
|
1656
|
+
[
|
1657
|
+
'capacityUsed', 'capacityReserved',
|
1658
|
+
'capacity', 'lsom_objects_count'
|
1659
|
+
].each do |x|
|
1660
|
+
props[x] ||= 0
|
1661
|
+
props[x] += disk[x]
|
1662
|
+
end
|
1663
|
+
end
|
1664
|
+
ssds.each do |disk|
|
1665
|
+
[
|
1666
|
+
'capacityReserved', 'capacity',
|
1667
|
+
].each do |x|
|
1668
|
+
props["ssd_#{x}"] ||= 0
|
1669
|
+
props["ssd_#{x}"] += disk[x]
|
1670
|
+
end
|
1671
|
+
end
|
1672
|
+
h = node_uuids[owner]
|
1673
|
+
props['host'] = h
|
1674
|
+
props['hostname'] = h ? hosts_props[h]['name'] : owner
|
1675
|
+
props['numHDDs'] = hdds.length
|
1676
|
+
props['maxComponents'] = 3000
|
1677
|
+
if hosts_props[h]['lsom.node']
|
1678
|
+
props['maxComponents'] = hosts_props[h]['lsom.node']['numMaxComponents']
|
1679
|
+
end
|
1680
|
+
[owner, props]
|
1681
|
+
end]
|
1682
|
+
|
1683
|
+
sorted_hosts = hosts_disks.values.sort_by{|x| -x['capacityUsed']}
|
1684
|
+
|
1685
|
+
if opts[:show_current_usage_per_host]
|
1686
|
+
puts "Current utilization of hosts:"
|
1687
|
+
t = Terminal::Table.new()
|
1688
|
+
t << [nil, nil, 'HDD Capacity', nil, nil, 'Components', 'SSD Capacity']
|
1689
|
+
t << ['Host', 'NumHDDs', 'Total', 'Used', 'Reserved', 'Used', 'Reserved']
|
1690
|
+
t.add_separator
|
1691
|
+
|
1692
|
+
hosts_disks.each do |owner, x|
|
1693
|
+
cols = [
|
1694
|
+
x['hostname'],
|
1695
|
+
x['numHDDs'],
|
1696
|
+
"%.2f GB" % [x['capacity'].to_f / 1024**3],
|
1697
|
+
"%.0f %%" % [x['capacityUsed'].to_f * 100 / x['capacity'].to_f],
|
1698
|
+
"%.0f %%" % [x['capacityReserved'].to_f * 100 / x['capacity'].to_f],
|
1699
|
+
"%4u/%u (%.0f %%)" % [
|
1700
|
+
x['lsom_objects_count'],
|
1701
|
+
x['maxComponents'],
|
1702
|
+
x['lsom_objects_count'].to_f * 100 / x['maxComponents'].to_f
|
1703
|
+
],
|
1704
|
+
"%.0f %%" % [x['ssd_capacityReserved'].to_f * 100 / x['ssd_capacity'].to_f],
|
1705
|
+
]
|
1706
|
+
t << cols
|
1707
|
+
end
|
1708
|
+
puts t
|
1709
|
+
puts ""
|
1710
|
+
end
|
1711
|
+
|
1712
|
+
puts "Simulating #{opts[:num_host_failures_to_simulate]} host failures:"
|
1713
|
+
puts ""
|
1714
|
+
worst_host = sorted_hosts[0]
|
1715
|
+
|
1716
|
+
if sorted_hosts.length < 3
|
1717
|
+
puts "Cluster unable to regain full policy compliance after host failure, "
|
1718
|
+
puts "not enough hosts remaining."
|
1719
|
+
return
|
1720
|
+
end
|
1721
|
+
|
1722
|
+
t = Terminal::Table.new()
|
1723
|
+
t << ["Resource", "Usage right now", "Usage after failure/re-protection"]
|
1724
|
+
t.add_separator
|
1725
|
+
capacityRow = ["HDD capacity"]
|
1726
|
+
|
1727
|
+
# Capacity before failure
|
1728
|
+
used = sorted_hosts.map{|x| x['capacityUsed']}.sum
|
1729
|
+
total = sorted_hosts.map{|x| x['capacity']}.sum
|
1730
|
+
free = total - used
|
1731
|
+
usedPctOriginal = 100.0 - (free.to_f * 100 / total.to_f)
|
1732
|
+
capacityRow << "%3.0f%% used (%.2f GB free)" % [
|
1733
|
+
usedPctOriginal,
|
1734
|
+
free.to_f / 1024**3,
|
1735
|
+
]
|
1736
|
+
|
1737
|
+
# Capacity after rebuild
|
1738
|
+
used = sorted_hosts[1..-1].map{|x| x['capacityUsed']}.sum
|
1739
|
+
total = sorted_hosts[1..-1].map{|x| x['capacity']}.sum
|
1740
|
+
additional = worst_host['capacityUsed']
|
1741
|
+
free = total - used
|
1742
|
+
usedPctBeforeReMirror = 100.0 - (free.to_f * 100 / total.to_f)
|
1743
|
+
usedPctAfterReMirror = 100.0 - ((free - additional).to_f * 100 / total.to_f)
|
1744
|
+
usedPctIncrease = usedPctAfterReMirror - usedPctOriginal
|
1745
|
+
capacityRow << "%3.0f%% used (%.2f GB free)" % [
|
1746
|
+
usedPctAfterReMirror,
|
1747
|
+
(free - additional).to_f / 1024**3,
|
1748
|
+
]
|
1749
|
+
t << capacityRow
|
1750
|
+
|
1751
|
+
# Components before failure
|
1752
|
+
sorted_hosts = hosts_disks.values.sort_by{|x| -x['lsom_objects_count']}
|
1753
|
+
worst_host = sorted_hosts[0]
|
1754
|
+
used = sorted_hosts.map{|x| x['lsom_objects_count']}.sum
|
1755
|
+
total = sorted_hosts.map{|x| x['maxComponents']}.sum
|
1756
|
+
free = total - used
|
1757
|
+
usedPctOriginal = 100.0 - (free.to_f * 100 / total.to_f)
|
1758
|
+
componentsRow = ["Components"]
|
1759
|
+
componentsRow << "%3.0f%% used (%u available)" % [
|
1760
|
+
usedPctOriginal,
|
1761
|
+
free,
|
1762
|
+
]
|
1763
|
+
|
1764
|
+
# Components after rebuild
|
1765
|
+
used = sorted_hosts[1..-1].map{|x| x['lsom_objects_count']}.sum
|
1766
|
+
total = sorted_hosts[1..-1].map{|x| x['maxComponents']}.sum
|
1767
|
+
additional = worst_host['lsom_objects_count']
|
1768
|
+
free = total - used
|
1769
|
+
usedPctBeforeReMirror = 100.0 - (free.to_f * 100 / total.to_f)
|
1770
|
+
usedPctAfterReMirror = 100.0 - ((free - additional).to_f * 100 / total.to_f)
|
1771
|
+
usedPctIncrease = usedPctAfterReMirror - usedPctOriginal
|
1772
|
+
componentsRow << "%3.0f%% used (%u available)" % [
|
1773
|
+
usedPctAfterReMirror,
|
1774
|
+
(free - additional),
|
1775
|
+
]
|
1776
|
+
t << componentsRow
|
1777
|
+
|
1778
|
+
# RC reservations before failure
|
1779
|
+
sorted_hosts = hosts_disks.values.sort_by{|x| -x['ssd_capacityReserved']}
|
1780
|
+
worst_host = sorted_hosts[0]
|
1781
|
+
used = sorted_hosts.map{|x| x['ssd_capacityReserved']}.sum
|
1782
|
+
total = sorted_hosts.map{|x| x['ssd_capacity']}.sum
|
1783
|
+
free = total - used
|
1784
|
+
usedPctOriginal = 100.0 - (free.to_f * 100 / total.to_f)
|
1785
|
+
rcReservationsRow = ["RC reservations"]
|
1786
|
+
rcReservationsRow << "%3.0f%% used (%.2f GB free)" % [
|
1787
|
+
usedPctOriginal,
|
1788
|
+
free.to_f / 1024**3,
|
1789
|
+
]
|
1790
|
+
|
1791
|
+
# RC reservations after rebuild
|
1792
|
+
used = sorted_hosts[1..-1].map{|x| x['ssd_capacityReserved']}.sum
|
1793
|
+
total = sorted_hosts[1..-1].map{|x| x['ssd_capacity']}.sum
|
1794
|
+
additional = worst_host['ssd_capacityReserved']
|
1795
|
+
free = total - used
|
1796
|
+
usedPctBeforeReMirror = 100.0 - (free.to_f * 100 / total.to_f)
|
1797
|
+
usedPctAfterReMirror = 100.0 - ((free - additional).to_f * 100 / total.to_f)
|
1798
|
+
usedPctIncrease = usedPctAfterReMirror - usedPctOriginal
|
1799
|
+
rcReservationsRow << "%3.0f%% used (%.2f GB free)" % [
|
1800
|
+
usedPctAfterReMirror,
|
1801
|
+
(free - additional).to_f / 1024**3,
|
1802
|
+
]
|
1803
|
+
t << rcReservationsRow
|
1804
|
+
|
1805
|
+
puts t
|
1806
|
+
end
|
1807
|
+
end
|
1808
|
+
|
1809
|
+
|
1810
|
+
def _observe_snapshot conn, host, hosts, vmView, pc, hosts_props, vsanIntSys
|
1811
|
+
startTime = Time.now
|
1812
|
+
observation = {
|
1813
|
+
'cmmds' => {
|
1814
|
+
'clusterInfos' => {},
|
1815
|
+
'clusterDirs' => {},
|
1816
|
+
},
|
1817
|
+
'vsi' => {},
|
1818
|
+
'inventory' => {},
|
1819
|
+
}
|
1820
|
+
exceptions = []
|
1821
|
+
threads = []
|
1822
|
+
begin
|
1823
|
+
threads << Thread.new do
|
1824
|
+
begin
|
1825
|
+
t1 = Time.now
|
1826
|
+
vms = vmView.view
|
1827
|
+
|
1828
|
+
vmProperties = [
|
1829
|
+
'name', 'runtime.powerState', 'datastore', 'config.annotation',
|
1830
|
+
'parent', 'resourcePool', 'storage.perDatastoreUsage',
|
1831
|
+
'summary.config.memorySizeMB', 'summary.config.numCpu',
|
1832
|
+
'summary.config.vmPathName', 'config.hardware.device',
|
1833
|
+
'runtime.connectionState',
|
1834
|
+
]
|
1835
|
+
vmsProps = pc.collectMultiple(vms, *vmProperties)
|
1836
|
+
t2 = Time.now
|
1837
|
+
puts "Query VM properties: %.2f sec" % (t2 - t1)
|
1838
|
+
observation['inventory']['vms'] = {}
|
1839
|
+
vmsProps.each do |vm, vmProps|
|
1840
|
+
vmProps['vsan-obj-uuids'] = {}
|
1841
|
+
devices = vmProps['config.hardware.device'] || []
|
1842
|
+
disks = devices.select{|x| x.is_a?(VIM::VirtualDisk)}
|
1843
|
+
disks.each do |disk|
|
1844
|
+
newBacking = {}
|
1845
|
+
newDisk = {
|
1846
|
+
'unitNumber' => disk.unitNumber,
|
1847
|
+
'controllerKey' => disk.controllerKey,
|
1848
|
+
'backing' => newBacking,
|
1849
|
+
}
|
1850
|
+
backing = disk.backing
|
1851
|
+
if !backing.is_a?(VIM::VirtualDiskFlatVer2BackingInfo)
|
1852
|
+
next
|
1853
|
+
end
|
1854
|
+
while backing
|
1855
|
+
uuid = backing.backingObjectId
|
1856
|
+
if uuid
|
1857
|
+
vmProps['vsan-obj-uuids'][uuid] = backing.fileName
|
1858
|
+
newBacking['uuid'] = uuid
|
1859
|
+
end
|
1860
|
+
newBacking['fileName'] = backing.fileName
|
1861
|
+
backing = backing.parent
|
1862
|
+
|
1863
|
+
if backing
|
1864
|
+
newBacking['parent'] = {}
|
1865
|
+
newBacking = newBacking['parent']
|
1866
|
+
end
|
1867
|
+
end
|
1868
|
+
|
1869
|
+
vmProps['disks'] ||= []
|
1870
|
+
vmProps['disks'] << newDisk
|
1871
|
+
end
|
1872
|
+
# Do not add devices to the snapshot as they are too big
|
1873
|
+
vmProps.delete('config.hardware.device')
|
1874
|
+
|
1875
|
+
begin
|
1876
|
+
vmPathName = vmProps['summary.config.vmPathName']
|
1877
|
+
uuid = vmPathName.split("] ")[1].split("/")[0]
|
1878
|
+
vmProps['vsan-obj-uuids'][uuid] = vmPathName
|
1879
|
+
rescue
|
1880
|
+
end
|
1881
|
+
|
1882
|
+
observation['inventory']['vms'][vm._ref] = vmProps
|
1883
|
+
end
|
1884
|
+
rescue Exception => ex
|
1885
|
+
exceptions << ex
|
1886
|
+
end
|
1887
|
+
end
|
1888
|
+
threads << Thread.new do
|
1889
|
+
begin
|
1890
|
+
sleep(20)
|
1891
|
+
hostname = hosts_props[host]['name']
|
1892
|
+
# XXX: Should pick one host per partition
|
1893
|
+
c1 = conn.spawn_additional_connection
|
1894
|
+
vsanIntSys1 = vsanIntSys.dup_on_conn(c1)
|
1895
|
+
|
1896
|
+
t1 = Time.now
|
1897
|
+
res = vsanIntSys1.query_cmmds(
|
1898
|
+
(1..30).map{|x| {:type => x}}
|
1899
|
+
)
|
1900
|
+
t2 = Time.now
|
1901
|
+
puts "Query CMMDS from #{hostname}: %.2f sec (json size: %dKB)" % [
|
1902
|
+
(t2 - t1), JSON.dump(res).length / 1024
|
1903
|
+
]
|
1904
|
+
observation['cmmds']['clusterDirs'][hostname] = res
|
1905
|
+
rescue Exception => ex
|
1906
|
+
exceptions << ex
|
1907
|
+
end
|
1908
|
+
end
|
1909
|
+
hosts.each do |host|
|
1910
|
+
threads << Thread.new do
|
1911
|
+
begin
|
1912
|
+
hostname = hosts_props[host]['name']
|
1913
|
+
vsanIntSys1 = hosts_props[host]['configManager.vsanInternalSystem']
|
1914
|
+
c1 = conn.spawn_additional_connection
|
1915
|
+
vsanIntSys1 = vsanIntSys1.dup_on_conn(c1)
|
1916
|
+
|
1917
|
+
t1 = Time.now
|
1918
|
+
res = vsanIntSys1.QueryVsanStatistics(:labels =>
|
1919
|
+
[
|
1920
|
+
'dom', 'lsom', 'worldlets', 'plog',
|
1921
|
+
'dom-objects',
|
1922
|
+
'mem', 'cpus', 'slabs',
|
1923
|
+
'vscsi', 'cbrc',
|
1924
|
+
'disks',
|
1925
|
+
#'rdtassocsets',
|
1926
|
+
'system-mem', 'pnics',
|
1927
|
+
]
|
1928
|
+
)
|
1929
|
+
t2 = Time.now
|
1930
|
+
res = JSON.load(res)
|
1931
|
+
puts "Query Stats on #{host.name}: %.2f sec (on ESX: %.2f, json size: %dKB)" % [
|
1932
|
+
(t2 - t1), res['on-esx-collect-duration'],
|
1933
|
+
JSON.dump(res).length / 1024
|
1934
|
+
]
|
1935
|
+
observation['vsi'][hostname] = res
|
1936
|
+
rescue Exception => ex
|
1937
|
+
exceptions << ex
|
1938
|
+
end
|
1939
|
+
end
|
1940
|
+
end
|
1941
|
+
threads.each{|x| x.join}
|
1942
|
+
if exceptions.length > 0
|
1943
|
+
raise exceptions.first
|
1944
|
+
end
|
1945
|
+
rescue Interrupt
|
1946
|
+
threads.each{|t| t.terminate}
|
1947
|
+
end
|
1948
|
+
|
1949
|
+
{
|
1950
|
+
'type' => 'inventory-snapshot',
|
1951
|
+
'snapshot' => observation,
|
1952
|
+
'starttime' => startTime.to_f,
|
1953
|
+
'endtime' => Time.now.to_f,
|
1954
|
+
}
|
1955
|
+
end
|
1956
|
+
|
1957
|
+
class VsanObserver
|
1958
|
+
def generate_observer_html(tasksAnalyzer, inventoryAnalyzer,
|
1959
|
+
vcInfo, hosts_props)
|
1960
|
+
opts = {}
|
1961
|
+
refreshString = ""
|
1962
|
+
vcOS = vcInfo['about']['osType']
|
1963
|
+
vcFullName = vcInfo['about']['fullName']
|
1964
|
+
testTitleString = "VC #{vcInfo['hostname']} (#{vcFullName} - #{vcOS})"
|
1965
|
+
skipTasksTab = true
|
1966
|
+
graphUpdateMsg = "XXX"
|
1967
|
+
processed = 0
|
1968
|
+
puts "#{Time.now}: Generating HTML"
|
1969
|
+
inventoryAnalyzerTabs = inventoryAnalyzer.generateHtmlTabs(
|
1970
|
+
true,
|
1971
|
+
:skipLivenessTab => true,
|
1972
|
+
:skipLsomExpert => true,
|
1973
|
+
)
|
1974
|
+
puts "#{Time.now}: Generating HTML (fill in template)"
|
1975
|
+
|
1976
|
+
erbFilename = "#{analyser_lib_dirname}/stats.erb.html"
|
1977
|
+
@erbFileContent = open(erbFilename, 'r').read
|
1978
|
+
|
1979
|
+
template = ERB.new(@erbFileContent)
|
1980
|
+
html = template.result(binding)
|
1981
|
+
puts "#{Time.now}: HTML length: #{html.length}"
|
1982
|
+
|
1983
|
+
html
|
1984
|
+
end
|
1985
|
+
|
1986
|
+
def generate_observer_bundle(bundlePath, tasksAnalyzer, inventoryAnalyzer,
|
1987
|
+
vcInfo, hosts_props)
|
1988
|
+
require 'rubygems/package'
|
1989
|
+
tarFilename = File.join(
|
1990
|
+
bundlePath,
|
1991
|
+
"vsan-observer-#{Time.now.strftime('%Y-%m-%d.%H-%M-%S')}.tar"
|
1992
|
+
)
|
1993
|
+
gzFilename = "%s.gz" % tarFilename
|
1994
|
+
|
1995
|
+
puts "#{Time.now}: Writing out an HTML bundle to #{gzFilename} ..."
|
1996
|
+
tar = open(tarFilename, 'wb+')
|
1997
|
+
Gem::Package::TarWriter.new(tar) do |writer|
|
1998
|
+
inventoryAnalyzer.dump(:tar => writer)
|
1999
|
+
|
2000
|
+
writer.add_file('stats.html', 0644) do |io|
|
2001
|
+
io.write(self.generate_observer_html(
|
2002
|
+
tasksAnalyzer, inventoryAnalyzer, vcInfo,
|
2003
|
+
hosts_props
|
2004
|
+
)
|
2005
|
+
)
|
2006
|
+
end
|
2007
|
+
|
2008
|
+
[
|
2009
|
+
'graphs.html', 'bg_pattern.png', 'vmw_logo_white.png',
|
2010
|
+
'graphs.js', 'observer.css', 'vm-graph.svg'
|
2011
|
+
].each do |filename|
|
2012
|
+
writer.add_file(filename, 0644) do |io|
|
2013
|
+
content = open("#{analyser_lib_dirname}/#{filename}", "r") do |src|
|
2014
|
+
src.read
|
2015
|
+
end
|
2016
|
+
io.write(content)
|
2017
|
+
end
|
2018
|
+
end
|
2019
|
+
end
|
2020
|
+
tar.seek(0)
|
2021
|
+
|
2022
|
+
gz = Zlib::GzipWriter.new(File.new(gzFilename, 'wb'))
|
2023
|
+
while (buffer = tar.read(10000))
|
2024
|
+
gz.write(buffer)
|
2025
|
+
end
|
2026
|
+
tar.close
|
2027
|
+
gz.close
|
2028
|
+
FileUtils.rm(tarFilename)
|
2029
|
+
puts "#{Time.now}: Done writing HTML bundle to #{gzFilename}"
|
2030
|
+
end
|
2031
|
+
end
|
2032
|
+
|
2033
|
+
require 'webrick'
|
2034
|
+
class SimpleGetForm < WEBrick::HTTPServlet::AbstractServlet
|
2035
|
+
def initialize(server, tasksAnalyzer, inventoryAnalyzer,
|
2036
|
+
erbFileContent, vcInfo, hosts_props)
|
2037
|
+
super server
|
2038
|
+
@tasksAnalyzer = tasksAnalyzer
|
2039
|
+
@inventoryAnalyzer = inventoryAnalyzer
|
2040
|
+
@erbFileContent = erbFileContent
|
2041
|
+
@vcInfo = vcInfo
|
2042
|
+
@hosts_props = hosts_props
|
2043
|
+
end
|
2044
|
+
|
2045
|
+
# Process the request, return response
|
2046
|
+
def do_GET(request, response)
|
2047
|
+
staticFiles = [
|
2048
|
+
"/graphs.js", "/graphs.html",
|
2049
|
+
"/observer.css",
|
2050
|
+
"/vmw_logo_white.png",
|
2051
|
+
"/bg_pattern.png",
|
2052
|
+
"/vm-graph.svg"
|
2053
|
+
]
|
2054
|
+
if request.path == "/"
|
2055
|
+
status, content_type, body = mainpage(request)
|
2056
|
+
elsif staticFiles.member?(request.path)
|
2057
|
+
status, content_type, body = servefile(request)
|
2058
|
+
# elsif request.path =~ /^\/css\//
|
2059
|
+
# status, content_type, body = servefile(request)
|
2060
|
+
elsif request.path =~ /^\/jsonstats\/(dom|pcpu|mem|lsom|vm|cmmds|misc)\/(.*).json$/
|
2061
|
+
group = $1
|
2062
|
+
file = $2
|
2063
|
+
opts = {}
|
2064
|
+
if file =~ /^(.*)_thumb$/
|
2065
|
+
file = $1
|
2066
|
+
opts[:points] = 60
|
2067
|
+
end
|
2068
|
+
status, content_type, body = servejson(group, file, opts)
|
2069
|
+
else
|
2070
|
+
super(request, response)
|
2071
|
+
end
|
2072
|
+
|
2073
|
+
response.status = status
|
2074
|
+
response['Content-Type'] = content_type
|
2075
|
+
response.body = body
|
2076
|
+
end
|
2077
|
+
|
2078
|
+
def servefile request
|
2079
|
+
filename = "#{analyser_lib_dirname}#{request.path}"
|
2080
|
+
content = open(filename, 'r').read
|
2081
|
+
if filename =~ /\.js$/
|
2082
|
+
return [200, "text/javascript", content]
|
2083
|
+
end
|
2084
|
+
if filename =~ /\.html$/
|
2085
|
+
return [200, "text/html", content]
|
2086
|
+
end
|
2087
|
+
if filename =~ /\.less$/
|
2088
|
+
return [200, "text/css", content]
|
2089
|
+
end
|
2090
|
+
if filename =~ /\.css$/
|
2091
|
+
return [200, "text/css", content]
|
2092
|
+
end
|
2093
|
+
if filename =~ /\.png$/
|
2094
|
+
return [200, "image/png", content]
|
2095
|
+
end
|
2096
|
+
if filename =~ /\.svg$/
|
2097
|
+
return [200, "image/svg+xml", content]
|
2098
|
+
end
|
2099
|
+
|
2100
|
+
[404, "text/html", "Not found"]
|
2101
|
+
end
|
2102
|
+
|
2103
|
+
def json_dump out
|
2104
|
+
@inventoryAnalyzer.json_dump out
|
2105
|
+
end
|
2106
|
+
|
2107
|
+
def servejson group, file, opts = {}
|
2108
|
+
points = opts[:points]
|
2109
|
+
if group == "misc"
|
2110
|
+
if file =~ /^distribution$/
|
2111
|
+
out = @inventoryAnalyzer.dumpDistribution(:points => points)
|
2112
|
+
return [200, "text/json", json_dump(out)]
|
2113
|
+
end
|
2114
|
+
if file =~ /^crbc-(.*)$/
|
2115
|
+
hostname = $1
|
2116
|
+
out = @inventoryAnalyzer.dumpCbrc(hostname)
|
2117
|
+
return [200, "text/json", json_dump(out)]
|
2118
|
+
end
|
2119
|
+
if file =~ /^pnics-(.*)$/
|
2120
|
+
hostname = $1
|
2121
|
+
out = @inventoryAnalyzer.dumpPnics(hostname)
|
2122
|
+
return [200, "text/json", json_dump(out)]
|
2123
|
+
end
|
2124
|
+
end
|
2125
|
+
if group == "vm"
|
2126
|
+
if file =~ /^list$/
|
2127
|
+
out = @inventoryAnalyzer.dumpVmList()
|
2128
|
+
return [200, "text/json", json_dump(out)]
|
2129
|
+
end
|
2130
|
+
if file =~ /^vscsi-([^-]*)-(.*)$/
|
2131
|
+
disk = $1
|
2132
|
+
vm = $2
|
2133
|
+
out = @inventoryAnalyzer.dumpVscsi(vm, disk, nil, :points => points)
|
2134
|
+
return [200, "text/json", json_dump(out)]
|
2135
|
+
end
|
2136
|
+
end
|
2137
|
+
if group == "cmmds"
|
2138
|
+
if file =~ /^disks$/
|
2139
|
+
out = @inventoryAnalyzer.dumpCmmdsDisks()
|
2140
|
+
return [200, "text/json", json_dump(out)]
|
2141
|
+
end
|
2142
|
+
if file =~ /^cmmds-(.*)$/
|
2143
|
+
uuid = $1
|
2144
|
+
out = @inventoryAnalyzer.dumpCmmdsUuid(uuid)
|
2145
|
+
return [200, "text/json", json_dump(out)]
|
2146
|
+
end
|
2147
|
+
end
|
2148
|
+
if group == "dom"
|
2149
|
+
if file =~ /^domobj-(client|total|compmgr)-(.*)$/
|
2150
|
+
uuid = "#{$1}-#{$2}"
|
2151
|
+
out = @inventoryAnalyzer.dumpDom(uuid, nil, :points => points)
|
2152
|
+
return [200, "text/json", json_dump(out)]
|
2153
|
+
elsif file =~ /^domobj-(.*)$/
|
2154
|
+
uuid = $1
|
2155
|
+
out = @inventoryAnalyzer.dumpDom(uuid, nil, :points => points)
|
2156
|
+
return [200, "text/json", json_dump(out)]
|
2157
|
+
end
|
2158
|
+
end
|
2159
|
+
if group == "pcpu"
|
2160
|
+
if file =~ /^wdt-(.*)-([^-]*)$/
|
2161
|
+
hostname = $1
|
2162
|
+
wdt = $2
|
2163
|
+
out = @inventoryAnalyzer.dumpWdt(hostname, wdt, nil, :points => points)
|
2164
|
+
return [200, "text/json", json_dump(out)]
|
2165
|
+
end
|
2166
|
+
if file =~ /^pcpu-(.*)$/
|
2167
|
+
hostname = $1
|
2168
|
+
out = @inventoryAnalyzer.dumpPcpu(hostname, :points => points)
|
2169
|
+
return [200, "text/json", json_dump(out)]
|
2170
|
+
end
|
2171
|
+
end
|
2172
|
+
if group == "mem"
|
2173
|
+
if file =~ /^heaps-(.*)$/
|
2174
|
+
hostname = $1
|
2175
|
+
out = @inventoryAnalyzer.dumpHeaps(hostname, nil, :points => points)
|
2176
|
+
return [200, "text/json", json_dump(out)]
|
2177
|
+
end
|
2178
|
+
if file =~ /^slabs-(.*)$/
|
2179
|
+
hostname = $1
|
2180
|
+
out = @inventoryAnalyzer.dumpSlabs(hostname, nil, :points => points)
|
2181
|
+
return [200, "text/json", json_dump(out)]
|
2182
|
+
end
|
2183
|
+
if file =~ /^system-(.*)$/
|
2184
|
+
hostname = $1
|
2185
|
+
out = @inventoryAnalyzer.dumpSystemMem(hostname, nil, :points => points)
|
2186
|
+
return [200, "text/json", json_dump(out)]
|
2187
|
+
end
|
2188
|
+
end
|
2189
|
+
if group == "lsom"
|
2190
|
+
if file =~ /^lsomcomp-(.*)$/
|
2191
|
+
uuid = $1
|
2192
|
+
out = @inventoryAnalyzer.dumpLsomComp(uuid, nil, :points => points)
|
2193
|
+
return [200, "text/json", json_dump(out)]
|
2194
|
+
end
|
2195
|
+
if file =~ /^lsomhost-(.*)$/
|
2196
|
+
hostname = $1
|
2197
|
+
out = @inventoryAnalyzer.dumpLsomHost(hostname, nil, :points => points)
|
2198
|
+
return [200, "text/json", json_dump(out)]
|
2199
|
+
end
|
2200
|
+
if file =~ /^ssd-(.*)$/
|
2201
|
+
uuid = $1
|
2202
|
+
out = @inventoryAnalyzer.dumpSsd(uuid, nil, nil, :points => points)
|
2203
|
+
return [200, "text/json", json_dump(out)]
|
2204
|
+
end
|
2205
|
+
if file =~ /^plog-(.*)$/
|
2206
|
+
dev = $1
|
2207
|
+
out = @inventoryAnalyzer.dumpPlog(dev, nil, nil, nil, :points => points)
|
2208
|
+
return [200, "text/json", json_dump(out)]
|
2209
|
+
end
|
2210
|
+
if file =~ /^disk-(.*)$/
|
2211
|
+
dev = $1
|
2212
|
+
out = @inventoryAnalyzer.dumpDisk(dev, nil, nil, :points => points)
|
2213
|
+
return [200, "text/json", json_dump(out)]
|
2214
|
+
end
|
2215
|
+
if file =~ /^physdisk-(.*)-([^-]*)$/
|
2216
|
+
hostname = $1
|
2217
|
+
dev = $2
|
2218
|
+
out = @inventoryAnalyzer.dumpPhysDisk(hostname, dev, nil, :points => points)
|
2219
|
+
return [200, "text/json", json_dump(out)]
|
2220
|
+
end
|
2221
|
+
end
|
2222
|
+
|
2223
|
+
[404, "text/html", "Not found"]
|
2224
|
+
end
|
2225
|
+
|
2226
|
+
def mainpage request
|
2227
|
+
tasksAnalyzer = @tasksAnalyzer
|
2228
|
+
inventoryAnalyzer = @inventoryAnalyzer
|
2229
|
+
|
2230
|
+
html = VsanObserver.new.generate_observer_html(
|
2231
|
+
@tasksAnalyzer, @inventoryAnalyzer, @vcInfo, @hosts_props
|
2232
|
+
)
|
2233
|
+
|
2234
|
+
[200, "text/html", html]
|
2235
|
+
end
|
2236
|
+
end
|
2237
|
+
|
2238
|
+
opts :observer do
|
2239
|
+
summary "Run observer"
|
2240
|
+
arg :cluster_or_host, nil, :lookup => [VIM::ClusterComputeResource, VIM::HostSystem]
|
2241
|
+
opt :filename, "Output file path", :type => :string
|
2242
|
+
opt :port, "Port on which to run webserver", :type => :int, :default => 8010
|
2243
|
+
opt :run_webserver, "Run a webserver to view live stats", :type => :boolean
|
2244
|
+
opt :force, "Apply force", :type => :boolean
|
2245
|
+
opt :keep_observation_in_memory, "Keep observed stats in memory even when commands ends. Allows to resume later", :type => :boolean
|
2246
|
+
opt :generate_html_bundle, "Generates an HTML bundle after completion. Pass a location", :type => :string
|
2247
|
+
opt :interval, "Interval (in sec) in which to collect stats", :type => :int, :default => 60
|
2248
|
+
opt :max_runtime, "Maximum number of hours to collect stats. Caps memory usage.", :type => :int, :default => 2
|
2249
|
+
end
|
2250
|
+
|
2251
|
+
def observer cluster_or_host, opts
|
2252
|
+
conn = cluster_or_host._connection
|
2253
|
+
pc = conn.propertyCollector
|
2254
|
+
host = cluster_or_host
|
2255
|
+
entries = []
|
2256
|
+
hostUuidMap = {}
|
2257
|
+
|
2258
|
+
vcAbout = conn.serviceContent.about
|
2259
|
+
vcInfo = {
|
2260
|
+
'hostname' => conn.host,
|
2261
|
+
'about' => {
|
2262
|
+
'fullName' => vcAbout.fullName,
|
2263
|
+
'osType' => vcAbout.osType,
|
2264
|
+
'apiVersion' => vcAbout.apiVersion,
|
2265
|
+
'apiType' => vcAbout.apiType,
|
2266
|
+
'build' => vcAbout.build,
|
2267
|
+
'instanceUuid' => vcAbout.instanceUuid,
|
2268
|
+
'version' => vcAbout.version,
|
2269
|
+
},
|
2270
|
+
}
|
2271
|
+
|
2272
|
+
if opts[:run_webserver] && !opts[:force]
|
2273
|
+
puts "Running a webserver with unencrypted HTTP on the vCenter machine "
|
2274
|
+
puts "could pose a security risk. This tool is an experimenal debugging "
|
2275
|
+
puts "tool, which has not been audited or tested for its security."
|
2276
|
+
puts "If in doubt, you may want to create a dummy vCenter machine to run"
|
2277
|
+
puts "just this tool, instead of running the tool on your production "
|
2278
|
+
puts "vCenter machine."
|
2279
|
+
puts "In order to run the webserver, please pass --force"
|
2280
|
+
err "Force needs to be applied to run the webserver"
|
2281
|
+
end
|
2282
|
+
|
2283
|
+
require 'rvc/observer/analyzer-lib'
|
2284
|
+
require 'rvc/observer/tasks-analyzer'
|
2285
|
+
require 'rvc/observer/inventory-analyzer'
|
2286
|
+
|
2287
|
+
inventoryAnalyzer = $inventoryAnalyzer
|
2288
|
+
tasksAnalyzer = $tasksAnalyzer
|
2289
|
+
|
2290
|
+
inventoryAnalyzer ||= InventoryAnalyzer.new
|
2291
|
+
tasksAnalyzer ||= TasksAnalyzer.new({})
|
2292
|
+
|
2293
|
+
file = nil
|
2294
|
+
if opts[:filename]
|
2295
|
+
file = open(opts[:filename], 'a')
|
2296
|
+
end
|
2297
|
+
server = nil
|
2298
|
+
webrickThread = nil
|
2299
|
+
hosts_props = nil
|
2300
|
+
|
2301
|
+
_run_with_rev(conn, "dev") do
|
2302
|
+
vsanIntSys = nil
|
2303
|
+
if cluster_or_host.is_a?(VIM::ClusterComputeResource)
|
2304
|
+
cluster = cluster_or_host
|
2305
|
+
hosts = cluster.host
|
2306
|
+
else
|
2307
|
+
hosts = [host]
|
2308
|
+
end
|
2309
|
+
|
2310
|
+
hosts_props = pc.collectMultiple(hosts,
|
2311
|
+
'name',
|
2312
|
+
'runtime.connectionState',
|
2313
|
+
'configManager.vsanSystem',
|
2314
|
+
'configManager.vsanInternalSystem',
|
2315
|
+
'summary.config.product',
|
2316
|
+
'summary.hardware'
|
2317
|
+
)
|
2318
|
+
connected_hosts = hosts_props.select do |k,v|
|
2319
|
+
v['runtime.connectionState'] == 'connected'
|
2320
|
+
end.keys
|
2321
|
+
host = connected_hosts.first
|
2322
|
+
if !host
|
2323
|
+
err "Couldn't find any connected hosts"
|
2324
|
+
end
|
2325
|
+
vsanIntSys = hosts_props[host]['configManager.vsanInternalSystem']
|
2326
|
+
vsanSysList = Hash[hosts_props.map do |host, props|
|
2327
|
+
[props['name'], props['configManager.vsanSystem']]
|
2328
|
+
end]
|
2329
|
+
clusterInfos = pc.collectMultiple(vsanSysList.values,
|
2330
|
+
'config.clusterInfo')
|
2331
|
+
hostUuidMap = Hash[vsanSysList.map do |hostname, sys|
|
2332
|
+
[clusterInfos[sys]['config.clusterInfo'].nodeUuid, hostname]
|
2333
|
+
end]
|
2334
|
+
|
2335
|
+
viewMgr = conn.serviceContent.viewManager
|
2336
|
+
rootFolder = conn.serviceContent.rootFolder
|
2337
|
+
|
2338
|
+
vmView = viewMgr.CreateContainerView(
|
2339
|
+
:container => rootFolder,
|
2340
|
+
:type => ['VirtualMachine'],
|
2341
|
+
:recursive => true
|
2342
|
+
)
|
2343
|
+
|
2344
|
+
if opts[:run_webserver]
|
2345
|
+
erbFilename = "#{analyser_lib_dirname}/stats.erb.html"
|
2346
|
+
erbFileContent = open(erbFilename, 'r').read
|
2347
|
+
|
2348
|
+
server = WEBrick::HTTPServer.new(:Port => opts[:port])
|
2349
|
+
server.mount(
|
2350
|
+
"/", SimpleGetForm,
|
2351
|
+
tasksAnalyzer, inventoryAnalyzer, erbFileContent, vcInfo,
|
2352
|
+
JSON.load(JSON.dump(hosts_props))
|
2353
|
+
)
|
2354
|
+
webrickThread = Thread.new do
|
2355
|
+
server.start
|
2356
|
+
end
|
2357
|
+
end
|
2358
|
+
|
2359
|
+
puts "Press <Ctrl>+<C> to stop observing at any point ..."
|
2360
|
+
puts
|
2361
|
+
|
2362
|
+
startTime = Time.now
|
2363
|
+
begin
|
2364
|
+
while (Time.now - startTime) < opts[:max_runtime] * 3600
|
2365
|
+
puts "#{Time.now}: Collect one inventory snapshot"
|
2366
|
+
t1 = Time.now
|
2367
|
+
begin
|
2368
|
+
observation = _observe_snapshot(
|
2369
|
+
conn, host, connected_hosts, vmView, pc, hosts_props, vsanIntSys
|
2370
|
+
)
|
2371
|
+
observation['snapshot']['vcinfo'] = vcInfo
|
2372
|
+
observation['timestamp'] = Time.now.to_f
|
2373
|
+
if file
|
2374
|
+
file.write(JSON.dump(observation) + "\n")
|
2375
|
+
file.flush()
|
2376
|
+
else
|
2377
|
+
puts "#{Time.now}: Live-Processing inventory snapshot"
|
2378
|
+
tasksAnalyzer.processTrace(observation)
|
2379
|
+
inventoryAnalyzer.processInventorySnapshot(observation)
|
2380
|
+
end
|
2381
|
+
rescue Interrupt
|
2382
|
+
raise
|
2383
|
+
rescue Exception => ex
|
2384
|
+
puts "#{Time.now}: Got exception: #{ex.class}: #{ex.message}"
|
2385
|
+
end
|
2386
|
+
t2 = Time.now
|
2387
|
+
|
2388
|
+
intervalTime = opts[:interval]
|
2389
|
+
time = t2 - t1
|
2390
|
+
sleepTime = intervalTime - time
|
2391
|
+
if sleepTime <= 0.0
|
2392
|
+
puts "#{Time.now}: Collection took %.2fs (> %.2fs), no sleep ..." % [
|
2393
|
+
time, intervalTime
|
2394
|
+
]
|
2395
|
+
else
|
2396
|
+
puts "#{Time.now}: Collection took %.2fs, sleeping for %.2fs" % [
|
2397
|
+
time, sleepTime
|
2398
|
+
]
|
2399
|
+
puts "#{Time.now}: Press <Ctrl>+<C> to stop observing"
|
2400
|
+
sleep(sleepTime)
|
2401
|
+
end
|
2402
|
+
end
|
2403
|
+
rescue Interrupt
|
2404
|
+
puts "#{Time.now}: Execution interrupted, wrapping up ..."
|
2405
|
+
end
|
2406
|
+
#pp res
|
2407
|
+
vmView.DestroyView()
|
2408
|
+
|
2409
|
+
end
|
2410
|
+
|
2411
|
+
if file
|
2412
|
+
file.close()
|
2413
|
+
end
|
2414
|
+
if server
|
2415
|
+
server.shutdown
|
2416
|
+
webrickThread.join
|
2417
|
+
end
|
2418
|
+
if opts[:generate_html_bundle]
|
2419
|
+
begin
|
2420
|
+
VsanObserver.new.generate_observer_bundle(
|
2421
|
+
opts[:generate_html_bundle], tasksAnalyzer, inventoryAnalyzer,
|
2422
|
+
vcInfo, hosts_props
|
2423
|
+
)
|
2424
|
+
rescue Exception => ex
|
2425
|
+
puts "#{Time.now}: Failed to generate HTML bundle: #{ex.class}: #{ex.message}"
|
2426
|
+
end
|
2427
|
+
end
|
2428
|
+
|
2429
|
+
if opts[:keep_observation_in_memory]
|
2430
|
+
$inventoryAnalyzer = inventoryAnalyzer
|
2431
|
+
$tasksAnalyzer = tasksAnalyzer
|
2432
|
+
else
|
2433
|
+
$inventoryAnalyzer = nil
|
2434
|
+
$tasksAnalyzer = nil
|
2435
|
+
end
|
2436
|
+
end
|
2437
|
+
|
2438
|
+
class RbVmomi::VIM
|
2439
|
+
def initialize opts
|
2440
|
+
super opts
|
2441
|
+
end
|
2442
|
+
|
2443
|
+
def spawn_additional_connection
|
2444
|
+
c1 = RbVmomi::VIM.new(@opts)
|
2445
|
+
c1.cookie = self.cookie
|
2446
|
+
c1.rev = self.rev
|
2447
|
+
c1
|
2448
|
+
end
|
2449
|
+
end
|
2450
|
+
|
2451
|
+
RbVmomi::VIM::ManagedObject
|
2452
|
+
class RbVmomi::VIM::ManagedObject
|
2453
|
+
def dup_on_conn conn
|
2454
|
+
self.class.new(conn, self._ref)
|
2455
|
+
end
|
2456
|
+
end
|
2457
|
+
|
2458
|
+
|
2459
|
+
opts :resync_dashboard do
|
2460
|
+
summary "Resyncing dashboard"
|
2461
|
+
arg :cluster_or_host, nil, :lookup => [VIM::ClusterComputeResource, VIM::HostSystem]
|
2462
|
+
opt :refresh_rate, "Refresh interval (in sec). Default is no refresh", :type => :int
|
2463
|
+
end
|
2464
|
+
|
2465
|
+
def resync_dashboard cluster_or_host, opts
|
2466
|
+
conn = cluster_or_host._connection
|
2467
|
+
pc = conn.propertyCollector
|
2468
|
+
if cluster_or_host.is_a?(VIM::ClusterComputeResource)
|
2469
|
+
cluster = cluster_or_host
|
2470
|
+
hosts = cluster.host
|
2471
|
+
else
|
2472
|
+
hosts = [host]
|
2473
|
+
end
|
2474
|
+
|
2475
|
+
_run_with_rev(conn, "dev") do
|
2476
|
+
hosts_props = pc.collectMultiple(hosts,
|
2477
|
+
'name',
|
2478
|
+
'runtime.connectionState',
|
2479
|
+
'configManager.vsanSystem',
|
2480
|
+
'configManager.vsanInternalSystem'
|
2481
|
+
)
|
2482
|
+
connected_hosts = hosts_props.select do |k,v|
|
2483
|
+
v['runtime.connectionState'] == 'connected'
|
2484
|
+
end.keys
|
2485
|
+
host = connected_hosts.first
|
2486
|
+
if !host
|
2487
|
+
err "Couldn't find any connected hosts"
|
2488
|
+
end
|
2489
|
+
hostname = hosts_props[host]['name']
|
2490
|
+
vsanIntSys = hosts_props[host]['configManager.vsanInternalSystem']
|
2491
|
+
|
2492
|
+
vsanSysList = Hash[hosts_props.map do |host, props|
|
2493
|
+
[props['name'], props['configManager.vsanSystem']]
|
2494
|
+
end]
|
2495
|
+
clusterInfos = pc.collectMultiple(vsanSysList.values,
|
2496
|
+
'config.clusterInfo')
|
2497
|
+
hostUuidMap = Hash[vsanSysList.map do |hostname, sys|
|
2498
|
+
[clusterInfos[sys]['config.clusterInfo'].nodeUuid, hostname]
|
2499
|
+
end]
|
2500
|
+
|
2501
|
+
entries = nil
|
2502
|
+
|
2503
|
+
puts "#{Time.now}: Querying all VMs on VSAN ..."
|
2504
|
+
ds_list = host.datastore
|
2505
|
+
ds_props = pc.collectMultiple(ds_list, 'name', 'summary.type')
|
2506
|
+
ds = ds_props.select{|k, x| x['summary.type'] == "vsan"}.keys.first
|
2507
|
+
ds_name = ds_props[ds]['name']
|
2508
|
+
|
2509
|
+
vms = ds.vm
|
2510
|
+
vmsProps = pc.collectMultiple(vms,
|
2511
|
+
'name', 'runtime.connectionState',
|
2512
|
+
'config.hardware.device', 'summary.config'
|
2513
|
+
)
|
2514
|
+
|
2515
|
+
iter = 0
|
2516
|
+
while (iter == 0) || opts[:refresh_rate]
|
2517
|
+
puts "#{Time.now}: Querying all objects in the system from #{hostname} ..."
|
2518
|
+
|
2519
|
+
result = vsanIntSys.query_syncing_vsan_objects({})
|
2520
|
+
if !result
|
2521
|
+
err "Server failed to gather syncing objects"
|
2522
|
+
end
|
2523
|
+
objects = result['dom_objects']
|
2524
|
+
|
2525
|
+
puts "#{Time.now}: Got all the info, computing table ..."
|
2526
|
+
objects = objects.map do |uuid, objInfo|
|
2527
|
+
obj = objInfo['config']
|
2528
|
+
comps = _components_in_dom_config(obj['content'])
|
2529
|
+
bytesToSyncTotal = 0
|
2530
|
+
recoveryETATotal = 0
|
2531
|
+
comps = comps.select do |comp|
|
2532
|
+
state = comp['attributes']['componentState']
|
2533
|
+
bytesToSync = comp['attributes']['bytesToSync'] || 0
|
2534
|
+
recoveryETA = comp['attributes']['recoveryETA'] || 0
|
2535
|
+
resync = [10, 6].member?(state) && bytesToSync != 0
|
2536
|
+
if resync
|
2537
|
+
bytesToSyncTotal += bytesToSync
|
2538
|
+
recoveryETATotal = [recoveryETA, recoveryETATotal].max
|
2539
|
+
end
|
2540
|
+
resync
|
2541
|
+
end
|
2542
|
+
obj['bytesToSync'] = bytesToSyncTotal
|
2543
|
+
obj['recoveryETA'] = recoveryETATotal
|
2544
|
+
if comps.length > 0
|
2545
|
+
obj
|
2546
|
+
end
|
2547
|
+
end.compact
|
2548
|
+
obj_uuids = objects.map{|x| x['uuid']}
|
2549
|
+
objects = Hash[objects.map{|x| [x['uuid'], x]}]
|
2550
|
+
|
2551
|
+
all_obj_uuids = []
|
2552
|
+
vmToObjMap = {}
|
2553
|
+
vms.each do |vm|
|
2554
|
+
vm_obj_uuids = _get_vm_obj_uuids(vm, vmsProps)
|
2555
|
+
vm_obj_uuids = vm_obj_uuids.select{|x, v| obj_uuids.member?(x)}
|
2556
|
+
vm_obj_uuids = vm_obj_uuids.reject{|x, v| all_obj_uuids.member?(x)}
|
2557
|
+
all_obj_uuids += vm_obj_uuids.keys
|
2558
|
+
if vm_obj_uuids.length > 0
|
2559
|
+
vmToObjMap[vm] = vm_obj_uuids
|
2560
|
+
end
|
2561
|
+
end
|
2562
|
+
|
2563
|
+
t = Terminal::Table.new()
|
2564
|
+
t << [
|
2565
|
+
'VM/Object',
|
2566
|
+
'Syncing objects',
|
2567
|
+
'Bytes to sync',
|
2568
|
+
#'ETA',
|
2569
|
+
]
|
2570
|
+
t.add_separator
|
2571
|
+
bytesToSyncGrandTotal = 0
|
2572
|
+
objGrandTotal = 0
|
2573
|
+
vmToObjMap.each do |vm, vm_obj_uuids|
|
2574
|
+
vmProps = vmsProps[vm]
|
2575
|
+
objs = vm_obj_uuids.keys.map{|x| objects[x]}
|
2576
|
+
bytesToSyncTotal = objs.map{|obj| obj['bytesToSync']}.sum
|
2577
|
+
recoveryETATotal = objs.map{|obj| obj['recoveryETA']}.max
|
2578
|
+
t << [
|
2579
|
+
vmProps['name'],
|
2580
|
+
objs.length,
|
2581
|
+
"", #"%.2f GB" % (bytesToSyncTotal.to_f / 1024**3),
|
2582
|
+
#"%.2f min" % (recoveryETATotal.to_f / 60),
|
2583
|
+
]
|
2584
|
+
objs.each do |obj|
|
2585
|
+
t << [
|
2586
|
+
" %s" % (vm_obj_uuids[obj['uuid']] || obj['uuid']),
|
2587
|
+
'',
|
2588
|
+
"%.2f GB" % (obj['bytesToSync'].to_f / 1024**3),
|
2589
|
+
#"%.2f min" % (obj['recoveryETA'].to_f / 60),
|
2590
|
+
]
|
2591
|
+
end
|
2592
|
+
bytesToSyncGrandTotal += bytesToSyncTotal
|
2593
|
+
objGrandTotal += objs.length
|
2594
|
+
end
|
2595
|
+
t.add_separator
|
2596
|
+
t << [
|
2597
|
+
'Total',
|
2598
|
+
objGrandTotal,
|
2599
|
+
"%.2f GB" % (bytesToSyncGrandTotal.to_f / 1024**3),
|
2600
|
+
#"%.2f min" % (recoveryETATotal.to_f / 60),
|
2601
|
+
]
|
2602
|
+
puts t
|
2603
|
+
iter += 1
|
2604
|
+
|
2605
|
+
if opts[:refresh_rate]
|
2606
|
+
sleep opts[:refresh_rate]
|
2607
|
+
end
|
2608
|
+
end
|
2609
|
+
end
|
2610
|
+
end
|
2611
|
+
|
2612
|
+
opts :vm_perf_stats do
|
2613
|
+
summary "VM perf stats"
|
2614
|
+
arg :vms, nil, :lookup => [VIM::VirtualMachine], :multi => true
|
2615
|
+
opt :interval, "Time interval to compute average over", :type => :int, :default => 20
|
2616
|
+
opt :show_objects, "Show objects that are part of VM", :type => :boolean
|
2617
|
+
end
|
2618
|
+
|
2619
|
+
def vm_perf_stats vms, opts
|
2620
|
+
conn = vms.first._connection
|
2621
|
+
pc = conn.propertyCollector
|
2622
|
+
cluster = vms.first.runtime.host.parent
|
2623
|
+
hosts = cluster.host
|
2624
|
+
|
2625
|
+
_run_with_rev(conn, "dev") do
|
2626
|
+
hosts_props = pc.collectMultiple(hosts,
|
2627
|
+
'name',
|
2628
|
+
'runtime.connectionState',
|
2629
|
+
'configManager.vsanSystem',
|
2630
|
+
'configManager.vsanInternalSystem'
|
2631
|
+
)
|
2632
|
+
connected_hosts = hosts_props.select do |k,v|
|
2633
|
+
v['runtime.connectionState'] == 'connected'
|
2634
|
+
end.keys
|
2635
|
+
host = connected_hosts.first
|
2636
|
+
if !host
|
2637
|
+
err "Couldn't find any connected hosts"
|
2638
|
+
end
|
2639
|
+
vsanIntSys = hosts_props[host]['configManager.vsanInternalSystem']
|
2640
|
+
|
2641
|
+
vsanSysList = Hash[hosts_props.map do |host, props|
|
2642
|
+
[props['name'], props['configManager.vsanSystem']]
|
2643
|
+
end]
|
2644
|
+
clusterInfos = pc.collectMultiple(vsanSysList.values,
|
2645
|
+
'config.clusterInfo')
|
2646
|
+
hostUuidMap = Hash[vsanSysList.map do |hostname, sys|
|
2647
|
+
[clusterInfos[sys]['config.clusterInfo'].nodeUuid, hostname]
|
2648
|
+
end]
|
2649
|
+
hostNameToMoMap = Hash[hosts_props.map do |host, props|
|
2650
|
+
[props['name'], host]
|
2651
|
+
end]
|
2652
|
+
|
2653
|
+
entries = nil
|
2654
|
+
|
2655
|
+
puts "#{Time.now}: Querying info about VMs ..."
|
2656
|
+
vmsProps = pc.collectMultiple(vms,
|
2657
|
+
'name', 'runtime.connectionState',
|
2658
|
+
'config.hardware.device', 'summary.config'
|
2659
|
+
)
|
2660
|
+
|
2661
|
+
obj_uuids = []
|
2662
|
+
vms.each do |vm|
|
2663
|
+
obj_uuids += _get_vm_obj_uuids(vm, vmsProps).keys
|
2664
|
+
end
|
2665
|
+
|
2666
|
+
puts "#{Time.now}: Querying VSAN objects used by the VMs ..."
|
2667
|
+
|
2668
|
+
objects = vsanIntSys.query_cmmds(obj_uuids.map do |uuid|
|
2669
|
+
{:type => 'CONFIG_STATUS', :uuid => uuid}
|
2670
|
+
end)
|
2671
|
+
if !objects
|
2672
|
+
err "Server failed to gather CONFIG_STATUS entries"
|
2673
|
+
end
|
2674
|
+
|
2675
|
+
objByHost = {}
|
2676
|
+
objects.each do |entry|
|
2677
|
+
host = hostUuidMap[entry['owner']]
|
2678
|
+
if !host
|
2679
|
+
next
|
2680
|
+
end
|
2681
|
+
host = hostNameToMoMap[host]
|
2682
|
+
if !host
|
2683
|
+
next
|
2684
|
+
end
|
2685
|
+
objByHost[host] ||= []
|
2686
|
+
objByHost[host] << entry['uuid']
|
2687
|
+
end
|
2688
|
+
|
2689
|
+
def fetchStats(objByHost, hosts_props)
|
2690
|
+
stats = {}
|
2691
|
+
objByHost.each do |host, obj_uuids|
|
2692
|
+
vsanIntSys = hosts_props[host]['configManager.vsanInternalSystem']
|
2693
|
+
|
2694
|
+
res = vsanIntSys.QueryVsanStatistics(:labels => obj_uuids.map do |uuid|
|
2695
|
+
"dom-object:#{uuid}"
|
2696
|
+
end)
|
2697
|
+
res = JSON.load(res)
|
2698
|
+
|
2699
|
+
obj_uuids.each do |uuid|
|
2700
|
+
stats[uuid] = res['dom.owners.selected.stats'][uuid]
|
2701
|
+
if stats[uuid]
|
2702
|
+
stats[uuid]['ts'] = res['dom.owners.selected.stats-taken']
|
2703
|
+
end
|
2704
|
+
end
|
2705
|
+
end
|
2706
|
+
stats
|
2707
|
+
end
|
2708
|
+
|
2709
|
+
puts "#{Time.now}: Fetching stats counters once ..."
|
2710
|
+
stats1 = fetchStats(objByHost, hosts_props)
|
2711
|
+
sleepTime = opts[:interval]
|
2712
|
+
puts "#{Time.now}: Sleeping for #{sleepTime} seconds ..."
|
2713
|
+
sleep(sleepTime)
|
2714
|
+
puts "#{Time.now}: Fetching stats counters again to compute averages ..."
|
2715
|
+
stats2 = fetchStats(objByHost, hosts_props)
|
2716
|
+
|
2717
|
+
puts "#{Time.now}: Got all data, computing table"
|
2718
|
+
stats = {}
|
2719
|
+
objects.each do |entry|
|
2720
|
+
uuid = entry['uuid']
|
2721
|
+
deltas = Hash[stats2[uuid].keys.map do |key|
|
2722
|
+
[key, stats2[uuid][key] - stats1[uuid][key]]
|
2723
|
+
end]
|
2724
|
+
deltaT = deltas['ts']
|
2725
|
+
stats[uuid] = deltas.merge({
|
2726
|
+
:readIops => deltas['readCount'] / deltaT,
|
2727
|
+
:writeIops => deltas['writeCount'] / deltaT,
|
2728
|
+
:readTput => deltas['readBytes'] / deltaT,
|
2729
|
+
:writeTput => deltas['writeBytes'] / deltaT,
|
2730
|
+
:readLatency => 0,
|
2731
|
+
:writeLatency => 0,
|
2732
|
+
})
|
2733
|
+
if deltas['readCount'] > 0
|
2734
|
+
stats[uuid][:readLatency] = deltas['readLatencySumUs'] / deltas['readCount']
|
2735
|
+
end
|
2736
|
+
if deltas['writeCount'] > 0
|
2737
|
+
stats[uuid][:writeLatency] = deltas['writeLatencySumUs'] / deltas['writeCount']
|
2738
|
+
end
|
2739
|
+
end
|
2740
|
+
|
2741
|
+
t = Terminal::Table.new()
|
2742
|
+
t << [
|
2743
|
+
'VM/Object',
|
2744
|
+
'IOPS',
|
2745
|
+
'Tput (KB/s)',
|
2746
|
+
'Latency (ms)'
|
2747
|
+
]
|
2748
|
+
t.add_separator
|
2749
|
+
vms.each do |vm|
|
2750
|
+
vmProps = vmsProps[vm]
|
2751
|
+
vm_obj_uuids = _get_vm_obj_uuids(vm, vmsProps)
|
2752
|
+
|
2753
|
+
if !opts[:show_objects]
|
2754
|
+
vmStats = {}
|
2755
|
+
vmStats[:readLatency] ||= []
|
2756
|
+
vmStats[:writeLatency] ||= []
|
2757
|
+
[:readIops, :writeIops, :readTput, :writeTput].each do |key|
|
2758
|
+
vmStats[key] ||= 0.0
|
2759
|
+
end
|
2760
|
+
|
2761
|
+
vm_obj_uuids.each do |uuid, path|
|
2762
|
+
path = path.gsub(/^\[([^\]]*)\] /, "")
|
2763
|
+
objStats = stats[uuid]
|
2764
|
+
if !objStats
|
2765
|
+
next
|
2766
|
+
end
|
2767
|
+
[:readIops, :writeIops, :readTput, :writeTput].each do |key|
|
2768
|
+
vmStats[key] += (objStats[key] || 0.0)
|
2769
|
+
end
|
2770
|
+
vmStats[:readLatency] << (objStats[:readLatency] * objStats[:readIops])
|
2771
|
+
vmStats[:writeLatency] << (objStats[:writeLatency] * objStats[:writeIops])
|
2772
|
+
end
|
2773
|
+
if vmStats[:readLatency].length > 0 && vmStats[:readIops] > 0.0
|
2774
|
+
vmStats[:readLatency] = vmStats[:readLatency].sum / vmStats[:readIops]
|
2775
|
+
else
|
2776
|
+
vmStats[:readLatency] = 0.0
|
2777
|
+
end
|
2778
|
+
if vmStats[:writeLatency].length > 0 && vmStats[:writeIops] > 0.0
|
2779
|
+
vmStats[:writeLatency] = vmStats[:writeLatency].sum / vmStats[:writeIops]
|
2780
|
+
else
|
2781
|
+
vmStats[:writeLatency] = 0.0
|
2782
|
+
end
|
2783
|
+
|
2784
|
+
t << [
|
2785
|
+
vmProps['name'],
|
2786
|
+
[
|
2787
|
+
"%.1fr" % [vmStats[:readIops]],
|
2788
|
+
"%.1fw" % [vmStats[:writeIops]],
|
2789
|
+
].join("/"),
|
2790
|
+
[
|
2791
|
+
"%.1fr" % [vmStats[:readTput] / 1024.0],
|
2792
|
+
"%.1fw" % [vmStats[:writeTput] / 1024.0],
|
2793
|
+
].join("/"),
|
2794
|
+
[
|
2795
|
+
"%.1fr" % [vmStats[:readLatency] / 1000.0],
|
2796
|
+
"%.1fw" % [vmStats[:writeLatency] / 1000.0],
|
2797
|
+
].join("/"),
|
2798
|
+
]
|
2799
|
+
else
|
2800
|
+
t << [
|
2801
|
+
vmProps['name'],
|
2802
|
+
"",
|
2803
|
+
"",
|
2804
|
+
"",
|
2805
|
+
]
|
2806
|
+
vm_obj_uuids.each do |uuid, path|
|
2807
|
+
path = path.gsub(/^\[([^\]]*)\] /, "")
|
2808
|
+
objStats = stats[uuid]
|
2809
|
+
if !objStats
|
2810
|
+
t << [
|
2811
|
+
" %s" % (path || uuid),
|
2812
|
+
"N/A","N/A","N/A",
|
2813
|
+
]
|
2814
|
+
next
|
2815
|
+
end
|
2816
|
+
t << [
|
2817
|
+
" %s" % (path || uuid),
|
2818
|
+
[
|
2819
|
+
"%.1fr" % [objStats[:readIops]],
|
2820
|
+
"%.1fw" % [objStats[:writeIops]],
|
2821
|
+
].join("/"),
|
2822
|
+
[
|
2823
|
+
"%.1fr" % [objStats[:readTput] / 1024.0],
|
2824
|
+
"%.1fw" % [objStats[:writeTput] / 1024.0],
|
2825
|
+
].join("/"),
|
2826
|
+
[
|
2827
|
+
"%.1fr" % [objStats[:readLatency] / 1000.0],
|
2828
|
+
"%.1fw" % [objStats[:writeLatency] / 1000.0],
|
2829
|
+
].join("/"),
|
2830
|
+
]
|
2831
|
+
end
|
2832
|
+
end
|
2833
|
+
end
|
2834
|
+
# t.add_separator
|
2835
|
+
# t << [
|
2836
|
+
# 'Total',
|
2837
|
+
# objGrandTotal,
|
2838
|
+
# "%.2f GB" % (bytesToSyncGrandTotal.to_f / 1024**3),
|
2839
|
+
# #"%.2f min" % (recoveryETATotal.to_f / 60),
|
2840
|
+
# ]
|
2841
|
+
puts t
|
2842
|
+
end
|
2843
|
+
end
|
2844
|
+
|
2845
|
+
|
2846
|
+
opts :enter_maintenance_mode do
|
2847
|
+
summary "Put hosts into maintenance mode"
|
2848
|
+
arg :host, nil, :lookup => VIM::HostSystem, :multi => true
|
2849
|
+
opt :timeout, "Timeout", :default => 0
|
2850
|
+
opt :evacuate_powered_off_vms, "Evacuate powered off vms", :type => :boolean
|
2851
|
+
opt :no_wait, "Don't wait for Task to complete", :type => :boolean
|
2852
|
+
opt :vsan_mode, "Actions to take for VSAN backed storage", :type => :string, :default => "ensureObjectAccessibility"
|
2853
|
+
end
|
2854
|
+
|
2855
|
+
def enter_maintenance_mode hosts, opts
|
2856
|
+
vsanChoices = ['ensureObjectAccessibility', 'evacuateAllData', 'noAction']
|
2857
|
+
if !vsanChoices.member?(opts[:vsan_mode])
|
2858
|
+
err "VSAN mode can only be one of these: #{vsanChoices}"
|
2859
|
+
end
|
2860
|
+
tasks = []
|
2861
|
+
conn = hosts[0]._connection
|
2862
|
+
_run_with_rev(conn, "dev") do
|
2863
|
+
tasks = hosts.map do |host|
|
2864
|
+
host.EnterMaintenanceMode_Task(
|
2865
|
+
:timeout => opts[:timeout],
|
2866
|
+
:evacuatePoweredOffVms => opts[:evacuate_powered_off_vms],
|
2867
|
+
:maintenanceSpec => {
|
2868
|
+
:vsanMode => {
|
2869
|
+
:objectAction => opts[:vsan_mode],
|
2870
|
+
}
|
2871
|
+
}
|
2872
|
+
)
|
2873
|
+
end
|
2874
|
+
end
|
2875
|
+
|
2876
|
+
if opts[:no_wait]
|
2877
|
+
# Do nothing
|
2878
|
+
else
|
2879
|
+
results = progress(tasks)
|
2880
|
+
|
2881
|
+
results.each do |task, error|
|
2882
|
+
if error.is_a?(VIM::LocalizedMethodFault)
|
2883
|
+
state, entityName, name = task.collect('info.state',
|
2884
|
+
'info.entityName',
|
2885
|
+
'info.name')
|
2886
|
+
puts "#{name} #{entityName}: #{error.fault.class.wsdl_name}: #{error.localizedMessage}"
|
2887
|
+
error.fault.faultMessage.each do |msg|
|
2888
|
+
puts " #{msg.key}: #{msg.message}"
|
2889
|
+
end
|
2890
|
+
|
2891
|
+
end
|
2892
|
+
end
|
2893
|
+
end
|
2894
|
+
end
|
2895
|
+
|
2896
|
+
RbVmomi::VIM::HostVsanInternalSystem
|
2897
|
+
class RbVmomi::VIM::HostVsanInternalSystem
|
2898
|
+
def _parseJson json
|
2899
|
+
if json == "BAD"
|
2900
|
+
return nil
|
2901
|
+
end
|
2902
|
+
begin
|
2903
|
+
json = JSON.load(json)
|
2904
|
+
rescue
|
2905
|
+
nil
|
2906
|
+
end
|
2907
|
+
end
|
2908
|
+
|
2909
|
+
def query_cmmds queries, opts = {}
|
2910
|
+
useGzip = (opts[:gzip]) && $vsanUseGzipApis
|
2911
|
+
if useGzip
|
2912
|
+
queries = queries + [{:type => "GZIP"}]
|
2913
|
+
end
|
2914
|
+
json = self.QueryCmmds(:queries => queries)
|
2915
|
+
if useGzip
|
2916
|
+
gzip = Base64.decode64(json)
|
2917
|
+
gz = Zlib::GzipReader.new(StringIO.new(gzip))
|
2918
|
+
json = gz.read
|
2919
|
+
end
|
2920
|
+
objects = _parseJson json
|
2921
|
+
if !objects
|
2922
|
+
raise "Server failed to gather CMMDS entries: JSON = '#{json}'"
|
2923
|
+
# raise "Server failed to gather CMMDS entries: JSON = #{json.length}"
|
2924
|
+
end
|
2925
|
+
objects = objects['result']
|
2926
|
+
objects
|
2927
|
+
end
|
2928
|
+
|
2929
|
+
def query_vsan_objects(opts)
|
2930
|
+
json = self.QueryVsanObjects(opts)
|
2931
|
+
objects = _parseJson json
|
2932
|
+
if !objects
|
2933
|
+
raise "Server failed to gather VSAN object info for #{obj_uuids}: JSON = '#{json}'"
|
2934
|
+
end
|
2935
|
+
objects
|
2936
|
+
end
|
2937
|
+
|
2938
|
+
def query_syncing_vsan_objects(opts = {})
|
2939
|
+
json = self.QuerySyncingVsanObjects(opts)
|
2940
|
+
objects = _parseJson json
|
2941
|
+
if !objects
|
2942
|
+
raise "Server failed to query syncing objects: JSON = '#{json}'"
|
2943
|
+
end
|
2944
|
+
objects
|
2945
|
+
end
|
2946
|
+
|
2947
|
+
def query_vsan_statistics(opts = {})
|
2948
|
+
json = self.QueryVsanStatistics(opts)
|
2949
|
+
objects = _parseJson json
|
2950
|
+
if !objects
|
2951
|
+
raise "Server failed to query vsan stats: JSON = '#{json}'"
|
2952
|
+
end
|
2953
|
+
objects
|
2954
|
+
end
|
2955
|
+
|
2956
|
+
def query_physical_vsan_disks(opts)
|
2957
|
+
json = self.QueryPhysicalVsanDisks(opts)
|
2958
|
+
objects = _parseJson json
|
2959
|
+
if !objects
|
2960
|
+
raise "Server failed to query vsan disks: JSON = '#{json}'"
|
2961
|
+
end
|
2962
|
+
objects
|
2963
|
+
end
|
2964
|
+
|
2965
|
+
def query_objects_on_physical_vsan_disk(opts)
|
2966
|
+
json = self.QueryObjectsOnPhysicalVsanDisk(opts)
|
2967
|
+
objects = _parseJson json
|
2968
|
+
if !objects
|
2969
|
+
raise "Server failed to query objects on vsan disks: JSON = '#{json}'"
|
2970
|
+
end
|
2971
|
+
objects
|
2972
|
+
end
|
2973
|
+
|
2974
|
+
|
2975
|
+
end
|
2976
|
+
|
2977
|
+
def _parseJson json
|
2978
|
+
if json == "BAD"
|
2979
|
+
return nil
|
2980
|
+
end
|
2981
|
+
begin
|
2982
|
+
json = JSON.load(json)
|
2983
|
+
rescue
|
2984
|
+
nil
|
2985
|
+
end
|
2986
|
+
end
|
2987
|
+
|
2988
|
+
def _assessAvailabilityByStatus state
|
2989
|
+
mask = {
|
2990
|
+
'DATA_AVAILABLE' => (1 << 0),
|
2991
|
+
'QUORUM' => (1 << 1),
|
2992
|
+
'PERF_COMPLIANT' => (1 << 2),
|
2993
|
+
'INCOMPLETE' => (1 << 3),
|
2994
|
+
}
|
2995
|
+
Hash[mask.map{|k,v| [k, (state & v) != 0]}]
|
2996
|
+
end
|
2997
|
+
|
2998
|
+
opts :lldpnetmap do
|
2999
|
+
summary "Gather LLDP mapping information from a set of hosts"
|
3000
|
+
arg :hosts_and_clusters, nil, :lookup => [VIM::HostSystem, VIM::ClusterComputeResource], :multi => true
|
3001
|
+
end
|
3002
|
+
|
3003
|
+
def lldpnetmap hosts_and_clusters, opts = {}
|
3004
|
+
conn = hosts_and_clusters.first._connection
|
3005
|
+
hosts = hosts_and_clusters.select{|x| x.is_a?(VIM::HostSystem)}
|
3006
|
+
clusters = hosts_and_clusters.select{|x| x.is_a?(VIM::ClusterComputeResource)}
|
3007
|
+
pc = conn.propertyCollector
|
3008
|
+
cluster_hosts = pc.collectMultiple(clusters, 'host')
|
3009
|
+
cluster_hosts.each do |cluster, props|
|
3010
|
+
hosts += props['host']
|
3011
|
+
end
|
3012
|
+
hosts = hosts.uniq
|
3013
|
+
_run_with_rev(conn, "dev") do
|
3014
|
+
hosts_props = pc.collectMultiple(hosts,
|
3015
|
+
'name',
|
3016
|
+
'runtime.connectionState',
|
3017
|
+
'configManager.vsanSystem',
|
3018
|
+
'configManager.vsanInternalSystem'
|
3019
|
+
)
|
3020
|
+
|
3021
|
+
hosts = hosts_props.select do |k,v|
|
3022
|
+
v['runtime.connectionState'] == 'connected'
|
3023
|
+
end.keys
|
3024
|
+
if hosts.length == 0
|
3025
|
+
err "Couldn't find any connected hosts"
|
3026
|
+
end
|
3027
|
+
|
3028
|
+
hosts_vsansys = Hash[hosts_props.map{|k,v| [v['configManager.vsanSystem'], k]}]
|
3029
|
+
node_uuids = pc.collectMultiple(hosts_vsansys.keys, 'config.clusterInfo.nodeUuid')
|
3030
|
+
node_uuids = Hash[node_uuids.map do |k, v|
|
3031
|
+
[v['config.clusterInfo.nodeUuid'], hosts_vsansys[k]]
|
3032
|
+
end]
|
3033
|
+
|
3034
|
+
puts "#{Time.now}: This operation will take 30-60 seconds ..."
|
3035
|
+
hosts_props.map do |host, props|
|
3036
|
+
Thread.new do
|
3037
|
+
begin
|
3038
|
+
vsanIntSys = props['configManager.vsanInternalSystem']
|
3039
|
+
c1 = conn.spawn_additional_connection
|
3040
|
+
vsanIntSys = vsanIntSys.dup_on_conn(c1)
|
3041
|
+
res = vsanIntSys.QueryVsanStatistics(:labels => ['lldpnetmap'])
|
3042
|
+
hosts_props[host]['lldpnetmap'] = JSON.parse(res)['lldpnetmap']
|
3043
|
+
rescue Exception => ex
|
3044
|
+
puts "Failed to gather lldpnetmap from #{props['name']}: #{ex.class}: #{ex.message}"
|
3045
|
+
end
|
3046
|
+
end
|
3047
|
+
end.each{|t| t.join}
|
3048
|
+
|
3049
|
+
t = Terminal::Table.new()
|
3050
|
+
t << ['Host', 'LLDP info']
|
3051
|
+
t.add_separator
|
3052
|
+
hosts_props.each do |host, props|
|
3053
|
+
t << [
|
3054
|
+
props['name'],
|
3055
|
+
props['lldpnetmap'].map do |switch, pnics|
|
3056
|
+
"#{switch}: #{pnics.join(',')}"
|
3057
|
+
end.join("\n")
|
3058
|
+
]
|
3059
|
+
end
|
3060
|
+
puts t
|
3061
|
+
end
|
3062
|
+
end
|
3063
|
+
|
3064
|
+
opts :check_limits do
|
3065
|
+
summary "Gathers (and checks) counters against limits"
|
3066
|
+
arg :hosts_and_clusters, nil, :lookup => [VIM::HostSystem, VIM::ClusterComputeResource], :multi => true
|
3067
|
+
end
|
3068
|
+
|
3069
|
+
def check_limits hosts_and_clusters, opts = {}
|
3070
|
+
conn = hosts_and_clusters.first._connection
|
3071
|
+
hosts = hosts_and_clusters.select{|x| x.is_a?(VIM::HostSystem)}
|
3072
|
+
clusters = hosts_and_clusters.select{|x| x.is_a?(VIM::ClusterComputeResource)}
|
3073
|
+
pc = conn.propertyCollector
|
3074
|
+
cluster_hosts = pc.collectMultiple(clusters, 'host')
|
3075
|
+
cluster_hosts.each do |cluster, props|
|
3076
|
+
hosts += props['host']
|
3077
|
+
end
|
3078
|
+
hosts = hosts.uniq
|
3079
|
+
_run_with_rev(conn, "dev") do
|
3080
|
+
hosts_props = pc.collectMultiple(hosts,
|
3081
|
+
'name',
|
3082
|
+
'runtime.connectionState',
|
3083
|
+
'configManager.vsanSystem',
|
3084
|
+
'configManager.vsanInternalSystem'
|
3085
|
+
)
|
3086
|
+
|
3087
|
+
hosts = hosts_props.select do |k,v|
|
3088
|
+
v['runtime.connectionState'] == 'connected'
|
3089
|
+
end.keys
|
3090
|
+
if hosts.length == 0
|
3091
|
+
err "Couldn't find any connected hosts"
|
3092
|
+
end
|
3093
|
+
|
3094
|
+
lock = Mutex.new
|
3095
|
+
all_disks = {}
|
3096
|
+
puts "#{Time.now}: Gathering stats from all hosts ..."
|
3097
|
+
hosts_props.map do |host, props|
|
3098
|
+
if props['runtime.connectionState'] != 'connected'
|
3099
|
+
next
|
3100
|
+
end
|
3101
|
+
hosts_props[host]['profiling'] = {}
|
3102
|
+
Thread.new do
|
3103
|
+
vsanIntSys = props['configManager.vsanInternalSystem']
|
3104
|
+
c1 = conn.spawn_additional_connection
|
3105
|
+
vsanIntSys2 = vsanIntSys.dup_on_conn(c1)
|
3106
|
+
begin
|
3107
|
+
timeout(45) do
|
3108
|
+
t1 = Time.now
|
3109
|
+
res = vsanIntSys2.query_vsan_statistics(
|
3110
|
+
:labels => ['rdtglobal', 'lsom-node']
|
3111
|
+
)
|
3112
|
+
t2 = Time.now
|
3113
|
+
hosts_props[host]['profiling']['rdtglobal'] = t2 - t1
|
3114
|
+
hosts_props[host]['rdtglobal'] = res['rdt.globalinfo']
|
3115
|
+
hosts_props[host]['lsom.node'] = res['lsom.node']
|
3116
|
+
end
|
3117
|
+
rescue Exception => ex
|
3118
|
+
puts "Failed to gather RDT info from #{props['name']}: #{ex.class}: #{ex.message}"
|
3119
|
+
end
|
3120
|
+
|
3121
|
+
begin
|
3122
|
+
timeout(60) do
|
3123
|
+
t1 = Time.now
|
3124
|
+
res = vsanIntSys2.QueryVsanStatistics(
|
3125
|
+
:labels => ['dom', 'dom-objects-counts']
|
3126
|
+
)
|
3127
|
+
res = JSON.parse(res)
|
3128
|
+
if res && !res['dom.owners.count']
|
3129
|
+
# XXX: Remove me later
|
3130
|
+
# This code is a fall back path in case we are dealing
|
3131
|
+
# with an old ESX host (before Nov13 2013). As we only
|
3132
|
+
# need to be compatible with VSAN GA, we can remove this
|
3133
|
+
# code once everyone is upgraded.
|
3134
|
+
res = vsanIntSys2.QueryVsanStatistics(
|
3135
|
+
:labels => ['dom', 'dom-objects']
|
3136
|
+
)
|
3137
|
+
res = JSON.parse(res)
|
3138
|
+
numOwners = res['dom.owners.stats'].keys.length
|
3139
|
+
else
|
3140
|
+
numOwners = res['dom.owners.count'].keys.length
|
3141
|
+
end
|
3142
|
+
t2 = Time.now
|
3143
|
+
hosts_props[host]['profiling']['domstats'] = t2 - t1
|
3144
|
+
hosts_props[host]['dom'] = {
|
3145
|
+
'numClients'=> res['dom.clients'].keys.length,
|
3146
|
+
'numOwners'=> numOwners,
|
3147
|
+
}
|
3148
|
+
end
|
3149
|
+
rescue Exception => ex
|
3150
|
+
puts "Failed to gather DOM info from #{props['name']}: #{ex.class}: #{ex.message}"
|
3151
|
+
end
|
3152
|
+
|
3153
|
+
begin
|
3154
|
+
timeout(45) do
|
3155
|
+
t1 = Time.now
|
3156
|
+
disks = vsanIntSys2.QueryPhysicalVsanDisks(:props => [
|
3157
|
+
'lsom_objects_count',
|
3158
|
+
'uuid',
|
3159
|
+
'isSsd',
|
3160
|
+
'capacity',
|
3161
|
+
'capacityUsed',
|
3162
|
+
])
|
3163
|
+
t2 = Time.now
|
3164
|
+
hosts_props[host]['profiling']['physdisk'] = t2 - t1
|
3165
|
+
disks = JSON.load(disks)
|
3166
|
+
|
3167
|
+
# Getting the data from all hosts is kind of overkill, but
|
3168
|
+
# this way we deal with partitions and get info on all disks
|
3169
|
+
# everywhere. But we have duplicates, so need to merge.
|
3170
|
+
lock.synchronize do
|
3171
|
+
all_disks.merge!(disks)
|
3172
|
+
end
|
3173
|
+
end
|
3174
|
+
rescue Exception => ex
|
3175
|
+
puts "Failed to gather disks info from #{props['name']}: #{ex.class}: #{ex.message}"
|
3176
|
+
end
|
3177
|
+
end
|
3178
|
+
end.compact.each{|t| t.join}
|
3179
|
+
|
3180
|
+
# hosts_props.each do |host, props|
|
3181
|
+
# puts "#{Time.now}: Host #{props['name']}: #{props['profiling']}"
|
3182
|
+
# end
|
3183
|
+
|
3184
|
+
puts "#{Time.now}: Gathering disks info ..."
|
3185
|
+
disks = all_disks
|
3186
|
+
vsan_disks_info = {}
|
3187
|
+
vsan_disks_info.merge!(
|
3188
|
+
_vsan_host_disks_info(Hash[hosts.map{|h| [h, hosts_props[h]['name']]}])
|
3189
|
+
)
|
3190
|
+
disks.each do |k, v|
|
3191
|
+
v['esxcli'] = vsan_disks_info[v['uuid']]
|
3192
|
+
if v['esxcli']
|
3193
|
+
v['host'] = v['esxcli']._get_property :host
|
3194
|
+
|
3195
|
+
hosts_props[v['host']]['components'] ||= 0
|
3196
|
+
hosts_props[v['host']]['components'] += v['lsom_objects_count']
|
3197
|
+
hosts_props[v['host']]['disks'] ||= []
|
3198
|
+
hosts_props[v['host']]['disks'] << v
|
3199
|
+
end
|
3200
|
+
end
|
3201
|
+
|
3202
|
+
t = Terminal::Table.new()
|
3203
|
+
t << ['Host', 'RDT', 'Disks']
|
3204
|
+
t.add_separator
|
3205
|
+
hosts_props.each do |host, props|
|
3206
|
+
rdt = props['rdtglobal'] || {}
|
3207
|
+
lsomnode = props['lsom.node'] || {}
|
3208
|
+
dom = props['dom'] || {}
|
3209
|
+
t << [
|
3210
|
+
props['name'],
|
3211
|
+
[
|
3212
|
+
"Assocs: #{rdt['assocCount']}/#{rdt['maxAssocCount']}",
|
3213
|
+
"Sockets: #{rdt['socketCount']}/#{rdt['maxSocketCount']}",
|
3214
|
+
"Clients: #{dom['numClients'] || 'N/A'}",
|
3215
|
+
"Owners: #{dom['numOwners'] || 'N/A'}",
|
3216
|
+
].join("\n"),
|
3217
|
+
([
|
3218
|
+
"Components: #{props['components']}/%s" % [
|
3219
|
+
lsomnode['numMaxComponents'] || 'N/A'
|
3220
|
+
],
|
3221
|
+
] + (props['disks'] || []).map do |disk|
|
3222
|
+
if disk['capacity'] > 0
|
3223
|
+
usage = disk['capacityUsed'] * 100 / disk['capacity']
|
3224
|
+
usage = "#{usage}%"
|
3225
|
+
else
|
3226
|
+
usage = "N/A"
|
3227
|
+
end
|
3228
|
+
"#{disk['esxcli'].DisplayName}: #{usage}"
|
3229
|
+
end).join("\n"),
|
3230
|
+
]
|
3231
|
+
end
|
3232
|
+
puts t
|
3233
|
+
end
|
3234
|
+
end
|
3235
|
+
|
3236
|
+
opts :object_reconfigure do
|
3237
|
+
summary "Reconfigure a VSAN object"
|
3238
|
+
arg :cluster, "Cluster on which to execute the reconfig", :lookup => [VIM::HostSystem, VIM::ClusterComputeResource]
|
3239
|
+
arg :obj_uuid, "Object UUID", :type => :string, :multi => true
|
3240
|
+
opt :policy, "New policy", :type => :string, :required => true
|
3241
|
+
end
|
3242
|
+
|
3243
|
+
def object_reconfigure cluster_or_host, obj_uuids, opts
|
3244
|
+
conn = cluster_or_host._connection
|
3245
|
+
pc = conn.propertyCollector
|
3246
|
+
if cluster_or_host.is_a?(VIM::ClusterComputeResource)
|
3247
|
+
cluster = cluster_or_host
|
3248
|
+
hosts = cluster.host
|
3249
|
+
else
|
3250
|
+
hosts = [host]
|
3251
|
+
end
|
3252
|
+
|
3253
|
+
_run_with_rev(conn, "dev") do
|
3254
|
+
hosts_props = pc.collectMultiple(hosts,
|
3255
|
+
'name',
|
3256
|
+
'runtime.connectionState',
|
3257
|
+
'configManager.vsanSystem',
|
3258
|
+
'configManager.vsanInternalSystem'
|
3259
|
+
)
|
3260
|
+
connected_hosts = hosts_props.select do |k,v|
|
3261
|
+
v['runtime.connectionState'] == 'connected'
|
3262
|
+
end.keys
|
3263
|
+
host = connected_hosts.first
|
3264
|
+
if !host
|
3265
|
+
err "Couldn't find any connected hosts"
|
3266
|
+
end
|
3267
|
+
vsanIntSys = hosts_props[host]['configManager.vsanInternalSystem']
|
3268
|
+
|
3269
|
+
obj_uuids.each do |uuid|
|
3270
|
+
puts "Reconfiguring '#{uuid}' to #{opts[:policy]}"
|
3271
|
+
puts vsanIntSys.ReconfigureDomObject(
|
3272
|
+
:uuid => uuid,
|
3273
|
+
:policy => opts[:policy]
|
3274
|
+
)
|
3275
|
+
end
|
3276
|
+
end
|
3277
|
+
puts "All reconfigs initiated. Synching operation may be happening in the background"
|
3278
|
+
end
|
3279
|
+
|
3280
|
+
|
3281
|
+
opts :obj_status_report do
|
3282
|
+
summary "Print component status for objects in the cluster."
|
3283
|
+
arg :cluster_or_host, nil, :lookup => [VIM::ClusterComputeResource, VIM::HostSystem]
|
3284
|
+
opt :print_table, "Print a table of object and their status, default all objects",
|
3285
|
+
:short => 't', :type => :boolean, :default => false
|
3286
|
+
opt :filter_table, "Filter the obj table based on status displayed in histogram, e.g. 2/3",
|
3287
|
+
:short => 'f', :type => :string, :default => nil
|
3288
|
+
opt :print_uuids, "In the table, print object UUIDs instead of vmdk and vm paths",
|
3289
|
+
:short => 'u', :type => :boolean, :default => false
|
3290
|
+
opt :ignore_node_uuid, "Estimate the status of objects if all comps on a given host were healthy.",
|
3291
|
+
:short => 'i', :type => :string, :default => nil
|
3292
|
+
end
|
3293
|
+
|
3294
|
+
def obj_status_report cluster_or_host, opts
|
3295
|
+
conn = cluster_or_host._connection
|
3296
|
+
pc = conn.propertyCollector
|
3297
|
+
if cluster_or_host.is_a?(VIM::ClusterComputeResource)
|
3298
|
+
cluster = cluster_or_host
|
3299
|
+
hosts = cluster.host
|
3300
|
+
else
|
3301
|
+
hosts = [host]
|
3302
|
+
end
|
3303
|
+
|
3304
|
+
_run_with_rev(conn, "dev") do
|
3305
|
+
hosts_props = pc.collectMultiple(hosts,
|
3306
|
+
'name',
|
3307
|
+
'runtime.connectionState',
|
3308
|
+
'configManager.vsanSystem',
|
3309
|
+
'configManager.vsanInternalSystem'
|
3310
|
+
)
|
3311
|
+
connected_hosts = hosts_props.select do |k,v|
|
3312
|
+
v['runtime.connectionState'] == 'connected'
|
3313
|
+
end.keys
|
3314
|
+
host = connected_hosts.first
|
3315
|
+
if !host
|
3316
|
+
err "Couldn't find any connected hosts"
|
3317
|
+
end
|
3318
|
+
vsanIntSys = hosts_props[host]['configManager.vsanInternalSystem']
|
3319
|
+
|
3320
|
+
vsanSysList = Hash[hosts_props.map do |host, props|
|
3321
|
+
[props['name'], props['configManager.vsanSystem']]
|
3322
|
+
end]
|
3323
|
+
clusterInfos = pc.collectMultiple(vsanSysList.values,
|
3324
|
+
'config.clusterInfo')
|
3325
|
+
hostUuidMap = Hash[vsanSysList.map do |hostname, sys|
|
3326
|
+
[clusterInfos[sys]['config.clusterInfo'].nodeUuid, hostname]
|
3327
|
+
end]
|
3328
|
+
|
3329
|
+
entries = nil
|
3330
|
+
|
3331
|
+
puts "#{Time.now}: Querying all VMs on VSAN ..."
|
3332
|
+
ds_list = host.datastore
|
3333
|
+
ds_props = pc.collectMultiple(ds_list, 'name', 'summary.type')
|
3334
|
+
ds = ds_props.select{|k, x| x['summary.type'] == "vsan"}.keys.first
|
3335
|
+
ds_name = ds_props[ds]['name']
|
3336
|
+
|
3337
|
+
vms = ds.vm
|
3338
|
+
vmsProps = pc.collectMultiple(vms,
|
3339
|
+
'name', 'runtime.connectionState',
|
3340
|
+
'config.hardware.device', 'summary.config'
|
3341
|
+
)
|
3342
|
+
|
3343
|
+
hostname = hosts_props[host]['name']
|
3344
|
+
puts "#{Time.now}: Querying all objects in the system from #{hostname} ..."
|
3345
|
+
|
3346
|
+
objects = vsanIntSys.query_cmmds([
|
3347
|
+
{:type => 'DOM_OBJECT'}
|
3348
|
+
], :gzip => true)
|
3349
|
+
if !objects
|
3350
|
+
err "Server failed to gather DOM_OBJECT entries"
|
3351
|
+
end
|
3352
|
+
|
3353
|
+
puts "#{Time.now}: Querying all disks in the system ..."
|
3354
|
+
# Need a list of live disk uuids to see if components are orphaned.
|
3355
|
+
liveDisks = vsanIntSys.query_cmmds([{:type => 'DISK'}])
|
3356
|
+
liveDisks = liveDisks.select do |disk|
|
3357
|
+
disk['health'] == "Healthy"
|
3358
|
+
end.map do |disk|
|
3359
|
+
disk['uuid']
|
3360
|
+
end
|
3361
|
+
|
3362
|
+
puts "#{Time.now}: Querying all components in the system ..."
|
3363
|
+
# Need a list of live comp uuids to see if components are orphaned.
|
3364
|
+
liveComps = vsanIntSys.query_cmmds(
|
3365
|
+
[{:type => 'LSOM_OBJECT'}],
|
3366
|
+
:gzip => true
|
3367
|
+
)
|
3368
|
+
liveComps = liveComps.select do |comp|
|
3369
|
+
comp['health'] == "Healthy"
|
3370
|
+
end
|
3371
|
+
liveComps = liveComps.map do |comp|
|
3372
|
+
comp['uuid']
|
3373
|
+
end
|
3374
|
+
|
3375
|
+
#pp liveDisks
|
3376
|
+
#puts "%d comps total" % liveComps.length
|
3377
|
+
|
3378
|
+
puts "#{Time.now}: Got all the info, computing table ..."
|
3379
|
+
|
3380
|
+
results = {}
|
3381
|
+
orphanRes = {}
|
3382
|
+
totalObjects = objects.length
|
3383
|
+
totalOrphans = 0
|
3384
|
+
|
3385
|
+
objects = objects.select do |obj|
|
3386
|
+
comps = _components_in_dom_config(obj['content'])
|
3387
|
+
numHealthy = 0
|
3388
|
+
numDeletedComps = 0
|
3389
|
+
|
3390
|
+
comps.each do |comp|
|
3391
|
+
state = comp['attributes']['componentState']
|
3392
|
+
bytesToSync = comp['attributes']['bytesToSync'] || 0
|
3393
|
+
resync = [10, 6].member?(state) && bytesToSync != 0
|
3394
|
+
|
3395
|
+
# Should we count resyncing as healthy? For now, lets do that.
|
3396
|
+
if resync || state == 5 ||
|
3397
|
+
(opts[:ignore_node_uuid] &&
|
3398
|
+
comp['attributes']['ownerId'] == opts[:ignore_node_uuid])
|
3399
|
+
numHealthy += 1
|
3400
|
+
elsif liveDisks.member?(comp['diskUuid']) &&
|
3401
|
+
!liveComps.member?(comp['componentUuid'])
|
3402
|
+
# A component is considered deleted if it's disk is present
|
3403
|
+
# and the component is not present in CMMDS.
|
3404
|
+
numDeletedComps += 1
|
3405
|
+
end
|
3406
|
+
end
|
3407
|
+
obj['numHealthy'] = numHealthy
|
3408
|
+
obj['numComps'] = comps.length
|
3409
|
+
status = [numHealthy, comps.length]
|
3410
|
+
|
3411
|
+
# An object can be orphaned if it is deleted while a minority of
|
3412
|
+
# components are absent. To consider this an orphan, the total
|
3413
|
+
# number of provably deleted components must be a quorum.
|
3414
|
+
# If we have some deleted comps, but not a quorum, then mark it
|
3415
|
+
# as an orphanCandidate instead of a full orphan. Orphan candidates
|
3416
|
+
# still go into the normal results table.
|
3417
|
+
isOrphan = numDeletedComps > 0 && numDeletedComps > comps.length / 2
|
3418
|
+
if isOrphan
|
3419
|
+
obj['isOrphan'] = true
|
3420
|
+
elsif numDeletedComps > 0
|
3421
|
+
obj['isOrphanCandidate'] = true
|
3422
|
+
end
|
3423
|
+
|
3424
|
+
if isOrphan
|
3425
|
+
# All absent components are orphaned. Consider the object orphaned.
|
3426
|
+
totalOrphans += 1
|
3427
|
+
orphanRes[status] ||= 0
|
3428
|
+
orphanRes[status] += 1
|
3429
|
+
else
|
3430
|
+
results[status] ||= 0
|
3431
|
+
results[status] += 1
|
3432
|
+
end
|
3433
|
+
|
3434
|
+
if opts[:filter_table]
|
3435
|
+
("%d/%d" % [numHealthy, comps.length]) == opts[:filter_table]
|
3436
|
+
else
|
3437
|
+
true
|
3438
|
+
end
|
3439
|
+
end
|
3440
|
+
obj_uuids = objects.map{|x| x['uuid']}
|
3441
|
+
objectUuidMap = Hash[objects.map{|x| [x['uuid'], x]}]
|
3442
|
+
|
3443
|
+
all_obj_uuids = []
|
3444
|
+
vmToObjMap = {}
|
3445
|
+
vms.each do |vm|
|
3446
|
+
vm_obj_uuids = _get_vm_obj_uuids(vm, vmsProps)
|
3447
|
+
vm_obj_uuids = vm_obj_uuids.select{|x, v| obj_uuids.member?(x)}
|
3448
|
+
vm_obj_uuids = vm_obj_uuids.reject{|x, v| all_obj_uuids.member?(x)}
|
3449
|
+
all_obj_uuids += vm_obj_uuids.keys
|
3450
|
+
if vm_obj_uuids.length > 0
|
3451
|
+
vmToObjMap[vm] = vm_obj_uuids
|
3452
|
+
end
|
3453
|
+
end
|
3454
|
+
|
3455
|
+
def printObjStatusHist results
|
3456
|
+
t = Terminal::Table.new()
|
3457
|
+
t << [
|
3458
|
+
'Num Healthy Comps / Total Num Comps',
|
3459
|
+
'Num objects with such status',
|
3460
|
+
]
|
3461
|
+
t.add_separator
|
3462
|
+
|
3463
|
+
results.each do |key,val|
|
3464
|
+
t << [
|
3465
|
+
"%d/%d" % [key[0], key[1]],
|
3466
|
+
" %d" % val,
|
3467
|
+
]
|
3468
|
+
end
|
3469
|
+
puts t
|
3470
|
+
end
|
3471
|
+
|
3472
|
+
puts ""
|
3473
|
+
puts "Histogram of component health for non-orphaned objects"
|
3474
|
+
puts ""
|
3475
|
+
printObjStatusHist(results)
|
3476
|
+
puts "Total non-orphans: %d" % (totalObjects - totalOrphans)
|
3477
|
+
puts ""
|
3478
|
+
puts ""
|
3479
|
+
puts "Histogram of component health for possibly orphaned objects"
|
3480
|
+
puts ""
|
3481
|
+
printObjStatusHist(orphanRes)
|
3482
|
+
puts "Total orphans: %d" % totalOrphans
|
3483
|
+
puts ""
|
3484
|
+
|
3485
|
+
|
3486
|
+
if opts[:print_table] || opts[:filter_table]
|
3487
|
+
t = Terminal::Table.new()
|
3488
|
+
t << [
|
3489
|
+
'VM/Object',
|
3490
|
+
'objects',
|
3491
|
+
'num healthy / total comps',
|
3492
|
+
]
|
3493
|
+
t.add_separator
|
3494
|
+
bytesToSyncGrandTotal = 0
|
3495
|
+
objGrandTotal = 0
|
3496
|
+
vmToObjMap.each do |vm, vm_obj_uuids|
|
3497
|
+
vmProps = vmsProps[vm]
|
3498
|
+
objs = vm_obj_uuids.keys.map{|x| objectUuidMap[x]}
|
3499
|
+
t << [
|
3500
|
+
vmProps['name'],
|
3501
|
+
objs.length,
|
3502
|
+
"",
|
3503
|
+
]
|
3504
|
+
objs.each do |obj|
|
3505
|
+
if opts[:print_uuids]
|
3506
|
+
objName = obj['uuid']
|
3507
|
+
else
|
3508
|
+
objName = (vm_obj_uuids[obj['uuid']] || obj['uuid'])
|
3509
|
+
end
|
3510
|
+
|
3511
|
+
if obj['isOrphan']
|
3512
|
+
orphanStr = "*"
|
3513
|
+
elsif obj['isOrphanCandidate']
|
3514
|
+
orphanStr = "-"
|
3515
|
+
else
|
3516
|
+
orphanStr = ""
|
3517
|
+
end
|
3518
|
+
|
3519
|
+
t << [
|
3520
|
+
" %s" % objName,
|
3521
|
+
'',
|
3522
|
+
"%d/%d%s" % [obj['numHealthy'], obj['numComps'], orphanStr],
|
3523
|
+
]
|
3524
|
+
objects.delete(obj)
|
3525
|
+
end
|
3526
|
+
end
|
3527
|
+
|
3528
|
+
# Okay, now print the remaining UUIDs which didn't map to any VM.
|
3529
|
+
if objects.length > 0
|
3530
|
+
if vmToObjMap.length > 0
|
3531
|
+
t.add_separator
|
3532
|
+
end
|
3533
|
+
t << [
|
3534
|
+
"Unassociated objects",
|
3535
|
+
'',
|
3536
|
+
'',
|
3537
|
+
]
|
3538
|
+
end
|
3539
|
+
objects.each do |obj|
|
3540
|
+
if obj['isOrphan']
|
3541
|
+
orphanStr = "*"
|
3542
|
+
elsif obj['isOrphanCandidate']
|
3543
|
+
orphanStr = "-"
|
3544
|
+
else
|
3545
|
+
orphanStr = ""
|
3546
|
+
end
|
3547
|
+
|
3548
|
+
t << [
|
3549
|
+
" %s" % obj['uuid'],
|
3550
|
+
'',
|
3551
|
+
"%d/%d%s" % [obj['numHealthy'], obj['numComps'], orphanStr],
|
3552
|
+
]
|
3553
|
+
end
|
3554
|
+
puts t
|
3555
|
+
puts ""
|
3556
|
+
puts "+------------------------------------------------------------------+"
|
3557
|
+
puts "| Legend: * = all unhealthy comps were deleted (disks present) |"
|
3558
|
+
puts "| - = some unhealthy comps deleted, some not or can't tell |"
|
3559
|
+
puts "| no symbol = We cannot conclude any comps were deleted |"
|
3560
|
+
puts "+------------------------------------------------------------------+"
|
3561
|
+
puts ""
|
3562
|
+
end
|
3563
|
+
end
|
3564
|
+
end
|
3565
|
+
|
3566
|
+
|
3567
|
+
opts :apply_license_to_cluster do
|
3568
|
+
summary "Apply license to VSAN "
|
3569
|
+
arg :cluster, nil, :lookup => VIM::ClusterComputeResource
|
3570
|
+
opt :license_key, "License key to be applied to the cluster", :short => 'k', :type => :string, :required => true
|
3571
|
+
opt :null_reconfigure, "", :short => 'r', :type => :boolean, :default => true
|
3572
|
+
end
|
3573
|
+
|
3574
|
+
def apply_license_to_cluster cluster, opts
|
3575
|
+
conn = cluster._connection
|
3576
|
+
puts "#{cluster.name}: Applying VSAN License on the cluster..."
|
3577
|
+
licenseManager = conn.serviceContent.licenseManager
|
3578
|
+
licenseAssignmentManager = licenseManager.licenseAssignmentManager
|
3579
|
+
assignment = licenseAssignmentManager.UpdateAssignedLicense(
|
3580
|
+
:entity => cluster._ref,
|
3581
|
+
:licenseKey => opts[:license_key]
|
3582
|
+
)
|
3583
|
+
if opts[:null_reconfigure]
|
3584
|
+
# Due to races in the cluster assignment mechanism in vSphere 5.5 GA a
|
3585
|
+
# disks may or may not be auto-claimed as would normally be expected. Doing
|
3586
|
+
# a Null-Reconfigure causes the license state to be synchronized correctly and
|
3587
|
+
# allows auto-claim to work as expected.
|
3588
|
+
puts "#{cluster.name}: Null-Reconfigure to force auto-claim..."
|
3589
|
+
spec = VIM::ClusterConfigSpecEx()
|
3590
|
+
task = cluster.ReconfigureComputeResource_Task(:spec => spec, :modify => true)
|
3591
|
+
progress([task])
|
3592
|
+
childtasks = task.child_tasks
|
3593
|
+
if childtasks && childtasks.length > 0
|
3594
|
+
progress(childtasks)
|
3595
|
+
end
|
3596
|
+
end
|
3597
|
+
end
|
3598
|
+
|
3599
|
+
|
3600
|
+
opts :check_state do
|
3601
|
+
summary "Checks state of VMs and VSAN objects"
|
3602
|
+
arg :cluster_or_host, nil, :lookup => [VIM::ClusterComputeResource, VIM::HostSystem]
|
3603
|
+
opt :refresh_state, "Not just check state, but also refresh", :type => :boolean
|
3604
|
+
opt :reregister_vms,
|
3605
|
+
"Not just check for vms with VC/hostd/vmx out of sync but also " \
|
3606
|
+
"fix them by un-registering and re-registering them",
|
3607
|
+
:type => :boolean
|
3608
|
+
end
|
3609
|
+
|
3610
|
+
def check_state cluster_or_host, opts
|
3611
|
+
conn = cluster_or_host._connection
|
3612
|
+
pc = conn.propertyCollector
|
3613
|
+
if cluster_or_host.is_a?(VIM::ClusterComputeResource)
|
3614
|
+
cluster = cluster_or_host
|
3615
|
+
hosts = cluster.host
|
3616
|
+
else
|
3617
|
+
hosts = [host]
|
3618
|
+
end
|
3619
|
+
|
3620
|
+
_run_with_rev(conn, "dev") do
|
3621
|
+
hosts_props = pc.collectMultiple(hosts,
|
3622
|
+
'name',
|
3623
|
+
'runtime.connectionState',
|
3624
|
+
'configManager.vsanSystem',
|
3625
|
+
'configManager.vsanInternalSystem'
|
3626
|
+
)
|
3627
|
+
connected_hosts = hosts_props.select do |k,v|
|
3628
|
+
v['runtime.connectionState'] == 'connected'
|
3629
|
+
end.keys
|
3630
|
+
host = connected_hosts.first
|
3631
|
+
if !host
|
3632
|
+
err "Couldn't find any connected hosts"
|
3633
|
+
end
|
3634
|
+
vsanIntSys = hosts_props[host]['configManager.vsanInternalSystem']
|
3635
|
+
|
3636
|
+
vsanSysList = Hash[hosts_props.map do |host, props|
|
3637
|
+
[props['name'], props['configManager.vsanSystem']]
|
3638
|
+
end]
|
3639
|
+
clusterInfos = pc.collectMultiple(vsanSysList.values,
|
3640
|
+
'config.clusterInfo')
|
3641
|
+
hostUuidMap = Hash[vsanSysList.map do |hostname, sys|
|
3642
|
+
[clusterInfos[sys]['config.clusterInfo'].nodeUuid, hostname]
|
3643
|
+
end]
|
3644
|
+
|
3645
|
+
entries = nil
|
3646
|
+
|
3647
|
+
ds_list = host.datastore
|
3648
|
+
ds_props = pc.collectMultiple(ds_list, 'name', 'summary.type')
|
3649
|
+
ds = ds_props.select{|k, x| x['summary.type'] == "vsan"}.keys.first
|
3650
|
+
ds_name = ds_props[ds]['name']
|
3651
|
+
|
3652
|
+
vms = ds.vm
|
3653
|
+
vms_props = pc.collectMultiple(vms, 'name', 'runtime.connectionState')
|
3654
|
+
|
3655
|
+
puts "#{Time.now}: Step 1: Check for inaccessible VSAN objects"
|
3656
|
+
|
3657
|
+
statusses = vsanIntSys.query_cmmds([{:type => 'CONFIG_STATUS'}])
|
3658
|
+
bad = statusses.select do |x|
|
3659
|
+
state = _assessAvailabilityByStatus(x['content']['state'])
|
3660
|
+
!state['DATA_AVAILABLE'] || !state['QUORUM']
|
3661
|
+
end
|
3662
|
+
|
3663
|
+
if !opts[:refresh_state]
|
3664
|
+
puts "Detected #{bad.length} objects to not be inaccessible"
|
3665
|
+
bad.each do |x|
|
3666
|
+
uuid = x['uuid']
|
3667
|
+
hostname = hostUuidMap[x['owner']]
|
3668
|
+
puts "Detected #{uuid} on #{hostname} to be inaccessible"
|
3669
|
+
end
|
3670
|
+
else
|
3671
|
+
bad.group_by{|x| hostUuidMap[x['owner']]}.each do |hostname, badOnHost|
|
3672
|
+
owner = hosts_props.select{|k,v| v['name'] == hostname}.keys.first
|
3673
|
+
owner_props = hosts_props[owner]
|
3674
|
+
owner_vsanIntSys = owner_props['configManager.vsanInternalSystem']
|
3675
|
+
badOnHost.each do |x|
|
3676
|
+
uuid = x['uuid']
|
3677
|
+
puts "Detected #{uuid} to not be inaccessible, refreshing state"
|
3678
|
+
end
|
3679
|
+
if badOnHost.length > 0
|
3680
|
+
badUuids = badOnHost.map{|x| x['uuid']}
|
3681
|
+
owner_vsanIntSys.AbdicateDomOwnership(:uuids => badUuids)
|
3682
|
+
end
|
3683
|
+
end
|
3684
|
+
puts ""
|
3685
|
+
|
3686
|
+
puts "#{Time.now}: Step 1b: Check for inaccessible VSAN objects, again"
|
3687
|
+
statusses = vsanIntSys.query_cmmds([{:type => 'CONFIG_STATUS'}])
|
3688
|
+
bad = statusses.select do |x|
|
3689
|
+
state = _assessAvailabilityByStatus(x['content']['state'])
|
3690
|
+
!state['DATA_AVAILABLE'] || !state['QUORUM']
|
3691
|
+
end
|
3692
|
+
bad.each do |x|
|
3693
|
+
puts "Detected #{x['uuid']} is still inaccessible"
|
3694
|
+
end
|
3695
|
+
end
|
3696
|
+
puts ""
|
3697
|
+
|
3698
|
+
puts "#{Time.now}: Step 2: Check for invalid/inaccessible VMs"
|
3699
|
+
invalid_vms = vms_props.select do |k,v|
|
3700
|
+
['invalid', 'inaccessible', 'orphaned'].member?(v['runtime.connectionState'])
|
3701
|
+
end.keys
|
3702
|
+
tasks = []
|
3703
|
+
invalid_vms.each do |vm|
|
3704
|
+
vm_props = vms_props[vm]
|
3705
|
+
vm_state = vm_props['runtime.connectionState']
|
3706
|
+
if !opts[:refresh_state]
|
3707
|
+
puts "Detected VM '#{vm_props['name']}' as being '#{vm_state}'"
|
3708
|
+
else
|
3709
|
+
puts "Detected VM '#{vm_props['name']}' as being '#{vm_state}', reloading ..."
|
3710
|
+
begin
|
3711
|
+
if vm_state == 'orphaned'
|
3712
|
+
path = vm.summary.config.vmPathName
|
3713
|
+
tasks << vm.reloadVirtualMachineFromPath_Task(
|
3714
|
+
:configurationPath => path
|
3715
|
+
)
|
3716
|
+
else
|
3717
|
+
vm.Reload
|
3718
|
+
vm.Reload
|
3719
|
+
end
|
3720
|
+
rescue Exception => ex
|
3721
|
+
puts "#{ex.class}: #{ex.message}"
|
3722
|
+
end
|
3723
|
+
end
|
3724
|
+
end
|
3725
|
+
tasks = tasks.compact
|
3726
|
+
if tasks.length > 0
|
3727
|
+
progress(tasks)
|
3728
|
+
end
|
3729
|
+
puts ""
|
3730
|
+
|
3731
|
+
if opts[:refresh_state]
|
3732
|
+
puts "#{Time.now}: Step 2b: Check for invalid/inaccessible VMs again"
|
3733
|
+
vms_props = pc.collectMultiple(vms, 'name', 'runtime.connectionState')
|
3734
|
+
invalid_vms = vms_props.select do |k,v|
|
3735
|
+
['invalid', 'inaccessible', 'orphaned'].member?(v['runtime.connectionState'])
|
3736
|
+
end.keys
|
3737
|
+
invalid_vms.each do |vm|
|
3738
|
+
vm_props = vms_props[vm]
|
3739
|
+
vm_state = vm_props['runtime.connectionState']
|
3740
|
+
puts "Detected VM '#{vm_props['name']}' as still '#{vm_state}'"
|
3741
|
+
end
|
3742
|
+
puts ""
|
3743
|
+
end
|
3744
|
+
|
3745
|
+
puts "#{Time.now}: Step 3: Check for VMs for which VC/hostd/vmx" \
|
3746
|
+
" are out of sync"
|
3747
|
+
inconsistent_vms = find_inconsistent_vms(cluster_or_host)
|
3748
|
+
if opts[:reregister_vms] and not inconsistent_vms.empty?
|
3749
|
+
puts "You have chosen to fix these VMs. This involves re-registering" \
|
3750
|
+
" the VM which will cause loss of some of the management state of"\
|
3751
|
+
" this VM (for eg. storage policy, permissions, tags," \
|
3752
|
+
" scheduled tasks, etc. but NO data loss). Do you want to" \
|
3753
|
+
" continue [y/N] ?"
|
3754
|
+
opt = $stdin.gets.chomp
|
3755
|
+
if opt == 'y' || opt == 'Y'
|
3756
|
+
puts "Attempting to fix these vms..."
|
3757
|
+
fix_inconsistent_vms(inconsistent_vms)
|
3758
|
+
end
|
3759
|
+
end
|
3760
|
+
puts ""
|
3761
|
+
|
3762
|
+
end
|
3763
|
+
end
|
3764
|
+
|
3765
|
+
|
3766
|
+
opts :reapply_vsan_vmknic_config do
|
3767
|
+
summary "Unbinds and rebinds VSAN to its vmknics"
|
3768
|
+
arg :host, nil, :lookup => [VIM::HostSystem], :multi => true
|
3769
|
+
opt :vmknic, "Refresh a specific vmknic. default is all vmknics", :type => :string
|
3770
|
+
opt :dry_run, "Do a dry run: Show what changes would be made", :type => :boolean
|
3771
|
+
end
|
3772
|
+
|
3773
|
+
def reapply_vsan_vmknic_config hosts, opts
|
3774
|
+
hosts.each do |host|
|
3775
|
+
hostname = host.name
|
3776
|
+
net = host.esxcli.vsan.network
|
3777
|
+
nics = net.list()
|
3778
|
+
if opts[:vmknic]
|
3779
|
+
nics = nics.select{|x| x.VmkNicName == opts[:vmknic]}
|
3780
|
+
end
|
3781
|
+
keys = {
|
3782
|
+
:AgentGroupMulticastAddress => :agentmcaddr,
|
3783
|
+
:AgentGroupMulticastPort => :agentmcport,
|
3784
|
+
:IPProtocol => nil,
|
3785
|
+
:InterfaceUUID => nil,
|
3786
|
+
:MasterGroupMulticastAddress => :mastermcaddr,
|
3787
|
+
:MasterGroupMulticastPort => :mastermcport,
|
3788
|
+
:MulticastTTL => :multicastttl,
|
3789
|
+
}
|
3790
|
+
puts "Host: #{hostname}"
|
3791
|
+
if opts[:dry_run]
|
3792
|
+
nics.each do |nic|
|
3793
|
+
puts " Would reapply config of vmknic #{nic.VmkNicName}:"
|
3794
|
+
keys.keys.each do |key|
|
3795
|
+
puts " #{key.to_s}: #{nic.send(key)}"
|
3796
|
+
end
|
3797
|
+
end
|
3798
|
+
else
|
3799
|
+
nics.each do |nic|
|
3800
|
+
puts " Reapplying config of #{nic.VmkNicName}:"
|
3801
|
+
keys.keys.each do |key|
|
3802
|
+
puts " #{key.to_s}: #{nic.send(key)}"
|
3803
|
+
end
|
3804
|
+
puts " Unbinding VSAN from vmknic #{nic.VmkNicName} ..."
|
3805
|
+
net.ipv4.remove(:interfacename => nic.VmkNicName)
|
3806
|
+
puts " Rebinding VSAN to vmknic #{nic.VmkNicName} ..."
|
3807
|
+
params = {
|
3808
|
+
:agentmcaddr => nic.AgentGroupMulticastAddress,
|
3809
|
+
:agentmcport => nic.AgentGroupMulticastPort,
|
3810
|
+
:interfacename => nic.VmkNicName,
|
3811
|
+
:mastermcaddr => nic.MasterGroupMulticastAddress,
|
3812
|
+
:mastermcport => nic.MasterGroupMulticastPort,
|
3813
|
+
:multicastttl => nic.MulticastTTL,
|
3814
|
+
}
|
3815
|
+
#pp params
|
3816
|
+
net.ipv4.add(params)
|
3817
|
+
end
|
3818
|
+
end
|
3819
|
+
end
|
3820
|
+
end
|
3821
|
+
|
3822
|
+
|
3823
|
+
opts :recover_spbm do
|
3824
|
+
summary "SPBM Recovery"
|
3825
|
+
arg :cluster_or_host, nil, :lookup => [VIM::ClusterComputeResource, VIM::HostSystem]
|
3826
|
+
opt :show_details, "Show all the details", :type => :boolean
|
3827
|
+
end
|
3828
|
+
|
3829
|
+
def recover_spbm cluster_or_host, opts
|
3830
|
+
conn = cluster_or_host._connection
|
3831
|
+
pc = conn.propertyCollector
|
3832
|
+
host = cluster_or_host
|
3833
|
+
entries = []
|
3834
|
+
hostUuidMap = {}
|
3835
|
+
startTime = Time.now
|
3836
|
+
_run_with_rev(conn, "dev") do
|
3837
|
+
vsanIntSys = nil
|
3838
|
+
puts "#{Time.now}: Fetching Host info"
|
3839
|
+
if cluster_or_host.is_a?(VIM::ClusterComputeResource)
|
3840
|
+
cluster = cluster_or_host
|
3841
|
+
hosts = cluster.host
|
3842
|
+
else
|
3843
|
+
hosts = [host]
|
3844
|
+
end
|
3845
|
+
|
3846
|
+
hosts_props = pc.collectMultiple(hosts,
|
3847
|
+
'name',
|
3848
|
+
'runtime.connectionState',
|
3849
|
+
'configManager.vsanSystem',
|
3850
|
+
'configManager.vsanInternalSystem',
|
3851
|
+
'datastore'
|
3852
|
+
)
|
3853
|
+
connected_hosts = hosts_props.select do |k,v|
|
3854
|
+
v['runtime.connectionState'] == 'connected'
|
3855
|
+
end.keys
|
3856
|
+
host = connected_hosts.first
|
3857
|
+
if !host
|
3858
|
+
err "Couldn't find any connected hosts"
|
3859
|
+
end
|
3860
|
+
vsanIntSys = hosts_props[host]['configManager.vsanInternalSystem']
|
3861
|
+
vsanSysList = Hash[hosts_props.map do |host, props|
|
3862
|
+
[props['name'], props['configManager.vsanSystem']]
|
3863
|
+
end]
|
3864
|
+
clusterInfos = pc.collectMultiple(vsanSysList.values,
|
3865
|
+
'config.clusterInfo')
|
3866
|
+
hostUuidMap = Hash[vsanSysList.map do |hostname, sys|
|
3867
|
+
[clusterInfos[sys]['config.clusterInfo'].nodeUuid, hostname]
|
3868
|
+
end]
|
3869
|
+
|
3870
|
+
puts "#{Time.now}: Fetching Datastore info"
|
3871
|
+
datastores = hosts_props.values.map{|x| x['datastore']}.flatten
|
3872
|
+
datastores_props = pc.collectMultiple(datastores, 'name', 'summary.type')
|
3873
|
+
vsanDsList = datastores_props.select do |ds, props|
|
3874
|
+
props['summary.type'] == "vsan"
|
3875
|
+
end.keys
|
3876
|
+
if vsanDsList.length > 1
|
3877
|
+
err "Two VSAN datastores found, can't handle that"
|
3878
|
+
end
|
3879
|
+
vsanDs = vsanDsList[0]
|
3880
|
+
|
3881
|
+
puts "#{Time.now}: Fetching VM properties"
|
3882
|
+
vms = vsanDs.vm
|
3883
|
+
vms_props = pc.collectMultiple(vms, 'name', 'config.hardware.device')
|
3884
|
+
|
3885
|
+
puts "#{Time.now}: Fetching policies used on VSAN from CMMDS"
|
3886
|
+
entries = vsanIntSys.query_cmmds([{
|
3887
|
+
:type => "POLICY",
|
3888
|
+
}], :gzip => true)
|
3889
|
+
|
3890
|
+
policies = entries.map{|x| x['content']}.uniq
|
3891
|
+
|
3892
|
+
puts "#{Time.now}: Fetching SPBM profiles"
|
3893
|
+
pbm = conn.pbm
|
3894
|
+
pm = pbm.serviceContent.profileManager
|
3895
|
+
profileIds = pm.PbmQueryProfile(
|
3896
|
+
:resourceType => {:resourceType => "STORAGE"},
|
3897
|
+
:profileCategory => "REQUIREMENT"
|
3898
|
+
)
|
3899
|
+
if profileIds.length > 0
|
3900
|
+
profiles = pm.PbmRetrieveContent(:profileIds => profileIds)
|
3901
|
+
else
|
3902
|
+
profiles = []
|
3903
|
+
end
|
3904
|
+
profilesMap = Hash[profiles.map do |x|
|
3905
|
+
["#{x.profileId.uniqueId}-gen#{x.generationId}", x]
|
3906
|
+
end]
|
3907
|
+
|
3908
|
+
puts "#{Time.now}: Fetching VM <-> SPBM profile association"
|
3909
|
+
vms_entities = vms.map do |vm|
|
3910
|
+
vm.all_pbmobjref(:vms_props => vms_props)
|
3911
|
+
end.flatten.map{|x| x.dynamicProperty = []; x}
|
3912
|
+
associatedProfiles = pm.PbmQueryAssociatedProfiles(
|
3913
|
+
:entities => vms_entities
|
3914
|
+
)
|
3915
|
+
associatedEntities = associatedProfiles.map{|x| x.object}.uniq
|
3916
|
+
puts "#{Time.now}: Computing which VMs do not have a SPBM Profile ..."
|
3917
|
+
|
3918
|
+
nonAssociatedEntities = vms_entities - associatedEntities
|
3919
|
+
|
3920
|
+
vmsMap = Hash[vms.map{|x| [x._ref, x]}]
|
3921
|
+
nonAssociatedVms = {}
|
3922
|
+
nonAssociatedEntities.map do |entity|
|
3923
|
+
vm = vmsMap[entity.key.split(":").first]
|
3924
|
+
nonAssociatedVms[vm] ||= []
|
3925
|
+
nonAssociatedVms[vm] << [entity.objectType, entity.key]
|
3926
|
+
end
|
3927
|
+
puts "#{Time.now}: Fetching additional info about some VMs"
|
3928
|
+
|
3929
|
+
vms_props2 = pc.collectMultiple(vms, 'summary.config.vmPathName')
|
3930
|
+
|
3931
|
+
puts "#{Time.now}: Got all info, computing after %.2f sec" % [
|
3932
|
+
Time.now - startTime
|
3933
|
+
]
|
3934
|
+
|
3935
|
+
policies.each do |policy|
|
3936
|
+
policy['spbmRecoveryCandidate'] = false
|
3937
|
+
policy['spbmProfile'] = nil
|
3938
|
+
if policy['spbmProfileId']
|
3939
|
+
name = "%s-gen%s" % [
|
3940
|
+
policy['spbmProfileId'],
|
3941
|
+
policy['spbmProfileGenerationNumber'],
|
3942
|
+
]
|
3943
|
+
policy['spbmName'] = name
|
3944
|
+
policy['spbmProfile'] = profilesMap[name]
|
3945
|
+
if policy['spbmProfile']
|
3946
|
+
name = policy['spbmProfile'].name
|
3947
|
+
policy['spbmName'] = name
|
3948
|
+
name = "Existing SPBM Profile:\n#{name}"
|
3949
|
+
else
|
3950
|
+
policy['spbmRecoveryCandidate'] = true
|
3951
|
+
profile = profiles.find do |profile|
|
3952
|
+
profile.profileId.uniqueId == policy['spbmProfileId'] &&
|
3953
|
+
profile.generationId > policy['spbmProfileGenerationNumber']
|
3954
|
+
end
|
3955
|
+
# XXX: We should check if there is a profile that matches
|
3956
|
+
# one we recovered
|
3957
|
+
if profile
|
3958
|
+
name = policy['spbmProfile'].name
|
3959
|
+
name = "Old generation of SPBM Profile:\n#{name}"
|
3960
|
+
else
|
3961
|
+
name = "Unknown SPBM Profile. UUID:\n#{name}"
|
3962
|
+
end
|
3963
|
+
end
|
3964
|
+
else
|
3965
|
+
name = "Not managed by SPBM"
|
3966
|
+
policy['spbmName'] = name
|
3967
|
+
end
|
3968
|
+
propCap = policy['proportionalCapacity']
|
3969
|
+
if propCap && propCap.is_a?(Array) && propCap.length == 2
|
3970
|
+
policy['proportionalCapacity'] = policy['proportionalCapacity'][0]
|
3971
|
+
end
|
3972
|
+
|
3973
|
+
policy['spbmDescr'] = name
|
3974
|
+
end
|
3975
|
+
entriesMap = Hash[entries.map{|x| [x['uuid'], x]}]
|
3976
|
+
|
3977
|
+
nonAssociatedEntities = []
|
3978
|
+
nonAssociatedVms.each do |vm, entities|
|
3979
|
+
if entities.any?{|x| x == ["virtualMachine", vm._ref]}
|
3980
|
+
vmxPath = vms_props2[vm]['summary.config.vmPathName']
|
3981
|
+
if vmxPath =~ /^\[([^\]]*)\] ([^\/])\//
|
3982
|
+
nsUuid = $2
|
3983
|
+
entry = entriesMap[nsUuid]
|
3984
|
+
if entry && entry['content']['spbmProfileId']
|
3985
|
+
# This is a candidate
|
3986
|
+
nonAssociatedEntities << {
|
3987
|
+
:objUuid => nsUuid,
|
3988
|
+
:type => "virtualMachine",
|
3989
|
+
:key => vm._ref,
|
3990
|
+
:entry => entry,
|
3991
|
+
:vm => vm,
|
3992
|
+
:label => "VM Home",
|
3993
|
+
}
|
3994
|
+
end
|
3995
|
+
end
|
3996
|
+
end
|
3997
|
+
devices = vms_props[vm]['config.hardware.device']
|
3998
|
+
disks = devices.select{|x| x.is_a?(VIM::VirtualDisk)}
|
3999
|
+
disks.each do |disk|
|
4000
|
+
key = "#{vm._ref}:#{disk.key}"
|
4001
|
+
if entities.any?{|x| x == ["virtualDiskId", key]}
|
4002
|
+
objUuid = disk.backing.backingObjectId
|
4003
|
+
if objUuid
|
4004
|
+
entry = entriesMap[objUuid]
|
4005
|
+
if entry && entry['content']['spbmProfileId']
|
4006
|
+
# This is a candidate
|
4007
|
+
nonAssociatedEntities << {
|
4008
|
+
:objUuid => objUuid,
|
4009
|
+
:type => "virtualDiskId",
|
4010
|
+
:key => key,
|
4011
|
+
:entry => entry,
|
4012
|
+
:vm => vm,
|
4013
|
+
:label => disk.deviceInfo.label,
|
4014
|
+
}
|
4015
|
+
end
|
4016
|
+
end
|
4017
|
+
end
|
4018
|
+
end
|
4019
|
+
end
|
4020
|
+
nonAssociatedEntities.each do |entity|
|
4021
|
+
policy = policies.find do |policy|
|
4022
|
+
match = true
|
4023
|
+
['spbmProfileId', 'spbmProfileGenerationNumber'].each do |k|
|
4024
|
+
match = match && policy[k] == entity[:entry]['content'][k]
|
4025
|
+
end
|
4026
|
+
match
|
4027
|
+
end
|
4028
|
+
entity[:policy] = policy
|
4029
|
+
end
|
4030
|
+
|
4031
|
+
candidates = policies.select{|p| p['spbmRecoveryCandidate'] == true}
|
4032
|
+
|
4033
|
+
puts "#{Time.now}: Done computing"
|
4034
|
+
|
4035
|
+
if !opts[:show_details]
|
4036
|
+
puts ""
|
4037
|
+
puts "Found %d missing SPBM Profiles." % candidates.length
|
4038
|
+
puts "Found %d entities not associated with their SPBM Profiles." % nonAssociatedEntities.length
|
4039
|
+
puts ""
|
4040
|
+
puts "You have a number of options (can be combined):"
|
4041
|
+
puts "1) Run command with --show-details to see a full report about missing"
|
4042
|
+
puts "SPBM Profiles and missing VM <-> SPBM Profile associations."
|
4043
|
+
puts "2) Run command with --create-missing-profiles to automatically create"
|
4044
|
+
puts "all missing SPBM profiles."
|
4045
|
+
puts "3)Run command with --create-missing-associations to automatically"
|
4046
|
+
puts "create all missing VM <-> SPBM Profile associations."
|
4047
|
+
end
|
4048
|
+
|
4049
|
+
if opts[:show_details]
|
4050
|
+
puts "SPBM Profiles used by VSAN:"
|
4051
|
+
t = Terminal::Table.new()
|
4052
|
+
t << ['SPBM ID', 'policy']
|
4053
|
+
policies.each do |policy|
|
4054
|
+
t.add_separator
|
4055
|
+
t << [
|
4056
|
+
policy['spbmDescr'],
|
4057
|
+
policy.select{|k,v| k !~ /spbm/}.map{|k,v| "#{k}: #{v}"}.join("\n")
|
4058
|
+
]
|
4059
|
+
end
|
4060
|
+
puts t
|
4061
|
+
puts ""
|
4062
|
+
|
4063
|
+
if candidates.length > 0
|
4064
|
+
puts "Recreate missing SPBM Profiles using following RVC commands:"
|
4065
|
+
candidates.each do |policy|
|
4066
|
+
rules = policy.select{|k,v| k !~ /spbm/}
|
4067
|
+
s = rules.map{|k,v| "--rule VSAN.#{k}=#{v}"}.join(" ")
|
4068
|
+
puts "spbm.profile_create #{s} #{policy['spbmName']}"
|
4069
|
+
end
|
4070
|
+
puts ""
|
4071
|
+
end
|
4072
|
+
end
|
4073
|
+
|
4074
|
+
if opts[:show_details] && nonAssociatedEntities.length > 0
|
4075
|
+
puts "Following missing VM <-> SPBM Profile associations were found:"
|
4076
|
+
t = Terminal::Table.new()
|
4077
|
+
t << ['Entity', 'VM', 'Profile']
|
4078
|
+
t.add_separator
|
4079
|
+
nonAssociatedEntities.each do |entity|
|
4080
|
+
#puts "'%s' of VM '%s' should be associated with profile '%s' but isn't." % [
|
4081
|
+
t << [
|
4082
|
+
entity[:label],
|
4083
|
+
vms_props[entity[:vm]]['name'],
|
4084
|
+
entity[:policy]['spbmName'],
|
4085
|
+
]
|
4086
|
+
|
4087
|
+
# Fix up the associations. Disabled for now until I can check
|
4088
|
+
# with Sudarsan
|
4089
|
+
# profile = entity[:policy]['spbmProfile']
|
4090
|
+
# if profile
|
4091
|
+
# pm.PbmAssociate(
|
4092
|
+
# :entity => PBM::PbmServerObjectRef(
|
4093
|
+
# :objectType => entity[:type],
|
4094
|
+
# :key => entity[:key],
|
4095
|
+
# :serverUuid => conn.serviceContent.about.instanceUuid
|
4096
|
+
# ),
|
4097
|
+
# :profile => profile.profileId
|
4098
|
+
# )
|
4099
|
+
# end
|
4100
|
+
end
|
4101
|
+
puts t
|
4102
|
+
end
|
4103
|
+
end
|
4104
|
+
|
4105
|
+
end
|