pinokiod 7.3.4 → 7.3.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/kernel/resource_usage/gpu.js +1114 -165
- package/kernel/resource_usage/index.js +9 -4
- package/package.json +2 -1
- package/server/index.js +3 -0
- package/server/views/install.ejs +4 -7
- package/test/resource-usage-gpu.test.js +353 -0
|
@@ -3,12 +3,31 @@
|
|
|
3
3
|
const fs = require("fs")
|
|
4
4
|
const os = require("os")
|
|
5
5
|
const path = require("path")
|
|
6
|
-
const {
|
|
6
|
+
const { normalizePid } = require("./process_tree")
|
|
7
7
|
|
|
8
|
-
const DEFAULT_GPU_TTL_MS =
|
|
9
|
-
const
|
|
8
|
+
const DEFAULT_GPU_TTL_MS = 5000
|
|
9
|
+
const DEFAULT_DRM_FDINFO_MAX_PIDS = 4096
|
|
10
|
+
const DEFAULT_DRM_FDINFO_MAX_FDS_PER_PID = 1024
|
|
10
11
|
const MIB = 1024 * 1024
|
|
11
12
|
|
|
13
|
+
const WINDOWS_GPU_PROCESS_COUNTER = "\\GPU Process Memory(*)\\Dedicated Usage"
|
|
14
|
+
const ERROR_SUCCESS = 0
|
|
15
|
+
const PDH_MORE_DATA = 0x800007D2
|
|
16
|
+
const PDH_INVALID_PATH = 0xC0000BC4
|
|
17
|
+
const PDH_INVALID_DATA = 0xC0000BC6
|
|
18
|
+
const PDH_NO_DATA = 0x800007D5
|
|
19
|
+
const PDH_FMT_LARGE = 0x00000400
|
|
20
|
+
|
|
21
|
+
const NVML_SUCCESS = 0
|
|
22
|
+
const NVML_ERROR_INSUFFICIENT_SIZE = 7
|
|
23
|
+
const NVML_VALUE_NOT_AVAILABLE = 0xFFFFFFFFFFFFFFFFn
|
|
24
|
+
|
|
25
|
+
const AMDSMI_INIT_AMD_GPUS = 1 << 1
|
|
26
|
+
const RSMI_INIT_DEFAULT = 0
|
|
27
|
+
|
|
28
|
+
let koffiModule
|
|
29
|
+
const koffiTypeCache = new WeakMap()
|
|
30
|
+
|
|
12
31
|
function unique(values) {
|
|
13
32
|
const seen = new Set()
|
|
14
33
|
const next = []
|
|
@@ -20,54 +39,198 @@ function unique(values) {
|
|
|
20
39
|
return next
|
|
21
40
|
}
|
|
22
41
|
|
|
23
|
-
function
|
|
42
|
+
function loadKoffi() {
|
|
43
|
+
if (koffiModule !== undefined) {
|
|
44
|
+
return koffiModule
|
|
45
|
+
}
|
|
24
46
|
try {
|
|
25
|
-
|
|
26
|
-
return true
|
|
47
|
+
koffiModule = require("koffi")
|
|
27
48
|
} catch (_) {
|
|
28
|
-
|
|
49
|
+
koffiModule = null
|
|
29
50
|
}
|
|
51
|
+
return koffiModule
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
function getCachedKoffiTypes(koffi, key, factory) {
|
|
55
|
+
let cache = koffiTypeCache.get(koffi)
|
|
56
|
+
if (!cache) {
|
|
57
|
+
cache = new Map()
|
|
58
|
+
koffiTypeCache.set(koffi, cache)
|
|
59
|
+
}
|
|
60
|
+
if (!cache.has(key)) {
|
|
61
|
+
cache.set(key, factory())
|
|
62
|
+
}
|
|
63
|
+
return cache.get(key)
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
function getWindowsPdhTypes(koffi) {
|
|
67
|
+
return getCachedKoffiTypes(koffi, "windows-pdh", () => {
|
|
68
|
+
const counterValue = koffi.struct("PDH_FMT_COUNTERVALUE", {
|
|
69
|
+
CStatus: "uint32_t",
|
|
70
|
+
largeValue: "int64_t"
|
|
71
|
+
})
|
|
72
|
+
const counterInfo = koffi.struct("PDH_COUNTER_INFO_W_PREFIX", {
|
|
73
|
+
dwLength: "uint32_t",
|
|
74
|
+
dwType: "uint32_t",
|
|
75
|
+
CVersion: "uint32_t",
|
|
76
|
+
CStatus: "uint32_t",
|
|
77
|
+
lScale: "int32_t",
|
|
78
|
+
lDefaultScale: "int32_t",
|
|
79
|
+
dwUserData: "uintptr_t",
|
|
80
|
+
dwQueryUserData: "uintptr_t",
|
|
81
|
+
szFullPath: "str16"
|
|
82
|
+
})
|
|
83
|
+
return { counterValue, counterInfo }
|
|
84
|
+
})
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
function getNvmlTypes(koffi) {
|
|
88
|
+
return getCachedKoffiTypes(koffi, "nvml", () => {
|
|
89
|
+
const processInfoV1 = koffi.struct("nvmlProcessInfo_v1_t", {
|
|
90
|
+
pid: "uint32_t",
|
|
91
|
+
usedGpuMemory: "uint64_t"
|
|
92
|
+
})
|
|
93
|
+
const processInfoV2 = koffi.struct("nvmlProcessInfo_v2_t", {
|
|
94
|
+
pid: "uint32_t",
|
|
95
|
+
usedGpuMemory: "uint64_t",
|
|
96
|
+
gpuInstanceId: "uint32_t",
|
|
97
|
+
computeInstanceId: "uint32_t"
|
|
98
|
+
})
|
|
99
|
+
return { processInfoV1, processInfoV2 }
|
|
100
|
+
})
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
function getAmdSmiTypes(koffi) {
|
|
104
|
+
return getCachedKoffiTypes(koffi, "amdsmi", () => {
|
|
105
|
+
const engineUsage = koffi.struct("amdsmi_engine_usage_process_t", {
|
|
106
|
+
gfx: "uint64_t",
|
|
107
|
+
enc: "uint64_t",
|
|
108
|
+
reserved: koffi.array("uint32_t", 12)
|
|
109
|
+
})
|
|
110
|
+
const memoryUsage = koffi.struct("amdsmi_memory_usage_process_t", {
|
|
111
|
+
gtt_mem: "uint64_t",
|
|
112
|
+
cpu_mem: "uint64_t",
|
|
113
|
+
vram_mem: "uint64_t",
|
|
114
|
+
reserved: koffi.array("uint32_t", 10)
|
|
115
|
+
})
|
|
116
|
+
const procInfo = koffi.struct("amdsmi_proc_info_t", {
|
|
117
|
+
name: koffi.array("char", 256),
|
|
118
|
+
pid: "uint32_t",
|
|
119
|
+
mem: "uint64_t",
|
|
120
|
+
engine_usage: engineUsage,
|
|
121
|
+
memory_usage: memoryUsage,
|
|
122
|
+
container_name: koffi.array("char", 256),
|
|
123
|
+
cu_occupancy: "uint32_t",
|
|
124
|
+
evicted_time: "uint32_t",
|
|
125
|
+
reserved: koffi.array("uint32_t", 10)
|
|
126
|
+
})
|
|
127
|
+
return { procInfo }
|
|
128
|
+
})
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
function getRocmSmiTypes(koffi) {
|
|
132
|
+
return getCachedKoffiTypes(koffi, "rocm-smi", () => {
|
|
133
|
+
const procInfo = koffi.struct("rsmi_process_info_t", {
|
|
134
|
+
process_id: "uint32_t",
|
|
135
|
+
pasid: "uint32_t",
|
|
136
|
+
vram_usage: "uint64_t",
|
|
137
|
+
sdma_usage: "uint64_t",
|
|
138
|
+
cu_occupancy: "uint32_t"
|
|
139
|
+
})
|
|
140
|
+
return { procInfo }
|
|
141
|
+
})
|
|
30
142
|
}
|
|
31
143
|
|
|
32
|
-
function
|
|
144
|
+
function existingLibraryCandidates(candidates) {
|
|
33
145
|
return unique(candidates).filter((candidate) => {
|
|
34
146
|
if (!candidate) return false
|
|
35
|
-
if (path.isAbsolute(candidate))
|
|
36
|
-
|
|
147
|
+
if (!path.isAbsolute(candidate)) return true
|
|
148
|
+
try {
|
|
149
|
+
return fs.existsSync(candidate)
|
|
150
|
+
} catch (_) {
|
|
151
|
+
return false
|
|
37
152
|
}
|
|
38
|
-
return true
|
|
39
153
|
})
|
|
40
154
|
}
|
|
41
155
|
|
|
42
|
-
function
|
|
43
|
-
|
|
44
|
-
|
|
156
|
+
function rocmLibraryCandidates(filename) {
|
|
157
|
+
const roots = unique([
|
|
158
|
+
process.env.ROCM_PATH,
|
|
159
|
+
process.env.ROCM_HOME,
|
|
160
|
+
"/opt/rocm",
|
|
161
|
+
"/usr",
|
|
162
|
+
"/usr/local"
|
|
163
|
+
])
|
|
164
|
+
const candidates = [filename]
|
|
165
|
+
for (const root of roots) {
|
|
166
|
+
candidates.push(
|
|
167
|
+
path.join(root, "lib", filename),
|
|
168
|
+
path.join(root, "lib64", filename)
|
|
169
|
+
)
|
|
45
170
|
}
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
171
|
+
candidates.push(
|
|
172
|
+
path.join("/usr/lib/x86_64-linux-gnu", filename),
|
|
173
|
+
path.join("/usr/lib/aarch64-linux-gnu", filename),
|
|
174
|
+
path.join("/usr/local/lib", filename)
|
|
175
|
+
)
|
|
176
|
+
return existingLibraryCandidates(candidates)
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
function loadFirstLibrary(koffi, candidates, options = {}) {
|
|
180
|
+
let lastError = null
|
|
181
|
+
for (const candidate of existingLibraryCandidates(candidates)) {
|
|
182
|
+
try {
|
|
183
|
+
return koffi.load(candidate, options)
|
|
184
|
+
} catch (error) {
|
|
185
|
+
lastError = error
|
|
59
186
|
}
|
|
60
187
|
}
|
|
61
|
-
|
|
188
|
+
throw lastError || new Error("native GPU library unavailable")
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
function optionalFunction(library, definitions) {
|
|
192
|
+
for (const definition of definitions) {
|
|
193
|
+
try {
|
|
194
|
+
return library.func(definition)
|
|
195
|
+
} catch (_) {}
|
|
196
|
+
}
|
|
197
|
+
return null
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
function statusCode(value) {
|
|
201
|
+
return Number(value) >>> 0
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
function isStatus(value, expected) {
|
|
205
|
+
return statusCode(value) === (expected >>> 0)
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
function isSuccess(value) {
|
|
209
|
+
return isStatus(value, ERROR_SUCCESS)
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
function isNoDataStatus(value) {
|
|
213
|
+
return isStatus(value, PDH_INVALID_PATH) || isStatus(value, PDH_INVALID_DATA) || isStatus(value, PDH_NO_DATA)
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
function toSafeNumber(value) {
|
|
217
|
+
if (typeof value === "bigint") {
|
|
218
|
+
if (value < 0n || value > BigInt(Number.MAX_SAFE_INTEGER)) return null
|
|
219
|
+
return Number(value)
|
|
220
|
+
}
|
|
221
|
+
const number = Number(value)
|
|
222
|
+
if (!Number.isFinite(number) || number < 0) return null
|
|
223
|
+
return number
|
|
62
224
|
}
|
|
63
225
|
|
|
64
226
|
function parseMemoryToBytes(value, defaultUnit = "") {
|
|
65
227
|
if (value == null) return null
|
|
66
|
-
if (typeof value === "number") {
|
|
67
|
-
|
|
68
|
-
if (
|
|
69
|
-
if (defaultUnit === "
|
|
70
|
-
return Math.round(
|
|
228
|
+
if (typeof value === "number" || typeof value === "bigint") {
|
|
229
|
+
const number = toSafeNumber(value)
|
|
230
|
+
if (number == null) return null
|
|
231
|
+
if (defaultUnit === "mib") return Math.round(number * MIB)
|
|
232
|
+
if (defaultUnit === "kb") return Math.round(number * 1024)
|
|
233
|
+
return Math.round(number)
|
|
71
234
|
}
|
|
72
235
|
const raw = String(value).trim()
|
|
73
236
|
if (!raw || /N\/A|not supported|none/i.test(raw)) {
|
|
@@ -98,108 +261,739 @@ function addGpuProcess(processes, pid, bytes) {
|
|
|
98
261
|
processes.set(normalizedPid, current)
|
|
99
262
|
}
|
|
100
263
|
|
|
101
|
-
function
|
|
102
|
-
const
|
|
264
|
+
function mergeGpuProcess(processes, pid, bytes) {
|
|
265
|
+
const normalizedPid = normalizePid(pid)
|
|
266
|
+
if (!normalizedPid || !Number.isFinite(bytes) || bytes < 0) {
|
|
267
|
+
return
|
|
268
|
+
}
|
|
269
|
+
const current = processes.get(normalizedPid) || {
|
|
270
|
+
pid: normalizedPid,
|
|
271
|
+
usedGpuMemoryBytes: 0
|
|
272
|
+
}
|
|
273
|
+
current.usedGpuMemoryBytes = Math.max(current.usedGpuMemoryBytes || 0, bytes)
|
|
274
|
+
processes.set(normalizedPid, current)
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
function normalizePidSet(values) {
|
|
278
|
+
const pids = []
|
|
279
|
+
for (const value of values || []) {
|
|
280
|
+
const pid = normalizePid(value)
|
|
281
|
+
if (pid) pids.push(pid)
|
|
282
|
+
}
|
|
283
|
+
return Array.from(new Set(pids)).sort((a, b) => a - b)
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
function filterProcessMap(processes, pids) {
|
|
287
|
+
const targetPids = normalizePidSet(pids)
|
|
288
|
+
if (targetPids.length === 0 && pids != null) {
|
|
289
|
+
return new Map()
|
|
290
|
+
}
|
|
291
|
+
if (targetPids.length === 0) {
|
|
292
|
+
return processes
|
|
293
|
+
}
|
|
294
|
+
const targetSet = new Set(targetPids)
|
|
295
|
+
const filtered = new Map()
|
|
296
|
+
for (const entry of processes.values()) {
|
|
297
|
+
if (targetSet.has(entry.pid)) {
|
|
298
|
+
filtered.set(entry.pid, entry)
|
|
299
|
+
}
|
|
300
|
+
}
|
|
301
|
+
return filtered
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
function coveredPids(processes) {
|
|
305
|
+
return new Set(Array.from(processes.keys()))
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
function hasUncoveredTarget(pids, covered) {
|
|
309
|
+
const targetPids = normalizePidSet(pids)
|
|
310
|
+
if (pids == null) return true
|
|
311
|
+
if (targetPids.length === 0) return false
|
|
312
|
+
for (const pid of targetPids) {
|
|
313
|
+
if (!covered.has(pid)) return true
|
|
314
|
+
}
|
|
315
|
+
return false
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
function extractPidFromWindowsGpuInstance(instanceName) {
|
|
319
|
+
const match = /(?:^|[^a-z0-9])pid[_\s-]*(\d+)(?:\D|$)/i.exec(String(instanceName || ""))
|
|
320
|
+
return normalizePid(match && match[1])
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
function decodeWindowsMultiSz(buffer, charCount) {
|
|
324
|
+
const values = []
|
|
325
|
+
let start = 0
|
|
326
|
+
const count = Math.max(0, Math.min(charCount || 0, Math.floor(buffer.length / 2)))
|
|
327
|
+
for (let i = 0; i < count; i += 1) {
|
|
328
|
+
const char = buffer.readUInt16LE(i * 2)
|
|
329
|
+
if (char !== 0) continue
|
|
330
|
+
if (i === start) break
|
|
331
|
+
values.push(buffer.subarray(start * 2, i * 2).toString("utf16le"))
|
|
332
|
+
start = i + 1
|
|
333
|
+
}
|
|
334
|
+
return values.filter(Boolean)
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
function isDedicatedDrmMemoryRegion(region) {
|
|
338
|
+
const normalized = String(region || "")
|
|
339
|
+
.trim()
|
|
340
|
+
.toLowerCase()
|
|
341
|
+
.replace(/[_\s]+/g, "-")
|
|
342
|
+
const compact = normalized.replace(/[^a-z0-9]/g, "")
|
|
343
|
+
if (!compact || /^(system|gtt|memory|shared|stolen|cpu|host)\d*$/.test(compact)) {
|
|
344
|
+
return false
|
|
345
|
+
}
|
|
346
|
+
return /^vram\d*$/.test(compact) || /^local\d*$/.test(compact)
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
function parseLinuxDrmFdinfo(stdout) {
|
|
350
|
+
const fields = new Map()
|
|
103
351
|
for (const line of String(stdout || "").split(/\r?\n/)) {
|
|
104
|
-
const
|
|
105
|
-
if (
|
|
106
|
-
const
|
|
107
|
-
const
|
|
108
|
-
|
|
109
|
-
|
|
352
|
+
const separator = line.indexOf(":")
|
|
353
|
+
if (separator < 0) continue
|
|
354
|
+
const key = line.slice(0, separator).trim().toLowerCase()
|
|
355
|
+
const value = line.slice(separator + 1).trim()
|
|
356
|
+
if (key) fields.set(key, value)
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
const driver = fields.get("drm-driver")
|
|
360
|
+
if (!driver) {
|
|
361
|
+
return null
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
let residentBytes = 0
|
|
365
|
+
let legacyMemoryBytes = 0
|
|
366
|
+
let hasResidentDedicatedMemory = false
|
|
367
|
+
for (const [key, value] of fields.entries()) {
|
|
368
|
+
const match = /^drm-(resident|memory)-(.+)$/.exec(key)
|
|
369
|
+
if (!match || !isDedicatedDrmMemoryRegion(match[2])) continue
|
|
370
|
+
const bytes = parseMemoryToBytes(value)
|
|
371
|
+
if (!Number.isFinite(bytes) || bytes < 0) continue
|
|
372
|
+
if (match[1] === "resident") {
|
|
373
|
+
hasResidentDedicatedMemory = true
|
|
374
|
+
residentBytes += bytes
|
|
375
|
+
} else {
|
|
376
|
+
legacyMemoryBytes += bytes
|
|
377
|
+
}
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
return {
|
|
381
|
+
driver,
|
|
382
|
+
pdev: fields.get("drm-pdev") || "",
|
|
383
|
+
clientId: fields.get("drm-client-id") || "",
|
|
384
|
+
dedicatedBytes: hasResidentDedicatedMemory ? residentBytes : legacyMemoryBytes
|
|
385
|
+
}
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
async function collectLinuxDrmFdinfoProcesses(pids, options = {}) {
|
|
389
|
+
const procRoot = options.procRoot || "/proc"
|
|
390
|
+
const maxPids = options.maxPids || DEFAULT_DRM_FDINFO_MAX_PIDS
|
|
391
|
+
const maxFdsPerPid = options.maxFdsPerPid || DEFAULT_DRM_FDINFO_MAX_FDS_PER_PID
|
|
392
|
+
const targetPids = normalizePidSet(pids).slice(0, maxPids)
|
|
393
|
+
const byClient = new Map()
|
|
394
|
+
|
|
395
|
+
for (const pid of targetPids) {
|
|
396
|
+
const fdinfoDir = path.join(procRoot, String(pid), "fdinfo")
|
|
397
|
+
let entries = []
|
|
398
|
+
try {
|
|
399
|
+
entries = await fs.promises.readdir(fdinfoDir, { withFileTypes: true })
|
|
400
|
+
} catch (_) {
|
|
401
|
+
continue
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
let scannedFds = 0
|
|
405
|
+
for (const entry of entries) {
|
|
406
|
+
const name = entry && entry.name ? entry.name : ""
|
|
407
|
+
if (!/^\d+$/.test(name)) continue
|
|
408
|
+
scannedFds += 1
|
|
409
|
+
if (scannedFds > maxFdsPerPid) break
|
|
410
|
+
|
|
411
|
+
let stdout = ""
|
|
412
|
+
try {
|
|
413
|
+
stdout = await fs.promises.readFile(path.join(fdinfoDir, name), "utf8")
|
|
414
|
+
} catch (_) {
|
|
415
|
+
continue
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
const parsed = parseLinuxDrmFdinfo(stdout)
|
|
419
|
+
if (!parsed || !(parsed.dedicatedBytes > 0)) continue
|
|
420
|
+
const clientKey = parsed.clientId
|
|
421
|
+
? `client:${parsed.clientId}`
|
|
422
|
+
: "unknown-client"
|
|
423
|
+
const key = [
|
|
424
|
+
pid,
|
|
425
|
+
parsed.driver || "unknown-driver",
|
|
426
|
+
parsed.pdev || "unknown-device",
|
|
427
|
+
clientKey
|
|
428
|
+
].join(":")
|
|
429
|
+
const current = byClient.get(key)
|
|
430
|
+
byClient.set(key, {
|
|
431
|
+
pid,
|
|
432
|
+
bytes: current ? Math.max(current.bytes, parsed.dedicatedBytes) : parsed.dedicatedBytes
|
|
433
|
+
})
|
|
434
|
+
}
|
|
435
|
+
}
|
|
436
|
+
|
|
437
|
+
const processes = new Map()
|
|
438
|
+
for (const entry of byClient.values()) {
|
|
439
|
+
addGpuProcess(processes, entry.pid, entry.bytes)
|
|
110
440
|
}
|
|
111
441
|
return processes
|
|
112
442
|
}
|
|
113
443
|
|
|
114
|
-
|
|
115
|
-
|
|
444
|
+
class WindowsPdhGpuMemoryClient {
|
|
445
|
+
constructor(options = {}) {
|
|
446
|
+
this.koffi = options.koffi || loadKoffi()
|
|
447
|
+
this.library = null
|
|
448
|
+
this.query = null
|
|
449
|
+
this.counters = []
|
|
450
|
+
this.counterValueType = null
|
|
451
|
+
this.counterInfoType = null
|
|
452
|
+
this.functions = null
|
|
453
|
+
this.counterRefreshMs = options.counterRefreshMs || DEFAULT_GPU_TTL_MS
|
|
454
|
+
this.lastCounterRefreshAt = 0
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
init() {
|
|
458
|
+
if (this.functions) return
|
|
459
|
+
if (!this.koffi) {
|
|
460
|
+
throw new Error("koffi unavailable")
|
|
461
|
+
}
|
|
462
|
+
|
|
463
|
+
const types = getWindowsPdhTypes(this.koffi)
|
|
464
|
+
this.counterValueType = types.counterValue
|
|
465
|
+
this.counterInfoType = types.counterInfo
|
|
466
|
+
|
|
467
|
+
this.library = this.koffi.load("pdh.dll")
|
|
468
|
+
this.functions = {
|
|
469
|
+
openQuery: this.library.func("uint32_t __stdcall PdhOpenQueryW(const char16_t *szDataSource, uintptr_t dwUserData, _Out_ void **phQuery)"),
|
|
470
|
+
addEnglishCounter: this.library.func("uint32_t __stdcall PdhAddEnglishCounterW(void *hQuery, const char16_t *szFullCounterPath, uintptr_t dwUserData, _Out_ void **phCounter)"),
|
|
471
|
+
addCounter: this.library.func("uint32_t __stdcall PdhAddCounterW(void *hQuery, const char16_t *szFullCounterPath, uintptr_t dwUserData, _Out_ void **phCounter)"),
|
|
472
|
+
collectQueryData: this.library.func("uint32_t __stdcall PdhCollectQueryData(void *hQuery)"),
|
|
473
|
+
getCounterInfo: this.library.func("uint32_t __stdcall PdhGetCounterInfoW(void *hCounter, int bRetrieveExplainText, _Inout_ uint32_t *pdwBufferSize, _Out_ void *lpBuffer)"),
|
|
474
|
+
expandWildCardPath: this.library.func("uint32_t __stdcall PdhExpandWildCardPathW(const char16_t *szDataSource, const char16_t *szWildCardPath, _Out_ char16_t *mszExpandedPathList, _Inout_ uint32_t *pcchPathListLength, uint32_t dwFlags)"),
|
|
475
|
+
getFormattedCounterValue: this.library.func("uint32_t __stdcall PdhGetFormattedCounterValueW(void *hCounter, uint32_t dwFormat, _Out_ uint32_t *lpdwType, _Out_ PDH_FMT_COUNTERVALUE *pValue)"),
|
|
476
|
+
closeQuery: this.library.func("uint32_t __stdcall PdhCloseQuery(void *hQuery)")
|
|
477
|
+
}
|
|
478
|
+
}
|
|
479
|
+
|
|
480
|
+
openQuery() {
|
|
481
|
+
const query = [null]
|
|
482
|
+
const status = this.functions.openQuery(null, 0, query)
|
|
483
|
+
if (!isSuccess(status)) {
|
|
484
|
+
throw new Error(`PdhOpenQueryW failed: 0x${statusCode(status).toString(16)}`)
|
|
485
|
+
}
|
|
486
|
+
return query[0]
|
|
487
|
+
}
|
|
488
|
+
|
|
489
|
+
closeQuery(query) {
|
|
490
|
+
if (!query || !this.functions) return
|
|
491
|
+
try {
|
|
492
|
+
this.functions.closeQuery(query)
|
|
493
|
+
} catch (_) {}
|
|
494
|
+
}
|
|
495
|
+
|
|
496
|
+
getLocalizedWildcardPath() {
|
|
497
|
+
const query = this.openQuery()
|
|
498
|
+
const counter = [null]
|
|
499
|
+
try {
|
|
500
|
+
let status = this.functions.addEnglishCounter(query, WINDOWS_GPU_PROCESS_COUNTER, 0, counter)
|
|
501
|
+
if (!isSuccess(status)) {
|
|
502
|
+
throw new Error(`PdhAddEnglishCounterW failed: 0x${statusCode(status).toString(16)}`)
|
|
503
|
+
}
|
|
504
|
+
|
|
505
|
+
const bufferSize = [0]
|
|
506
|
+
status = this.functions.getCounterInfo(counter[0], 0, bufferSize, null)
|
|
507
|
+
if (!isStatus(status, PDH_MORE_DATA) && !isSuccess(status)) {
|
|
508
|
+
throw new Error(`PdhGetCounterInfoW failed: 0x${statusCode(status).toString(16)}`)
|
|
509
|
+
}
|
|
510
|
+
if (bufferSize[0] <= 0) {
|
|
511
|
+
return WINDOWS_GPU_PROCESS_COUNTER
|
|
512
|
+
}
|
|
513
|
+
|
|
514
|
+
const buffer = Buffer.alloc(bufferSize[0])
|
|
515
|
+
status = this.functions.getCounterInfo(counter[0], 0, bufferSize, buffer)
|
|
516
|
+
if (!isSuccess(status)) {
|
|
517
|
+
throw new Error(`PdhGetCounterInfoW failed: 0x${statusCode(status).toString(16)}`)
|
|
518
|
+
}
|
|
519
|
+
|
|
520
|
+
const info = this.koffi.decode(buffer, this.counterInfoType)
|
|
521
|
+
return info && info.szFullPath ? info.szFullPath : WINDOWS_GPU_PROCESS_COUNTER
|
|
522
|
+
} finally {
|
|
523
|
+
this.closeQuery(query)
|
|
524
|
+
}
|
|
525
|
+
}
|
|
526
|
+
|
|
527
|
+
expandWildcardPath(wildcardPath) {
|
|
528
|
+
const charCount = [0]
|
|
529
|
+
let status = this.functions.expandWildCardPath(null, wildcardPath, null, charCount, 0)
|
|
530
|
+
if (isNoDataStatus(status)) {
|
|
531
|
+
return []
|
|
532
|
+
}
|
|
533
|
+
if (!isStatus(status, PDH_MORE_DATA) && !isSuccess(status)) {
|
|
534
|
+
throw new Error(`PdhExpandWildCardPathW failed: 0x${statusCode(status).toString(16)}`)
|
|
535
|
+
}
|
|
536
|
+
if (charCount[0] <= 0) {
|
|
537
|
+
return []
|
|
538
|
+
}
|
|
539
|
+
|
|
540
|
+
const buffer = Buffer.alloc(charCount[0] * 2)
|
|
541
|
+
status = this.functions.expandWildCardPath(null, wildcardPath, buffer, charCount, 0)
|
|
542
|
+
if (isNoDataStatus(status)) {
|
|
543
|
+
return []
|
|
544
|
+
}
|
|
545
|
+
if (!isSuccess(status)) {
|
|
546
|
+
throw new Error(`PdhExpandWildCardPathW failed: 0x${statusCode(status).toString(16)}`)
|
|
547
|
+
}
|
|
548
|
+
return decodeWindowsMultiSz(buffer, charCount[0])
|
|
549
|
+
}
|
|
550
|
+
|
|
551
|
+
refreshCounters(force = false) {
|
|
552
|
+
const now = Date.now()
|
|
553
|
+
if (!force && this.query && now - this.lastCounterRefreshAt < this.counterRefreshMs) {
|
|
554
|
+
return
|
|
555
|
+
}
|
|
556
|
+
|
|
557
|
+
const paths = this.expandWildcardPath(this.getLocalizedWildcardPath())
|
|
558
|
+
const query = this.openQuery()
|
|
559
|
+
const counters = []
|
|
560
|
+
try {
|
|
561
|
+
for (const counterPath of paths) {
|
|
562
|
+
const pid = extractPidFromWindowsGpuInstance(counterPath)
|
|
563
|
+
if (!pid) continue
|
|
564
|
+
const counter = [null]
|
|
565
|
+
const status = this.functions.addCounter(query, counterPath, 0, counter)
|
|
566
|
+
if (isSuccess(status) && counter[0]) {
|
|
567
|
+
counters.push({ handle: counter[0], pid })
|
|
568
|
+
}
|
|
569
|
+
}
|
|
570
|
+
} catch (error) {
|
|
571
|
+
this.closeQuery(query)
|
|
572
|
+
throw error
|
|
573
|
+
}
|
|
574
|
+
|
|
575
|
+
const previousQuery = this.query
|
|
576
|
+
this.query = counters.length > 0 ? query : null
|
|
577
|
+
this.counters = counters
|
|
578
|
+
this.lastCounterRefreshAt = now
|
|
579
|
+
if (this.query !== query) {
|
|
580
|
+
this.closeQuery(query)
|
|
581
|
+
}
|
|
582
|
+
this.closeQuery(previousQuery)
|
|
583
|
+
}
|
|
584
|
+
|
|
585
|
+
readCounterValue(counter) {
|
|
586
|
+
const type = [0]
|
|
587
|
+
const buffer = Buffer.alloc(this.koffi.sizeof(this.counterValueType))
|
|
588
|
+
const status = this.functions.getFormattedCounterValue(counter.handle, PDH_FMT_LARGE, type, buffer)
|
|
589
|
+
if (isNoDataStatus(status)) {
|
|
590
|
+
return null
|
|
591
|
+
}
|
|
592
|
+
if (!isSuccess(status)) {
|
|
593
|
+
return null
|
|
594
|
+
}
|
|
595
|
+
const value = this.koffi.decode(buffer, this.counterValueType)
|
|
596
|
+
if (!value || !isSuccess(value.CStatus)) {
|
|
597
|
+
return null
|
|
598
|
+
}
|
|
599
|
+
return parseMemoryToBytes(value.largeValue)
|
|
600
|
+
}
|
|
601
|
+
|
|
602
|
+
collect(pids) {
|
|
603
|
+
this.init()
|
|
604
|
+
this.refreshCounters(false)
|
|
605
|
+
if (!this.query || this.counters.length === 0) {
|
|
606
|
+
return new Map()
|
|
607
|
+
}
|
|
608
|
+
|
|
609
|
+
const status = this.functions.collectQueryData(this.query)
|
|
610
|
+
if (isNoDataStatus(status)) {
|
|
611
|
+
return new Map()
|
|
612
|
+
}
|
|
613
|
+
if (!isSuccess(status)) {
|
|
614
|
+
throw new Error(`PdhCollectQueryData failed: 0x${statusCode(status).toString(16)}`)
|
|
615
|
+
}
|
|
616
|
+
|
|
617
|
+
const targetPids = normalizePidSet(pids)
|
|
618
|
+
const targetSet = targetPids.length > 0 ? new Set(targetPids) : null
|
|
619
|
+
const processes = new Map()
|
|
620
|
+
for (const counter of this.counters) {
|
|
621
|
+
if (!counter || (targetSet && !targetSet.has(counter.pid))) continue
|
|
622
|
+
addGpuProcess(processes, counter.pid, this.readCounterValue(counter))
|
|
623
|
+
}
|
|
624
|
+
return processes
|
|
625
|
+
}
|
|
626
|
+
|
|
627
|
+
stop() {
|
|
628
|
+
this.closeQuery(this.query)
|
|
629
|
+
this.query = null
|
|
630
|
+
this.counters = []
|
|
631
|
+
}
|
|
632
|
+
}
|
|
633
|
+
|
|
634
|
+
class NvmlGpuMemoryClient {
|
|
635
|
+
constructor(options = {}) {
|
|
636
|
+
this.koffi = options.koffi || loadKoffi()
|
|
637
|
+
this.library = null
|
|
638
|
+
this.initialized = false
|
|
639
|
+
this.processInfoV1 = null
|
|
640
|
+
this.processInfoV2 = null
|
|
641
|
+
this.functions = null
|
|
642
|
+
}
|
|
643
|
+
|
|
644
|
+
init() {
|
|
645
|
+
if (this.initialized) return
|
|
646
|
+
if (!this.koffi) {
|
|
647
|
+
throw new Error("koffi unavailable")
|
|
648
|
+
}
|
|
649
|
+
|
|
650
|
+
const types = getNvmlTypes(this.koffi)
|
|
651
|
+
this.processInfoV1 = types.processInfoV1
|
|
652
|
+
this.processInfoV2 = types.processInfoV2
|
|
653
|
+
|
|
654
|
+
this.library = loadFirstLibrary(this.koffi, [
|
|
655
|
+
process.env.NVIDIA_ML,
|
|
656
|
+
"libnvidia-ml.so.1",
|
|
657
|
+
"/usr/lib/x86_64-linux-gnu/libnvidia-ml.so.1",
|
|
658
|
+
"/usr/lib/aarch64-linux-gnu/libnvidia-ml.so.1",
|
|
659
|
+
"/usr/lib64/libnvidia-ml.so.1",
|
|
660
|
+
"/usr/local/nvidia/lib64/libnvidia-ml.so.1"
|
|
661
|
+
])
|
|
662
|
+
this.functions = {
|
|
663
|
+
init: optionalFunction(this.library, [
|
|
664
|
+
"int nvmlInit_v2(void)",
|
|
665
|
+
"int nvmlInit(void)"
|
|
666
|
+
]),
|
|
667
|
+
shutdown: optionalFunction(this.library, [
|
|
668
|
+
"int nvmlShutdown(void)"
|
|
669
|
+
]),
|
|
670
|
+
getCount: optionalFunction(this.library, [
|
|
671
|
+
"int nvmlDeviceGetCount_v2(_Out_ uint32_t *deviceCount)",
|
|
672
|
+
"int nvmlDeviceGetCount(_Out_ uint32_t *deviceCount)"
|
|
673
|
+
]),
|
|
674
|
+
getHandleByIndex: optionalFunction(this.library, [
|
|
675
|
+
"int nvmlDeviceGetHandleByIndex_v2(uint32_t index, _Out_ void **device)",
|
|
676
|
+
"int nvmlDeviceGetHandleByIndex(uint32_t index, _Out_ void **device)"
|
|
677
|
+
]),
|
|
678
|
+
compute: this.pickProcessFunction("nvmlDeviceGetComputeRunningProcesses"),
|
|
679
|
+
graphics: this.pickProcessFunction("nvmlDeviceGetGraphicsRunningProcesses"),
|
|
680
|
+
mps: this.pickProcessFunction("nvmlDeviceGetMPSComputeRunningProcesses")
|
|
681
|
+
}
|
|
682
|
+
|
|
683
|
+
if (!this.functions.init || !this.functions.getCount || !this.functions.getHandleByIndex) {
|
|
684
|
+
throw new Error("NVML process API unavailable")
|
|
685
|
+
}
|
|
686
|
+
const status = this.functions.init()
|
|
687
|
+
if (status !== NVML_SUCCESS) {
|
|
688
|
+
throw new Error(`nvmlInit failed: ${status}`)
|
|
689
|
+
}
|
|
690
|
+
this.initialized = true
|
|
691
|
+
}
|
|
692
|
+
|
|
693
|
+
pickProcessFunction(baseName) {
|
|
694
|
+
const candidates = [
|
|
695
|
+
{ suffix: "_v3", type: () => this.processInfoV2 },
|
|
696
|
+
{ suffix: "_v2", type: () => this.processInfoV2 },
|
|
697
|
+
{ suffix: "", type: () => this.processInfoV1 }
|
|
698
|
+
]
|
|
699
|
+
for (const candidate of candidates) {
|
|
700
|
+
const typeName = candidate.type() === this.processInfoV2 ? "nvmlProcessInfo_v2_t" : "nvmlProcessInfo_v1_t"
|
|
701
|
+
const func = optionalFunction(this.library, [
|
|
702
|
+
`int ${baseName}${candidate.suffix}(void *device, _Inout_ uint32_t *infoCount, _Out_ ${typeName} *infos)`
|
|
703
|
+
])
|
|
704
|
+
if (func) {
|
|
705
|
+
return { func, type: candidate.type() }
|
|
706
|
+
}
|
|
707
|
+
}
|
|
116
708
|
return null
|
|
117
709
|
}
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
710
|
+
|
|
711
|
+
getDeviceHandles() {
|
|
712
|
+
const count = [0]
|
|
713
|
+
const status = this.functions.getCount(count)
|
|
714
|
+
if (status !== NVML_SUCCESS) {
|
|
715
|
+
throw new Error(`nvmlDeviceGetCount failed: ${status}`)
|
|
121
716
|
}
|
|
717
|
+
const handles = []
|
|
718
|
+
for (let i = 0; i < count[0]; i += 1) {
|
|
719
|
+
const handle = [null]
|
|
720
|
+
const handleStatus = this.functions.getHandleByIndex(i, handle)
|
|
721
|
+
if (handleStatus === NVML_SUCCESS && handle[0]) {
|
|
722
|
+
handles.push(handle[0])
|
|
723
|
+
}
|
|
724
|
+
}
|
|
725
|
+
return handles
|
|
726
|
+
}
|
|
727
|
+
|
|
728
|
+
collectProcessList(device, entry) {
|
|
729
|
+
if (!entry || !entry.func) return []
|
|
730
|
+
|
|
731
|
+
let count = [0]
|
|
732
|
+
let status = entry.func(device, count, null)
|
|
733
|
+
if (status === NVML_SUCCESS && count[0] === 0) {
|
|
734
|
+
return []
|
|
735
|
+
}
|
|
736
|
+
if (status !== NVML_SUCCESS && status !== NVML_ERROR_INSUFFICIENT_SIZE) {
|
|
737
|
+
return []
|
|
738
|
+
}
|
|
739
|
+
|
|
740
|
+
let capacity = Math.max(1, count[0] + 8)
|
|
741
|
+
for (let attempt = 0; attempt < 2; attempt += 1) {
|
|
742
|
+
count = [capacity]
|
|
743
|
+
const buffer = Buffer.alloc(this.koffi.sizeof(entry.type) * capacity)
|
|
744
|
+
status = entry.func(device, count, buffer)
|
|
745
|
+
if (status === NVML_SUCCESS) {
|
|
746
|
+
return this.koffi.decode(buffer, entry.type, Math.min(count[0], capacity))
|
|
747
|
+
}
|
|
748
|
+
if (status !== NVML_ERROR_INSUFFICIENT_SIZE || count[0] <= capacity) {
|
|
749
|
+
return []
|
|
750
|
+
}
|
|
751
|
+
capacity = count[0] + 8
|
|
752
|
+
}
|
|
753
|
+
return []
|
|
754
|
+
}
|
|
755
|
+
|
|
756
|
+
collect(pids = null) {
|
|
757
|
+
this.init()
|
|
758
|
+
const processes = new Map()
|
|
759
|
+
for (const device of this.getDeviceHandles()) {
|
|
760
|
+
const deviceProcesses = new Map()
|
|
761
|
+
for (const entry of [this.functions.compute, this.functions.graphics, this.functions.mps]) {
|
|
762
|
+
for (const processInfo of this.collectProcessList(device, entry)) {
|
|
763
|
+
if (!processInfo) continue
|
|
764
|
+
const pid = normalizePid(processInfo.pid)
|
|
765
|
+
if (!pid) continue
|
|
766
|
+
if (typeof processInfo.usedGpuMemory === "bigint" && processInfo.usedGpuMemory === NVML_VALUE_NOT_AVAILABLE) {
|
|
767
|
+
continue
|
|
768
|
+
}
|
|
769
|
+
const bytes = parseMemoryToBytes(processInfo.usedGpuMemory)
|
|
770
|
+
mergeGpuProcess(deviceProcesses, pid, bytes)
|
|
771
|
+
}
|
|
772
|
+
}
|
|
773
|
+
for (const entry of deviceProcesses.values()) {
|
|
774
|
+
addGpuProcess(processes, entry.pid, entry.usedGpuMemoryBytes)
|
|
775
|
+
}
|
|
776
|
+
}
|
|
777
|
+
return filterProcessMap(processes, pids)
|
|
778
|
+
}
|
|
779
|
+
|
|
780
|
+
stop() {
|
|
781
|
+
if (this.initialized && this.functions && this.functions.shutdown) {
|
|
782
|
+
try {
|
|
783
|
+
this.functions.shutdown()
|
|
784
|
+
} catch (_) {}
|
|
785
|
+
}
|
|
786
|
+
this.initialized = false
|
|
122
787
|
}
|
|
123
|
-
return null
|
|
124
788
|
}
|
|
125
789
|
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
790
|
+
class AmdSmiGpuMemoryClient {
|
|
791
|
+
constructor(options = {}) {
|
|
792
|
+
this.koffi = options.koffi || loadKoffi()
|
|
793
|
+
this.library = null
|
|
794
|
+
this.initialized = false
|
|
795
|
+
this.procInfoType = null
|
|
796
|
+
this.functions = null
|
|
797
|
+
}
|
|
798
|
+
|
|
799
|
+
init() {
|
|
800
|
+
if (this.initialized) return
|
|
801
|
+
if (!this.koffi) {
|
|
802
|
+
throw new Error("koffi unavailable")
|
|
130
803
|
}
|
|
131
|
-
|
|
804
|
+
|
|
805
|
+
this.procInfoType = getAmdSmiTypes(this.koffi).procInfo
|
|
806
|
+
|
|
807
|
+
this.library = loadFirstLibrary(this.koffi, [
|
|
808
|
+
process.env.AMD_SMI_LIBRARY,
|
|
809
|
+
...rocmLibraryCandidates("libamd_smi.so")
|
|
810
|
+
])
|
|
811
|
+
this.functions = {
|
|
812
|
+
init: this.library.func("int amdsmi_init(uint64_t init_flags)"),
|
|
813
|
+
shutdown: optionalFunction(this.library, [
|
|
814
|
+
"int amdsmi_shut_down(void)"
|
|
815
|
+
]),
|
|
816
|
+
getSocketHandles: this.library.func("int amdsmi_get_socket_handles(_Inout_ uint32_t *socket_count, _Out_ void **socket_handles)"),
|
|
817
|
+
getProcessorHandles: this.library.func("int amdsmi_get_processor_handles(void *socket_handle, _Inout_ uint32_t *processor_count, _Out_ void **processor_handles)"),
|
|
818
|
+
getProcessList: this.library.func("int amdsmi_get_gpu_process_list(void *processor_handle, _Inout_ uint32_t *max_processes, _Out_ amdsmi_proc_info_t *list)")
|
|
819
|
+
}
|
|
820
|
+
|
|
821
|
+
const status = this.functions.init(AMDSMI_INIT_AMD_GPUS)
|
|
822
|
+
if (status !== 0) {
|
|
823
|
+
throw new Error(`amdsmi_init failed: ${status}`)
|
|
824
|
+
}
|
|
825
|
+
this.initialized = true
|
|
132
826
|
}
|
|
133
|
-
|
|
134
|
-
|
|
827
|
+
|
|
828
|
+
readPointerArray(countFunction) {
|
|
829
|
+
let count = [0]
|
|
830
|
+
let status = countFunction(count, null)
|
|
831
|
+
if (status !== 0 && count[0] === 0) {
|
|
832
|
+
return []
|
|
833
|
+
}
|
|
834
|
+
if (count[0] <= 0) {
|
|
835
|
+
return []
|
|
836
|
+
}
|
|
837
|
+
const pointerSize = this.koffi.sizeof("void *")
|
|
838
|
+
const buffer = Buffer.alloc(pointerSize * count[0])
|
|
839
|
+
status = countFunction(count, buffer)
|
|
840
|
+
if (status !== 0) {
|
|
841
|
+
return []
|
|
842
|
+
}
|
|
843
|
+
return this.koffi.decode(buffer, "uintptr_t", count[0]).filter(Boolean)
|
|
135
844
|
}
|
|
136
845
|
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
846
|
+
getProcessorHandles() {
|
|
847
|
+
const sockets = this.readPointerArray((count, buffer) => {
|
|
848
|
+
return this.functions.getSocketHandles(count, buffer)
|
|
849
|
+
})
|
|
850
|
+
const processors = []
|
|
851
|
+
for (const socket of sockets) {
|
|
852
|
+
processors.push(...this.readPointerArray((count, buffer) => {
|
|
853
|
+
return this.functions.getProcessorHandles(socket, count, buffer)
|
|
854
|
+
}))
|
|
855
|
+
}
|
|
856
|
+
return processors
|
|
857
|
+
}
|
|
858
|
+
|
|
859
|
+
collectProcessorProcesses(processor) {
|
|
860
|
+
let count = [0]
|
|
861
|
+
let status = this.functions.getProcessList(processor, count, null)
|
|
862
|
+
if (status !== 0 && count[0] === 0) {
|
|
863
|
+
return []
|
|
864
|
+
}
|
|
865
|
+
if (count[0] <= 0) {
|
|
866
|
+
return []
|
|
867
|
+
}
|
|
146
868
|
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
869
|
+
let capacity = count[0]
|
|
870
|
+
for (let attempt = 0; attempt < 2; attempt += 1) {
|
|
871
|
+
count = [capacity]
|
|
872
|
+
const buffer = Buffer.alloc(this.koffi.sizeof(this.procInfoType) * capacity)
|
|
873
|
+
status = this.functions.getProcessList(processor, count, buffer)
|
|
874
|
+
if (status === 0) {
|
|
875
|
+
return this.koffi.decode(buffer, this.procInfoType, Math.min(count[0], capacity))
|
|
876
|
+
}
|
|
877
|
+
if (count[0] <= capacity) {
|
|
878
|
+
return []
|
|
879
|
+
}
|
|
880
|
+
capacity = count[0]
|
|
150
881
|
}
|
|
882
|
+
return []
|
|
883
|
+
}
|
|
884
|
+
|
|
885
|
+
collect(pids = null) {
|
|
886
|
+
this.init()
|
|
887
|
+
const processes = new Map()
|
|
888
|
+
for (const processor of this.getProcessorHandles()) {
|
|
889
|
+
for (const entry of this.collectProcessorProcesses(processor)) {
|
|
890
|
+
if (!entry) continue
|
|
891
|
+
const bytes = parseMemoryToBytes(entry.memory_usage && entry.memory_usage.vram_mem)
|
|
892
|
+
addGpuProcess(processes, entry.pid, bytes)
|
|
893
|
+
}
|
|
894
|
+
}
|
|
895
|
+
return filterProcessMap(processes, pids)
|
|
896
|
+
}
|
|
897
|
+
|
|
898
|
+
stop() {
|
|
899
|
+
if (this.initialized && this.functions && this.functions.shutdown) {
|
|
900
|
+
try {
|
|
901
|
+
this.functions.shutdown()
|
|
902
|
+
} catch (_) {}
|
|
903
|
+
}
|
|
904
|
+
this.initialized = false
|
|
151
905
|
}
|
|
152
|
-
return processes
|
|
153
906
|
}
|
|
154
907
|
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
908
|
+
class RocmSmiGpuMemoryClient {
|
|
909
|
+
constructor(options = {}) {
|
|
910
|
+
this.koffi = options.koffi || loadKoffi()
|
|
911
|
+
this.library = null
|
|
912
|
+
this.initialized = false
|
|
913
|
+
this.procInfoType = null
|
|
914
|
+
this.functions = null
|
|
915
|
+
}
|
|
916
|
+
|
|
917
|
+
init() {
|
|
918
|
+
if (this.initialized) return
|
|
919
|
+
if (!this.koffi) {
|
|
920
|
+
throw new Error("koffi unavailable")
|
|
921
|
+
}
|
|
922
|
+
|
|
923
|
+
this.procInfoType = getRocmSmiTypes(this.koffi).procInfo
|
|
924
|
+
|
|
925
|
+
this.library = loadFirstLibrary(this.koffi, [
|
|
926
|
+
process.env.ROCM_SMI_LIBRARY,
|
|
927
|
+
...rocmLibraryCandidates("librocm_smi64.so")
|
|
928
|
+
])
|
|
929
|
+
this.functions = {
|
|
930
|
+
init: this.library.func("int rsmi_init(uint64_t init_flags)"),
|
|
931
|
+
shutdown: optionalFunction(this.library, [
|
|
932
|
+
"int rsmi_shut_down(void)"
|
|
933
|
+
]),
|
|
934
|
+
getProcessInfo: this.library.func("int rsmi_compute_process_info_get(_Out_ rsmi_process_info_t *procs, _Inout_ uint32_t *num_items)")
|
|
935
|
+
}
|
|
936
|
+
|
|
937
|
+
const status = this.functions.init(RSMI_INIT_DEFAULT)
|
|
938
|
+
if (status !== 0) {
|
|
939
|
+
throw new Error(`rsmi_init failed: ${status}`)
|
|
940
|
+
}
|
|
941
|
+
this.initialized = true
|
|
942
|
+
}
|
|
943
|
+
|
|
944
|
+
collect(pids = null) {
|
|
945
|
+
this.init()
|
|
946
|
+
let count = [0]
|
|
947
|
+
let status = this.functions.getProcessInfo(null, count)
|
|
948
|
+
if (status !== 0 && count[0] === 0) {
|
|
949
|
+
return new Map()
|
|
950
|
+
}
|
|
951
|
+
if (count[0] <= 0) {
|
|
952
|
+
return new Map()
|
|
953
|
+
}
|
|
954
|
+
|
|
955
|
+
const buffer = Buffer.alloc(this.koffi.sizeof(this.procInfoType) * count[0])
|
|
956
|
+
status = this.functions.getProcessInfo(buffer, count)
|
|
957
|
+
if (status !== 0) {
|
|
958
|
+
return new Map()
|
|
959
|
+
}
|
|
960
|
+
|
|
961
|
+
const processes = new Map()
|
|
962
|
+
for (const entry of this.koffi.decode(buffer, this.procInfoType, count[0])) {
|
|
963
|
+
if (!entry) continue
|
|
964
|
+
addGpuProcess(processes, entry.process_id, parseMemoryToBytes(entry.vram_usage))
|
|
965
|
+
}
|
|
966
|
+
return filterProcessMap(processes, pids)
|
|
967
|
+
}
|
|
968
|
+
|
|
969
|
+
stop() {
|
|
970
|
+
if (this.initialized && this.functions && this.functions.shutdown) {
|
|
971
|
+
try {
|
|
972
|
+
this.functions.shutdown()
|
|
973
|
+
} catch (_) {}
|
|
974
|
+
}
|
|
975
|
+
this.initialized = false
|
|
976
|
+
}
|
|
158
977
|
}
|
|
159
978
|
|
|
160
979
|
class GpuSampler {
|
|
161
980
|
constructor(options = {}) {
|
|
162
981
|
this.kernel = options.kernel || null
|
|
982
|
+
this.platform = options.platform || (this.kernel && this.kernel.platform) || os.platform()
|
|
163
983
|
this.ttlMs = options.ttlMs || DEFAULT_GPU_TTL_MS
|
|
164
|
-
this.
|
|
984
|
+
this.procRoot = options.procRoot || "/proc"
|
|
985
|
+
this.drmFdinfoMaxPids = options.drmFdinfoMaxPids || DEFAULT_DRM_FDINFO_MAX_PIDS
|
|
986
|
+
this.drmFdinfoMaxFdsPerPid = options.drmFdinfoMaxFdsPerPid || DEFAULT_DRM_FDINFO_MAX_FDS_PER_PID
|
|
987
|
+
this.windowsPdhClient = options.windowsPdhClient || null
|
|
988
|
+
this.nvmlClient = options.nvmlClient || null
|
|
989
|
+
this.amdSmiClient = options.amdSmiClient || null
|
|
990
|
+
this.rocmSmiClient = options.rocmSmiClient || null
|
|
165
991
|
this.current = null
|
|
992
|
+
this.currentCacheKey = null
|
|
166
993
|
this.inFlight = null
|
|
994
|
+
this.inFlightCacheKey = null
|
|
167
995
|
this.providerBackoff = new Map()
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
nvidiaCandidates() {
|
|
171
|
-
const platform = os.platform()
|
|
172
|
-
const candidates = [
|
|
173
|
-
process.env.NVIDIA_SMI,
|
|
174
|
-
"nvidia-smi",
|
|
175
|
-
...getPinokioCondaCandidates(this.kernel, ["nvidia-smi"])
|
|
176
|
-
]
|
|
177
|
-
if (platform === "win32") {
|
|
178
|
-
candidates.push(
|
|
179
|
-
"C:\\Program Files\\NVIDIA Corporation\\NVSMI\\nvidia-smi.exe",
|
|
180
|
-
"C:\\Windows\\System32\\nvidia-smi.exe"
|
|
181
|
-
)
|
|
182
|
-
} else if (platform === "linux") {
|
|
183
|
-
candidates.push(
|
|
184
|
-
"/usr/bin/nvidia-smi",
|
|
185
|
-
"/usr/local/bin/nvidia-smi",
|
|
186
|
-
"/usr/local/nvidia/bin/nvidia-smi",
|
|
187
|
-
"/usr/local/cuda/bin/nvidia-smi"
|
|
188
|
-
)
|
|
189
|
-
}
|
|
190
|
-
return executableCandidates(candidates)
|
|
191
|
-
}
|
|
192
|
-
|
|
193
|
-
amdCandidates() {
|
|
194
|
-
const candidates = [
|
|
195
|
-
process.env.AMD_SMI,
|
|
196
|
-
"amd-smi",
|
|
197
|
-
...getPinokioCondaCandidates(this.kernel, ["amd-smi"])
|
|
198
|
-
]
|
|
199
|
-
if (os.platform() === "linux") {
|
|
200
|
-
candidates.push("/opt/rocm/bin/amd-smi", "/usr/bin/amd-smi", "/usr/local/bin/amd-smi")
|
|
201
|
-
}
|
|
202
|
-
return executableCandidates(candidates)
|
|
996
|
+
this.providerLogBackoff = new Map()
|
|
203
997
|
}
|
|
204
998
|
|
|
205
999
|
isBackedOff(provider) {
|
|
@@ -211,75 +1005,161 @@ class GpuSampler {
|
|
|
211
1005
|
this.providerBackoff.set(provider, Date.now() + ms)
|
|
212
1006
|
}
|
|
213
1007
|
|
|
214
|
-
|
|
215
|
-
|
|
1008
|
+
logProviderFailure(provider, error, pids, fallbackMessage = "GPU provider unavailable", ms = 60000) {
|
|
1009
|
+
const now = Date.now()
|
|
1010
|
+
const until = this.providerLogBackoff.get(provider) || 0
|
|
1011
|
+
if (now < until) return
|
|
1012
|
+
this.providerLogBackoff.set(provider, now + ms)
|
|
1013
|
+
|
|
1014
|
+
const summary = {
|
|
1015
|
+
provider,
|
|
1016
|
+
platform: this.platform,
|
|
1017
|
+
pid_count: normalizePidSet(pids).length,
|
|
1018
|
+
error: error && error.message ? error.message : fallbackMessage
|
|
1019
|
+
}
|
|
1020
|
+
const code = error && (error.code || error.errno || error.status)
|
|
1021
|
+
if (code != null) {
|
|
1022
|
+
summary.code = String(code)
|
|
1023
|
+
}
|
|
1024
|
+
try {
|
|
1025
|
+
console.warn("[resource-usage:gpu] provider failed", summary)
|
|
1026
|
+
} catch (_) {}
|
|
1027
|
+
}
|
|
1028
|
+
|
|
1029
|
+
getWindowsPdhClient() {
|
|
1030
|
+
if (!this.windowsPdhClient) {
|
|
1031
|
+
this.windowsPdhClient = new WindowsPdhGpuMemoryClient()
|
|
1032
|
+
}
|
|
1033
|
+
return this.windowsPdhClient
|
|
1034
|
+
}
|
|
1035
|
+
|
|
1036
|
+
getNvmlClient() {
|
|
1037
|
+
if (!this.nvmlClient) {
|
|
1038
|
+
this.nvmlClient = new NvmlGpuMemoryClient()
|
|
1039
|
+
}
|
|
1040
|
+
return this.nvmlClient
|
|
1041
|
+
}
|
|
1042
|
+
|
|
1043
|
+
getAmdSmiClient() {
|
|
1044
|
+
if (!this.amdSmiClient) {
|
|
1045
|
+
this.amdSmiClient = new AmdSmiGpuMemoryClient()
|
|
1046
|
+
}
|
|
1047
|
+
return this.amdSmiClient
|
|
1048
|
+
}
|
|
1049
|
+
|
|
1050
|
+
getRocmSmiClient() {
|
|
1051
|
+
if (!this.rocmSmiClient) {
|
|
1052
|
+
this.rocmSmiClient = new RocmSmiGpuMemoryClient()
|
|
1053
|
+
}
|
|
1054
|
+
return this.rocmSmiClient
|
|
1055
|
+
}
|
|
1056
|
+
|
|
1057
|
+
async collectWindowsPdh(pids) {
|
|
1058
|
+
if (this.platform !== "win32" || this.isBackedOff("windows-pdh")) {
|
|
216
1059
|
return null
|
|
217
1060
|
}
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
} catch (error) {
|
|
232
|
-
lastError = error
|
|
233
|
-
if (error && error.code === "ENOENT") {
|
|
234
|
-
continue
|
|
235
|
-
}
|
|
236
|
-
break
|
|
1061
|
+
try {
|
|
1062
|
+
return {
|
|
1063
|
+
provider: "windows-pdh",
|
|
1064
|
+
processes: this.getWindowsPdhClient().collect(pids),
|
|
1065
|
+
error: null
|
|
1066
|
+
}
|
|
1067
|
+
} catch (error) {
|
|
1068
|
+
this.logProviderFailure("windows-pdh", error, pids, "Windows PDH unavailable")
|
|
1069
|
+
this.backoff("windows-pdh", 60000)
|
|
1070
|
+
return {
|
|
1071
|
+
provider: "windows-pdh",
|
|
1072
|
+
processes: new Map(),
|
|
1073
|
+
error: error && error.message ? error.message : "Windows PDH unavailable"
|
|
237
1074
|
}
|
|
238
1075
|
}
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
1076
|
+
}
|
|
1077
|
+
|
|
1078
|
+
async collectLinuxDrmFdinfo(pids) {
|
|
1079
|
+
if (this.platform !== "linux" || this.isBackedOff("linux-drm-fdinfo") || pids == null) {
|
|
1080
|
+
return null
|
|
1081
|
+
}
|
|
1082
|
+
const targetPids = normalizePidSet(pids)
|
|
1083
|
+
if (targetPids.length === 0) {
|
|
1084
|
+
return null
|
|
1085
|
+
}
|
|
1086
|
+
try {
|
|
1087
|
+
const processes = await collectLinuxDrmFdinfoProcesses(targetPids, {
|
|
1088
|
+
procRoot: this.procRoot,
|
|
1089
|
+
maxPids: this.drmFdinfoMaxPids,
|
|
1090
|
+
maxFdsPerPid: this.drmFdinfoMaxFdsPerPid
|
|
1091
|
+
})
|
|
1092
|
+
if (processes.size === 0) {
|
|
1093
|
+
return null
|
|
1094
|
+
}
|
|
1095
|
+
return {
|
|
1096
|
+
provider: "linux-drm-fdinfo",
|
|
1097
|
+
processes,
|
|
1098
|
+
error: null
|
|
1099
|
+
}
|
|
1100
|
+
} catch (error) {
|
|
1101
|
+
this.logProviderFailure("linux-drm-fdinfo", error, pids, "Linux DRM fdinfo unavailable")
|
|
1102
|
+
this.backoff("linux-drm-fdinfo", 60000)
|
|
1103
|
+
return {
|
|
1104
|
+
provider: "linux-drm-fdinfo",
|
|
1105
|
+
processes: new Map(),
|
|
1106
|
+
error: error && error.message ? error.message : "Linux DRM fdinfo unavailable"
|
|
1107
|
+
}
|
|
244
1108
|
}
|
|
245
1109
|
}
|
|
246
1110
|
|
|
247
|
-
async
|
|
248
|
-
if (
|
|
1111
|
+
async collectNvml(pids) {
|
|
1112
|
+
if (this.platform !== "linux" || this.isBackedOff("linux-nvml")) {
|
|
249
1113
|
return null
|
|
250
1114
|
}
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
provider: "amd-smi",
|
|
257
|
-
processes: parseAmdJson(stdout),
|
|
258
|
-
error: null
|
|
259
|
-
}
|
|
260
|
-
} catch (error) {
|
|
261
|
-
lastError = error
|
|
262
|
-
if (error && error.code === "ENOENT") {
|
|
263
|
-
continue
|
|
264
|
-
}
|
|
265
|
-
break
|
|
1115
|
+
try {
|
|
1116
|
+
return {
|
|
1117
|
+
provider: "linux-nvml",
|
|
1118
|
+
processes: this.getNvmlClient().collect(pids),
|
|
1119
|
+
error: null
|
|
266
1120
|
}
|
|
1121
|
+
} catch (error) {
|
|
1122
|
+
this.logProviderFailure("linux-nvml", error, pids, "Linux NVML unavailable")
|
|
1123
|
+
this.backoff("linux-nvml", 60000)
|
|
1124
|
+
return null
|
|
267
1125
|
}
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
1126
|
+
}
|
|
1127
|
+
|
|
1128
|
+
async collectAmdSmi(pids) {
|
|
1129
|
+
if (this.platform !== "linux" || this.isBackedOff("linux-amdsmi")) {
|
|
1130
|
+
return null
|
|
1131
|
+
}
|
|
1132
|
+
try {
|
|
1133
|
+
return {
|
|
1134
|
+
provider: "linux-amdsmi",
|
|
1135
|
+
processes: this.getAmdSmiClient().collect(pids),
|
|
1136
|
+
error: null
|
|
1137
|
+
}
|
|
1138
|
+
} catch (error) {
|
|
1139
|
+
this.logProviderFailure("linux-amdsmi", error, pids, "Linux AMD SMI unavailable")
|
|
1140
|
+
this.backoff("linux-amdsmi", 60000)
|
|
1141
|
+
return null
|
|
273
1142
|
}
|
|
274
1143
|
}
|
|
275
1144
|
|
|
276
|
-
async
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
1145
|
+
async collectRocmSmi(pids) {
|
|
1146
|
+
if (this.platform !== "linux" || this.isBackedOff("linux-rocm-smi")) {
|
|
1147
|
+
return null
|
|
1148
|
+
}
|
|
1149
|
+
try {
|
|
1150
|
+
return {
|
|
1151
|
+
provider: "linux-rocm-smi",
|
|
1152
|
+
processes: this.getRocmSmiClient().collect(pids),
|
|
1153
|
+
error: null
|
|
1154
|
+
}
|
|
1155
|
+
} catch (error) {
|
|
1156
|
+
this.logProviderFailure("linux-rocm-smi", error, pids, "Linux ROCm SMI unavailable")
|
|
1157
|
+
this.backoff("linux-rocm-smi", 60000)
|
|
1158
|
+
return null
|
|
1159
|
+
}
|
|
1160
|
+
}
|
|
282
1161
|
|
|
1162
|
+
mergeResults(results) {
|
|
283
1163
|
const processes = new Map()
|
|
284
1164
|
const providers = []
|
|
285
1165
|
const errors = []
|
|
@@ -288,9 +1168,56 @@ class GpuSampler {
|
|
|
288
1168
|
if (result.provider) providers.push(result.provider)
|
|
289
1169
|
if (result.error) errors.push({ provider: result.provider, error: result.error })
|
|
290
1170
|
for (const entry of result.processes.values()) {
|
|
291
|
-
|
|
1171
|
+
mergeGpuProcess(processes, entry.pid, entry.usedGpuMemoryBytes)
|
|
292
1172
|
}
|
|
293
1173
|
}
|
|
1174
|
+
return { processes, providers, errors }
|
|
1175
|
+
}
|
|
1176
|
+
|
|
1177
|
+
async collect(pids = null) {
|
|
1178
|
+
if (this.platform === "darwin") {
|
|
1179
|
+
return {
|
|
1180
|
+
available: false,
|
|
1181
|
+
stale: false,
|
|
1182
|
+
collectedAt: Date.now(),
|
|
1183
|
+
providers: [],
|
|
1184
|
+
processes: new Map(),
|
|
1185
|
+
errors: []
|
|
1186
|
+
}
|
|
1187
|
+
}
|
|
1188
|
+
|
|
1189
|
+
const results = []
|
|
1190
|
+
if (this.platform === "win32") {
|
|
1191
|
+
const windowsPdh = await this.collectWindowsPdh(pids)
|
|
1192
|
+
if (windowsPdh) results.push(windowsPdh)
|
|
1193
|
+
} else if (this.platform === "linux") {
|
|
1194
|
+
const linuxDrmFdinfo = await this.collectLinuxDrmFdinfo(pids)
|
|
1195
|
+
if (linuxDrmFdinfo) results.push(linuxDrmFdinfo)
|
|
1196
|
+
|
|
1197
|
+
let merged = this.mergeResults(results)
|
|
1198
|
+
const covered = coveredPids(merged.processes)
|
|
1199
|
+
|
|
1200
|
+
if (hasUncoveredTarget(pids, covered)) {
|
|
1201
|
+
const nvml = await this.collectNvml(pids)
|
|
1202
|
+
if (nvml) results.push(nvml)
|
|
1203
|
+
}
|
|
1204
|
+
|
|
1205
|
+
merged = this.mergeResults(results)
|
|
1206
|
+
const afterNvmlCovered = coveredPids(merged.processes)
|
|
1207
|
+
if (hasUncoveredTarget(pids, afterNvmlCovered)) {
|
|
1208
|
+
const amdSmi = await this.collectAmdSmi(pids)
|
|
1209
|
+
if (amdSmi) results.push(amdSmi)
|
|
1210
|
+
}
|
|
1211
|
+
|
|
1212
|
+
merged = this.mergeResults(results)
|
|
1213
|
+
const afterAmdCovered = coveredPids(merged.processes)
|
|
1214
|
+
if (hasUncoveredTarget(pids, afterAmdCovered)) {
|
|
1215
|
+
const rocmSmi = await this.collectRocmSmi(pids)
|
|
1216
|
+
if (rocmSmi) results.push(rocmSmi)
|
|
1217
|
+
}
|
|
1218
|
+
}
|
|
1219
|
+
|
|
1220
|
+
const { processes, providers, errors } = this.mergeResults(results)
|
|
294
1221
|
return {
|
|
295
1222
|
available: providers.length > 0 && errors.length < providers.length,
|
|
296
1223
|
stale: false,
|
|
@@ -301,18 +1228,22 @@ class GpuSampler {
|
|
|
301
1228
|
}
|
|
302
1229
|
}
|
|
303
1230
|
|
|
304
|
-
async getSnapshot() {
|
|
1231
|
+
async getSnapshot(pids = null) {
|
|
305
1232
|
const now = Date.now()
|
|
306
|
-
|
|
1233
|
+
const cacheKey = this.platform === "darwin" ? "" : normalizePidSet(pids).join(",")
|
|
1234
|
+
if (this.current && this.currentCacheKey === cacheKey && now - this.current.collectedAt < this.ttlMs) {
|
|
307
1235
|
return this.current
|
|
308
1236
|
}
|
|
309
|
-
if (this.inFlight) {
|
|
1237
|
+
if (this.inFlight && this.inFlightCacheKey === cacheKey) {
|
|
310
1238
|
return this.inFlight
|
|
311
1239
|
}
|
|
312
|
-
this.
|
|
1240
|
+
this.inFlightCacheKey = cacheKey
|
|
1241
|
+
this.inFlight = this.collect(pids).then((snapshot) => {
|
|
313
1242
|
this.current = snapshot
|
|
1243
|
+
this.currentCacheKey = cacheKey
|
|
314
1244
|
return snapshot
|
|
315
1245
|
}).catch((error) => {
|
|
1246
|
+
this.logProviderFailure("gpu", error, pids, "GPU sampling unavailable")
|
|
316
1247
|
if (this.current) {
|
|
317
1248
|
return { ...this.current, stale: true }
|
|
318
1249
|
}
|
|
@@ -326,9 +1257,18 @@ class GpuSampler {
|
|
|
326
1257
|
}
|
|
327
1258
|
}).finally(() => {
|
|
328
1259
|
this.inFlight = null
|
|
1260
|
+
this.inFlightCacheKey = null
|
|
329
1261
|
})
|
|
330
1262
|
return this.inFlight
|
|
331
1263
|
}
|
|
1264
|
+
|
|
1265
|
+
stop() {
|
|
1266
|
+
for (const client of [this.windowsPdhClient, this.nvmlClient, this.amdSmiClient, this.rocmSmiClient]) {
|
|
1267
|
+
if (client && typeof client.stop === "function") {
|
|
1268
|
+
client.stop()
|
|
1269
|
+
}
|
|
1270
|
+
}
|
|
1271
|
+
}
|
|
332
1272
|
}
|
|
333
1273
|
|
|
334
1274
|
function sumGpuMemory(snapshot, pids) {
|
|
@@ -344,6 +1284,15 @@ function sumGpuMemory(snapshot, pids) {
|
|
|
344
1284
|
|
|
345
1285
|
module.exports = {
|
|
346
1286
|
GpuSampler,
|
|
1287
|
+
WindowsPdhGpuMemoryClient,
|
|
1288
|
+
NvmlGpuMemoryClient,
|
|
1289
|
+
AmdSmiGpuMemoryClient,
|
|
1290
|
+
RocmSmiGpuMemoryClient,
|
|
347
1291
|
parseMemoryToBytes,
|
|
1292
|
+
decodeWindowsMultiSz,
|
|
1293
|
+
extractPidFromWindowsGpuInstance,
|
|
1294
|
+
collectLinuxDrmFdinfoProcesses,
|
|
1295
|
+
isDedicatedDrmMemoryRegion,
|
|
1296
|
+
parseLinuxDrmFdinfo,
|
|
348
1297
|
sumGpuMemory
|
|
349
1298
|
}
|