@icyfenix-dmla/cli 2026.4.19-947 → 2026.4.21-2145
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/commands/manage.js +6 -70
- package/src/commands/server.js +10 -10
- package/src/index.js +5 -17
- package/src/server/routes/sandbox.js +38 -1
- package/src/server/sandbox.js +158 -0
package/package.json
CHANGED
package/src/commands/manage.js
CHANGED
|
@@ -66,7 +66,7 @@ export async function installImages(types, registry = 'dockerhub') {
|
|
|
66
66
|
|
|
67
67
|
console.log()
|
|
68
68
|
console.log(chalk.green('🎉 镜像安装完成'))
|
|
69
|
-
console.log(chalk.yellow('
|
|
69
|
+
console.log(chalk.yellow('提示: 运行 dmla start 启动服务'))
|
|
70
70
|
}
|
|
71
71
|
|
|
72
72
|
/**
|
|
@@ -105,70 +105,6 @@ async function pullImageWithProgress(imageName) {
|
|
|
105
105
|
})
|
|
106
106
|
}
|
|
107
107
|
|
|
108
|
-
/**
|
|
109
|
-
* 更新所有组件
|
|
110
|
-
*/
|
|
111
|
-
export async function updateAll(registry = 'dockerhub') {
|
|
112
|
-
console.log()
|
|
113
|
-
|
|
114
|
-
// 更新 npm 包
|
|
115
|
-
console.log(chalk.bold('📦 更新 npm 包'))
|
|
116
|
-
try {
|
|
117
|
-
console.log(chalk.gray(' 执行 npm update -g @icyfenix-dmla/cli...'))
|
|
118
|
-
execSync('npm update -g @icyfenix-dmla/cli', { stdio: 'inherit' })
|
|
119
|
-
console.log(chalk.green('✅ npm 包已更新'))
|
|
120
|
-
} catch (error) {
|
|
121
|
-
console.log(chalk.yellow('⚠️ npm 包更新失败或已是最新版本'))
|
|
122
|
-
}
|
|
123
|
-
|
|
124
|
-
console.log()
|
|
125
|
-
|
|
126
|
-
// 检查并更新镜像
|
|
127
|
-
console.log(chalk.bold('🖼️ 检查 Docker 镜像更新'))
|
|
128
|
-
const registryUrl = getRegistryUrl(registry)
|
|
129
|
-
|
|
130
|
-
for (const type of ['cpu', 'gpu']) {
|
|
131
|
-
const remoteImage = `${registryUrl}:${type}`
|
|
132
|
-
const localImage = type === 'gpu' ? CONFIG.imageGpu : CONFIG.imageCpu
|
|
133
|
-
|
|
134
|
-
console.log(chalk.gray(` 检查 ${type.toUpperCase()} 版本...`))
|
|
135
|
-
|
|
136
|
-
try {
|
|
137
|
-
// 检查本地镜像是否存在
|
|
138
|
-
let localImageInfo = null
|
|
139
|
-
try {
|
|
140
|
-
localImageInfo = await docker.getImage(localImage).inspect()
|
|
141
|
-
} catch {
|
|
142
|
-
// 本地镜像不存在,需要拉取
|
|
143
|
-
}
|
|
144
|
-
|
|
145
|
-
// 拉取最新镜像
|
|
146
|
-
console.log(chalk.gray(` 拉取最新 ${type.toUpperCase()} 镜像...`))
|
|
147
|
-
await pullImageWithProgress(remoteImage)
|
|
148
|
-
|
|
149
|
-
// 获取拉取的镜像信息
|
|
150
|
-
const remoteImageInfo = await docker.getImage(remoteImage).inspect()
|
|
151
|
-
|
|
152
|
-
// 比较镜像 ID
|
|
153
|
-
if (localImageInfo && localImageInfo.Id === remoteImageInfo.Id) {
|
|
154
|
-
console.log(chalk.green(`✅ ${type.toUpperCase()} 镜像已是最新版本`))
|
|
155
|
-
} else {
|
|
156
|
-
// Tag 为本地名称
|
|
157
|
-
console.log(chalk.gray(` 重命名为 ${localImage}...`))
|
|
158
|
-
const image = docker.getImage(remoteImage)
|
|
159
|
-
await image.tag({ repo: CONFIG.imageName, tag: type })
|
|
160
|
-
|
|
161
|
-
console.log(chalk.green(`✅ ${type.toUpperCase()} 镜像已更新`))
|
|
162
|
-
}
|
|
163
|
-
} catch (error) {
|
|
164
|
-
console.log(chalk.yellow(`⚠️ ${type.toUpperCase()} 镜像更新失败: ${error.message}`))
|
|
165
|
-
}
|
|
166
|
-
}
|
|
167
|
-
|
|
168
|
-
console.log()
|
|
169
|
-
console.log(chalk.green('🎉 更新完成'))
|
|
170
|
-
}
|
|
171
|
-
|
|
172
108
|
/**
|
|
173
109
|
* 环境诊断
|
|
174
110
|
*/
|
|
@@ -201,7 +137,7 @@ export async function runDoctor() {
|
|
|
201
137
|
// ───────────────────────────────────────────────────────────
|
|
202
138
|
// 镜像检查
|
|
203
139
|
// ───────────────────────────────────────────────────────────
|
|
204
|
-
console.log(chalk.bold('
|
|
140
|
+
console.log(chalk.bold('Docker 镜像'))
|
|
205
141
|
|
|
206
142
|
const cpuImage = CONFIG.imageCpu
|
|
207
143
|
const gpuImage = CONFIG.imageGpu
|
|
@@ -234,7 +170,7 @@ export async function runDoctor() {
|
|
|
234
170
|
// ───────────────────────────────────────────────────────────
|
|
235
171
|
// GPU 检查
|
|
236
172
|
// ───────────────────────────────────────────────────────────
|
|
237
|
-
console.log(chalk.bold('
|
|
173
|
+
console.log(chalk.bold('GPU 驱动'))
|
|
238
174
|
|
|
239
175
|
try {
|
|
240
176
|
const output = execSync('nvidia-smi -L', { timeout: 5000, encoding: 'utf8' })
|
|
@@ -261,7 +197,7 @@ export async function runDoctor() {
|
|
|
261
197
|
// ───────────────────────────────────────────────────────────
|
|
262
198
|
// 端口检查
|
|
263
199
|
// ───────────────────────────────────────────────────────────
|
|
264
|
-
console.log(chalk.bold('
|
|
200
|
+
console.log(chalk.bold('端口可用性'))
|
|
265
201
|
|
|
266
202
|
const port = CONFIG.defaultPort
|
|
267
203
|
const portAvailable = await checkPortAvailable(port)
|
|
@@ -310,11 +246,11 @@ export async function runDoctor() {
|
|
|
310
246
|
console.log(chalk.red(` ${i + 1}. ${issue}`))
|
|
311
247
|
})
|
|
312
248
|
console.log()
|
|
313
|
-
console.log(chalk.yellow('
|
|
249
|
+
console.log(chalk.yellow('请根据上述提示解决问题后再次运行 dmla doctor'))
|
|
314
250
|
} else {
|
|
315
251
|
console.log(chalk.bold.green('✅ 所有检查通过,环境正常'))
|
|
316
252
|
console.log()
|
|
317
|
-
console.log(chalk.gray('
|
|
253
|
+
console.log(chalk.gray('运行 dmla start 启动服务'))
|
|
318
254
|
}
|
|
319
255
|
}
|
|
320
256
|
|
package/src/commands/server.js
CHANGED
|
@@ -198,7 +198,7 @@ export async function startServerSync(port, useGpu = false) {
|
|
|
198
198
|
const portAvailable = await checkPortAvailable(port)
|
|
199
199
|
if (!portAvailable) {
|
|
200
200
|
console.log(chalk.red(`❌ 端口 ${port} 已被占用`))
|
|
201
|
-
console.log(chalk.yellow('
|
|
201
|
+
console.log(chalk.yellow('提示: 使用 --port 选项指定其他端口'))
|
|
202
202
|
return
|
|
203
203
|
}
|
|
204
204
|
|
|
@@ -206,7 +206,7 @@ export async function startServerSync(port, useGpu = false) {
|
|
|
206
206
|
const imageResolution = await resolveImageType(useGpu)
|
|
207
207
|
if (!imageResolution.imageType) {
|
|
208
208
|
console.log(chalk.red('❌ 无可用镜像'))
|
|
209
|
-
console.log(chalk.yellow('
|
|
209
|
+
console.log(chalk.yellow('提示: 运行 dmla install 安装镜像'))
|
|
210
210
|
return
|
|
211
211
|
}
|
|
212
212
|
const resolvedUseGpu = imageResolution.imageType === 'gpu'
|
|
@@ -222,7 +222,7 @@ export async function startServerSync(port, useGpu = false) {
|
|
|
222
222
|
const actualServerPath = findServerPath()
|
|
223
223
|
if (!actualServerPath) {
|
|
224
224
|
console.log(chalk.red('❌ 找不到服务入口文件'))
|
|
225
|
-
console.log(chalk.yellow('
|
|
225
|
+
console.log(chalk.yellow('提示: 确保正确安装了 @icyfenix-dmla/cli'))
|
|
226
226
|
return
|
|
227
227
|
}
|
|
228
228
|
|
|
@@ -256,7 +256,7 @@ export async function startServer(port, useGpu = false) {
|
|
|
256
256
|
const portAvailable = await checkPortAvailable(port)
|
|
257
257
|
if (!portAvailable) {
|
|
258
258
|
console.log(chalk.red(`❌ 端口 ${port} 已被占用`))
|
|
259
|
-
console.log(chalk.yellow('
|
|
259
|
+
console.log(chalk.yellow('提示: 使用 --port 选项指定其他端口'))
|
|
260
260
|
return
|
|
261
261
|
}
|
|
262
262
|
|
|
@@ -264,7 +264,7 @@ export async function startServer(port, useGpu = false) {
|
|
|
264
264
|
const imageResolution = await resolveImageType(useGpu)
|
|
265
265
|
if (!imageResolution.imageType) {
|
|
266
266
|
console.log(chalk.red('❌ 无可用镜像'))
|
|
267
|
-
console.log(chalk.yellow('
|
|
267
|
+
console.log(chalk.yellow('提示: 运行 dmla install 安装镜像'))
|
|
268
268
|
return
|
|
269
269
|
}
|
|
270
270
|
const resolvedUseGpu = imageResolution.imageType === 'gpu'
|
|
@@ -285,7 +285,7 @@ export async function startServer(port, useGpu = false) {
|
|
|
285
285
|
|
|
286
286
|
if (!actualServerPath) {
|
|
287
287
|
console.log(chalk.red('❌ 找不到服务入口文件'))
|
|
288
|
-
console.log(chalk.yellow('
|
|
288
|
+
console.log(chalk.yellow('提示: 确保正确安装了 @icyfenix-dmla/cli'))
|
|
289
289
|
return
|
|
290
290
|
}
|
|
291
291
|
|
|
@@ -445,7 +445,7 @@ export async function getStatus() {
|
|
|
445
445
|
console.log()
|
|
446
446
|
|
|
447
447
|
// 检查 npm 包版本
|
|
448
|
-
console.log(chalk.bold('
|
|
448
|
+
console.log(chalk.bold('npm 包版本'))
|
|
449
449
|
try {
|
|
450
450
|
// __dirname 是 src/commands,需要向上两级到包根目录
|
|
451
451
|
const pkgPath = path.resolve(__dirname, '../../package.json')
|
|
@@ -458,7 +458,7 @@ export async function getStatus() {
|
|
|
458
458
|
console.log()
|
|
459
459
|
|
|
460
460
|
// 检查镜像
|
|
461
|
-
console.log(chalk.bold('
|
|
461
|
+
console.log(chalk.bold('Docker 镜像'))
|
|
462
462
|
const cpuExists = await checkImageExists('cpu')
|
|
463
463
|
const gpuExists = await checkImageExists('gpu')
|
|
464
464
|
console.log(chalk.gray(` CPU: ${cpuExists ? chalk.green('已安装') : chalk.red('未安装')}`))
|
|
@@ -467,7 +467,7 @@ export async function getStatus() {
|
|
|
467
467
|
console.log()
|
|
468
468
|
|
|
469
469
|
// 检查 GPU
|
|
470
|
-
console.log(chalk.bold('
|
|
470
|
+
console.log(chalk.bold('GPU 状态'))
|
|
471
471
|
const gpuAvailable = await checkGPUAvailable()
|
|
472
472
|
if (gpuAvailable) {
|
|
473
473
|
console.log(chalk.green(' GPU 可用'))
|
|
@@ -485,7 +485,7 @@ export async function getStatus() {
|
|
|
485
485
|
console.log()
|
|
486
486
|
|
|
487
487
|
// 检查服务
|
|
488
|
-
console.log(chalk.bold('
|
|
488
|
+
console.log(chalk.bold('服务状态'))
|
|
489
489
|
const running = await checkServiceRunning(CONFIG.defaultPort)
|
|
490
490
|
if (running) {
|
|
491
491
|
console.log(chalk.green(` 服务运行中 (端口 ${CONFIG.defaultPort})`))
|
package/src/index.js
CHANGED
|
@@ -8,7 +8,7 @@ import path from 'path'
|
|
|
8
8
|
import { fileURLToPath } from 'url'
|
|
9
9
|
import fs from 'fs'
|
|
10
10
|
import { startServer, startServerSync, stopServer, getStatus } from './commands/server.js'
|
|
11
|
-
import {
|
|
11
|
+
import { runDoctor } from './commands/manage.js'
|
|
12
12
|
import { runInstallTUI } from '@icyfenix-dmla/install'
|
|
13
13
|
|
|
14
14
|
// 从 package.json 读取版本号
|
|
@@ -98,7 +98,7 @@ program
|
|
|
98
98
|
const useGpu = options.gpu
|
|
99
99
|
const sync = options.sync
|
|
100
100
|
|
|
101
|
-
console.log(chalk.blue('
|
|
101
|
+
console.log(chalk.blue('启动 DMLA 沙箱服务...'))
|
|
102
102
|
console.log(chalk.gray(` 端口: ${port}`))
|
|
103
103
|
console.log(chalk.gray(` 请求类型: ${useGpu ? 'GPU' : '自动选择'}`))
|
|
104
104
|
if (sync) {
|
|
@@ -119,7 +119,7 @@ program
|
|
|
119
119
|
.command('stop')
|
|
120
120
|
.description('停止运行中的沙箱服务')
|
|
121
121
|
.action(async () => {
|
|
122
|
-
console.log(chalk.blue('
|
|
122
|
+
console.log(chalk.blue('停止 DMLA 沙箱服务...'))
|
|
123
123
|
await stopServer()
|
|
124
124
|
})
|
|
125
125
|
|
|
@@ -130,7 +130,7 @@ program
|
|
|
130
130
|
.command('status')
|
|
131
131
|
.description('查看服务状态')
|
|
132
132
|
.action(async () => {
|
|
133
|
-
console.log(chalk.blue('
|
|
133
|
+
console.log(chalk.blue('DMLA 沙箱服务状态'))
|
|
134
134
|
await getStatus()
|
|
135
135
|
})
|
|
136
136
|
|
|
@@ -144,18 +144,6 @@ program
|
|
|
144
144
|
await runInstallTUI()
|
|
145
145
|
})
|
|
146
146
|
|
|
147
|
-
// ─────────────────────────────────────────────────────────────
|
|
148
|
-
// update 命令
|
|
149
|
-
// ─────────────────────────────────────────────────────────────
|
|
150
|
-
program
|
|
151
|
-
.command('update')
|
|
152
|
-
.description('更新 npm 包和 Docker 镜像')
|
|
153
|
-
.option('-r, --registry <type>', '镜像仓库 (dockerhub/acr)', 'dockerhub')
|
|
154
|
-
.action(async (options) => {
|
|
155
|
-
console.log(chalk.blue('🔄 更新 DMLA...'))
|
|
156
|
-
await updateAll(options.registry)
|
|
157
|
-
})
|
|
158
|
-
|
|
159
147
|
// ─────────────────────────────────────────────────────────────
|
|
160
148
|
// doctor 命令
|
|
161
149
|
// ─────────────────────────────────────────────────────────────
|
|
@@ -163,7 +151,7 @@ program
|
|
|
163
151
|
.command('doctor')
|
|
164
152
|
.description('诊断安装环境')
|
|
165
153
|
.action(async () => {
|
|
166
|
-
console.log(chalk.blue('
|
|
154
|
+
console.log(chalk.blue('DMLA 环境诊断'))
|
|
167
155
|
await runDoctor()
|
|
168
156
|
})
|
|
169
157
|
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* 沙箱 API 路由
|
|
3
3
|
*/
|
|
4
4
|
import { Router } from 'express'
|
|
5
|
-
import sandbox, { runPythonCode, checkImageExists, checkGPUAvailable } from '../sandbox.js'
|
|
5
|
+
import sandbox, { runPythonCode, checkImageExists, checkGPUAvailable, checkCUDACompatibility } from '../sandbox.js'
|
|
6
6
|
|
|
7
7
|
const { SANDBOX_CONFIG } = sandbox
|
|
8
8
|
|
|
@@ -128,4 +128,41 @@ router.get('/gpu', async (req, res) => {
|
|
|
128
128
|
}
|
|
129
129
|
})
|
|
130
130
|
|
|
131
|
+
/**
|
|
132
|
+
* CUDA 兼容性检查
|
|
133
|
+
* 返回详细的 CUDA 环境诊断信息
|
|
134
|
+
*/
|
|
135
|
+
router.get('/cuda-compat', async (req, res) => {
|
|
136
|
+
try {
|
|
137
|
+
const imageGpuExists = await checkImageExists(true)
|
|
138
|
+
|
|
139
|
+
if (!imageGpuExists) {
|
|
140
|
+
return res.json({
|
|
141
|
+
status: 'error',
|
|
142
|
+
message: 'GPU 镜像未安装',
|
|
143
|
+
compatible: false,
|
|
144
|
+
suggestion: '请运行 npm run build:sandbox:gpu 或 dmla install --gpu'
|
|
145
|
+
})
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
const compatResult = await checkCUDACompatibility()
|
|
149
|
+
|
|
150
|
+
res.json({
|
|
151
|
+
status: compatResult.compatible ? 'ok' : 'error',
|
|
152
|
+
compatible: compatResult.compatible,
|
|
153
|
+
details: compatResult.details,
|
|
154
|
+
issues: compatResult.issues,
|
|
155
|
+
message: compatResult.compatible
|
|
156
|
+
? 'CUDA 环境完全兼容,GPU 加速可用'
|
|
157
|
+
: 'CUDA 环境不兼容,请使用 CPU 模式或重新构建镜像'
|
|
158
|
+
})
|
|
159
|
+
} catch (error) {
|
|
160
|
+
res.status(500).json({
|
|
161
|
+
status: 'error',
|
|
162
|
+
compatible: false,
|
|
163
|
+
error: error.message
|
|
164
|
+
})
|
|
165
|
+
}
|
|
166
|
+
})
|
|
167
|
+
|
|
131
168
|
export default router
|
package/src/server/sandbox.js
CHANGED
|
@@ -154,6 +154,116 @@ export async function checkGPUAvailable() {
|
|
|
154
154
|
}
|
|
155
155
|
}
|
|
156
156
|
|
|
157
|
+
/**
|
|
158
|
+
* 检查 CUDA 兼容性
|
|
159
|
+
* 在 GPU 镜像中运行简单的 CUDA 操作测试,验证 PyTorch 与 GPU 兼容
|
|
160
|
+
* @returns {Promise<{compatible: boolean, issues: string[], details: object}>}
|
|
161
|
+
*/
|
|
162
|
+
export async function checkCUDACompatibility() {
|
|
163
|
+
let container = null
|
|
164
|
+
|
|
165
|
+
const testCode = `
|
|
166
|
+
import torch
|
|
167
|
+
import json
|
|
168
|
+
|
|
169
|
+
result = {
|
|
170
|
+
'pytorch_version': torch.__version__,
|
|
171
|
+
'cuda_available': torch.cuda.is_available(),
|
|
172
|
+
'cuda_version': str(torch.version.cuda) if torch.cuda.is_available() else None,
|
|
173
|
+
'device_name': torch.cuda.get_device_name(0) if torch.cuda.is_available() else None,
|
|
174
|
+
'compatible': True,
|
|
175
|
+
'test_passed': False,
|
|
176
|
+
'error': None
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
if torch.cuda.is_available():
|
|
180
|
+
try:
|
|
181
|
+
x = torch.randn(100, 100, device='cuda')
|
|
182
|
+
y = x + x
|
|
183
|
+
torch.cuda.synchronize()
|
|
184
|
+
result['test_passed'] = True
|
|
185
|
+
except RuntimeError as e:
|
|
186
|
+
result['compatible'] = False
|
|
187
|
+
result['error'] = str(e)
|
|
188
|
+
if 'no kernel image' in str(e) or 'CUDA error' in str(e):
|
|
189
|
+
result['error_type'] = 'compatibility'
|
|
190
|
+
|
|
191
|
+
print(json.dumps(result))
|
|
192
|
+
`
|
|
193
|
+
|
|
194
|
+
try {
|
|
195
|
+
container = await docker.createContainer({
|
|
196
|
+
Image: SANDBOX_CONFIG.imageGpu,
|
|
197
|
+
Cmd: ['python3', '-c', testCode],
|
|
198
|
+
HostConfig: {
|
|
199
|
+
DeviceRequests: [{
|
|
200
|
+
Driver: 'nvidia',
|
|
201
|
+
Count: -1,
|
|
202
|
+
Capabilities: [['gpu']]
|
|
203
|
+
}],
|
|
204
|
+
AutoRemove: false
|
|
205
|
+
},
|
|
206
|
+
Env: ['PYTHONUNBUFFERED=1']
|
|
207
|
+
})
|
|
208
|
+
|
|
209
|
+
await container.start()
|
|
210
|
+
await container.wait()
|
|
211
|
+
|
|
212
|
+
const logs = await container.logs({
|
|
213
|
+
stdout: true,
|
|
214
|
+
stderr: true
|
|
215
|
+
})
|
|
216
|
+
|
|
217
|
+
const { stdout, stderr } = parseDockerLogsSeparate(logs)
|
|
218
|
+
|
|
219
|
+
// 尝试解析 JSON 输出
|
|
220
|
+
const jsonStart = stdout.indexOf('{')
|
|
221
|
+
if (jsonStart !== -1) {
|
|
222
|
+
try {
|
|
223
|
+
const result = JSON.parse(stdout.substring(jsonStart))
|
|
224
|
+
return {
|
|
225
|
+
compatible: result.compatible && result.test_passed,
|
|
226
|
+
issues: result.error ? [result.error] : [],
|
|
227
|
+
details: result
|
|
228
|
+
}
|
|
229
|
+
} catch {
|
|
230
|
+
// JSON 解析失败
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
// 如果无法解析,检查 stderr 是否有 CUDA 错误
|
|
235
|
+
if (stderr.includes('no kernel image') || stderr.includes('CUDA error')) {
|
|
236
|
+
return {
|
|
237
|
+
compatible: false,
|
|
238
|
+
issues: [stderr],
|
|
239
|
+
details: { raw_output: stderr }
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
// 默认返回未知状态
|
|
244
|
+
return {
|
|
245
|
+
compatible: true, // 假设兼容,让实际执行来验证
|
|
246
|
+
issues: [],
|
|
247
|
+
details: { stdout, stderr }
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
} catch (error) {
|
|
251
|
+
return {
|
|
252
|
+
compatible: false,
|
|
253
|
+
issues: [error.message],
|
|
254
|
+
details: { error: error.message }
|
|
255
|
+
}
|
|
256
|
+
} finally {
|
|
257
|
+
if (container) {
|
|
258
|
+
try {
|
|
259
|
+
await container.remove({ force: true })
|
|
260
|
+
} catch {
|
|
261
|
+
// 忽略清理错误
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
|
|
157
267
|
/**
|
|
158
268
|
* 执行 Python 代码
|
|
159
269
|
* 使用 IPython Kernel 执行代码,支持富输出(图片、文本、错误等)
|
|
@@ -171,6 +281,53 @@ export async function runPythonCode(code, useGpu = false, imageOverride = null)
|
|
|
171
281
|
const image = imageOverride || (useGpu ? SANDBOX_CONFIG.imageGpu : SANDBOX_CONFIG.imageCpu)
|
|
172
282
|
log(`Using image: ${image}`)
|
|
173
283
|
|
|
284
|
+
// GPU 兼容性预检查
|
|
285
|
+
if (useGpu) {
|
|
286
|
+
log('GPU mode: running CUDA compatibility pre-check...')
|
|
287
|
+
const compatResult = await checkCUDACompatibility()
|
|
288
|
+
log(`CUDA compatibility check result: ${JSON.stringify(compatResult)}`)
|
|
289
|
+
|
|
290
|
+
if (!compatResult.compatible) {
|
|
291
|
+
log('CUDA compatibility check failed')
|
|
292
|
+
const executionTime = (Date.now() - startTime) / 1000
|
|
293
|
+
|
|
294
|
+
// 构建详细的错误信息
|
|
295
|
+
const errorDetails = compatResult.details || {}
|
|
296
|
+
const errorType = errorDetails.error_type || 'unknown'
|
|
297
|
+
|
|
298
|
+
let errorMessage = 'CUDA 兼容性错误:PyTorch CUDA 版本与您的 GPU 不兼容\n\n'
|
|
299
|
+
|
|
300
|
+
if (errorType === 'compatibility' || compatResult.issues.some(i => i.includes('no kernel image'))) {
|
|
301
|
+
errorMessage += `诊断详情:\n`
|
|
302
|
+
errorMessage += `- PyTorch 版本: ${errorDetails.pytorch_version || '未知'}\n`
|
|
303
|
+
errorMessage += `- CUDA 版本: ${errorDetails.cuda_version || '未知'}\n`
|
|
304
|
+
errorMessage += `- GPU 设备: ${errorDetails.device_name || '未知'}\n`
|
|
305
|
+
errorMessage += `- 错误类型: CUDA kernel 不兼容\n\n`
|
|
306
|
+
errorMessage += `解决方案:\n`
|
|
307
|
+
errorMessage += `1. 使用 CPU 模式运行代码(在前端选择 "Run on CPU")\n`
|
|
308
|
+
errorMessage += `2. 在代码开头添加: device = torch.device('cpu')\n`
|
|
309
|
+
errorMessage += `3. 重新构建兼容的 Docker 镜像(修改 Dockerfile.sandbox 使用 CUDA 12.x)\n\n`
|
|
310
|
+
errorMessage += `更多诊断信息请运行: dmla doctor`
|
|
311
|
+
} else {
|
|
312
|
+
errorMessage += `错误详情: ${compatResult.issues.join('\n')}\n\n`
|
|
313
|
+
errorMessage += `建议使用 CPU 模式运行代码。`
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
return {
|
|
317
|
+
success: false,
|
|
318
|
+
outputs: [{
|
|
319
|
+
type: 'error',
|
|
320
|
+
ename: 'CUDACompatError',
|
|
321
|
+
evalue: 'CUDA 兼容性错误',
|
|
322
|
+
traceback: [errorMessage]
|
|
323
|
+
}],
|
|
324
|
+
executionTime,
|
|
325
|
+
gpuUsed: false
|
|
326
|
+
}
|
|
327
|
+
}
|
|
328
|
+
log('CUDA compatibility check passed')
|
|
329
|
+
}
|
|
330
|
+
|
|
174
331
|
// 创建容器配置 - 使用 kernel_runner.py 执行代码
|
|
175
332
|
const containerConfig = {
|
|
176
333
|
Image: image,
|
|
@@ -484,6 +641,7 @@ export async function pullImage(useGpu = false) {
|
|
|
484
641
|
export default {
|
|
485
642
|
runPythonCode,
|
|
486
643
|
checkGPUAvailable,
|
|
644
|
+
checkCUDACompatibility,
|
|
487
645
|
checkImageExists,
|
|
488
646
|
pullImage,
|
|
489
647
|
SANDBOX_CONFIG
|