@icyfenix-dmla/cli 2026.5.2-7 → 2026.5.3-821
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +9 -6
- package/scripts/build.js +44 -11
- package/shared_modules/__init__.py +10 -0
- package/shared_modules/bayesian/__init__.py +6 -0
- package/shared_modules/bayesian/bayesian_network.py +105 -0
- package/shared_modules/bayesian/gaussian_mixture_model.py +141 -0
- package/shared_modules/bayesian/gaussian_mixturemodel.py +141 -0
- package/shared_modules/bayesian/multinomial_naive_bayes.py +74 -0
- package/shared_modules/bayesian/simple_bayesian_network.py +99 -0
- package/shared_modules/bayesian/simple_bayesiannetwork.py +99 -0
- package/shared_modules/cnn/__init__.py +5 -0
- package/shared_modules/cnn/alex_net.py +65 -0
- package/shared_modules/cnn/alexnet.py +65 -0
- package/shared_modules/cnn/t_e_r_m1.py +65 -0
- package/shared_modules/cnn/tiny_image_net_dataset.py +67 -0
- package/shared_modules/cnn/tiny_imagenet_dataset.py +67 -0
- package/shared_modules/cnn/tiny_imagenetdataset.py +67 -0
- package/shared_modules/cnn/tinyimagenetdataset.py +67 -0
- package/shared_modules/linear/__init__.py +6 -0
- package/shared_modules/linear/lasso_regression.py +93 -0
- package/shared_modules/linear/logistic_regression.py +78 -0
- package/shared_modules/linear/naive_bayes.py +141 -0
- package/shared_modules/linear/ridge_regression.py +58 -0
- package/shared_modules/neural/__init__.py +4 -0
- package/shared_modules/neural/perceptron.py +80 -0
- package/shared_modules/svm/__init__.py +5 -0
- package/shared_modules/svm/kernel_s_v_m.py +98 -0
- package/shared_modules/svm/kernel_svm.py +98 -0
- package/shared_modules/svm/simple_s_v_m.py +111 -0
- package/shared_modules/svm/simple_svm.py +111 -0
- package/shared_modules/tree/__init__.py +6 -0
- package/shared_modules/tree/ada_boost.py +77 -0
- package/shared_modules/tree/decision_tree_classifier.py +235 -0
- package/shared_modules/tree/decision_treeclassifier.py +235 -0
- package/shared_modules/tree/random_forest_classifier.py +88 -0
- package/shared_modules/tree/random_forestclassifier.py +88 -0
- package/shared_modules/unsupervised/__init__.py +5 -0
- package/shared_modules/unsupervised/k_means.py +127 -0
- package/shared_modules/unsupervised/kmeans.py +127 -0
- package/shared_modules/unsupervised/p_c_a.py +111 -0
- package/shared_modules/unsupervised/pca.py +111 -0
- package/src/commands/data.js +823 -0
- package/src/commands/server.js +209 -4
- package/src/index.js +23 -2
- package/src/server/routes/sandbox.js +70 -3
- package/src/server/sandbox.js +87 -11
- package/version.json +4 -0
|
@@ -0,0 +1,823 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* 数据管理命令
|
|
3
|
+
* 提供数据卷挂载、数据集下载、数据管理等功能的 TUI 界面
|
|
4
|
+
*/
|
|
5
|
+
import chalk from 'chalk'
|
|
6
|
+
import pkg from 'enquirer'
|
|
7
|
+
const { prompt } = pkg
|
|
8
|
+
import fs from 'fs'
|
|
9
|
+
import path from 'path'
|
|
10
|
+
import os from 'os'
|
|
11
|
+
import { spawn } from 'child_process'
|
|
12
|
+
import { execSync } from 'child_process'
|
|
13
|
+
import AdmZip from 'adm-zip'
|
|
14
|
+
|
|
15
|
+
// 配置文件路径
|
|
16
|
+
const DMLA_CONFIG_DIR = path.join(os.homedir(), '.dmla')
|
|
17
|
+
const DMLA_CONFIG_FILE = path.join(DMLA_CONFIG_DIR, 'config.json')
|
|
18
|
+
|
|
19
|
+
// 默认数据目录
|
|
20
|
+
const DEFAULT_DATA_DIR = path.join(os.homedir(), 'dmla-data')
|
|
21
|
+
|
|
22
|
+
// 数据集配置(使用 ModelScope 国内镜像,下载速度更快)
|
|
23
|
+
const DATASETS = [
|
|
24
|
+
{
|
|
25
|
+
id: 'tiny-imagenet-200',
|
|
26
|
+
name: 'Tiny ImageNet 200',
|
|
27
|
+
url: 'https://www.modelscope.cn/datasets/icyfenix/Tiny_ImageNet_200.git',
|
|
28
|
+
size: '247MB',
|
|
29
|
+
format: 'git',
|
|
30
|
+
targetDir: 'datasets/tiny-imagenet-200',
|
|
31
|
+
source: 'ModelScope (icyfenix)',
|
|
32
|
+
// git clone 后需要解压的 zip 文件
|
|
33
|
+
zipFile: 'tiny-imagenet-200.zip',
|
|
34
|
+
// zip 内部的顶层目录名(解压后需要将此目录内容移到上层)
|
|
35
|
+
zipInnerDir: 'tiny-imagenet-200'
|
|
36
|
+
},
|
|
37
|
+
{
|
|
38
|
+
id: 'cifar-10',
|
|
39
|
+
name: 'CIFAR-10',
|
|
40
|
+
url: 'https://www.modelscope.cn/datasets/icyfenix/CIFAR_10.git',
|
|
41
|
+
size: '163MB',
|
|
42
|
+
format: 'git',
|
|
43
|
+
targetDir: 'datasets/cifar-10',
|
|
44
|
+
source: 'ModelScope (icyfenix)'
|
|
45
|
+
},
|
|
46
|
+
{
|
|
47
|
+
id: 'mnist',
|
|
48
|
+
name: 'MNIST',
|
|
49
|
+
url: 'https://www.modelscope.cn/datasets/icyfenix/MNIST.git',
|
|
50
|
+
size: '11MB',
|
|
51
|
+
format: 'git',
|
|
52
|
+
targetDir: 'datasets/mnist',
|
|
53
|
+
source: 'ModelScope (icyfenix)'
|
|
54
|
+
}
|
|
55
|
+
]
|
|
56
|
+
|
|
57
|
+
/**
|
|
58
|
+
* 检查是否是用户取消操作(ESC 或 Ctrl+C)
|
|
59
|
+
* enquirer 可能抛出空字符串错误或包含 'cancel' 的消息
|
|
60
|
+
*/
|
|
61
|
+
function isUserCancel(error) {
|
|
62
|
+
return !error.message ||
|
|
63
|
+
error.message === '' ||
|
|
64
|
+
error.message.includes('cancel')
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
/**
|
|
68
|
+
* 显示 Banner
|
|
69
|
+
*/
|
|
70
|
+
function showBanner() {
|
|
71
|
+
console.log()
|
|
72
|
+
console.log(chalk.cyan('DMLA 数据管理'))
|
|
73
|
+
console.log(chalk.cyan('================'))
|
|
74
|
+
console.log()
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
/**
|
|
78
|
+
* 读取配置文件
|
|
79
|
+
*/
|
|
80
|
+
function readConfig() {
|
|
81
|
+
try {
|
|
82
|
+
if (fs.existsSync(DMLA_CONFIG_FILE)) {
|
|
83
|
+
const content = fs.readFileSync(DMLA_CONFIG_FILE, 'utf8')
|
|
84
|
+
return JSON.parse(content)
|
|
85
|
+
}
|
|
86
|
+
} catch (error) {
|
|
87
|
+
console.log(chalk.yellow(`警告: 配置文件读取失败: ${error.message}`))
|
|
88
|
+
}
|
|
89
|
+
return { dataVolumePath: DEFAULT_DATA_DIR }
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
/**
|
|
93
|
+
* 写入配置文件
|
|
94
|
+
*/
|
|
95
|
+
function writeConfig(config) {
|
|
96
|
+
try {
|
|
97
|
+
// 确保配置目录存在
|
|
98
|
+
if (!fs.existsSync(DMLA_CONFIG_DIR)) {
|
|
99
|
+
fs.mkdirSync(DMLA_CONFIG_DIR, { recursive: true })
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
config.lastModified = new Date().toISOString()
|
|
103
|
+
fs.writeFileSync(DMLA_CONFIG_FILE, JSON.stringify(config, null, 2))
|
|
104
|
+
} catch (error) {
|
|
105
|
+
console.log(chalk.red(`配置文件写入失败: ${error.message}`))
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
/**
|
|
110
|
+
* 获取数据卷路径
|
|
111
|
+
*/
|
|
112
|
+
function getDataVolumePath() {
|
|
113
|
+
const config = readConfig()
|
|
114
|
+
return config.dataVolumePath || DEFAULT_DATA_DIR
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
/**
|
|
118
|
+
* 确保数据目录结构存在
|
|
119
|
+
*/
|
|
120
|
+
function ensureDataDirStructure(dataPath) {
|
|
121
|
+
const subDirs = [
|
|
122
|
+
'datasets',
|
|
123
|
+
'datasets/custom',
|
|
124
|
+
'models',
|
|
125
|
+
'models/alexnet/checkpoints',
|
|
126
|
+
'models/alexnet/final',
|
|
127
|
+
'models/vgg',
|
|
128
|
+
'models/resnet',
|
|
129
|
+
'models/gan',
|
|
130
|
+
'models/llm',
|
|
131
|
+
'models/pretrained',
|
|
132
|
+
'outputs',
|
|
133
|
+
'outputs/training_logs',
|
|
134
|
+
'outputs/visualizations',
|
|
135
|
+
'outputs/exports',
|
|
136
|
+
'cache',
|
|
137
|
+
'cache/downloads',
|
|
138
|
+
'cache/preprocessing',
|
|
139
|
+
'cache/torch_hub'
|
|
140
|
+
]
|
|
141
|
+
|
|
142
|
+
for (const subDir of subDirs) {
|
|
143
|
+
const fullPath = path.join(dataPath, subDir)
|
|
144
|
+
if (!fs.existsSync(fullPath)) {
|
|
145
|
+
fs.mkdirSync(fullPath, { recursive: true })
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
/**
|
|
151
|
+
* 统计目录信息
|
|
152
|
+
*/
|
|
153
|
+
function getDirectoryStats(dataPath) {
|
|
154
|
+
const stats = {
|
|
155
|
+
datasets: 0,
|
|
156
|
+
models: 0,
|
|
157
|
+
totalSize: 0
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
try {
|
|
161
|
+
// 统计已下载的数据集
|
|
162
|
+
const datasetsPath = path.join(dataPath, 'datasets')
|
|
163
|
+
if (fs.existsSync(datasetsPath)) {
|
|
164
|
+
const dirs = fs.readdirSync(datasetsPath).filter(d => {
|
|
165
|
+
const fullPath = path.join(datasetsPath, d)
|
|
166
|
+
return fs.statSync(fullPath).isDirectory() && d !== 'custom'
|
|
167
|
+
})
|
|
168
|
+
stats.datasets = dirs.length
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
// 统计模型文件数量
|
|
172
|
+
const modelsPath = path.join(dataPath, 'models')
|
|
173
|
+
if (fs.existsSync(modelsPath)) {
|
|
174
|
+
const countModelFiles = (dir) => {
|
|
175
|
+
let count = 0
|
|
176
|
+
const items = fs.readdirSync(dir)
|
|
177
|
+
for (const item of items) {
|
|
178
|
+
const fullPath = path.join(dir, item)
|
|
179
|
+
const stat = fs.statSync(fullPath)
|
|
180
|
+
if (stat.isDirectory()) {
|
|
181
|
+
count += countModelFiles(fullPath)
|
|
182
|
+
} else if (item.endsWith('.pth') || item.endsWith('.pt') || item.endsWith('.onnx')) {
|
|
183
|
+
count++
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
return count
|
|
187
|
+
}
|
|
188
|
+
stats.models = countModelFiles(modelsPath)
|
|
189
|
+
}
|
|
190
|
+
} catch (error) {
|
|
191
|
+
// 忽略统计错误
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
return stats
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
/**
|
|
198
|
+
* 检查数据集是否已下载
|
|
199
|
+
*/
|
|
200
|
+
function isDatasetDownloaded(dataPath, datasetId) {
|
|
201
|
+
const dataset = DATASETS.find(d => d.id === datasetId)
|
|
202
|
+
if (!dataset) return false
|
|
203
|
+
|
|
204
|
+
const targetPath = path.join(dataPath, dataset.targetDir)
|
|
205
|
+
return fs.existsSync(targetPath)
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
/**
|
|
209
|
+
* 显示主菜单
|
|
210
|
+
*/
|
|
211
|
+
async function showMainMenu(dataPath) {
|
|
212
|
+
const stats = getDirectoryStats(dataPath)
|
|
213
|
+
|
|
214
|
+
console.log(chalk.gray(`当前挂载路径: ${dataPath}`))
|
|
215
|
+
console.log(chalk.gray(`数据集: ${stats.datasets} 个已下载`))
|
|
216
|
+
console.log(chalk.gray(`模型: ${stats.models} 个已保存`))
|
|
217
|
+
console.log()
|
|
218
|
+
console.log(chalk.gray('------------------------------------'))
|
|
219
|
+
console.log()
|
|
220
|
+
|
|
221
|
+
const choices = [
|
|
222
|
+
{ name: '1', message: '挂载路径设置 ' + chalk.gray(`[当前: ${dataPath}]`) },
|
|
223
|
+
{ name: '2', message: '下载数据集' },
|
|
224
|
+
{ name: '3', message: '查看数据集列表' },
|
|
225
|
+
{ name: '4', message: '清空数据内容' },
|
|
226
|
+
{ name: '5', message: '删除数据卷' },
|
|
227
|
+
{ name: '6', message: '退出' }
|
|
228
|
+
]
|
|
229
|
+
|
|
230
|
+
const { action } = await prompt({
|
|
231
|
+
type: 'select',
|
|
232
|
+
name: 'action',
|
|
233
|
+
message: '选择操作',
|
|
234
|
+
choices: choices.map(c => c.message)
|
|
235
|
+
})
|
|
236
|
+
|
|
237
|
+
// 解析选择
|
|
238
|
+
const selectedIndex = choices.findIndex(c => c.message === action)
|
|
239
|
+
return selectedIndex + 1
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
/**
|
|
243
|
+
* 挂载路径设置
|
|
244
|
+
*/
|
|
245
|
+
async function mountPath() {
|
|
246
|
+
const currentPath = getDataVolumePath()
|
|
247
|
+
|
|
248
|
+
console.log()
|
|
249
|
+
console.log(chalk.bold('挂载路径设置'))
|
|
250
|
+
console.log(chalk.gray(`当前路径: ${currentPath}`))
|
|
251
|
+
console.log()
|
|
252
|
+
|
|
253
|
+
const { newPath } = await prompt({
|
|
254
|
+
type: 'input',
|
|
255
|
+
name: 'newPath',
|
|
256
|
+
message: '输入新的挂载路径 (留空保持当前)',
|
|
257
|
+
initial: currentPath
|
|
258
|
+
})
|
|
259
|
+
|
|
260
|
+
if (!newPath || newPath.trim() === '') {
|
|
261
|
+
console.log(chalk.yellow('路径未修改'))
|
|
262
|
+
return
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
const resolvedPath = path.resolve(newPath.trim())
|
|
266
|
+
|
|
267
|
+
// 检查路径是否存在
|
|
268
|
+
if (!fs.existsSync(resolvedPath)) {
|
|
269
|
+
const { create } = await prompt({
|
|
270
|
+
type: 'confirm',
|
|
271
|
+
name: 'create',
|
|
272
|
+
message: `路径 ${resolvedPath} 不存在,是否创建?`,
|
|
273
|
+
initial: true
|
|
274
|
+
})
|
|
275
|
+
|
|
276
|
+
if (!create) {
|
|
277
|
+
console.log(chalk.yellow('操作已取消'))
|
|
278
|
+
return
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
fs.mkdirSync(resolvedPath, { recursive: true })
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
// 创建完整目录结构
|
|
285
|
+
ensureDataDirStructure(resolvedPath)
|
|
286
|
+
|
|
287
|
+
// 保存配置
|
|
288
|
+
writeConfig({ dataVolumePath: resolvedPath })
|
|
289
|
+
|
|
290
|
+
console.log(chalk.green(`挂载路径已更新: ${resolvedPath}`))
|
|
291
|
+
console.log(chalk.yellow('提示: 需要重启沙箱服务才能生效 (dmla stop && dmla start)'))
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
/**
|
|
295
|
+
* 清空数据内容
|
|
296
|
+
*/
|
|
297
|
+
async function clearData() {
|
|
298
|
+
const dataPath = getDataVolumePath()
|
|
299
|
+
|
|
300
|
+
console.log()
|
|
301
|
+
console.log(chalk.bold('清空数据内容'))
|
|
302
|
+
console.log(chalk.red('警告: 此操作将删除所有数据集、模型和输出文件!'))
|
|
303
|
+
console.log()
|
|
304
|
+
|
|
305
|
+
const { confirm } = await prompt({
|
|
306
|
+
type: 'confirm',
|
|
307
|
+
name: 'confirm',
|
|
308
|
+
message: '确认清空数据内容?',
|
|
309
|
+
initial: false
|
|
310
|
+
})
|
|
311
|
+
|
|
312
|
+
if (!confirm) {
|
|
313
|
+
console.log(chalk.yellow('操作已取消'))
|
|
314
|
+
return
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
// 清空子目录内容但保留目录结构
|
|
318
|
+
const dirsToClear = ['datasets', 'models', 'outputs', 'cache']
|
|
319
|
+
|
|
320
|
+
for (const dir of dirsToClear) {
|
|
321
|
+
const fullPath = path.join(dataPath, dir)
|
|
322
|
+
if (fs.existsSync(fullPath)) {
|
|
323
|
+
const items = fs.readdirSync(fullPath)
|
|
324
|
+
for (const item of items) {
|
|
325
|
+
const itemPath = path.join(fullPath, item)
|
|
326
|
+
fs.rmSync(itemPath, { recursive: true, force: true })
|
|
327
|
+
}
|
|
328
|
+
}
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
// 重新创建目录结构
|
|
332
|
+
ensureDataDirStructure(dataPath)
|
|
333
|
+
|
|
334
|
+
console.log(chalk.green('数据内容已清空'))
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
/**
|
|
338
|
+
* 删除数据卷
|
|
339
|
+
*/
|
|
340
|
+
async function removeData() {
|
|
341
|
+
const dataPath = getDataVolumePath()
|
|
342
|
+
|
|
343
|
+
console.log()
|
|
344
|
+
console.log(chalk.bold('删除数据卷'))
|
|
345
|
+
console.log(chalk.red('警告: 此操作将删除整个数据目录和所有数据!'))
|
|
346
|
+
console.log()
|
|
347
|
+
|
|
348
|
+
const { confirm } = await prompt({
|
|
349
|
+
type: 'confirm',
|
|
350
|
+
name: 'confirm',
|
|
351
|
+
message: '确认删除数据卷?',
|
|
352
|
+
initial: false
|
|
353
|
+
})
|
|
354
|
+
|
|
355
|
+
if (!confirm) {
|
|
356
|
+
console.log(chalk.yellow('操作已取消'))
|
|
357
|
+
return
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
// 删除整个目录
|
|
361
|
+
if (fs.existsSync(dataPath)) {
|
|
362
|
+
fs.rmSync(dataPath, { recursive: true, force: true })
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
// 清除配置
|
|
366
|
+
writeConfig({ dataVolumePath: null })
|
|
367
|
+
|
|
368
|
+
console.log(chalk.green('数据卷已删除'))
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
/**
|
|
372
|
+
* 查看数据集列表
|
|
373
|
+
*/
|
|
374
|
+
function listDatasets() {
|
|
375
|
+
const dataPath = getDataVolumePath()
|
|
376
|
+
|
|
377
|
+
console.log()
|
|
378
|
+
console.log(chalk.bold('已下载的数据集'))
|
|
379
|
+
console.log()
|
|
380
|
+
|
|
381
|
+
for (const dataset of DATASETS) {
|
|
382
|
+
const downloaded = isDatasetDownloaded(dataPath, dataset.id)
|
|
383
|
+
const status = downloaded ? chalk.green('[已下载]') : chalk.gray('[未下载]')
|
|
384
|
+
console.log(`${status} ${dataset.name} (${dataset.size})`)
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
console.log()
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
/**
|
|
391
|
+
* 下载数据集子菜单
|
|
392
|
+
*/
|
|
393
|
+
async function downloadDatasets() {
|
|
394
|
+
const dataPath = getDataVolumePath()
|
|
395
|
+
|
|
396
|
+
console.log()
|
|
397
|
+
console.log(chalk.bold('下载数据集'))
|
|
398
|
+
console.log()
|
|
399
|
+
|
|
400
|
+
// 检查数据目录是否存在
|
|
401
|
+
if (!fs.existsSync(dataPath)) {
|
|
402
|
+
console.log(chalk.yellow(`数据目录不存在: ${dataPath}`))
|
|
403
|
+
console.log(chalk.yellow('请先设置挂载路径'))
|
|
404
|
+
return
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
// 检查 Git 环境
|
|
408
|
+
try {
|
|
409
|
+
execSync('git --version', { stdio: 'pipe' })
|
|
410
|
+
} catch {
|
|
411
|
+
console.log(chalk.red('❌ Git 未安装'))
|
|
412
|
+
console.log(chalk.yellow('下载数据集需要 Git,请先安装: https://git-scm.com/downloads'))
|
|
413
|
+
return
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
// 构建选项列表
|
|
417
|
+
const choices = DATASETS.map((dataset, index) => {
|
|
418
|
+
const downloaded = isDatasetDownloaded(dataPath, dataset.id)
|
|
419
|
+
|
|
420
|
+
let message = `${dataset.name} (${dataset.size})`
|
|
421
|
+
if (downloaded) {
|
|
422
|
+
message += ' [已下载]'
|
|
423
|
+
}
|
|
424
|
+
|
|
425
|
+
return {
|
|
426
|
+
name: index.toString(),
|
|
427
|
+
message,
|
|
428
|
+
disabled: downloaded
|
|
429
|
+
}
|
|
430
|
+
})
|
|
431
|
+
|
|
432
|
+
// 操作提示
|
|
433
|
+
console.log(chalk.gray('操作: 上下键移动,空格勾选/取消,回车确认,ESC 返回'))
|
|
434
|
+
console.log()
|
|
435
|
+
|
|
436
|
+
try {
|
|
437
|
+
const { selected } = await prompt({
|
|
438
|
+
type: 'multiselect',
|
|
439
|
+
name: 'selected',
|
|
440
|
+
message: '选择要下载的数据集',
|
|
441
|
+
choices,
|
|
442
|
+
hint: '空格选择,回车确认下载',
|
|
443
|
+
warn: '已下载'
|
|
444
|
+
})
|
|
445
|
+
|
|
446
|
+
if (!selected || selected.length === 0) {
|
|
447
|
+
console.log(chalk.yellow('未选择任何数据集'))
|
|
448
|
+
return
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
// 下载选中的数据集
|
|
452
|
+
for (const indexStr of selected) {
|
|
453
|
+
const index = parseInt(indexStr)
|
|
454
|
+
const dataset = DATASETS[index]
|
|
455
|
+
|
|
456
|
+
console.log()
|
|
457
|
+
console.log(chalk.cyan(`────────────────────────────────────`))
|
|
458
|
+
|
|
459
|
+
// 检查是否已下载
|
|
460
|
+
if (isDatasetDownloaded(dataPath, dataset.id)) {
|
|
461
|
+
console.log(chalk.yellow(`${dataset.name} 已下载,跳过`))
|
|
462
|
+
continue
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
await downloadDataset(dataPath, dataset)
|
|
466
|
+
}
|
|
467
|
+
|
|
468
|
+
console.log()
|
|
469
|
+
console.log(chalk.cyan(`────────────────────────────────────`))
|
|
470
|
+
console.log(chalk.green('所有选中的数据集已处理完成'))
|
|
471
|
+
} catch (error) {
|
|
472
|
+
// 用户按 ESC 或 Ctrl+C 取消
|
|
473
|
+
if (isUserCancel(error)) {
|
|
474
|
+
console.log(chalk.gray('返回上一级'))
|
|
475
|
+
return
|
|
476
|
+
}
|
|
477
|
+
throw error
|
|
478
|
+
}
|
|
479
|
+
}
|
|
480
|
+
|
|
481
|
+
/**
|
|
482
|
+
* 下载单个数据集
|
|
483
|
+
*/
|
|
484
|
+
async function downloadDataset(dataPath, dataset) {
|
|
485
|
+
console.log()
|
|
486
|
+
console.log(chalk.bold(`下载 ${dataset.name}...`))
|
|
487
|
+
console.log(chalk.gray(`来源: ${dataset.source || 'ModelScope'}`))
|
|
488
|
+
console.log(chalk.gray(`URL: ${dataset.url}`))
|
|
489
|
+
|
|
490
|
+
const targetDir = path.join(dataPath, dataset.targetDir)
|
|
491
|
+
|
|
492
|
+
console.log(chalk.gray(`目标: ${targetDir}`))
|
|
493
|
+
console.log()
|
|
494
|
+
|
|
495
|
+
// 确保目标目录的父目录存在
|
|
496
|
+
const parentDir = path.dirname(targetDir)
|
|
497
|
+
if (!fs.existsSync(parentDir)) {
|
|
498
|
+
fs.mkdirSync(parentDir, { recursive: true })
|
|
499
|
+
}
|
|
500
|
+
|
|
501
|
+
try {
|
|
502
|
+
if (dataset.format === 'git') {
|
|
503
|
+
// 使用 git clone 下载 ModelScope 数据集
|
|
504
|
+
console.log(chalk.gray('开始 git clone...'))
|
|
505
|
+
console.log()
|
|
506
|
+
|
|
507
|
+
// 先安装 git lfs(如果需要)
|
|
508
|
+
try {
|
|
509
|
+
execSync('git lfs install', { stdio: 'pipe' })
|
|
510
|
+
} catch {
|
|
511
|
+
// git lfs 可能未安装,但大多数数据集不需要
|
|
512
|
+
}
|
|
513
|
+
|
|
514
|
+
// 执行 git clone
|
|
515
|
+
await new Promise((resolve, reject) => {
|
|
516
|
+
const git = spawn('git', ['clone', dataset.url, targetDir], { stdio: 'inherit' })
|
|
517
|
+
|
|
518
|
+
git.on('close', (code) => {
|
|
519
|
+
if (code === 0) {
|
|
520
|
+
resolve()
|
|
521
|
+
} else {
|
|
522
|
+
reject(new Error(`git clone exited with code ${code}`))
|
|
523
|
+
}
|
|
524
|
+
})
|
|
525
|
+
|
|
526
|
+
git.on('error', (err) => {
|
|
527
|
+
reject(err)
|
|
528
|
+
})
|
|
529
|
+
})
|
|
530
|
+
|
|
531
|
+
console.log()
|
|
532
|
+
console.log(chalk.green('下载完成'))
|
|
533
|
+
|
|
534
|
+
// 解压数据集内的 zip 文件(如果有)
|
|
535
|
+
if (dataset.zipFile) {
|
|
536
|
+
const zipPath = path.join(targetDir, dataset.zipFile)
|
|
537
|
+
|
|
538
|
+
if (fs.existsSync(zipPath)) {
|
|
539
|
+
console.log()
|
|
540
|
+
console.log(chalk.gray(`解压 ${dataset.zipFile}...`))
|
|
541
|
+
|
|
542
|
+
try {
|
|
543
|
+
const zip = new AdmZip(zipPath)
|
|
544
|
+
|
|
545
|
+
// 解压到临时目录
|
|
546
|
+
const tempDir = path.join(targetDir, '_extract_temp')
|
|
547
|
+
zip.extractAllTo(tempDir, true)
|
|
548
|
+
|
|
549
|
+
// 将 zip 内部目录内容移到目标目录
|
|
550
|
+
const innerDir = dataset.zipInnerDir
|
|
551
|
+
? path.join(tempDir, dataset.zipInnerDir)
|
|
552
|
+
: tempDir
|
|
553
|
+
|
|
554
|
+
if (fs.existsSync(innerDir)) {
|
|
555
|
+
// 移动内部目录的所有内容到目标目录
|
|
556
|
+
const items = fs.readdirSync(innerDir)
|
|
557
|
+
for (const item of items) {
|
|
558
|
+
const srcPath = path.join(innerDir, item)
|
|
559
|
+
const destPath = path.join(targetDir, item)
|
|
560
|
+
|
|
561
|
+
// 如果目标已存在且不是 zip 文件,跳过
|
|
562
|
+
if (fs.existsSync(destPath) && item !== dataset.zipFile) {
|
|
563
|
+
continue
|
|
564
|
+
}
|
|
565
|
+
|
|
566
|
+
fs.cpSync(srcPath, destPath, { recursive: true, force: true })
|
|
567
|
+
}
|
|
568
|
+
|
|
569
|
+
// 清理临时目录
|
|
570
|
+
fs.rmSync(tempDir, { recursive: true, force: true })
|
|
571
|
+
|
|
572
|
+
// 删除 zip 文件
|
|
573
|
+
fs.rmSync(zipPath, { force: true })
|
|
574
|
+
|
|
575
|
+
console.log(chalk.green('解压完成'))
|
|
576
|
+
} else {
|
|
577
|
+
console.log(chalk.yellow(` ⚠ zip 内部目录 ${dataset.zipInnerDir} 不存在`))
|
|
578
|
+
}
|
|
579
|
+
} catch (err) {
|
|
580
|
+
console.log(chalk.red(`解压失败: ${err.message}`))
|
|
581
|
+
console.log(chalk.yellow(`请手动解压: ${zipPath}`))
|
|
582
|
+
}
|
|
583
|
+
}
|
|
584
|
+
}
|
|
585
|
+
|
|
586
|
+
} else {
|
|
587
|
+
// 原有的 curl/wget 下载逻辑(保留兼容性)
|
|
588
|
+
const cacheDir = path.join(dataPath, 'cache', 'downloads')
|
|
589
|
+
const downloadFile = path.join(cacheDir, `${dataset.id}.${dataset.format}`)
|
|
590
|
+
|
|
591
|
+
if (!fs.existsSync(cacheDir)) {
|
|
592
|
+
fs.mkdirSync(cacheDir, { recursive: true })
|
|
593
|
+
}
|
|
594
|
+
|
|
595
|
+
const curlArgs = [
|
|
596
|
+
'-L',
|
|
597
|
+
'-o', downloadFile,
|
|
598
|
+
'--progress-bar',
|
|
599
|
+
dataset.url
|
|
600
|
+
]
|
|
601
|
+
|
|
602
|
+
console.log(chalk.gray('开始下载...'))
|
|
603
|
+
console.log()
|
|
604
|
+
|
|
605
|
+
await new Promise((resolve, reject) => {
|
|
606
|
+
const curl = spawn('curl', curlArgs, { stdio: 'inherit' })
|
|
607
|
+
|
|
608
|
+
curl.on('close', (code) => {
|
|
609
|
+
if (code === 0) {
|
|
610
|
+
resolve()
|
|
611
|
+
} else {
|
|
612
|
+
reject(new Error(`curl exited with code ${code}`))
|
|
613
|
+
}
|
|
614
|
+
})
|
|
615
|
+
|
|
616
|
+
curl.on('error', (err) => {
|
|
617
|
+
reject(err)
|
|
618
|
+
})
|
|
619
|
+
})
|
|
620
|
+
|
|
621
|
+
console.log()
|
|
622
|
+
console.log(chalk.green('下载完成'))
|
|
623
|
+
console.log()
|
|
624
|
+
|
|
625
|
+
// 解压
|
|
626
|
+
console.log(chalk.gray('正在解压...'))
|
|
627
|
+
|
|
628
|
+
if (dataset.format === 'zip') {
|
|
629
|
+
try {
|
|
630
|
+
const zip = new AdmZip(downloadFile)
|
|
631
|
+
zip.extractAllTo(targetDir, true) // overwrite = true
|
|
632
|
+
console.log(chalk.green('解压完成'))
|
|
633
|
+
} catch (err) {
|
|
634
|
+
console.log(chalk.red(`解压失败: ${err.message}`))
|
|
635
|
+
throw err
|
|
636
|
+
}
|
|
637
|
+
|
|
638
|
+
} else if (dataset.format === 'tar.gz') {
|
|
639
|
+
// tar.gz 文件仍使用系统命令(adm-zip 不支持)
|
|
640
|
+
execSync(`tar -xzf "${downloadFile}" -C "${path.join(dataPath, 'datasets')}"`, { stdio: 'inherit' })
|
|
641
|
+
}
|
|
642
|
+
|
|
643
|
+
// 清理下载文件
|
|
644
|
+
fs.rmSync(downloadFile, { force: true })
|
|
645
|
+
}
|
|
646
|
+
|
|
647
|
+
console.log()
|
|
648
|
+
console.log(chalk.green(`数据集已保存到 ${targetDir}`))
|
|
649
|
+
|
|
650
|
+
// 更新配置
|
|
651
|
+
const config = readConfig()
|
|
652
|
+
if (!config.installedDatasets) {
|
|
653
|
+
config.installedDatasets = []
|
|
654
|
+
}
|
|
655
|
+
if (!config.installedDatasets.includes(dataset.id)) {
|
|
656
|
+
config.installedDatasets.push(dataset.id)
|
|
657
|
+
}
|
|
658
|
+
writeConfig(config)
|
|
659
|
+
|
|
660
|
+
} catch (error) {
|
|
661
|
+
console.log()
|
|
662
|
+
console.log(chalk.red(`下载失败: ${error.message}`))
|
|
663
|
+
console.log(chalk.yellow('您可以稍后重试'))
|
|
664
|
+
}
|
|
665
|
+
}
|
|
666
|
+
|
|
667
|
+
/**
|
|
668
|
+
* 运行数据管理 TUI
|
|
669
|
+
*/
|
|
670
|
+
export async function runDataTUI() {
|
|
671
|
+
showBanner()
|
|
672
|
+
|
|
673
|
+
let dataPath = getDataVolumePath()
|
|
674
|
+
|
|
675
|
+
// 确保配置目录存在
|
|
676
|
+
if (!fs.existsSync(DMLA_CONFIG_DIR)) {
|
|
677
|
+
fs.mkdirSync(DMLA_CONFIG_DIR, { recursive: true })
|
|
678
|
+
}
|
|
679
|
+
|
|
680
|
+
// 如果数据目录不存在,提示创建
|
|
681
|
+
if (!fs.existsSync(dataPath)) {
|
|
682
|
+
console.log(chalk.yellow(`数据目录不存在: ${dataPath}`))
|
|
683
|
+
console.log()
|
|
684
|
+
|
|
685
|
+
try {
|
|
686
|
+
const { create } = await prompt({
|
|
687
|
+
type: 'confirm',
|
|
688
|
+
name: 'create',
|
|
689
|
+
message: '是否创建数据目录?',
|
|
690
|
+
initial: true
|
|
691
|
+
})
|
|
692
|
+
|
|
693
|
+
if (create) {
|
|
694
|
+
ensureDataDirStructure(dataPath)
|
|
695
|
+
console.log(chalk.green(`数据目录已创建: ${dataPath}`))
|
|
696
|
+
}
|
|
697
|
+
} catch (error) {
|
|
698
|
+
if (isUserCancel(error)) {
|
|
699
|
+
console.log(chalk.gray('已退出数据管理'))
|
|
700
|
+
console.log()
|
|
701
|
+
return
|
|
702
|
+
}
|
|
703
|
+
throw error
|
|
704
|
+
}
|
|
705
|
+
}
|
|
706
|
+
|
|
707
|
+
// 主循环
|
|
708
|
+
while (true) {
|
|
709
|
+
console.log()
|
|
710
|
+
try {
|
|
711
|
+
const action = await showMainMenu(dataPath)
|
|
712
|
+
|
|
713
|
+
switch (action) {
|
|
714
|
+
case 1:
|
|
715
|
+
try {
|
|
716
|
+
await mountPath()
|
|
717
|
+
} catch (error) {
|
|
718
|
+
if (isUserCancel(error)) {
|
|
719
|
+
console.log(chalk.gray('返回主菜单'))
|
|
720
|
+
} else {
|
|
721
|
+
throw error
|
|
722
|
+
}
|
|
723
|
+
}
|
|
724
|
+
break
|
|
725
|
+
case 2:
|
|
726
|
+
try {
|
|
727
|
+
await downloadDatasets()
|
|
728
|
+
} catch (error) {
|
|
729
|
+
if (isUserCancel(error)) {
|
|
730
|
+
console.log(chalk.gray('返回主菜单'))
|
|
731
|
+
} else {
|
|
732
|
+
throw error
|
|
733
|
+
}
|
|
734
|
+
}
|
|
735
|
+
break
|
|
736
|
+
case 3:
|
|
737
|
+
listDatasets()
|
|
738
|
+
break
|
|
739
|
+
case 4:
|
|
740
|
+
try {
|
|
741
|
+
await clearData()
|
|
742
|
+
} catch (error) {
|
|
743
|
+
if (isUserCancel(error)) {
|
|
744
|
+
console.log(chalk.gray('返回主菜单'))
|
|
745
|
+
} else {
|
|
746
|
+
throw error
|
|
747
|
+
}
|
|
748
|
+
}
|
|
749
|
+
break
|
|
750
|
+
case 5:
|
|
751
|
+
try {
|
|
752
|
+
await removeData()
|
|
753
|
+
} catch (error) {
|
|
754
|
+
if (isUserCancel(error)) {
|
|
755
|
+
console.log(chalk.gray('返回主菜单'))
|
|
756
|
+
} else {
|
|
757
|
+
throw error
|
|
758
|
+
}
|
|
759
|
+
}
|
|
760
|
+
break
|
|
761
|
+
case 6:
|
|
762
|
+
console.log()
|
|
763
|
+
console.log(chalk.gray('已退出数据管理'))
|
|
764
|
+
console.log()
|
|
765
|
+
return
|
|
766
|
+
}
|
|
767
|
+
|
|
768
|
+
// 刷新路径(可能在操作中修改了)
|
|
769
|
+
dataPath = getDataVolumePath()
|
|
770
|
+
} catch (error) {
|
|
771
|
+
// 主菜单按 ESC 取消 -> 退出程序
|
|
772
|
+
if (isUserCancel(error)) {
|
|
773
|
+
console.log()
|
|
774
|
+
console.log(chalk.gray('已退出数据管理'))
|
|
775
|
+
console.log()
|
|
776
|
+
return
|
|
777
|
+
}
|
|
778
|
+
throw error
|
|
779
|
+
}
|
|
780
|
+
}
|
|
781
|
+
}
|
|
782
|
+
|
|
783
|
+
/**
|
|
784
|
+
* CLI 命令入口 (非 TUI 模式)
|
|
785
|
+
*/
|
|
786
|
+
export async function runDataCommand(subCommand, options) {
|
|
787
|
+
const dataPath = getDataVolumePath()
|
|
788
|
+
|
|
789
|
+
switch (subCommand) {
|
|
790
|
+
case 'path':
|
|
791
|
+
console.log(dataPath)
|
|
792
|
+
break
|
|
793
|
+
case 'mount':
|
|
794
|
+
if (options.path) {
|
|
795
|
+
const resolvedPath = path.resolve(options.path)
|
|
796
|
+
ensureDataDirStructure(resolvedPath)
|
|
797
|
+
writeConfig({ dataVolumePath: resolvedPath })
|
|
798
|
+
console.log(chalk.green(`挂载路径已设置: ${resolvedPath}`))
|
|
799
|
+
} else {
|
|
800
|
+
console.log(chalk.yellow('请指定路径: dmla data mount <path>'))
|
|
801
|
+
}
|
|
802
|
+
break
|
|
803
|
+
case 'clear':
|
|
804
|
+
await clearData()
|
|
805
|
+
break
|
|
806
|
+
case 'remove':
|
|
807
|
+
await removeData()
|
|
808
|
+
break
|
|
809
|
+
case 'download':
|
|
810
|
+
await downloadDatasets()
|
|
811
|
+
break
|
|
812
|
+
default:
|
|
813
|
+
// 无子命令时进入 TUI
|
|
814
|
+
await runDataTUI()
|
|
815
|
+
}
|
|
816
|
+
}
|
|
817
|
+
|
|
818
|
+
export default {
|
|
819
|
+
runDataTUI,
|
|
820
|
+
runDataCommand,
|
|
821
|
+
getDataVolumePath,
|
|
822
|
+
DATASETS
|
|
823
|
+
}
|