@icyfenix-dmla/cli 2026.5.3-2128 → 2026.5.3-2346

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@icyfenix-dmla/cli",
3
- "version": "2026.5.3-2128",
3
+ "version": "2026.5.3-2346",
4
4
  "description": "DMLA 沙箱服务命令行工具",
5
5
  "type": "module",
6
6
  "main": "src/index.js",
@@ -18,12 +18,61 @@ DMLA 进度报告模块
18
18
 
19
19
  import json
20
20
  import time
21
+ import sys
22
+ import threading
23
+ import queue
21
24
  from pathlib import Path
22
25
  from typing import Optional
23
26
 
24
27
  # 进度文件路径
25
28
  PROGRESS_FILE = Path('/workspace/progress.json')
26
29
 
30
+ # stderr 异步写入队列(避免管道阻塞)
31
+ _stderr_queue: queue.Queue = queue.Queue()
32
+ _stderr_thread: Optional[threading.Thread] = None
33
+
34
+
35
+ def _start_stderr_writer():
36
+ """启动后台 stderr 写入线程(daemon 线程,随主线程退出)"""
37
+ global _stderr_thread
38
+ if _stderr_thread is None or not _stderr_thread.is_alive():
39
+ _stderr_thread = threading.Thread(target=_stderr_writer_loop, daemon=True)
40
+ _stderr_thread.start()
41
+
42
+
43
+ def _stderr_writer_loop():
44
+ """后台线程循环:从队列读取数据并写入 stderr"""
45
+ while True:
46
+ try:
47
+ item = _stderr_queue.get(timeout=1.0)
48
+ if item is None: # 停止信号
49
+ break
50
+ sys.stderr.write(item)
51
+ sys.stderr.flush()
52
+ except queue.Empty:
53
+ continue # 队列空,继续等待
54
+ except Exception:
55
+ pass # 写入失败,忽略(不影响主线程)
56
+
57
+
58
+ def _write_stderr_async(data: str):
59
+ """
60
+ 异步写入 stderr(非阻塞)
61
+
62
+ 将数据放入队列,由后台线程处理写入。
63
+ 如果队列积压过多(>100 条),则丢弃旧数据,避免内存爆炸。
64
+ """
65
+ _start_stderr_writer()
66
+
67
+ # 如果队列积压过多,清空部分旧数据
68
+ while _stderr_queue.qsize() > 100:
69
+ try:
70
+ _stderr_queue.get_nowait()
71
+ except queue.Empty:
72
+ break
73
+
74
+ _stderr_queue.put(data)
75
+
27
76
 
28
77
  class ProgressReporter:
29
78
  """
@@ -214,23 +263,23 @@ class ProgressReporter:
214
263
  if extra_data:
215
264
  data["extra_data"] = extra_data
216
265
 
217
- # 1. stderr 输出(用于流式 HTTP 响应,与 stdout 分开避免合并)
218
- # Jupyter kernel 会将 stdout 和 stderr 分到不同的 stream 消息
219
- try:
220
- import sys
221
- sys.stderr.write(json.dumps(data, ensure_ascii=False) + '\n')
222
- sys.stderr.flush()
223
- except Exception as e:
224
- # stderr 输出失败不影响训练,仅打印警告
225
- print(f"Warning: Failed to output progress to stderr: {e}")
226
-
227
- # 2. 文件写入(作为降级/备用方案)
266
+ # 1. 文件写入(优先,确保进度数据持久化)
267
+ # 文件写入是可靠的,不受管道阻塞影响
228
268
  try:
229
269
  PROGRESS_FILE.write_text(json.dumps(data, ensure_ascii=False))
230
270
  except Exception as e:
231
271
  # 写入失败不影响训练,仅打印警告
232
272
  print(f"Warning: Failed to write progress file: {e}")
233
273
 
274
+ # 2. stderr 异步输出(用于流式 HTTP 响应)
275
+ # 使用后台线程异步写入,避免管道阻塞主线程
276
+ # Windows Docker 环境下管道缓冲区满时会导致阻塞
277
+ try:
278
+ _write_stderr_async(json.dumps(data, ensure_ascii=False) + '\n')
279
+ except Exception as e:
280
+ # stderr 输出失败不影响训练
281
+ print(f"Warning: Failed to output progress to stderr: {e}")
282
+
234
283
 
235
284
  def get_progress() -> Optional[dict]:
236
285
  """
@@ -146,7 +146,8 @@ const SANDBOX_CONFIG = {
146
146
  imageCpu: 'dmla-sandbox:cpu',
147
147
  imageGpu: 'dmla-sandbox:gpu',
148
148
  timeout: 60000, // 60 秒超时
149
- memory: 4 * 1024 * 1024 * 1024 // 4GB 内存
149
+ memoryCpu: 4 * 1024 * 1024 * 1024, // CPU 容器 4GB 内存限制
150
+ memoryGpu: 0 // GPU 容器不限制内存(让 GPU 显存独立管理)
150
151
  }
151
152
 
152
153
  // DMLA 配置文件路径
@@ -480,11 +481,14 @@ export async function runPythonCode(code, useGpu = false, imageOverride = null,
480
481
 
481
482
  // 创建容器配置 - 使用 kernel_runner.py 执行代码
482
483
  const timeoutSeconds = actualTimeout === null ? 86400 : actualTimeout // unlimited 使用 24 小时
484
+
485
+ // GPU 容器不限制内存,CPU 容器限制 4GB
486
+ const memoryLimit = useGpu ? SANDBOX_CONFIG.memoryGpu : SANDBOX_CONFIG.memoryCpu
487
+
483
488
  const containerConfig = {
484
489
  Image: image,
485
490
  Cmd: ['python3', '/workspace/kernel_runner.py', '--code', code, '--timeout', String(timeoutSeconds)],
486
491
  HostConfig: {
487
- Memory: SANDBOX_CONFIG.memory,
488
492
  AutoRemove: false // 手动移除以获取日志
489
493
  },
490
494
  // matplotlib 使用 IPython Kernel 的 inline 后端,自动发送 display_data
@@ -496,6 +500,11 @@ export async function runPythonCode(code, useGpu = false, imageOverride = null,
496
500
  ].filter(e => e) // 过滤空字符串
497
501
  }
498
502
 
503
+ // 仅对 CPU 容器设置内存限制(GPU 容器不限制)
504
+ if (memoryLimit > 0) {
505
+ containerConfig.HostConfig.Memory = memoryLimit
506
+ }
507
+
499
508
  log('Container config created')
500
509
 
501
510
  // Volume Mount 配置
@@ -772,11 +781,14 @@ export async function runPythonCodeStreaming(code, useGpu = false, res, imageOve
772
781
 
773
782
  // 创建容器配置
774
783
  const timeoutSeconds = actualTimeout === null ? 86400 : actualTimeout
784
+
785
+ // GPU 容器不限制内存,CPU 容器限制 4GB
786
+ const memoryLimit = useGpu ? SANDBOX_CONFIG.memoryGpu : SANDBOX_CONFIG.memoryCpu
787
+
775
788
  const containerConfig = {
776
789
  Image: image,
777
790
  Cmd: ['python3', '/workspace/kernel_runner.py', '--code', code, '--timeout', String(timeoutSeconds), '--stream'],
778
791
  HostConfig: {
779
- Memory: SANDBOX_CONFIG.memory,
780
792
  AutoRemove: false
781
793
  },
782
794
  Env: [
@@ -786,6 +798,11 @@ export async function runPythonCodeStreaming(code, useGpu = false, res, imageOve
786
798
  ].filter(e => e)
787
799
  }
788
800
 
801
+ // 仅对 CPU 容器设置内存限制(GPU 容器不限制)
802
+ if (memoryLimit > 0) {
803
+ containerConfig.HostConfig.Memory = memoryLimit
804
+ }
805
+
789
806
  log('Container config created for streaming')
790
807
 
791
808
  // Volume Mount 配置(与 runPythonCode 相同)
package/version.json CHANGED
@@ -1,4 +1,4 @@
1
1
  {
2
- "buildTime": "2026-05-03T13:29:41.242Z",
3
- "cliVersion": "2026.5.3-2128"
2
+ "buildTime": "2026-05-03T15:46:55.655Z",
3
+ "cliVersion": "2026.5.3-2346"
4
4
  }