npm - @icyfenix-dmla/cli - Versions diffs - 2026.5.3-2128 → 2026.5.3-2346 - Mend

@icyfenix-dmla/cli 2026.5.3-2128 → 2026.5.3-2346

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/package.json +1 -1
package/src/server/dmla_progress.py +60 -11
package/src/server/sandbox.js +20 -3
package/version.json +2 -2

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@icyfenix-dmla/cli",
-  "version": "2026.5.3-2128",
+  "version": "2026.5.3-2346",
   "description": "DMLA 沙箱服务命令行工具",
   "type": "module",
   "main": "src/index.js",

package/src/server/dmla_progress.py CHANGED Viewed

@@ -18,12 +18,61 @@ DMLA 进度报告模块
 import json
 import time
+import sys
+import threading
+import queue
 from pathlib import Path
 from typing import Optional
 # 进度文件路径
 PROGRESS_FILE = Path('/workspace/progress.json')
+# stderr 异步写入队列（避免管道阻塞）
+_stderr_queue: queue.Queue = queue.Queue()
+_stderr_thread: Optional[threading.Thread] = None
+def _start_stderr_writer():
+    """启动后台 stderr 写入线程（daemon 线程，随主线程退出）"""
+    global _stderr_thread
+    if _stderr_thread is None or not _stderr_thread.is_alive():
+        _stderr_thread = threading.Thread(target=_stderr_writer_loop, daemon=True)
+        _stderr_thread.start()
+def _stderr_writer_loop():
+    """后台线程循环：从队列读取数据并写入 stderr"""
+    while True:
+        try:
+            item = _stderr_queue.get(timeout=1.0)
+            if item is None:  # 停止信号
+                break
+            sys.stderr.write(item)
+            sys.stderr.flush()
+        except queue.Empty:
+            continue  # 队列空，继续等待
+        except Exception:
+            pass  # 写入失败，忽略（不影响主线程）
+def _write_stderr_async(data: str):
+    """
+    异步写入 stderr（非阻塞）
+    将数据放入队列，由后台线程处理写入。
+    如果队列积压过多（>100 条），则丢弃旧数据，避免内存爆炸。
+    """
+    _start_stderr_writer()
+    # 如果队列积压过多，清空部分旧数据
+    while _stderr_queue.qsize() > 100:
+        try:
+            _stderr_queue.get_nowait()
+        except queue.Empty:
+            break
+    _stderr_queue.put(data)
 class ProgressReporter:
     """
@@ -214,23 +263,23 @@ class ProgressReporter:
         if extra_data:
             data["extra_data"] = extra_data
-        # 1. stderr 输出（用于流式 HTTP 响应，与 stdout 分开避免合并）
-        # Jupyter kernel 会将 stdout 和 stderr 分到不同的 stream 消息
-        try:
-            import sys
-            sys.stderr.write(json.dumps(data, ensure_ascii=False) + '\n')
-            sys.stderr.flush()
-        except Exception as e:
-            # stderr 输出失败不影响训练，仅打印警告
-            print(f"Warning: Failed to output progress to stderr: {e}")
-        # 2. 文件写入（作为降级/备用方案）
+        # 1. 文件写入（优先，确保进度数据持久化）
+        # 文件写入是可靠的，不受管道阻塞影响
         try:
             PROGRESS_FILE.write_text(json.dumps(data, ensure_ascii=False))
         except Exception as e:
             # 写入失败不影响训练，仅打印警告
             print(f"Warning: Failed to write progress file: {e}")
+        # 2. stderr 异步输出（用于流式 HTTP 响应）
+        # 使用后台线程异步写入，避免管道阻塞主线程
+        # Windows Docker 环境下管道缓冲区满时会导致阻塞
+        try:
+            _write_stderr_async(json.dumps(data, ensure_ascii=False) + '\n')
+        except Exception as e:
+            # stderr 输出失败不影响训练
+            print(f"Warning: Failed to output progress to stderr: {e}")
 def get_progress() -> Optional[dict]:
     """

package/src/server/sandbox.js CHANGED Viewed

@@ -146,7 +146,8 @@ const SANDBOX_CONFIG = {
   imageCpu: 'dmla-sandbox:cpu',
   imageGpu: 'dmla-sandbox:gpu',
   timeout: 60000,           // 60 秒超时
-  memory: 4 * 1024 * 1024 * 1024  // 4GB 内存
+  memoryCpu: 4 * 1024 * 1024 * 1024,  // CPU 容器 4GB 内存限制
+  memoryGpu: 0   // GPU 容器不限制内存（让 GPU 显存独立管理）
 }
 // DMLA 配置文件路径
@@ -480,11 +481,14 @@ export async function runPythonCode(code, useGpu = false, imageOverride = null,
   // 创建容器配置 - 使用 kernel_runner.py 执行代码
   const timeoutSeconds = actualTimeout === null ? 86400 : actualTimeout  // unlimited 使用 24 小时
+  // GPU 容器不限制内存，CPU 容器限制 4GB
+  const memoryLimit = useGpu ? SANDBOX_CONFIG.memoryGpu : SANDBOX_CONFIG.memoryCpu
   const containerConfig = {
     Image: image,
     Cmd: ['python3', '/workspace/kernel_runner.py', '--code', code, '--timeout', String(timeoutSeconds)],
     HostConfig: {
-      Memory: SANDBOX_CONFIG.memory,
       AutoRemove: false  // 手动移除以获取日志
     },
     // matplotlib 使用 IPython Kernel 的 inline 后端，自动发送 display_data
@@ -496,6 +500,11 @@ export async function runPythonCode(code, useGpu = false, imageOverride = null,
     ].filter(e => e)  // 过滤空字符串
   }
+  // 仅对 CPU 容器设置内存限制（GPU 容器不限制）
+  if (memoryLimit > 0) {
+    containerConfig.HostConfig.Memory = memoryLimit
+  }
   log('Container config created')
   // Volume Mount 配置
@@ -772,11 +781,14 @@ export async function runPythonCodeStreaming(code, useGpu = false, res, imageOve
   // 创建容器配置
   const timeoutSeconds = actualTimeout === null ? 86400 : actualTimeout
+  // GPU 容器不限制内存，CPU 容器限制 4GB
+  const memoryLimit = useGpu ? SANDBOX_CONFIG.memoryGpu : SANDBOX_CONFIG.memoryCpu
   const containerConfig = {
     Image: image,
     Cmd: ['python3', '/workspace/kernel_runner.py', '--code', code, '--timeout', String(timeoutSeconds), '--stream'],
     HostConfig: {
-      Memory: SANDBOX_CONFIG.memory,
       AutoRemove: false
     },
     Env: [
@@ -786,6 +798,11 @@ export async function runPythonCodeStreaming(code, useGpu = false, res, imageOve
     ].filter(e => e)
   }
+  // 仅对 CPU 容器设置内存限制（GPU 容器不限制）
+  if (memoryLimit > 0) {
+    containerConfig.HostConfig.Memory = memoryLimit
+  }
   log('Container config created for streaming')
   // Volume Mount 配置（与 runPythonCode 相同）

package/version.json CHANGED Viewed

@@ -1,4 +1,4 @@
 {
-  "buildTime": "2026-05-03T13:29:41.242Z",
-  "cliVersion": "2026.5.3-2128"
+  "buildTime": "2026-05-03T15:46:55.655Z",
+  "cliVersion": "2026.5.3-2346"
 }