npm - @icyfenix-dmla/cli - Versions diffs - 2026.5.25-736 → 2026.5.29-2018 - Mend

@icyfenix-dmla/cli 2026.5.25-736 → 2026.5.29-2018

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/package.json +1 -1
package/scripts/build.js +1 -1
package/shared/llm/__init__.py +2 -1
package/shared/llm/mini_mind_config.py +7 -4
package/shared/llm/reward_model.py +48 -0
package/shared/llm/sftdataset.py +16 -4
package/src/commands/data.js +1 -1
package/src/server/chat-manager.cjs +181 -0
package/src/server/index.js +4 -0
package/src/server/kernel_runner.py +267 -1
package/src/server/native_executor.js +61 -7
package/src/server/routes/chat.js +35 -0
package/src/server/routes/native.js +2 -2
package/src/server/routes/sandbox.js +3 -3
package/src/server/sandbox.js +69 -7
package/version.json +2 -2

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@icyfenix-dmla/cli",
-  "version": "2026.5.25-736",
+  "version": "2026.5.29-2018",
   "description": "DMLA 沙箱服务命令行工具",
   "type": "module",
   "main": "src/index.js",

package/scripts/build.js CHANGED Viewed

@@ -53,7 +53,7 @@ function copyDir(src, dest, filter = null) {
 console.log('\n📋 复制服务器代码...')
 console.log(`   源目录: ${localServerSrc}`)
 console.log(`   目标目录: ${cliServerDest}`)
-copyDir(localServerSrc, cliServerDest, (name) => name.endsWith('.js') || name.endsWith('.py'))
+copyDir(localServerSrc, cliServerDest, (name) => name.endsWith('.js') || name.endsWith('.cjs') || name.endsWith('.py'))
 // 复制共享模块（复制所有 .py 文件和 __init__.py）
 console.log('\n📋 复制共享模块...')

package/shared/llm/__init__.py CHANGED Viewed

@@ -1,6 +1,7 @@
 # LLM 模块
 from .mini_mind_config import MiniMindConfig, RMSNorm, Attention, FeedForward, MiniMindBlock, MiniMindModel, MiniMindForCausalLM, precompute_freqs_cis, apply_rotary_pos_emb, repeat_kv
 from .pretrain_dataset import PretrainDataset
+from .reward_model import RewardModel
 from .sftdataset import SFTDataset, pre_processing_chat
-__all__ = ['MiniMindConfig', 'RMSNorm', 'Attention', 'FeedForward', 'MiniMindBlock', 'MiniMindModel', 'MiniMindForCausalLM', 'precompute_freqs_cis', 'apply_rotary_pos_emb', 'repeat_kv', 'PretrainDataset', 'SFTDataset', 'pre_processing_chat']
+__all__ = ['MiniMindConfig', 'RMSNorm', 'Attention', 'FeedForward', 'MiniMindBlock', 'MiniMindModel', 'MiniMindForCausalLM', 'precompute_freqs_cis', 'apply_rotary_pos_emb', 'repeat_kv', 'PretrainDataset', 'RewardModel', 'SFTDataset', 'pre_processing_chat']

package/shared/llm/mini_mind_config.py CHANGED Viewed

@@ -12,7 +12,7 @@ from transformers.modeling_outputs import MoeCausalLMOutputWithPast
 from typing import Optional, Tuple, List, Dict
 class MiniMindConfig(PretrainedConfig):
-    """MiniMind 模型配置"""
+    """模型配置"""
     model_type = "minimind"
     def __init__(self, hidden_size=768, num_hidden_layers=8, use_moe=False, **kwargs):
         super().__init__(**kwargs)
@@ -182,7 +182,7 @@ class MiniMindBlock(nn.Module):
 class MiniMindModel(nn.Module):
-    """MiniMind 主体：词嵌入 + 多层 Transformer + 最终归一化"""
+    """模型主体：词嵌入 + 多层 Transformer + 最终归一化"""
     def __init__(self, config):
         super().__init__()
         self.config = config
@@ -228,7 +228,7 @@ class MiniMindModel(nn.Module):
 class MiniMindForCausalLM(PreTrainedModel, GenerationMixin):
-    """MiniMind 因果语言模型：用于预训练和推理"""
+    """因果语言模型：用于预训练和推理"""
     config_class = MiniMindConfig
     _tied_weights_keys = {"lm_head.weight": "model.embed_tokens.weight"}
     def __init__(self, config=None):
@@ -251,7 +251,10 @@ class MiniMindForCausalLM(PreTrainedModel, GenerationMixin):
         return MoeCausalLMOutputWithPast(loss=loss, aux_loss=aux_loss, logits=logits, past_key_values=past_key_values, hidden_states=hidden_states)
     @torch.inference_mode()
-    def generate(self, inputs=None, attention_mask=None, max_new_tokens=512, temperature=0.85, top_p=0.85, top_k=50, eos_token_id=2, streamer=None, use_cache=True, num_return_sequences=1, do_sample=True, repetition_penalty=1.0, **kwargs):
+    def generate(self, inputs=None, attention_mask=None, max_new_tokens=512,
+                 temperature=0.85, top_p=0.85, top_k=50, eos_token_id=2,
+                 streamer=None, use_cache=True, num_return_sequences=1,
+                 do_sample=True, repetition_penalty=1.0, **kwargs):
         """自回归生成：逐 token 采样，支持 top-k、top-p、重复惩罚"""
         input_ids = kwargs.pop("input_ids", inputs).repeat(num_return_sequences, 1)
         attention_mask = attention_mask.repeat(num_return_sequences, 1) if attention_mask is not None else None

package/shared/llm/reward_model.py ADDED Viewed

@@ -0,0 +1,48 @@
+# RewardModel 定义
+# 从文档自动提取生成
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class RewardModel(nn.Module):
+    """
+    简化的奖励模型实现
+    核心结构：Transformer 编码器提取语义特征 → 奖励头映射为标量评分
+    参数:
+        vocab_size : 词汇表大小
+        d_model : 嵌入维度
+        nhead : 注意力头数
+        num_layers : Transformer 层数
+    """
+    def __init__(self, vocab_size=1000, d_model=128, nhead=4, num_layers=2):
+        super().__init__()
+        self.embedding = nn.Embedding(vocab_size, d_model)
+        self.pos_encoding = nn.Parameter(torch.randn(1, 512, d_model) * 0.01)
+        encoder_layer = nn.TransformerEncoderLayer(d_model, nhead, d_model * 4, batch_first=True)
+        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers)
+        # 奖励头：将语义特征映射为标量奖励值
+        self.reward_head = nn.Linear(d_model, 1)
+    def forward(self, input_ids):
+        """
+        输入: input_ids (batch, seq_len) — 指令+回答的 token 序列
+        输出: reward (batch,) — 标量奖励分数
+        核心步骤：
+        1. 嵌入 + 位置编码（对应理论中的输入表示）
+        2. Transformer 编码（对应理论中的语义特征提取）
+        3. 取最后 token 隐藏状态 → 线性层映射（对应理论中的奖励评分）
+        """
+        seq_len = input_ids.size(1)
+        x = self.embedding(input_ids) + self.pos_encoding[:, :seq_len, :]
+        x = self.transformer(x)
+        # 取最后一个 token 的隐藏状态
+        last_hidden = x[:, -1, :]  # (batch, d_model)
+        reward = self.reward_head(last_hidden).squeeze(-1)  # (batch,)
+        return reward

package/shared/llm/sftdataset.py CHANGED Viewed

@@ -6,6 +6,7 @@ import os
 import random
 import torch
 from datasets import load_dataset, Features, Value
+from datasets import logging as datasets_logging
 from torch.utils.data import Dataset
 class SFTDataset(Dataset):
@@ -17,17 +18,32 @@ class SFTDataset(Dataset):
     - 标签掩码：仅 assistant 回答部分参与 loss，其余标记为 -100
     - 使用 apply_chat_template 将对话转为 ChatML 格式
     """
+    # MiniMind 使用 ChatML 格式：<|im_start|>role\ncontent<|im_end|>\n
+    # tokenizer 本身未内置 chat_template，需手动设置
+    CHATML_TEMPLATE = (
+        "{% for message in messages %}<|im_start|>{{ message.role }}\n"
+        "{{ message.content }}<|im_end|>\n"
+        "{% endfor %}"
+        "{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}"
+    )
     def __init__(self, jsonl_path, tokenizer, max_length=768):
         super().__init__()
         os.environ["TOKENIZERS_PARALLELISM"] = "false"
         self.tokenizer = tokenizer
+        # MiniMind tokenizer 未内置 chat_template，需手动设置 ChatML 格式
+        if not tokenizer.chat_template:
+            tokenizer.chat_template = self.CHATML_TEMPLATE
         self.max_length = max_length
         features = Features({
             'conversations': [{'role': Value('string'), 'content': Value('string'),
                               'reasoning_content': Value('string'), 'tools': Value('string'),
                               'tool_calls': Value('string')}]
         })
+        # 抑制 load_dataset 的 "Generating train split" 进度输出
+        datasets_logging.set_verbosity_error()
         self.samples = load_dataset('json', data_files=jsonl_path, split='train', features=features)
+        datasets_logging.set_verbosity_warning()
         # 预计算 assistant 回答的起止标记 ID
         self.bos_id = tokenizer(f'{tokenizer.bos_token}assistant\n', add_special_tokens=False).input_ids
         self.eos_id = tokenizer(f'{tokenizer.eos_token}\n', add_special_tokens=False).input_ids
@@ -91,15 +107,11 @@ def pre_processing_chat(conversations, add_system_ratio=0.2):
     SYSTEM_PROMPTS = [
         "你是一个知识丰富的AI，尽力为用户提供准确的信息。",
-        "你是minimind，一个小巧但有用的语言模型。",
         "你是一个专业的AI助手，请提供有价值的回答。",
-        "你是minimind，请尽力帮助用户解决问题。",
         "你是一个可靠的AI，请给出准确的回答。",
         "You are a helpful AI assistant.",
-        "You are minimind, a lightweight intelligent assistant.",
         "You are a friendly chatbot. Please answer the user's questions carefully.",
         "You are a knowledgeable AI. Try your best to provide accurate information.",
-        "You are minimind, a small but useful language model."
     ]
     # 概率性添加 system
     if conversations[0].get('role') != 'system':

package/src/commands/data.js CHANGED Viewed

@@ -83,7 +83,7 @@ const DATASETS = [
     id: 'minimind-sft',
     name: 'MiniMind SFT (LLM监督微调语料)',
     url: 'https://www.modelscope.cn/datasets/icyfenix/Minimind_SFT.git',
-    size: '~500MB',
+    size: '~1.7GB',
     format: 'git',
     targetDir: 'datasets/minimind-sft',
     source: 'ModelScope (icyfenix)'

package/src/server/chat-manager.cjs ADDED Viewed

@@ -0,0 +1,181 @@
+const EventEmitter = require('events');
+class ChatManager extends EventEmitter {
+  constructor() {
+    super();
+    this.session = null;
+    this._pendingResponse = null;
+    this._responseBuffer = '';
+    this._pendingTimeout = null;
+  }
+  /**
+   * 注册一个对话沙箱会话
+   * @param {'docker'|'native'} type - 沙箱类型
+   * @param {object} options
+   * @param {object} [options.container] - Docker 容器实例
+   * @param {object} [options.process] - 子进程实例
+   * @param {object} options.stdin - 可写入的 stdin 流
+   */
+  register(type, { container, process: proc, stdin }) {
+    this.session = { type, container, process: proc, stdin, ready: false };
+    this._responseBuffer = '';
+    // 监听 stdout 解析消息
+    const stdout = type === 'native' ? proc.stdout : null;
+    // Docker 模式的 stdout 通过 stream 事件处理，不在此处绑定
+    if (stdout) {
+      stdout.on('data', (data) => this._handleStdout(data));
+    }
+  }
+  /**
+   * 标记对话沙箱就绪
+   */
+  setReady(ready) {
+    if (this.session) {
+      this.session.ready = ready;
+    }
+    this.emit('ready', ready);
+  }
+  /**
+   * 查询对话服务状态
+   */
+  getStatus() {
+    if (!this.session) {
+      return { ready: false, message: '对话服务未启动' };
+    }
+    return {
+      ready: this.session.ready,
+      message: this.session.ready ? '对话服务就绪' : '模型加载中...'
+    };
+  }
+  /**
+   * 发送对话消息
+   * @param {string} message - 用户消息
+   * @returns {Promise<string>} AI 回复
+   */
+  async send(message) {
+    if (!this.session || !this.session.ready) {
+      throw new Error('对话服务未就绪');
+    }
+    if (!this.session.stdin) {
+      throw new Error('沙箱 stdin 不可用');
+    }
+    // 转义消息中的特殊字符，构造安全的 Python 字符串
+    const escapedMessage = JSON.stringify(message);
+    const code = `print(chat(${escapedMessage}))`;
+    return new Promise((resolve, reject) => {
+      this._pendingResponse = { resolve, reject, buffer: '' };
+      const cmd = JSON.stringify({ action: 'execute', code });
+      this.session.stdin.write(cmd + '\n');
+      // 超时保护（60秒）
+      this._pendingTimeout = setTimeout(() => {
+        this._pendingResponse = null;
+        reject(new Error('推理超时'));
+      }, 60000);
+    });
+  }
+  /**
+   * 处理沙箱 stdout 输出（Native 模式）
+   */
+  _handleStdout(data) {
+    const text = data.toString();
+    this._responseBuffer += text;
+    const lines = this._responseBuffer.split('\n');
+    this._responseBuffer = lines.pop();
+    for (const line of lines) {
+      if (!line.trim()) continue;
+      try {
+        const msg = JSON.parse(line);
+        this._handleMessage(msg);
+      } catch {
+        if (this._pendingResponse) {
+          this._pendingResponse.buffer += line + '\n';
+        }
+      }
+    }
+  }
+  /**
+   * 处理单条 JSON 消息
+   */
+  _handleMessage(msg) {
+    switch (msg.type) {
+      case 'idle':
+        this.setReady(true);
+        break;
+      case 'pong':
+        break;
+      case 'stream':
+        if (this._pendingResponse) {
+          const content = msg.content || msg.text || '';
+          if (content) {
+            this._pendingResponse.buffer += content;
+          }
+        }
+        break;
+      case 'result':
+      case 'execute_result':
+        if (this._pendingResponse) {
+          clearTimeout(this._pendingTimeout);
+          const result = msg.content || this._pendingResponse.buffer.trim();
+          this._pendingResponse.resolve(result);
+          this._pendingResponse = null;
+        }
+        break;
+      case 'error':
+        if (this._pendingResponse) {
+          clearTimeout(this._pendingTimeout);
+          this._pendingResponse.reject(new Error(msg.content || msg.message || '推理出错'));
+          this._pendingResponse = null;
+        }
+        break;
+    }
+  }
+  /**
+   * 处理 Docker 模式的流式输出（由 sandbox.js 调用）
+   */
+  handleDockerStream(data) {
+    this._handleStdout(data);
+  }
+  /**
+   * 清除对话沙箱会话
+   */
+  clear() {
+    if (this._pendingResponse) {
+      clearTimeout(this._pendingTimeout);
+      this._pendingResponse.reject(new Error('沙箱已停止'));
+      this._pendingResponse = null;
+    }
+    // 移除 stdout 监听器，防止已清理的会话继续接收数据
+    if (this.session) {
+      const stdout = this.session.type === 'native' && this.session.process
+        ? this.session.process.stdout : null;
+      if (stdout) {
+        stdout.removeListener('data', this._handleStdout);
+      }
+    }
+    this.session = null;
+    this._responseBuffer = '';
+    this.emit('cleared');
+  }
+}
+// 单例导出
+module.exports = new ChatManager();

package/src/server/index.js CHANGED Viewed

@@ -12,6 +12,7 @@ import { fileURLToPath } from 'url'
 import { resolve } from 'path'
 import sandboxRouter from './routes/sandbox.js'
 import nativeRouter from './routes/native.js'
+import chatRouter from './routes/chat.js'
 import { cleanupAllContainers } from './sandbox.js'
 import { cleanupAllProcesses } from './native_executor.js'
 import { checkNativeEnvironment, getDataPath } from './native_env_check.js'
@@ -72,6 +73,9 @@ if (isNativeMode) {
   app.use('/api/sandbox', sandboxRouter)
 }
+// 对话 API
+app.use('/api/chat', chatRouter)
 // 错误处理
 app.use((err, req, res, next) => {
   log(`Error: ${err.message}`)

package/src/server/kernel_runner.py CHANGED Viewed

@@ -483,6 +483,164 @@ matplotlib.use('module://matplotlib_inline.backend_inline')
                 log_debug(f'Error shutting down kernel: {e}')
+def _collect_kernel_outputs(kc, timeout, stream=False):
+    """
+    从 Kernel 收集执行输出，复用 run_code 中的输出处理逻辑。
+    Args:
+        kc: 已启动的 KernelClient
+        timeout: 执行超时时间（秒），0 表示不超时
+        stream: 是否启用流式输出
+    Returns:
+        (outputs, timed_out, has_error) 元组
+    """
+    deadline = time.time() + timeout if timeout > 0 else float('inf')
+    outputs = []
+    timed_out = False
+    has_error = False
+    while True:
+        remaining = deadline - time.time()
+        if timeout > 0 and remaining <= 0:
+            timed_out = True
+            break
+        try:
+            msg = kc.get_iopub_msg(timeout=max(1, remaining) if timeout > 0 else 2)
+        except Exception:
+            if timeout > 0 and time.time() >= deadline:
+                timed_out = True
+            break
+        msg_type = msg['header']['msg_type']
+        content = msg['content']
+        if msg_type == 'status':
+            if content.get('execution_state') == 'idle':
+                break
+            continue
+        if msg_type == 'stream':
+            stream_output = {
+                'type': 'stream',
+                'name': content.get('name', 'stdout'),
+                'text': content.get('text', '')
+            }
+            if stream:
+                output_json(stream_output)
+            else:
+                outputs.append(stream_output)
+        elif msg_type == 'display_data':
+            display_output = {
+                'type': 'display_data',
+                'data': content.get('data', {}),
+                'metadata': content.get('metadata', {})
+            }
+            if stream:
+                output_json(display_output)
+            else:
+                outputs.append(display_output)
+        elif msg_type == 'execute_result':
+            result_output = {
+                'type': 'execute_result',
+                'data': content.get('data', {}),
+                'metadata': content.get('metadata', {}),
+                'execution_count': content.get('execution_count')
+            }
+            if stream:
+                output_json(result_output)
+            else:
+                outputs.append(result_output)
+        elif msg_type == 'error':
+            error_output = {
+                'type': 'error',
+                'ename': content.get('ename', 'UnknownError'),
+                'evalue': content.get('evalue', ''),
+                'traceback': content.get('traceback', [])
+            }
+            if is_cuda_compat_error(content.get('evalue', '')):
+                error_output = enrich_cuda_error(error_output)
+            has_error = True
+            if stream:
+                output_json(error_output)
+            else:
+                outputs.append(error_output)
+    return outputs, timed_out, has_error
+def _execute_and_output(kc, code, timeout=0):
+    """
+    在已有的 Kernel 中执行代码，非流式模式，输出 JSON 结果到 stdout。
+    Args:
+        kc: 已启动的 KernelClient
+        code: 要执行的 Python 代码
+        timeout: 执行超时时间（秒），0 表示不超时
+    """
+    start_time = time.time()
+    actual_timeout = timeout if timeout > 0 else DEFAULT_TIMEOUT
+    kc.execute(code, allow_stdin=False)
+    outputs, timed_out, has_error = _collect_kernel_outputs(kc, actual_timeout, stream=False)
+    execution_time = time.time() - start_time
+    if timed_out:
+        result = {
+            'success': False,
+            'outputs': [{
+                'type': 'error',
+                'ename': 'TimeoutError',
+                'evalue': f'Execution timed out after {actual_timeout} seconds',
+                'traceback': [f'Execution timed out after {actual_timeout} seconds']
+            }],
+            'executionTime': round(execution_time, 3)
+        }
+    else:
+        result = {
+            'success': not has_error,
+            'outputs': outputs,
+            'executionTime': round(execution_time, 3)
+        }
+    print(json.dumps(result, ensure_ascii=False))
+    sys.stdout.flush()
+def _stream_execute(kc, code, timeout=0):
+    """
+    在已有的 Kernel 中执行代码，流式模式，实时输出每个消息到 stdout。
+    Args:
+        kc: 已启动的 KernelClient
+        code: 要执行的 Python 代码
+        timeout: 执行超时时间（秒），0 表示不超时
+    """
+    start_time = time.time()
+    actual_timeout = timeout if timeout > 0 else DEFAULT_TIMEOUT
+    kc.execute(code, allow_stdin=False)
+    outputs, timed_out, has_error = _collect_kernel_outputs(kc, actual_timeout, stream=True)
+    execution_time = time.time() - start_time
+    if timed_out:
+        output_json({'type': 'error', 'ename': 'TimeoutError',
+                     'evalue': f'Execution timed out after {actual_timeout} seconds',
+                     'traceback': [f'Execution timed out after {actual_timeout} seconds']})
+        output_json({'type': 'result', 'success': False,
+                     'executionTime': round(execution_time, 3)})
+    else:
+        output_json({'type': 'result', 'success': not has_error,
+                     'outputs': outputs,
+                     'executionTime': round(execution_time, 3)})
 def check_cuda_compatibility():
     """
     快速检查 CUDA 兼容性
@@ -545,6 +703,8 @@ def main():
     parser.add_argument('--timeout', type=int, default=DEFAULT_TIMEOUT, help='执行超时时间（秒）')
     parser.add_argument('--check-cuda', action='store_true', help='仅检查 CUDA 兼容性')
     parser.add_argument('--stream', action='store_true', help='启用流式输出模式（实时输出每个消息）')
+    parser.add_argument("--serve", action="store_true",
+                        help="长运行模式：初始代码执行后 Kernel 保持运行，从 stdin 接收后续指令")
     args = parser.parse_args()
@@ -569,7 +729,7 @@ def main():
             return
     elif args.code:
         code = args.code
-    else:
+    elif not args.serve:
         result = {
             'success': False,
             'outputs': [{
@@ -589,6 +749,112 @@ def main():
         print(json.dumps(result, ensure_ascii=False))
         return
+    # serve 模式：自行管理 Kernel 生命周期，执行初始代码后进入 stdin 监听循环
+    if args.serve:
+        from jupyter_client import KernelManager
+        km = KernelManager()
+        suppress_stdout()
+        km.start_kernel()
+        kc = km.client()
+        kc.start_channels()
+        kc.wait_for_ready(timeout=30)
+        restore_stdout()
+        # 注入全局变量、数据路径和 sys.path 配置
+        python_path_env = os.environ.get('PYTHONPATH', '')
+        path_separator = ';' if os.name == 'nt' else ':'
+        python_path_entries = [p for p in python_path_env.split(path_separator) if p]
+        setup_code = '''
+import os
+import sys
+DATA_DIR = os.environ.get('DMLA_DATA_PATH', '/data')
+# 将 PYTHONPATH 中的路径注入 sys.path（IPython kernel 可能不会自动继承）
+_python_path_entries = ''' + repr(python_path_entries) + '''
+for _p in _python_path_entries:
+    if _p not in sys.path:
+        sys.path.insert(0, _p)
+# 配置 matplotlib inline 后端（在用户 import matplotlib 之前设置）
+import matplotlib
+matplotlib.use('module://matplotlib_inline.backend_inline')
+'''
+        kc.execute(setup_code, allow_stdin=False)
+        # 等待 setup 执行完成
+        setup_start = time.time()
+        while True:
+            if time.time() - setup_start > 5:
+                break
+            try:
+                msg = kc.get_iopub_msg(timeout=2)
+                msg_type = msg['header']['msg_type']
+                if msg_type == 'status' and msg['content'].get('execution_state') == 'idle':
+                    break
+            except Exception:
+                break
+        # 执行初始代码
+        if code:
+            if args.stream:
+                _stream_execute(kc, code, args.timeout)
+            else:
+                _execute_and_output(kc, code, args.timeout)
+        # serve 模式：进入 stdin 监听循环
+        def output_message(msg_type, content):
+            """输出 JSON Lines 消息到 stdout"""
+            msg = {"type": msg_type}
+            if content is not None:
+                msg["content"] = content
+            sys.stdout.write(json.dumps(msg, ensure_ascii=False) + "\n")
+            sys.stdout.flush()
+        output_message("idle", "kernel ready")
+        while True:
+            try:
+                line = sys.stdin.readline()
+                if not line:
+                    break
+                line = line.strip()
+                if not line:
+                    continue
+                try:
+                    cmd = json.loads(line)
+                except json.JSONDecodeError:
+                    output_message("error", f"无效的 JSON 指令: {line}")
+                    continue
+                action = cmd.get("action")
+                if action == "ping":
+                    output_message("pong", None)
+                elif action == "execute":
+                    exec_code = cmd.get("code", "")
+                    if args.stream:
+                        _stream_execute(kc, exec_code, cmd.get("timeout", 0))
+                    else:
+                        _execute_and_output(kc, exec_code, cmd.get("timeout", 0))
+                    output_message("idle", "kernel ready")
+                else:
+                    output_message("error", f"未知指令: {action}")
+            except Exception as e:
+                output_message("error", str(e))
+                break
+        # stdin 关闭或出错，清理退出
+        kc.stop_channels()
+        try:
+            kc.shutdown_kernel()
+        except Exception:
+            pass
+        try:
+            km.shutdown_kernel(now=True)
+        except Exception:
+            pass
+        return
     result = run_code(code, args.timeout, stream=args.stream)
     # 非流式模式：输出 JSON 结果到 stdout

package/src/server/native_executor.js CHANGED Viewed

@@ -10,6 +10,7 @@ import os from 'os'
 import fs from 'fs'
 import chalk from 'chalk'
 import { getCachedEnvironment, getKernelRunnerPath, getSharedModulesPath, getServerPythonPath, getDataPath, getProgressPath, getPythonCommand, detectPythonCommand } from './native_env_check.js'
+import chatManager from './chat-manager.cjs'
 const __filename = fileURLToPath(import.meta.url)
 const __dirname = path.dirname(__filename)
@@ -232,6 +233,14 @@ export async function runPythonCodeNative(code, useGpu = false, timeoutOverride
     registerProcess(executionId, proc)
+    // 检测是否为对话沙箱
+    if (procArgs.includes('--serve')) {
+      chatManager.register('native', {
+        process: proc,
+        stdin: proc.stdin
+      })
+    }
     // 设置超时
     const timeoutPromise = new Promise((_, reject) => {
       timeoutId = setTimeout(() => {
@@ -259,6 +268,9 @@ export async function runPythonCodeNative(code, useGpu = false, timeoutOverride
     const execPromise = new Promise((resolve, reject) => {
       proc.on('close', (code) => {
         log(`Process exited with code ${code}`)
+        if (chatManager.session) {
+          chatManager.clear()
+        }
         if (timeoutId) clearTimeout(timeoutId)
         resolve({ stdout, stderr, exitCode: code })
       })
@@ -378,8 +390,9 @@ export async function runPythonCodeNative(code, useGpu = false, timeoutOverride
  * @param {boolean} useGpu - 是否请求 GPU
  * @param {object} res - Express 响应对象
  * @param {number|null} timeoutOverride - 超时时间（秒）
+ * @param {string|null} mode - 执行模式（'chat' 启用对话模式）
  */
-export async function runPythonCodeStreamingNative(code, useGpu = false, res, timeoutOverride = null) {
+export async function runPythonCodeStreamingNative(code, useGpu = false, res, timeoutOverride = null, mode = null) {
   const startTime = Date.now()
   const executionId = generateExecutionId()
@@ -474,6 +487,7 @@ export async function runPythonCodeStreamingNative(code, useGpu = false, res, ti
       '--timeout', String(timeoutSeconds),
       '--stream'
     ]
+    if (mode === 'chat') procArgs.push('--serve')
   } else {
     // Linux/macOS 直接传递代码参数
     procArgs = [
@@ -482,6 +496,7 @@ export async function runPythonCodeStreamingNative(code, useGpu = false, res, ti
       '--timeout', String(timeoutSeconds),
       '--stream'
     ]
+    if (mode === 'chat') procArgs.push('--serve')
   }
   try {
@@ -489,6 +504,14 @@ export async function runPythonCodeStreamingNative(code, useGpu = false, res, ti
     registerProcess(executionId, proc)
+    // 检测是否为对话沙箱
+    if (procArgs.includes('--serve')) {
+      chatManager.register('native', {
+        process: proc,
+        stdin: proc.stdin
+      })
+    }
     // 输出运行状态
     res.write(JSON.stringify({
       type: 'status',
@@ -514,6 +537,19 @@ export async function runPythonCodeStreamingNative(code, useGpu = false, res, ti
         // kernel_runner.py 输出的已经是 JSON 格式，直接转发
         if (line.trim().startsWith('{')) {
           res.write(line + '\n')
+          // chat 模式：检测 idle 消息，注册 ChatManager
+          if (mode === 'chat' && !chatManager.session) {
+            try {
+              const msg = JSON.parse(line)
+              if (msg.type === 'idle') {
+                chatManager.register('native', {
+                  process: proc,
+                  stdin: proc.stdin
+                })
+                log('ChatManager registered for Native chat sandbox')
+              }
+            } catch {}
+          }
         } else {
           // 非 JSON 内容包装为 stream 消息
           res.write(JSON.stringify({
@@ -562,10 +598,25 @@ export async function runPythonCodeStreamingNative(code, useGpu = false, res, ti
       }) + '\n')
     })
-    // 等待进程完成
-    await new Promise((resolve) => {
-      proc.on('close', resolve)
-    })
+    // 等待进程完成（chat 模式下进程持续运行，不等待 close）
+    if (mode === 'chat') {
+      // chat 模式：进程持续运行，HTTP 流保持打开
+      // 当进程意外退出时清理
+      proc.on('close', () => {
+        if (chatManager.session) {
+          chatManager.clear()
+        }
+      })
+    } else {
+      await new Promise((resolve) => {
+        proc.on('close', () => {
+          if (chatManager.session) {
+            chatManager.clear()
+          }
+          resolve()
+        })
+      })
+    }
     // 处理缓冲区剩余内容
     if (stdoutBuffer.trim()) {
@@ -613,8 +664,11 @@ export async function runPythonCodeStreamingNative(code, useGpu = false, res, ti
         log(`Failed to clean up temp file: ${e.message}`)
       }
     }
-    res.end()
-    log('Streaming response ended')
+    // chat 模式下不关闭 HTTP 响应，进程持续运行
+    if (mode !== 'chat') {
+      res.end()
+      log('Streaming response ended')
+    }
   }
 }

package/src/server/routes/chat.js ADDED Viewed

@@ -0,0 +1,35 @@
+import { Router } from 'express'
+import chatManager from '../chat-manager.cjs'
+const router = Router()
+/**
+ * 查询对话服务状态
+ * GET /api/chat/status
+ */
+router.get('/status', (req, res) => {
+  res.json(chatManager.getStatus())
+})
+/**
+ * 发送对话消息
+ * POST /api/chat/send
+ * Body: { message: string }
+ */
+router.post('/send', async (req, res) => {
+  const { message } = req.body
+  if (!message || typeof message !== 'string') {
+    return res.status(400).json({ error: '消息不能为空' })
+  }
+  try {
+    const response = await chatManager.send(message)
+    res.json({ response })
+  } catch (err) {
+    const status = err.message === '对话服务未就绪' ? 503 : 500
+    res.status(status).json({ error: err.message })
+  }
+})
+export default router

package/src/server/routes/native.js CHANGED Viewed

@@ -115,7 +115,7 @@ router.post('/run', async (req, res) => {
  * 响应: JSON Lines 流式输出
  */
 router.post('/stream', async (req, res) => {
-  const { code, useGpu = false, timeout = null } = req.body
+  const { code, useGpu = false, timeout = null, mode = null } = req.body
   // 验证请求
   if (!code || typeof code !== 'string') {
@@ -155,7 +155,7 @@ router.post('/stream', async (req, res) => {
     }
     // 流式执行
-    await runPythonCodeStreamingNative(code, useGpu, res, timeout)
+    await runPythonCodeStreamingNative(code, useGpu, res, timeout, mode)
   } catch (error) {
     console.error('[Native Sandbox Stream] Error:', error)

package/src/server/routes/sandbox.js CHANGED Viewed

@@ -124,11 +124,11 @@ router.post('/run', async (req, res) => {
 /**
  * 流式执行代码
  * POST /api/sandbox/stream
- * Body: { code: string, useGpu?: boolean, timeout?: number|null }
+ * Body: { code: string, useGpu?: boolean, timeout?: number|null, mode?: string }
  * 响应: JSON Lines 流式输出
  */
 router.post('/stream', async (req, res) => {
-  const { code, useGpu = false, timeout = null } = req.body
+  const { code, useGpu = false, timeout = null, mode = null } = req.body
   // 验证请求
   if (!code || typeof code !== 'string') {
@@ -193,7 +193,7 @@ router.post('/stream', async (req, res) => {
     }
     // 流式执行代码
-    await runPythonCodeStreaming(code, actualUseGpu, res, actualImage, timeout)
+    await runPythonCodeStreaming(code, actualUseGpu, res, actualImage, timeout, mode)
   } catch (error) {
     console.error('Sandbox stream error:', error)

package/src/server/sandbox.js CHANGED Viewed

@@ -7,6 +7,7 @@ import path from 'path'
 import { fileURLToPath } from 'url'
 import fs from 'fs'
 import os from 'os'
+import chatManager from './chat-manager.cjs'
 const __filename = fileURLToPath(import.meta.url)
 const __dirname = path.dirname(__filename)
@@ -71,6 +72,7 @@ export async function cleanupAllContainers() {
   }
   activeContainers.clear()
+  chatManager.clear()
   log(`All containers cleaned up: ${count}`)
   return count
 }
@@ -102,6 +104,7 @@ export async function abortExecution(executionId = null) {
     }
   }
+  chatManager.clear()
   return { success: true, stopped }
 }
@@ -496,7 +499,8 @@ export async function runPythonCode(code, useGpu = false, imageOverride = null,
     Env: [
       'PYTHONUNBUFFERED=1',
       'PYTHONPATH=/workspace',
-      actualTimeout === null ? 'DMLA_NO_TIMEOUT=1' : ''
+      actualTimeout === null ? 'DMLA_NO_TIMEOUT=1' : '',
+      `DMLA_DATA_PATH=${getDataVolumePath() || '/data'}`
     ].filter(e => e)  // 过滤空字符串
   }
@@ -558,6 +562,12 @@ export async function runPythonCode(code, useGpu = false, imageOverride = null,
     containerConfig.HostConfig.Binds = binds
   }
+  // 将宿主机 shared 目录路径注入到容器环境变量，供环境检查代码读取
+  const sharedMountInfo = (useMount && sharedModulesPath && fs.existsSync(sharedModulesPath))
+    ? `host_path=${sharedModulesPath},mounted=true`
+    : 'mounted=false'
+  containerConfig.Env.push(`DMLA_SHARED_INFO=${sharedMountInfo}`)
   if (!PROJECT_ROOT) {
     console.log('[Sandbox] 独立安装模式，无 Volume Mount')
   }
@@ -727,7 +737,7 @@ export async function runPythonCode(code, useGpu = false, imageOverride = null,
  * @param {number|null} timeoutOverride - 可选，超时时间（秒）
  * @returns {Promise<void>}
  */
-export async function runPythonCodeStreaming(code, useGpu = false, res, imageOverride = null, timeoutOverride = null) {
+export async function runPythonCodeStreaming(code, useGpu = false, res, imageOverride = null, timeoutOverride = null, mode = null) {
   const startTime = Date.now()
   // 生成唯一执行 ID
@@ -785,9 +795,17 @@ export async function runPythonCodeStreaming(code, useGpu = false, res, imageOve
   // GPU 容器不限制内存，CPU 容器限制 4GB
   const memoryLimit = useGpu ? SANDBOX_CONFIG.memoryGpu : SANDBOX_CONFIG.memoryCpu
+  // 构建命令参数
+  const cmdArgs = ['python3', '/workspace/kernel_runner.py', '--code', code, '--timeout', String(timeoutSeconds), '--stream']
+  if (mode === 'chat') {
+    cmdArgs.push('--serve')
+  }
   const containerConfig = {
     Image: image,
-    Cmd: ['python3', '/workspace/kernel_runner.py', '--code', code, '--timeout', String(timeoutSeconds), '--stream'],
+    Cmd: cmdArgs,
+    OpenStdin: true,
+    StdinOnce: false,
     HostConfig: {
       AutoRemove: false
     },
@@ -838,6 +856,12 @@ export async function runPythonCodeStreaming(code, useGpu = false, res, imageOve
     containerConfig.HostConfig.Binds = binds
   }
+  // 将宿主机 shared 目录路径注入到容器环境变量
+  const sharedMountInfo = (useMount && sharedModulesPath && fs.existsSync(sharedModulesPath))
+    ? `host_path=${sharedModulesPath},mounted=true`
+    : 'mounted=false'
+  containerConfig.Env.push(`DMLA_SHARED_INFO=${sharedMountInfo}`)
   // GPU 配置
   if (useGpu) {
     containerConfig.HostConfig.DeviceRequests = [{
@@ -872,6 +896,24 @@ export async function runPythonCodeStreaming(code, useGpu = false, res, imageOve
     await container.start()
     log('Container started')
+    // chat 模式：获取容器的 attach 流（支持 stdin 写入）
+    if (mode === 'chat') {
+      log('Chat mode detected, preparing stdin stream...')
+      const attachStream = await container.attach({
+        hijack: true,
+        stdin: true,
+        stream: true,
+        stdout: true,
+        stderr: true
+      })
+      // 先注册 stdin，等 idle 消息时设置 ready
+      chatManager.register('docker', {
+        container,
+        stdin: attachStream
+      })
+      log('ChatManager registered for Docker sandbox (stdin ready, waiting for idle)')
+    }
     // 输出运行状态消息
     const runningMsg = {
       type: 'status',
@@ -940,6 +982,16 @@ export async function runPythonCodeStreaming(code, useGpu = false, res, imageOve
               if (isJsonComplete(text)) {
                 log(`Forwarding complete JSON message: ${text.length} bytes`)
                 res.write(text + '\n')
+                // chat 模式：检测 idle 消息，设置 ChatManager 就绪
+                if (mode === 'chat') {
+                  try {
+                    const msg = JSON.parse(text)
+                    if (msg.type === 'idle') {
+                      chatManager.setReady(true)
+                      log('ChatManager ready (idle message received)')
+                    }
+                  } catch {}
+                }
               } else {
                 // JSON 不完整，存入缓冲等待后续帧
                 log(`JSON message incomplete, buffering: ${text.length} bytes`)
@@ -1029,10 +1081,14 @@ export async function runPythonCodeStreaming(code, useGpu = false, res, imageOve
       res.write(JSON.stringify(errorMsg) + '\n')
     })
-    // 等待容器结束
-    log('Waiting for container to finish...')
-    await container.wait()
-    log('Container finished')
+    // 等待容器结束（chat 模式下不等待，容器持续运行）
+    if (mode !== 'chat') {
+      log('Waiting for container to finish...')
+      await container.wait()
+      log('Container finished')
+    } else {
+      log('Chat mode: container will keep running, not waiting for finish')
+    }
     // 等待日志流结束（带超时保护）
     await new Promise((resolve) => {
@@ -1086,6 +1142,12 @@ export async function runPythonCodeStreaming(code, useGpu = false, res, imageOve
     // 从活跃列表移除
     unregisterContainer(executionId)
+    // chat 模式下保持容器运行和 HTTP 流打开
+    if (mode === 'chat') {
+      log('Chat mode: keeping container alive and HTTP stream open')
+      return
+    }
     // 清理容器
     log('Cleaning up container...')
     if (container) {

package/version.json CHANGED Viewed

@@ -1,4 +1,4 @@
 {
-  "buildTime": "2026-05-24T23:36:43.904Z",
-  "cliVersion": "2026.5.25-736"
+  "buildTime": "2026-05-29T12:19:03.854Z",
+  "cliVersion": "2026.5.29-2018"
 }