@icyfenix-dmla/cli 2026.5.25-736 → 2026.5.29-2149

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@icyfenix-dmla/cli",
3
- "version": "2026.5.25-736",
3
+ "version": "2026.5.29-2149",
4
4
  "description": "DMLA 沙箱服务命令行工具",
5
5
  "type": "module",
6
6
  "main": "src/index.js",
package/scripts/build.js CHANGED
@@ -53,7 +53,7 @@ function copyDir(src, dest, filter = null) {
53
53
  console.log('\n📋 复制服务器代码...')
54
54
  console.log(` 源目录: ${localServerSrc}`)
55
55
  console.log(` 目标目录: ${cliServerDest}`)
56
- copyDir(localServerSrc, cliServerDest, (name) => name.endsWith('.js') || name.endsWith('.py'))
56
+ copyDir(localServerSrc, cliServerDest, (name) => name.endsWith('.js') || name.endsWith('.cjs') || name.endsWith('.py'))
57
57
 
58
58
  // 复制共享模块(复制所有 .py 文件和 __init__.py)
59
59
  console.log('\n📋 复制共享模块...')
@@ -1,6 +1,7 @@
1
1
  # LLM 模块
2
2
  from .mini_mind_config import MiniMindConfig, RMSNorm, Attention, FeedForward, MiniMindBlock, MiniMindModel, MiniMindForCausalLM, precompute_freqs_cis, apply_rotary_pos_emb, repeat_kv
3
3
  from .pretrain_dataset import PretrainDataset
4
+ from .reward_model import RewardModel
4
5
  from .sftdataset import SFTDataset, pre_processing_chat
5
6
 
6
- __all__ = ['MiniMindConfig', 'RMSNorm', 'Attention', 'FeedForward', 'MiniMindBlock', 'MiniMindModel', 'MiniMindForCausalLM', 'precompute_freqs_cis', 'apply_rotary_pos_emb', 'repeat_kv', 'PretrainDataset', 'SFTDataset', 'pre_processing_chat']
7
+ __all__ = ['MiniMindConfig', 'RMSNorm', 'Attention', 'FeedForward', 'MiniMindBlock', 'MiniMindModel', 'MiniMindForCausalLM', 'precompute_freqs_cis', 'apply_rotary_pos_emb', 'repeat_kv', 'PretrainDataset', 'RewardModel', 'SFTDataset', 'pre_processing_chat']
@@ -12,7 +12,7 @@ from transformers.modeling_outputs import MoeCausalLMOutputWithPast
12
12
  from typing import Optional, Tuple, List, Dict
13
13
 
14
14
  class MiniMindConfig(PretrainedConfig):
15
- """MiniMind 模型配置"""
15
+ """模型配置"""
16
16
  model_type = "minimind"
17
17
  def __init__(self, hidden_size=768, num_hidden_layers=8, use_moe=False, **kwargs):
18
18
  super().__init__(**kwargs)
@@ -182,7 +182,7 @@ class MiniMindBlock(nn.Module):
182
182
 
183
183
 
184
184
  class MiniMindModel(nn.Module):
185
- """MiniMind 主体:词嵌入 + 多层 Transformer + 最终归一化"""
185
+ """模型主体:词嵌入 + 多层 Transformer + 最终归一化"""
186
186
  def __init__(self, config):
187
187
  super().__init__()
188
188
  self.config = config
@@ -228,7 +228,7 @@ class MiniMindModel(nn.Module):
228
228
 
229
229
 
230
230
  class MiniMindForCausalLM(PreTrainedModel, GenerationMixin):
231
- """MiniMind 因果语言模型:用于预训练和推理"""
231
+ """因果语言模型:用于预训练和推理"""
232
232
  config_class = MiniMindConfig
233
233
  _tied_weights_keys = {"lm_head.weight": "model.embed_tokens.weight"}
234
234
  def __init__(self, config=None):
@@ -251,7 +251,10 @@ class MiniMindForCausalLM(PreTrainedModel, GenerationMixin):
251
251
  return MoeCausalLMOutputWithPast(loss=loss, aux_loss=aux_loss, logits=logits, past_key_values=past_key_values, hidden_states=hidden_states)
252
252
 
253
253
  @torch.inference_mode()
254
- def generate(self, inputs=None, attention_mask=None, max_new_tokens=512, temperature=0.85, top_p=0.85, top_k=50, eos_token_id=2, streamer=None, use_cache=True, num_return_sequences=1, do_sample=True, repetition_penalty=1.0, **kwargs):
254
+ def generate(self, inputs=None, attention_mask=None, max_new_tokens=512,
255
+ temperature=0.85, top_p=0.85, top_k=50, eos_token_id=2,
256
+ streamer=None, use_cache=True, num_return_sequences=1,
257
+ do_sample=True, repetition_penalty=1.0, **kwargs):
255
258
  """自回归生成:逐 token 采样,支持 top-k、top-p、重复惩罚"""
256
259
  input_ids = kwargs.pop("input_ids", inputs).repeat(num_return_sequences, 1)
257
260
  attention_mask = attention_mask.repeat(num_return_sequences, 1) if attention_mask is not None else None
@@ -0,0 +1,48 @@
1
+ # RewardModel 定义
2
+ # 从文档自动提取生成
3
+
4
+ import torch
5
+ import torch.nn as nn
6
+ import torch.nn.functional as F
7
+
8
+ class RewardModel(nn.Module):
9
+ """
10
+ 简化的奖励模型实现
11
+
12
+ 核心结构:Transformer 编码器提取语义特征 → 奖励头映射为标量评分
13
+
14
+ 参数:
15
+ vocab_size : 词汇表大小
16
+ d_model : 嵌入维度
17
+ nhead : 注意力头数
18
+ num_layers : Transformer 层数
19
+ """
20
+ def __init__(self, vocab_size=1000, d_model=128, nhead=4, num_layers=2):
21
+ super().__init__()
22
+ self.embedding = nn.Embedding(vocab_size, d_model)
23
+ self.pos_encoding = nn.Parameter(torch.randn(1, 512, d_model) * 0.01)
24
+
25
+ encoder_layer = nn.TransformerEncoderLayer(d_model, nhead, d_model * 4, batch_first=True)
26
+ self.transformer = nn.TransformerEncoder(encoder_layer, num_layers)
27
+
28
+ # 奖励头:将语义特征映射为标量奖励值
29
+ self.reward_head = nn.Linear(d_model, 1)
30
+
31
+ def forward(self, input_ids):
32
+ """
33
+ 输入: input_ids (batch, seq_len) — 指令+回答的 token 序列
34
+ 输出: reward (batch,) — 标量奖励分数
35
+
36
+ 核心步骤:
37
+ 1. 嵌入 + 位置编码(对应理论中的输入表示)
38
+ 2. Transformer 编码(对应理论中的语义特征提取)
39
+ 3. 取最后 token 隐藏状态 → 线性层映射(对应理论中的奖励评分)
40
+ """
41
+ seq_len = input_ids.size(1)
42
+ x = self.embedding(input_ids) + self.pos_encoding[:, :seq_len, :]
43
+ x = self.transformer(x)
44
+
45
+ # 取最后一个 token 的隐藏状态
46
+ last_hidden = x[:, -1, :] # (batch, d_model)
47
+ reward = self.reward_head(last_hidden).squeeze(-1) # (batch,)
48
+ return reward
@@ -6,6 +6,7 @@ import os
6
6
  import random
7
7
  import torch
8
8
  from datasets import load_dataset, Features, Value
9
+ from datasets import logging as datasets_logging
9
10
  from torch.utils.data import Dataset
10
11
 
11
12
  class SFTDataset(Dataset):
@@ -17,17 +18,32 @@ class SFTDataset(Dataset):
17
18
  - 标签掩码:仅 assistant 回答部分参与 loss,其余标记为 -100
18
19
  - 使用 apply_chat_template 将对话转为 ChatML 格式
19
20
  """
21
+ # MiniMind 使用 ChatML 格式:<|im_start|>role\ncontent<|im_end|>\n
22
+ # tokenizer 本身未内置 chat_template,需手动设置
23
+ CHATML_TEMPLATE = (
24
+ "{% for message in messages %}<|im_start|>{{ message.role }}\n"
25
+ "{{ message.content }}<|im_end|>\n"
26
+ "{% endfor %}"
27
+ "{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}"
28
+ )
29
+
20
30
  def __init__(self, jsonl_path, tokenizer, max_length=768):
21
31
  super().__init__()
22
32
  os.environ["TOKENIZERS_PARALLELISM"] = "false"
23
33
  self.tokenizer = tokenizer
34
+ # MiniMind tokenizer 未内置 chat_template,需手动设置 ChatML 格式
35
+ if not tokenizer.chat_template:
36
+ tokenizer.chat_template = self.CHATML_TEMPLATE
24
37
  self.max_length = max_length
25
38
  features = Features({
26
39
  'conversations': [{'role': Value('string'), 'content': Value('string'),
27
40
  'reasoning_content': Value('string'), 'tools': Value('string'),
28
41
  'tool_calls': Value('string')}]
29
42
  })
43
+ # 抑制 load_dataset 的 "Generating train split" 进度输出
44
+ datasets_logging.set_verbosity_error()
30
45
  self.samples = load_dataset('json', data_files=jsonl_path, split='train', features=features)
46
+ datasets_logging.set_verbosity_warning()
31
47
  # 预计算 assistant 回答的起止标记 ID
32
48
  self.bos_id = tokenizer(f'{tokenizer.bos_token}assistant\n', add_special_tokens=False).input_ids
33
49
  self.eos_id = tokenizer(f'{tokenizer.eos_token}\n', add_special_tokens=False).input_ids
@@ -37,6 +53,9 @@ class SFTDataset(Dataset):
37
53
 
38
54
  def create_chat_prompt(self, conversations):
39
55
  """将对话列表应用 chat template 转为文本"""
56
+ # DataLoader 多 worker 场景下 tokenizer.chat_template 可能丢失,需防御性设置
57
+ if not self.tokenizer.chat_template:
58
+ self.tokenizer.chat_template = self.CHATML_TEMPLATE
40
59
  messages = []
41
60
  tools = None
42
61
  for message in conversations:
@@ -91,15 +110,11 @@ def pre_processing_chat(conversations, add_system_ratio=0.2):
91
110
 
92
111
  SYSTEM_PROMPTS = [
93
112
  "你是一个知识丰富的AI,尽力为用户提供准确的信息。",
94
- "你是minimind,一个小巧但有用的语言模型。",
95
113
  "你是一个专业的AI助手,请提供有价值的回答。",
96
- "你是minimind,请尽力帮助用户解决问题。",
97
114
  "你是一个可靠的AI,请给出准确的回答。",
98
115
  "You are a helpful AI assistant.",
99
- "You are minimind, a lightweight intelligent assistant.",
100
116
  "You are a friendly chatbot. Please answer the user's questions carefully.",
101
117
  "You are a knowledgeable AI. Try your best to provide accurate information.",
102
- "You are minimind, a small but useful language model."
103
118
  ]
104
119
  # 概率性添加 system
105
120
  if conversations[0].get('role') != 'system':
@@ -83,7 +83,7 @@ const DATASETS = [
83
83
  id: 'minimind-sft',
84
84
  name: 'MiniMind SFT (LLM监督微调语料)',
85
85
  url: 'https://www.modelscope.cn/datasets/icyfenix/Minimind_SFT.git',
86
- size: '~500MB',
86
+ size: '~90MB',
87
87
  format: 'git',
88
88
  targetDir: 'datasets/minimind-sft',
89
89
  source: 'ModelScope (icyfenix)'
@@ -0,0 +1,181 @@
1
+ const EventEmitter = require('events');
2
+
3
+ class ChatManager extends EventEmitter {
4
+ constructor() {
5
+ super();
6
+ this.session = null;
7
+ this._pendingResponse = null;
8
+ this._responseBuffer = '';
9
+ this._pendingTimeout = null;
10
+ }
11
+
12
+ /**
13
+ * 注册一个对话沙箱会话
14
+ * @param {'docker'|'native'} type - 沙箱类型
15
+ * @param {object} options
16
+ * @param {object} [options.container] - Docker 容器实例
17
+ * @param {object} [options.process] - 子进程实例
18
+ * @param {object} options.stdin - 可写入的 stdin 流
19
+ */
20
+ register(type, { container, process: proc, stdin }) {
21
+ this.session = { type, container, process: proc, stdin, ready: false };
22
+ this._responseBuffer = '';
23
+
24
+ // 监听 stdout 解析消息
25
+ const stdout = type === 'native' ? proc.stdout : null;
26
+ // Docker 模式的 stdout 通过 stream 事件处理,不在此处绑定
27
+ if (stdout) {
28
+ stdout.on('data', (data) => this._handleStdout(data));
29
+ }
30
+ }
31
+
32
+ /**
33
+ * 标记对话沙箱就绪
34
+ */
35
+ setReady(ready) {
36
+ if (this.session) {
37
+ this.session.ready = ready;
38
+ }
39
+ this.emit('ready', ready);
40
+ }
41
+
42
+ /**
43
+ * 查询对话服务状态
44
+ */
45
+ getStatus() {
46
+ if (!this.session) {
47
+ return { ready: false, message: '对话服务未启动' };
48
+ }
49
+ return {
50
+ ready: this.session.ready,
51
+ message: this.session.ready ? '对话服务就绪' : '模型加载中...'
52
+ };
53
+ }
54
+
55
+ /**
56
+ * 发送对话消息
57
+ * @param {string} message - 用户消息
58
+ * @returns {Promise<string>} AI 回复
59
+ */
60
+ async send(message) {
61
+ if (!this.session || !this.session.ready) {
62
+ throw new Error('对话服务未就绪');
63
+ }
64
+ if (!this.session.stdin) {
65
+ throw new Error('沙箱 stdin 不可用');
66
+ }
67
+
68
+ // 转义消息中的特殊字符,构造安全的 Python 字符串
69
+ const escapedMessage = JSON.stringify(message);
70
+ const code = `print(chat(${escapedMessage}))`;
71
+
72
+ return new Promise((resolve, reject) => {
73
+ this._pendingResponse = { resolve, reject, buffer: '' };
74
+
75
+ const cmd = JSON.stringify({ action: 'execute', code });
76
+ this.session.stdin.write(cmd + '\n');
77
+
78
+ // 超时保护(60秒)
79
+ this._pendingTimeout = setTimeout(() => {
80
+ this._pendingResponse = null;
81
+ reject(new Error('推理超时'));
82
+ }, 60000);
83
+ });
84
+ }
85
+
86
+ /**
87
+ * 处理沙箱 stdout 输出(Native 模式)
88
+ */
89
+ _handleStdout(data) {
90
+ const text = data.toString();
91
+ this._responseBuffer += text;
92
+
93
+ const lines = this._responseBuffer.split('\n');
94
+ this._responseBuffer = lines.pop();
95
+
96
+ for (const line of lines) {
97
+ if (!line.trim()) continue;
98
+ try {
99
+ const msg = JSON.parse(line);
100
+ this._handleMessage(msg);
101
+ } catch {
102
+ if (this._pendingResponse) {
103
+ this._pendingResponse.buffer += line + '\n';
104
+ }
105
+ }
106
+ }
107
+ }
108
+
109
+ /**
110
+ * 处理单条 JSON 消息
111
+ */
112
+ _handleMessage(msg) {
113
+ switch (msg.type) {
114
+ case 'idle':
115
+ this.setReady(true);
116
+ break;
117
+
118
+ case 'pong':
119
+ break;
120
+
121
+ case 'stream':
122
+ if (this._pendingResponse) {
123
+ const content = msg.content || msg.text || '';
124
+ if (content) {
125
+ this._pendingResponse.buffer += content;
126
+ }
127
+ }
128
+ break;
129
+
130
+ case 'result':
131
+ case 'execute_result':
132
+ if (this._pendingResponse) {
133
+ clearTimeout(this._pendingTimeout);
134
+ const result = msg.content || this._pendingResponse.buffer.trim();
135
+ this._pendingResponse.resolve(result);
136
+ this._pendingResponse = null;
137
+ }
138
+ break;
139
+
140
+ case 'error':
141
+ if (this._pendingResponse) {
142
+ clearTimeout(this._pendingTimeout);
143
+ this._pendingResponse.reject(new Error(msg.content || msg.message || '推理出错'));
144
+ this._pendingResponse = null;
145
+ }
146
+ break;
147
+ }
148
+ }
149
+
150
+ /**
151
+ * 处理 Docker 模式的流式输出(由 sandbox.js 调用)
152
+ */
153
+ handleDockerStream(data) {
154
+ this._handleStdout(data);
155
+ }
156
+
157
+ /**
158
+ * 清除对话沙箱会话
159
+ */
160
+ clear() {
161
+ if (this._pendingResponse) {
162
+ clearTimeout(this._pendingTimeout);
163
+ this._pendingResponse.reject(new Error('沙箱已停止'));
164
+ this._pendingResponse = null;
165
+ }
166
+ // 移除 stdout 监听器,防止已清理的会话继续接收数据
167
+ if (this.session) {
168
+ const stdout = this.session.type === 'native' && this.session.process
169
+ ? this.session.process.stdout : null;
170
+ if (stdout) {
171
+ stdout.removeListener('data', this._handleStdout);
172
+ }
173
+ }
174
+ this.session = null;
175
+ this._responseBuffer = '';
176
+ this.emit('cleared');
177
+ }
178
+ }
179
+
180
+ // 单例导出
181
+ module.exports = new ChatManager();
@@ -12,6 +12,7 @@ import { fileURLToPath } from 'url'
12
12
  import { resolve } from 'path'
13
13
  import sandboxRouter from './routes/sandbox.js'
14
14
  import nativeRouter from './routes/native.js'
15
+ import chatRouter from './routes/chat.js'
15
16
  import { cleanupAllContainers } from './sandbox.js'
16
17
  import { cleanupAllProcesses } from './native_executor.js'
17
18
  import { checkNativeEnvironment, getDataPath } from './native_env_check.js'
@@ -72,6 +73,9 @@ if (isNativeMode) {
72
73
  app.use('/api/sandbox', sandboxRouter)
73
74
  }
74
75
 
76
+ // 对话 API
77
+ app.use('/api/chat', chatRouter)
78
+
75
79
  // 错误处理
76
80
  app.use((err, req, res, next) => {
77
81
  log(`Error: ${err.message}`)
@@ -306,10 +306,45 @@ matplotlib.use('module://matplotlib_inline.backend_inline')
306
306
 
307
307
  # 处理不同类型的输出
308
308
  if msg_type == 'stream':
309
+ stream_name = content.get('name', 'stdout')
310
+ stream_text = content.get('text', '')
311
+
312
+ # 从 stderr 中提取 ProgressReporter 的 progress JSON,
313
+ # 作为独立的 progress 类型消息发送,避免与普通 stderr 输出混合
314
+ if stream_name == 'stderr':
315
+ progress_lines = []
316
+ other_lines = []
317
+ for line in stream_text.split('\n'):
318
+ if line.startswith('{"type": "progress"') or line.startswith('{"type":"progress"'):
319
+ progress_lines.append(line)
320
+ else:
321
+ other_lines.append(line)
322
+
323
+ # 将 progress JSON 作为独立消息发送(字段展开到顶层,与前端 progress case 匹配)
324
+ for pline in progress_lines:
325
+ if not pline.strip():
326
+ continue
327
+ try:
328
+ import json as _json
329
+ progress_data = _json.loads(pline)
330
+ progress_data['type'] = 'progress'
331
+ if stream:
332
+ output_json(progress_data)
333
+ else:
334
+ outputs.append(progress_data)
335
+ except Exception:
336
+ # JSON 解析失败,作为普通文本处理
337
+ other_lines.append(pline)
338
+
339
+ # 剩余 stderr 内容正常传递
340
+ stream_text = '\n'.join(other_lines)
341
+ if not stream_text.strip():
342
+ continue
343
+
309
344
  stream_output = {
310
345
  'type': 'stream',
311
- 'name': content.get('name', 'stdout'),
312
- 'text': content.get('text', '')
346
+ 'name': stream_name,
347
+ 'text': stream_text
313
348
  }
314
349
 
315
350
  if stream:
@@ -317,7 +352,7 @@ matplotlib.use('module://matplotlib_inline.backend_inline')
317
352
  output_json(stream_output)
318
353
  else:
319
354
  outputs.append(stream_output)
320
- log_debug(f'Stream output: {content.get("name")} len={len(content.get("text", ""))}')
355
+ log_debug(f'Stream output: {stream_name} len={len(stream_text)}')
321
356
 
322
357
  elif msg_type == 'display_data':
323
358
  display_output = {
@@ -483,6 +518,164 @@ matplotlib.use('module://matplotlib_inline.backend_inline')
483
518
  log_debug(f'Error shutting down kernel: {e}')
484
519
 
485
520
 
521
+ def _collect_kernel_outputs(kc, timeout, stream=False):
522
+ """
523
+ 从 Kernel 收集执行输出,复用 run_code 中的输出处理逻辑。
524
+
525
+ Args:
526
+ kc: 已启动的 KernelClient
527
+ timeout: 执行超时时间(秒),0 表示不超时
528
+ stream: 是否启用流式输出
529
+
530
+ Returns:
531
+ (outputs, timed_out, has_error) 元组
532
+ """
533
+ deadline = time.time() + timeout if timeout > 0 else float('inf')
534
+ outputs = []
535
+ timed_out = False
536
+ has_error = False
537
+
538
+ while True:
539
+ remaining = deadline - time.time()
540
+ if timeout > 0 and remaining <= 0:
541
+ timed_out = True
542
+ break
543
+
544
+ try:
545
+ msg = kc.get_iopub_msg(timeout=max(1, remaining) if timeout > 0 else 2)
546
+ except Exception:
547
+ if timeout > 0 and time.time() >= deadline:
548
+ timed_out = True
549
+ break
550
+
551
+ msg_type = msg['header']['msg_type']
552
+ content = msg['content']
553
+
554
+ if msg_type == 'status':
555
+ if content.get('execution_state') == 'idle':
556
+ break
557
+ continue
558
+
559
+ if msg_type == 'stream':
560
+ stream_output = {
561
+ 'type': 'stream',
562
+ 'name': content.get('name', 'stdout'),
563
+ 'text': content.get('text', '')
564
+ }
565
+ if stream:
566
+ output_json(stream_output)
567
+ else:
568
+ outputs.append(stream_output)
569
+
570
+ elif msg_type == 'display_data':
571
+ display_output = {
572
+ 'type': 'display_data',
573
+ 'data': content.get('data', {}),
574
+ 'metadata': content.get('metadata', {})
575
+ }
576
+ if stream:
577
+ output_json(display_output)
578
+ else:
579
+ outputs.append(display_output)
580
+
581
+ elif msg_type == 'execute_result':
582
+ result_output = {
583
+ 'type': 'execute_result',
584
+ 'data': content.get('data', {}),
585
+ 'metadata': content.get('metadata', {}),
586
+ 'execution_count': content.get('execution_count')
587
+ }
588
+ if stream:
589
+ output_json(result_output)
590
+ else:
591
+ outputs.append(result_output)
592
+
593
+ elif msg_type == 'error':
594
+ error_output = {
595
+ 'type': 'error',
596
+ 'ename': content.get('ename', 'UnknownError'),
597
+ 'evalue': content.get('evalue', ''),
598
+ 'traceback': content.get('traceback', [])
599
+ }
600
+ if is_cuda_compat_error(content.get('evalue', '')):
601
+ error_output = enrich_cuda_error(error_output)
602
+ has_error = True
603
+ if stream:
604
+ output_json(error_output)
605
+ else:
606
+ outputs.append(error_output)
607
+
608
+ return outputs, timed_out, has_error
609
+
610
+
611
+ def _execute_and_output(kc, code, timeout=0):
612
+ """
613
+ 在已有的 Kernel 中执行代码,非流式模式,输出 JSON 结果到 stdout。
614
+
615
+ Args:
616
+ kc: 已启动的 KernelClient
617
+ code: 要执行的 Python 代码
618
+ timeout: 执行超时时间(秒),0 表示不超时
619
+ """
620
+ start_time = time.time()
621
+ actual_timeout = timeout if timeout > 0 else DEFAULT_TIMEOUT
622
+
623
+ kc.execute(code, allow_stdin=False)
624
+ outputs, timed_out, has_error = _collect_kernel_outputs(kc, actual_timeout, stream=False)
625
+
626
+ execution_time = time.time() - start_time
627
+
628
+ if timed_out:
629
+ result = {
630
+ 'success': False,
631
+ 'outputs': [{
632
+ 'type': 'error',
633
+ 'ename': 'TimeoutError',
634
+ 'evalue': f'Execution timed out after {actual_timeout} seconds',
635
+ 'traceback': [f'Execution timed out after {actual_timeout} seconds']
636
+ }],
637
+ 'executionTime': round(execution_time, 3)
638
+ }
639
+ else:
640
+ result = {
641
+ 'success': not has_error,
642
+ 'outputs': outputs,
643
+ 'executionTime': round(execution_time, 3)
644
+ }
645
+
646
+ print(json.dumps(result, ensure_ascii=False))
647
+ sys.stdout.flush()
648
+
649
+
650
+ def _stream_execute(kc, code, timeout=0):
651
+ """
652
+ 在已有的 Kernel 中执行代码,流式模式,实时输出每个消息到 stdout。
653
+
654
+ Args:
655
+ kc: 已启动的 KernelClient
656
+ code: 要执行的 Python 代码
657
+ timeout: 执行超时时间(秒),0 表示不超时
658
+ """
659
+ start_time = time.time()
660
+ actual_timeout = timeout if timeout > 0 else DEFAULT_TIMEOUT
661
+
662
+ kc.execute(code, allow_stdin=False)
663
+ outputs, timed_out, has_error = _collect_kernel_outputs(kc, actual_timeout, stream=True)
664
+
665
+ execution_time = time.time() - start_time
666
+
667
+ if timed_out:
668
+ output_json({'type': 'error', 'ename': 'TimeoutError',
669
+ 'evalue': f'Execution timed out after {actual_timeout} seconds',
670
+ 'traceback': [f'Execution timed out after {actual_timeout} seconds']})
671
+ output_json({'type': 'result', 'success': False,
672
+ 'executionTime': round(execution_time, 3)})
673
+ else:
674
+ output_json({'type': 'result', 'success': not has_error,
675
+ 'outputs': outputs,
676
+ 'executionTime': round(execution_time, 3)})
677
+
678
+
486
679
  def check_cuda_compatibility():
487
680
  """
488
681
  快速检查 CUDA 兼容性
@@ -545,6 +738,8 @@ def main():
545
738
  parser.add_argument('--timeout', type=int, default=DEFAULT_TIMEOUT, help='执行超时时间(秒)')
546
739
  parser.add_argument('--check-cuda', action='store_true', help='仅检查 CUDA 兼容性')
547
740
  parser.add_argument('--stream', action='store_true', help='启用流式输出模式(实时输出每个消息)')
741
+ parser.add_argument("--serve", action="store_true",
742
+ help="长运行模式:初始代码执行后 Kernel 保持运行,从 stdin 接收后续指令")
548
743
 
549
744
  args = parser.parse_args()
550
745
 
@@ -569,7 +764,7 @@ def main():
569
764
  return
570
765
  elif args.code:
571
766
  code = args.code
572
- else:
767
+ elif not args.serve:
573
768
  result = {
574
769
  'success': False,
575
770
  'outputs': [{
@@ -589,6 +784,112 @@ def main():
589
784
  print(json.dumps(result, ensure_ascii=False))
590
785
  return
591
786
 
787
+ # serve 模式:自行管理 Kernel 生命周期,执行初始代码后进入 stdin 监听循环
788
+ if args.serve:
789
+ from jupyter_client import KernelManager
790
+
791
+ km = KernelManager()
792
+ suppress_stdout()
793
+ km.start_kernel()
794
+ kc = km.client()
795
+ kc.start_channels()
796
+ kc.wait_for_ready(timeout=30)
797
+ restore_stdout()
798
+
799
+ # 注入全局变量、数据路径和 sys.path 配置
800
+ python_path_env = os.environ.get('PYTHONPATH', '')
801
+ path_separator = ';' if os.name == 'nt' else ':'
802
+ python_path_entries = [p for p in python_path_env.split(path_separator) if p]
803
+ setup_code = '''
804
+ import os
805
+ import sys
806
+
807
+ DATA_DIR = os.environ.get('DMLA_DATA_PATH', '/data')
808
+
809
+ # 将 PYTHONPATH 中的路径注入 sys.path(IPython kernel 可能不会自动继承)
810
+ _python_path_entries = ''' + repr(python_path_entries) + '''
811
+ for _p in _python_path_entries:
812
+ if _p not in sys.path:
813
+ sys.path.insert(0, _p)
814
+
815
+ # 配置 matplotlib inline 后端(在用户 import matplotlib 之前设置)
816
+ import matplotlib
817
+ matplotlib.use('module://matplotlib_inline.backend_inline')
818
+ '''
819
+ kc.execute(setup_code, allow_stdin=False)
820
+ # 等待 setup 执行完成
821
+ setup_start = time.time()
822
+ while True:
823
+ if time.time() - setup_start > 5:
824
+ break
825
+ try:
826
+ msg = kc.get_iopub_msg(timeout=2)
827
+ msg_type = msg['header']['msg_type']
828
+ if msg_type == 'status' and msg['content'].get('execution_state') == 'idle':
829
+ break
830
+ except Exception:
831
+ break
832
+
833
+ # 执行初始代码
834
+ if code:
835
+ if args.stream:
836
+ _stream_execute(kc, code, args.timeout)
837
+ else:
838
+ _execute_and_output(kc, code, args.timeout)
839
+
840
+ # serve 模式:进入 stdin 监听循环
841
+ def output_message(msg_type, content):
842
+ """输出 JSON Lines 消息到 stdout"""
843
+ msg = {"type": msg_type}
844
+ if content is not None:
845
+ msg["content"] = content
846
+ sys.stdout.write(json.dumps(msg, ensure_ascii=False) + "\n")
847
+ sys.stdout.flush()
848
+
849
+ output_message("idle", "kernel ready")
850
+
851
+ while True:
852
+ try:
853
+ line = sys.stdin.readline()
854
+ if not line:
855
+ break
856
+ line = line.strip()
857
+ if not line:
858
+ continue
859
+ try:
860
+ cmd = json.loads(line)
861
+ except json.JSONDecodeError:
862
+ output_message("error", f"无效的 JSON 指令: {line}")
863
+ continue
864
+
865
+ action = cmd.get("action")
866
+ if action == "ping":
867
+ output_message("pong", None)
868
+ elif action == "execute":
869
+ exec_code = cmd.get("code", "")
870
+ if args.stream:
871
+ _stream_execute(kc, exec_code, cmd.get("timeout", 0))
872
+ else:
873
+ _execute_and_output(kc, exec_code, cmd.get("timeout", 0))
874
+ output_message("idle", "kernel ready")
875
+ else:
876
+ output_message("error", f"未知指令: {action}")
877
+ except Exception as e:
878
+ output_message("error", str(e))
879
+ break
880
+
881
+ # stdin 关闭或出错,清理退出
882
+ kc.stop_channels()
883
+ try:
884
+ kc.shutdown_kernel()
885
+ except Exception:
886
+ pass
887
+ try:
888
+ km.shutdown_kernel(now=True)
889
+ except Exception:
890
+ pass
891
+ return
892
+
592
893
  result = run_code(code, args.timeout, stream=args.stream)
593
894
 
594
895
  # 非流式模式:输出 JSON 结果到 stdout
@@ -10,6 +10,7 @@ import os from 'os'
10
10
  import fs from 'fs'
11
11
  import chalk from 'chalk'
12
12
  import { getCachedEnvironment, getKernelRunnerPath, getSharedModulesPath, getServerPythonPath, getDataPath, getProgressPath, getPythonCommand, detectPythonCommand } from './native_env_check.js'
13
+ import chatManager from './chat-manager.cjs'
13
14
 
14
15
  const __filename = fileURLToPath(import.meta.url)
15
16
  const __dirname = path.dirname(__filename)
@@ -232,6 +233,14 @@ export async function runPythonCodeNative(code, useGpu = false, timeoutOverride
232
233
 
233
234
  registerProcess(executionId, proc)
234
235
 
236
+ // 检测是否为对话沙箱
237
+ if (procArgs.includes('--serve')) {
238
+ chatManager.register('native', {
239
+ process: proc,
240
+ stdin: proc.stdin
241
+ })
242
+ }
243
+
235
244
  // 设置超时
236
245
  const timeoutPromise = new Promise((_, reject) => {
237
246
  timeoutId = setTimeout(() => {
@@ -259,6 +268,9 @@ export async function runPythonCodeNative(code, useGpu = false, timeoutOverride
259
268
  const execPromise = new Promise((resolve, reject) => {
260
269
  proc.on('close', (code) => {
261
270
  log(`Process exited with code ${code}`)
271
+ if (chatManager.session) {
272
+ chatManager.clear()
273
+ }
262
274
  if (timeoutId) clearTimeout(timeoutId)
263
275
  resolve({ stdout, stderr, exitCode: code })
264
276
  })
@@ -378,8 +390,9 @@ export async function runPythonCodeNative(code, useGpu = false, timeoutOverride
378
390
  * @param {boolean} useGpu - 是否请求 GPU
379
391
  * @param {object} res - Express 响应对象
380
392
  * @param {number|null} timeoutOverride - 超时时间(秒)
393
+ * @param {string|null} mode - 执行模式('chat' 启用对话模式)
381
394
  */
382
- export async function runPythonCodeStreamingNative(code, useGpu = false, res, timeoutOverride = null) {
395
+ export async function runPythonCodeStreamingNative(code, useGpu = false, res, timeoutOverride = null, mode = null) {
383
396
  const startTime = Date.now()
384
397
  const executionId = generateExecutionId()
385
398
 
@@ -474,6 +487,7 @@ export async function runPythonCodeStreamingNative(code, useGpu = false, res, ti
474
487
  '--timeout', String(timeoutSeconds),
475
488
  '--stream'
476
489
  ]
490
+ if (mode === 'chat') procArgs.push('--serve')
477
491
  } else {
478
492
  // Linux/macOS 直接传递代码参数
479
493
  procArgs = [
@@ -482,6 +496,7 @@ export async function runPythonCodeStreamingNative(code, useGpu = false, res, ti
482
496
  '--timeout', String(timeoutSeconds),
483
497
  '--stream'
484
498
  ]
499
+ if (mode === 'chat') procArgs.push('--serve')
485
500
  }
486
501
 
487
502
  try {
@@ -489,6 +504,14 @@ export async function runPythonCodeStreamingNative(code, useGpu = false, res, ti
489
504
 
490
505
  registerProcess(executionId, proc)
491
506
 
507
+ // 检测是否为对话沙箱
508
+ if (procArgs.includes('--serve')) {
509
+ chatManager.register('native', {
510
+ process: proc,
511
+ stdin: proc.stdin
512
+ })
513
+ }
514
+
492
515
  // 输出运行状态
493
516
  res.write(JSON.stringify({
494
517
  type: 'status',
@@ -514,6 +537,19 @@ export async function runPythonCodeStreamingNative(code, useGpu = false, res, ti
514
537
  // kernel_runner.py 输出的已经是 JSON 格式,直接转发
515
538
  if (line.trim().startsWith('{')) {
516
539
  res.write(line + '\n')
540
+ // chat 模式:检测 idle 消息,注册 ChatManager
541
+ if (mode === 'chat' && !chatManager.session) {
542
+ try {
543
+ const msg = JSON.parse(line)
544
+ if (msg.type === 'idle') {
545
+ chatManager.register('native', {
546
+ process: proc,
547
+ stdin: proc.stdin
548
+ })
549
+ log('ChatManager registered for Native chat sandbox')
550
+ }
551
+ } catch {}
552
+ }
517
553
  } else {
518
554
  // 非 JSON 内容包装为 stream 消息
519
555
  res.write(JSON.stringify({
@@ -562,10 +598,25 @@ export async function runPythonCodeStreamingNative(code, useGpu = false, res, ti
562
598
  }) + '\n')
563
599
  })
564
600
 
565
- // 等待进程完成
566
- await new Promise((resolve) => {
567
- proc.on('close', resolve)
568
- })
601
+ // 等待进程完成(chat 模式下进程持续运行,不等待 close)
602
+ if (mode === 'chat') {
603
+ // chat 模式:进程持续运行,HTTP 流保持打开
604
+ // 当进程意外退出时清理
605
+ proc.on('close', () => {
606
+ if (chatManager.session) {
607
+ chatManager.clear()
608
+ }
609
+ })
610
+ } else {
611
+ await new Promise((resolve) => {
612
+ proc.on('close', () => {
613
+ if (chatManager.session) {
614
+ chatManager.clear()
615
+ }
616
+ resolve()
617
+ })
618
+ })
619
+ }
569
620
 
570
621
  // 处理缓冲区剩余内容
571
622
  if (stdoutBuffer.trim()) {
@@ -613,8 +664,11 @@ export async function runPythonCodeStreamingNative(code, useGpu = false, res, ti
613
664
  log(`Failed to clean up temp file: ${e.message}`)
614
665
  }
615
666
  }
616
- res.end()
617
- log('Streaming response ended')
667
+ // chat 模式下不关闭 HTTP 响应,进程持续运行
668
+ if (mode !== 'chat') {
669
+ res.end()
670
+ log('Streaming response ended')
671
+ }
618
672
  }
619
673
  }
620
674
 
@@ -0,0 +1,35 @@
1
+ import { Router } from 'express'
2
+ import chatManager from '../chat-manager.cjs'
3
+
4
+ const router = Router()
5
+
6
+ /**
7
+ * 查询对话服务状态
8
+ * GET /api/chat/status
9
+ */
10
+ router.get('/status', (req, res) => {
11
+ res.json(chatManager.getStatus())
12
+ })
13
+
14
+ /**
15
+ * 发送对话消息
16
+ * POST /api/chat/send
17
+ * Body: { message: string }
18
+ */
19
+ router.post('/send', async (req, res) => {
20
+ const { message } = req.body
21
+
22
+ if (!message || typeof message !== 'string') {
23
+ return res.status(400).json({ error: '消息不能为空' })
24
+ }
25
+
26
+ try {
27
+ const response = await chatManager.send(message)
28
+ res.json({ response })
29
+ } catch (err) {
30
+ const status = err.message === '对话服务未就绪' ? 503 : 500
31
+ res.status(status).json({ error: err.message })
32
+ }
33
+ })
34
+
35
+ export default router
@@ -115,7 +115,7 @@ router.post('/run', async (req, res) => {
115
115
  * 响应: JSON Lines 流式输出
116
116
  */
117
117
  router.post('/stream', async (req, res) => {
118
- const { code, useGpu = false, timeout = null } = req.body
118
+ const { code, useGpu = false, timeout = null, mode = null } = req.body
119
119
 
120
120
  // 验证请求
121
121
  if (!code || typeof code !== 'string') {
@@ -155,7 +155,7 @@ router.post('/stream', async (req, res) => {
155
155
  }
156
156
 
157
157
  // 流式执行
158
- await runPythonCodeStreamingNative(code, useGpu, res, timeout)
158
+ await runPythonCodeStreamingNative(code, useGpu, res, timeout, mode)
159
159
 
160
160
  } catch (error) {
161
161
  console.error('[Native Sandbox Stream] Error:', error)
@@ -124,11 +124,11 @@ router.post('/run', async (req, res) => {
124
124
  /**
125
125
  * 流式执行代码
126
126
  * POST /api/sandbox/stream
127
- * Body: { code: string, useGpu?: boolean, timeout?: number|null }
127
+ * Body: { code: string, useGpu?: boolean, timeout?: number|null, mode?: string }
128
128
  * 响应: JSON Lines 流式输出
129
129
  */
130
130
  router.post('/stream', async (req, res) => {
131
- const { code, useGpu = false, timeout = null } = req.body
131
+ const { code, useGpu = false, timeout = null, mode = null } = req.body
132
132
 
133
133
  // 验证请求
134
134
  if (!code || typeof code !== 'string') {
@@ -193,7 +193,7 @@ router.post('/stream', async (req, res) => {
193
193
  }
194
194
 
195
195
  // 流式执行代码
196
- await runPythonCodeStreaming(code, actualUseGpu, res, actualImage, timeout)
196
+ await runPythonCodeStreaming(code, actualUseGpu, res, actualImage, timeout, mode)
197
197
 
198
198
  } catch (error) {
199
199
  console.error('Sandbox stream error:', error)
@@ -7,6 +7,7 @@ import path from 'path'
7
7
  import { fileURLToPath } from 'url'
8
8
  import fs from 'fs'
9
9
  import os from 'os'
10
+ import chatManager from './chat-manager.cjs'
10
11
 
11
12
  const __filename = fileURLToPath(import.meta.url)
12
13
  const __dirname = path.dirname(__filename)
@@ -71,6 +72,7 @@ export async function cleanupAllContainers() {
71
72
  }
72
73
 
73
74
  activeContainers.clear()
75
+ chatManager.clear()
74
76
  log(`All containers cleaned up: ${count}`)
75
77
  return count
76
78
  }
@@ -102,6 +104,7 @@ export async function abortExecution(executionId = null) {
102
104
  }
103
105
  }
104
106
 
107
+ chatManager.clear()
105
108
  return { success: true, stopped }
106
109
  }
107
110
 
@@ -496,7 +499,8 @@ export async function runPythonCode(code, useGpu = false, imageOverride = null,
496
499
  Env: [
497
500
  'PYTHONUNBUFFERED=1',
498
501
  'PYTHONPATH=/workspace',
499
- actualTimeout === null ? 'DMLA_NO_TIMEOUT=1' : ''
502
+ actualTimeout === null ? 'DMLA_NO_TIMEOUT=1' : '',
503
+ `DMLA_DATA_PATH=${getDataVolumePath() || '/data'}`
500
504
  ].filter(e => e) // 过滤空字符串
501
505
  }
502
506
 
@@ -558,6 +562,12 @@ export async function runPythonCode(code, useGpu = false, imageOverride = null,
558
562
  containerConfig.HostConfig.Binds = binds
559
563
  }
560
564
 
565
+ // 将宿主机 shared 目录路径注入到容器环境变量,供环境检查代码读取
566
+ const sharedMountInfo = (useMount && sharedModulesPath && fs.existsSync(sharedModulesPath))
567
+ ? `host_path=${sharedModulesPath},mounted=true`
568
+ : 'mounted=false'
569
+ containerConfig.Env.push(`DMLA_SHARED_INFO=${sharedMountInfo}`)
570
+
561
571
  if (!PROJECT_ROOT) {
562
572
  console.log('[Sandbox] 独立安装模式,无 Volume Mount')
563
573
  }
@@ -727,7 +737,7 @@ export async function runPythonCode(code, useGpu = false, imageOverride = null,
727
737
  * @param {number|null} timeoutOverride - 可选,超时时间(秒)
728
738
  * @returns {Promise<void>}
729
739
  */
730
- export async function runPythonCodeStreaming(code, useGpu = false, res, imageOverride = null, timeoutOverride = null) {
740
+ export async function runPythonCodeStreaming(code, useGpu = false, res, imageOverride = null, timeoutOverride = null, mode = null) {
731
741
  const startTime = Date.now()
732
742
 
733
743
  // 生成唯一执行 ID
@@ -785,9 +795,17 @@ export async function runPythonCodeStreaming(code, useGpu = false, res, imageOve
785
795
  // GPU 容器不限制内存,CPU 容器限制 4GB
786
796
  const memoryLimit = useGpu ? SANDBOX_CONFIG.memoryGpu : SANDBOX_CONFIG.memoryCpu
787
797
 
798
+ // 构建命令参数
799
+ const cmdArgs = ['python3', '/workspace/kernel_runner.py', '--code', code, '--timeout', String(timeoutSeconds), '--stream']
800
+ if (mode === 'chat') {
801
+ cmdArgs.push('--serve')
802
+ }
803
+
788
804
  const containerConfig = {
789
805
  Image: image,
790
- Cmd: ['python3', '/workspace/kernel_runner.py', '--code', code, '--timeout', String(timeoutSeconds), '--stream'],
806
+ Cmd: cmdArgs,
807
+ OpenStdin: true,
808
+ StdinOnce: false,
791
809
  HostConfig: {
792
810
  AutoRemove: false
793
811
  },
@@ -838,6 +856,12 @@ export async function runPythonCodeStreaming(code, useGpu = false, res, imageOve
838
856
  containerConfig.HostConfig.Binds = binds
839
857
  }
840
858
 
859
+ // 将宿主机 shared 目录路径注入到容器环境变量
860
+ const sharedMountInfo = (useMount && sharedModulesPath && fs.existsSync(sharedModulesPath))
861
+ ? `host_path=${sharedModulesPath},mounted=true`
862
+ : 'mounted=false'
863
+ containerConfig.Env.push(`DMLA_SHARED_INFO=${sharedMountInfo}`)
864
+
841
865
  // GPU 配置
842
866
  if (useGpu) {
843
867
  containerConfig.HostConfig.DeviceRequests = [{
@@ -872,6 +896,24 @@ export async function runPythonCodeStreaming(code, useGpu = false, res, imageOve
872
896
  await container.start()
873
897
  log('Container started')
874
898
 
899
+ // chat 模式:获取容器的 attach 流(支持 stdin 写入)
900
+ if (mode === 'chat') {
901
+ log('Chat mode detected, preparing stdin stream...')
902
+ const attachStream = await container.attach({
903
+ hijack: true,
904
+ stdin: true,
905
+ stream: true,
906
+ stdout: true,
907
+ stderr: true
908
+ })
909
+ // 先注册 stdin,等 idle 消息时设置 ready
910
+ chatManager.register('docker', {
911
+ container,
912
+ stdin: attachStream
913
+ })
914
+ log('ChatManager registered for Docker sandbox (stdin ready, waiting for idle)')
915
+ }
916
+
875
917
  // 输出运行状态消息
876
918
  const runningMsg = {
877
919
  type: 'status',
@@ -940,6 +982,17 @@ export async function runPythonCodeStreaming(code, useGpu = false, res, imageOve
940
982
  if (isJsonComplete(text)) {
941
983
  log(`Forwarding complete JSON message: ${text.length} bytes`)
942
984
  res.write(text + '\n')
985
+ // chat 模式:将消息转发给 ChatManager 处理
986
+ if (mode === 'chat') {
987
+ try {
988
+ const msg = JSON.parse(text)
989
+ chatManager.handleDockerStream(Buffer.from(text + '\n'))
990
+ if (msg.type === 'idle') {
991
+ chatManager.setReady(true)
992
+ log('ChatManager ready (idle message received)')
993
+ }
994
+ } catch {}
995
+ }
943
996
  } else {
944
997
  // JSON 不完整,存入缓冲等待后续帧
945
998
  log(`JSON message incomplete, buffering: ${text.length} bytes`)
@@ -1029,10 +1082,14 @@ export async function runPythonCodeStreaming(code, useGpu = false, res, imageOve
1029
1082
  res.write(JSON.stringify(errorMsg) + '\n')
1030
1083
  })
1031
1084
 
1032
- // 等待容器结束
1033
- log('Waiting for container to finish...')
1034
- await container.wait()
1035
- log('Container finished')
1085
+ // 等待容器结束(chat 模式下不等待,容器持续运行)
1086
+ if (mode !== 'chat') {
1087
+ log('Waiting for container to finish...')
1088
+ await container.wait()
1089
+ log('Container finished')
1090
+ } else {
1091
+ log('Chat mode: container will keep running, not waiting for finish')
1092
+ }
1036
1093
 
1037
1094
  // 等待日志流结束(带超时保护)
1038
1095
  await new Promise((resolve) => {
@@ -1086,6 +1143,12 @@ export async function runPythonCodeStreaming(code, useGpu = false, res, imageOve
1086
1143
  // 从活跃列表移除
1087
1144
  unregisterContainer(executionId)
1088
1145
 
1146
+ // chat 模式下保持容器运行和 HTTP 流打开
1147
+ if (mode === 'chat') {
1148
+ log('Chat mode: keeping container alive and HTTP stream open')
1149
+ return
1150
+ }
1151
+
1089
1152
  // 清理容器
1090
1153
  log('Cleaning up container...')
1091
1154
  if (container) {
package/version.json CHANGED
@@ -1,4 +1,4 @@
1
1
  {
2
- "buildTime": "2026-05-24T23:36:43.904Z",
3
- "cliVersion": "2026.5.25-736"
2
+ "buildTime": "2026-05-29T13:50:24.219Z",
3
+ "cliVersion": "2026.5.29-2149"
4
4
  }