@icyfenix-dmla/cli 2026.5.25-736 → 2026.5.29-2018

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@icyfenix-dmla/cli",
3
- "version": "2026.5.25-736",
3
+ "version": "2026.5.29-2018",
4
4
  "description": "DMLA 沙箱服务命令行工具",
5
5
  "type": "module",
6
6
  "main": "src/index.js",
package/scripts/build.js CHANGED
@@ -53,7 +53,7 @@ function copyDir(src, dest, filter = null) {
53
53
  console.log('\n📋 复制服务器代码...')
54
54
  console.log(` 源目录: ${localServerSrc}`)
55
55
  console.log(` 目标目录: ${cliServerDest}`)
56
- copyDir(localServerSrc, cliServerDest, (name) => name.endsWith('.js') || name.endsWith('.py'))
56
+ copyDir(localServerSrc, cliServerDest, (name) => name.endsWith('.js') || name.endsWith('.cjs') || name.endsWith('.py'))
57
57
 
58
58
  // 复制共享模块(复制所有 .py 文件和 __init__.py)
59
59
  console.log('\n📋 复制共享模块...')
@@ -1,6 +1,7 @@
1
1
  # LLM 模块
2
2
  from .mini_mind_config import MiniMindConfig, RMSNorm, Attention, FeedForward, MiniMindBlock, MiniMindModel, MiniMindForCausalLM, precompute_freqs_cis, apply_rotary_pos_emb, repeat_kv
3
3
  from .pretrain_dataset import PretrainDataset
4
+ from .reward_model import RewardModel
4
5
  from .sftdataset import SFTDataset, pre_processing_chat
5
6
 
6
- __all__ = ['MiniMindConfig', 'RMSNorm', 'Attention', 'FeedForward', 'MiniMindBlock', 'MiniMindModel', 'MiniMindForCausalLM', 'precompute_freqs_cis', 'apply_rotary_pos_emb', 'repeat_kv', 'PretrainDataset', 'SFTDataset', 'pre_processing_chat']
7
+ __all__ = ['MiniMindConfig', 'RMSNorm', 'Attention', 'FeedForward', 'MiniMindBlock', 'MiniMindModel', 'MiniMindForCausalLM', 'precompute_freqs_cis', 'apply_rotary_pos_emb', 'repeat_kv', 'PretrainDataset', 'RewardModel', 'SFTDataset', 'pre_processing_chat']
@@ -12,7 +12,7 @@ from transformers.modeling_outputs import MoeCausalLMOutputWithPast
12
12
  from typing import Optional, Tuple, List, Dict
13
13
 
14
14
  class MiniMindConfig(PretrainedConfig):
15
- """MiniMind 模型配置"""
15
+ """模型配置"""
16
16
  model_type = "minimind"
17
17
  def __init__(self, hidden_size=768, num_hidden_layers=8, use_moe=False, **kwargs):
18
18
  super().__init__(**kwargs)
@@ -182,7 +182,7 @@ class MiniMindBlock(nn.Module):
182
182
 
183
183
 
184
184
  class MiniMindModel(nn.Module):
185
- """MiniMind 主体:词嵌入 + 多层 Transformer + 最终归一化"""
185
+ """模型主体:词嵌入 + 多层 Transformer + 最终归一化"""
186
186
  def __init__(self, config):
187
187
  super().__init__()
188
188
  self.config = config
@@ -228,7 +228,7 @@ class MiniMindModel(nn.Module):
228
228
 
229
229
 
230
230
  class MiniMindForCausalLM(PreTrainedModel, GenerationMixin):
231
- """MiniMind 因果语言模型:用于预训练和推理"""
231
+ """因果语言模型:用于预训练和推理"""
232
232
  config_class = MiniMindConfig
233
233
  _tied_weights_keys = {"lm_head.weight": "model.embed_tokens.weight"}
234
234
  def __init__(self, config=None):
@@ -251,7 +251,10 @@ class MiniMindForCausalLM(PreTrainedModel, GenerationMixin):
251
251
  return MoeCausalLMOutputWithPast(loss=loss, aux_loss=aux_loss, logits=logits, past_key_values=past_key_values, hidden_states=hidden_states)
252
252
 
253
253
  @torch.inference_mode()
254
- def generate(self, inputs=None, attention_mask=None, max_new_tokens=512, temperature=0.85, top_p=0.85, top_k=50, eos_token_id=2, streamer=None, use_cache=True, num_return_sequences=1, do_sample=True, repetition_penalty=1.0, **kwargs):
254
+ def generate(self, inputs=None, attention_mask=None, max_new_tokens=512,
255
+ temperature=0.85, top_p=0.85, top_k=50, eos_token_id=2,
256
+ streamer=None, use_cache=True, num_return_sequences=1,
257
+ do_sample=True, repetition_penalty=1.0, **kwargs):
255
258
  """自回归生成:逐 token 采样,支持 top-k、top-p、重复惩罚"""
256
259
  input_ids = kwargs.pop("input_ids", inputs).repeat(num_return_sequences, 1)
257
260
  attention_mask = attention_mask.repeat(num_return_sequences, 1) if attention_mask is not None else None
@@ -0,0 +1,48 @@
1
+ # RewardModel 定义
2
+ # 从文档自动提取生成
3
+
4
+ import torch
5
+ import torch.nn as nn
6
+ import torch.nn.functional as F
7
+
8
+ class RewardModel(nn.Module):
9
+ """
10
+ 简化的奖励模型实现
11
+
12
+ 核心结构:Transformer 编码器提取语义特征 → 奖励头映射为标量评分
13
+
14
+ 参数:
15
+ vocab_size : 词汇表大小
16
+ d_model : 嵌入维度
17
+ nhead : 注意力头数
18
+ num_layers : Transformer 层数
19
+ """
20
+ def __init__(self, vocab_size=1000, d_model=128, nhead=4, num_layers=2):
21
+ super().__init__()
22
+ self.embedding = nn.Embedding(vocab_size, d_model)
23
+ self.pos_encoding = nn.Parameter(torch.randn(1, 512, d_model) * 0.01)
24
+
25
+ encoder_layer = nn.TransformerEncoderLayer(d_model, nhead, d_model * 4, batch_first=True)
26
+ self.transformer = nn.TransformerEncoder(encoder_layer, num_layers)
27
+
28
+ # 奖励头:将语义特征映射为标量奖励值
29
+ self.reward_head = nn.Linear(d_model, 1)
30
+
31
+ def forward(self, input_ids):
32
+ """
33
+ 输入: input_ids (batch, seq_len) — 指令+回答的 token 序列
34
+ 输出: reward (batch,) — 标量奖励分数
35
+
36
+ 核心步骤:
37
+ 1. 嵌入 + 位置编码(对应理论中的输入表示)
38
+ 2. Transformer 编码(对应理论中的语义特征提取)
39
+ 3. 取最后 token 隐藏状态 → 线性层映射(对应理论中的奖励评分)
40
+ """
41
+ seq_len = input_ids.size(1)
42
+ x = self.embedding(input_ids) + self.pos_encoding[:, :seq_len, :]
43
+ x = self.transformer(x)
44
+
45
+ # 取最后一个 token 的隐藏状态
46
+ last_hidden = x[:, -1, :] # (batch, d_model)
47
+ reward = self.reward_head(last_hidden).squeeze(-1) # (batch,)
48
+ return reward
@@ -6,6 +6,7 @@ import os
6
6
  import random
7
7
  import torch
8
8
  from datasets import load_dataset, Features, Value
9
+ from datasets import logging as datasets_logging
9
10
  from torch.utils.data import Dataset
10
11
 
11
12
  class SFTDataset(Dataset):
@@ -17,17 +18,32 @@ class SFTDataset(Dataset):
17
18
  - 标签掩码:仅 assistant 回答部分参与 loss,其余标记为 -100
18
19
  - 使用 apply_chat_template 将对话转为 ChatML 格式
19
20
  """
21
+ # MiniMind 使用 ChatML 格式:<|im_start|>role\ncontent<|im_end|>\n
22
+ # tokenizer 本身未内置 chat_template,需手动设置
23
+ CHATML_TEMPLATE = (
24
+ "{% for message in messages %}<|im_start|>{{ message.role }}\n"
25
+ "{{ message.content }}<|im_end|>\n"
26
+ "{% endfor %}"
27
+ "{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}"
28
+ )
29
+
20
30
  def __init__(self, jsonl_path, tokenizer, max_length=768):
21
31
  super().__init__()
22
32
  os.environ["TOKENIZERS_PARALLELISM"] = "false"
23
33
  self.tokenizer = tokenizer
34
+ # MiniMind tokenizer 未内置 chat_template,需手动设置 ChatML 格式
35
+ if not tokenizer.chat_template:
36
+ tokenizer.chat_template = self.CHATML_TEMPLATE
24
37
  self.max_length = max_length
25
38
  features = Features({
26
39
  'conversations': [{'role': Value('string'), 'content': Value('string'),
27
40
  'reasoning_content': Value('string'), 'tools': Value('string'),
28
41
  'tool_calls': Value('string')}]
29
42
  })
43
+ # 抑制 load_dataset 的 "Generating train split" 进度输出
44
+ datasets_logging.set_verbosity_error()
30
45
  self.samples = load_dataset('json', data_files=jsonl_path, split='train', features=features)
46
+ datasets_logging.set_verbosity_warning()
31
47
  # 预计算 assistant 回答的起止标记 ID
32
48
  self.bos_id = tokenizer(f'{tokenizer.bos_token}assistant\n', add_special_tokens=False).input_ids
33
49
  self.eos_id = tokenizer(f'{tokenizer.eos_token}\n', add_special_tokens=False).input_ids
@@ -91,15 +107,11 @@ def pre_processing_chat(conversations, add_system_ratio=0.2):
91
107
 
92
108
  SYSTEM_PROMPTS = [
93
109
  "你是一个知识丰富的AI,尽力为用户提供准确的信息。",
94
- "你是minimind,一个小巧但有用的语言模型。",
95
110
  "你是一个专业的AI助手,请提供有价值的回答。",
96
- "你是minimind,请尽力帮助用户解决问题。",
97
111
  "你是一个可靠的AI,请给出准确的回答。",
98
112
  "You are a helpful AI assistant.",
99
- "You are minimind, a lightweight intelligent assistant.",
100
113
  "You are a friendly chatbot. Please answer the user's questions carefully.",
101
114
  "You are a knowledgeable AI. Try your best to provide accurate information.",
102
- "You are minimind, a small but useful language model."
103
115
  ]
104
116
  # 概率性添加 system
105
117
  if conversations[0].get('role') != 'system':
@@ -83,7 +83,7 @@ const DATASETS = [
83
83
  id: 'minimind-sft',
84
84
  name: 'MiniMind SFT (LLM监督微调语料)',
85
85
  url: 'https://www.modelscope.cn/datasets/icyfenix/Minimind_SFT.git',
86
- size: '~500MB',
86
+ size: '~1.7GB',
87
87
  format: 'git',
88
88
  targetDir: 'datasets/minimind-sft',
89
89
  source: 'ModelScope (icyfenix)'
@@ -0,0 +1,181 @@
1
+ const EventEmitter = require('events');
2
+
3
+ class ChatManager extends EventEmitter {
4
+ constructor() {
5
+ super();
6
+ this.session = null;
7
+ this._pendingResponse = null;
8
+ this._responseBuffer = '';
9
+ this._pendingTimeout = null;
10
+ }
11
+
12
+ /**
13
+ * 注册一个对话沙箱会话
14
+ * @param {'docker'|'native'} type - 沙箱类型
15
+ * @param {object} options
16
+ * @param {object} [options.container] - Docker 容器实例
17
+ * @param {object} [options.process] - 子进程实例
18
+ * @param {object} options.stdin - 可写入的 stdin 流
19
+ */
20
+ register(type, { container, process: proc, stdin }) {
21
+ this.session = { type, container, process: proc, stdin, ready: false };
22
+ this._responseBuffer = '';
23
+
24
+ // 监听 stdout 解析消息
25
+ const stdout = type === 'native' ? proc.stdout : null;
26
+ // Docker 模式的 stdout 通过 stream 事件处理,不在此处绑定
27
+ if (stdout) {
28
+ stdout.on('data', (data) => this._handleStdout(data));
29
+ }
30
+ }
31
+
32
+ /**
33
+ * 标记对话沙箱就绪
34
+ */
35
+ setReady(ready) {
36
+ if (this.session) {
37
+ this.session.ready = ready;
38
+ }
39
+ this.emit('ready', ready);
40
+ }
41
+
42
+ /**
43
+ * 查询对话服务状态
44
+ */
45
+ getStatus() {
46
+ if (!this.session) {
47
+ return { ready: false, message: '对话服务未启动' };
48
+ }
49
+ return {
50
+ ready: this.session.ready,
51
+ message: this.session.ready ? '对话服务就绪' : '模型加载中...'
52
+ };
53
+ }
54
+
55
+ /**
56
+ * 发送对话消息
57
+ * @param {string} message - 用户消息
58
+ * @returns {Promise<string>} AI 回复
59
+ */
60
+ async send(message) {
61
+ if (!this.session || !this.session.ready) {
62
+ throw new Error('对话服务未就绪');
63
+ }
64
+ if (!this.session.stdin) {
65
+ throw new Error('沙箱 stdin 不可用');
66
+ }
67
+
68
+ // 转义消息中的特殊字符,构造安全的 Python 字符串
69
+ const escapedMessage = JSON.stringify(message);
70
+ const code = `print(chat(${escapedMessage}))`;
71
+
72
+ return new Promise((resolve, reject) => {
73
+ this._pendingResponse = { resolve, reject, buffer: '' };
74
+
75
+ const cmd = JSON.stringify({ action: 'execute', code });
76
+ this.session.stdin.write(cmd + '\n');
77
+
78
+ // 超时保护(60秒)
79
+ this._pendingTimeout = setTimeout(() => {
80
+ this._pendingResponse = null;
81
+ reject(new Error('推理超时'));
82
+ }, 60000);
83
+ });
84
+ }
85
+
86
+ /**
87
+ * 处理沙箱 stdout 输出(Native 模式)
88
+ */
89
+ _handleStdout(data) {
90
+ const text = data.toString();
91
+ this._responseBuffer += text;
92
+
93
+ const lines = this._responseBuffer.split('\n');
94
+ this._responseBuffer = lines.pop();
95
+
96
+ for (const line of lines) {
97
+ if (!line.trim()) continue;
98
+ try {
99
+ const msg = JSON.parse(line);
100
+ this._handleMessage(msg);
101
+ } catch {
102
+ if (this._pendingResponse) {
103
+ this._pendingResponse.buffer += line + '\n';
104
+ }
105
+ }
106
+ }
107
+ }
108
+
109
+ /**
110
+ * 处理单条 JSON 消息
111
+ */
112
+ _handleMessage(msg) {
113
+ switch (msg.type) {
114
+ case 'idle':
115
+ this.setReady(true);
116
+ break;
117
+
118
+ case 'pong':
119
+ break;
120
+
121
+ case 'stream':
122
+ if (this._pendingResponse) {
123
+ const content = msg.content || msg.text || '';
124
+ if (content) {
125
+ this._pendingResponse.buffer += content;
126
+ }
127
+ }
128
+ break;
129
+
130
+ case 'result':
131
+ case 'execute_result':
132
+ if (this._pendingResponse) {
133
+ clearTimeout(this._pendingTimeout);
134
+ const result = msg.content || this._pendingResponse.buffer.trim();
135
+ this._pendingResponse.resolve(result);
136
+ this._pendingResponse = null;
137
+ }
138
+ break;
139
+
140
+ case 'error':
141
+ if (this._pendingResponse) {
142
+ clearTimeout(this._pendingTimeout);
143
+ this._pendingResponse.reject(new Error(msg.content || msg.message || '推理出错'));
144
+ this._pendingResponse = null;
145
+ }
146
+ break;
147
+ }
148
+ }
149
+
150
+ /**
151
+ * 处理 Docker 模式的流式输出(由 sandbox.js 调用)
152
+ */
153
+ handleDockerStream(data) {
154
+ this._handleStdout(data);
155
+ }
156
+
157
+ /**
158
+ * 清除对话沙箱会话
159
+ */
160
+ clear() {
161
+ if (this._pendingResponse) {
162
+ clearTimeout(this._pendingTimeout);
163
+ this._pendingResponse.reject(new Error('沙箱已停止'));
164
+ this._pendingResponse = null;
165
+ }
166
+ // 移除 stdout 监听器,防止已清理的会话继续接收数据
167
+ if (this.session) {
168
+ const stdout = this.session.type === 'native' && this.session.process
169
+ ? this.session.process.stdout : null;
170
+ if (stdout) {
171
+ stdout.removeListener('data', this._handleStdout);
172
+ }
173
+ }
174
+ this.session = null;
175
+ this._responseBuffer = '';
176
+ this.emit('cleared');
177
+ }
178
+ }
179
+
180
+ // 单例导出
181
+ module.exports = new ChatManager();
@@ -12,6 +12,7 @@ import { fileURLToPath } from 'url'
12
12
  import { resolve } from 'path'
13
13
  import sandboxRouter from './routes/sandbox.js'
14
14
  import nativeRouter from './routes/native.js'
15
+ import chatRouter from './routes/chat.js'
15
16
  import { cleanupAllContainers } from './sandbox.js'
16
17
  import { cleanupAllProcesses } from './native_executor.js'
17
18
  import { checkNativeEnvironment, getDataPath } from './native_env_check.js'
@@ -72,6 +73,9 @@ if (isNativeMode) {
72
73
  app.use('/api/sandbox', sandboxRouter)
73
74
  }
74
75
 
76
+ // 对话 API
77
+ app.use('/api/chat', chatRouter)
78
+
75
79
  // 错误处理
76
80
  app.use((err, req, res, next) => {
77
81
  log(`Error: ${err.message}`)
@@ -483,6 +483,164 @@ matplotlib.use('module://matplotlib_inline.backend_inline')
483
483
  log_debug(f'Error shutting down kernel: {e}')
484
484
 
485
485
 
486
+ def _collect_kernel_outputs(kc, timeout, stream=False):
487
+ """
488
+ 从 Kernel 收集执行输出,复用 run_code 中的输出处理逻辑。
489
+
490
+ Args:
491
+ kc: 已启动的 KernelClient
492
+ timeout: 执行超时时间(秒),0 表示不超时
493
+ stream: 是否启用流式输出
494
+
495
+ Returns:
496
+ (outputs, timed_out, has_error) 元组
497
+ """
498
+ deadline = time.time() + timeout if timeout > 0 else float('inf')
499
+ outputs = []
500
+ timed_out = False
501
+ has_error = False
502
+
503
+ while True:
504
+ remaining = deadline - time.time()
505
+ if timeout > 0 and remaining <= 0:
506
+ timed_out = True
507
+ break
508
+
509
+ try:
510
+ msg = kc.get_iopub_msg(timeout=max(1, remaining) if timeout > 0 else 2)
511
+ except Exception:
512
+ if timeout > 0 and time.time() >= deadline:
513
+ timed_out = True
514
+ break
515
+
516
+ msg_type = msg['header']['msg_type']
517
+ content = msg['content']
518
+
519
+ if msg_type == 'status':
520
+ if content.get('execution_state') == 'idle':
521
+ break
522
+ continue
523
+
524
+ if msg_type == 'stream':
525
+ stream_output = {
526
+ 'type': 'stream',
527
+ 'name': content.get('name', 'stdout'),
528
+ 'text': content.get('text', '')
529
+ }
530
+ if stream:
531
+ output_json(stream_output)
532
+ else:
533
+ outputs.append(stream_output)
534
+
535
+ elif msg_type == 'display_data':
536
+ display_output = {
537
+ 'type': 'display_data',
538
+ 'data': content.get('data', {}),
539
+ 'metadata': content.get('metadata', {})
540
+ }
541
+ if stream:
542
+ output_json(display_output)
543
+ else:
544
+ outputs.append(display_output)
545
+
546
+ elif msg_type == 'execute_result':
547
+ result_output = {
548
+ 'type': 'execute_result',
549
+ 'data': content.get('data', {}),
550
+ 'metadata': content.get('metadata', {}),
551
+ 'execution_count': content.get('execution_count')
552
+ }
553
+ if stream:
554
+ output_json(result_output)
555
+ else:
556
+ outputs.append(result_output)
557
+
558
+ elif msg_type == 'error':
559
+ error_output = {
560
+ 'type': 'error',
561
+ 'ename': content.get('ename', 'UnknownError'),
562
+ 'evalue': content.get('evalue', ''),
563
+ 'traceback': content.get('traceback', [])
564
+ }
565
+ if is_cuda_compat_error(content.get('evalue', '')):
566
+ error_output = enrich_cuda_error(error_output)
567
+ has_error = True
568
+ if stream:
569
+ output_json(error_output)
570
+ else:
571
+ outputs.append(error_output)
572
+
573
+ return outputs, timed_out, has_error
574
+
575
+
576
+ def _execute_and_output(kc, code, timeout=0):
577
+ """
578
+ 在已有的 Kernel 中执行代码,非流式模式,输出 JSON 结果到 stdout。
579
+
580
+ Args:
581
+ kc: 已启动的 KernelClient
582
+ code: 要执行的 Python 代码
583
+ timeout: 执行超时时间(秒),0 表示不超时
584
+ """
585
+ start_time = time.time()
586
+ actual_timeout = timeout if timeout > 0 else DEFAULT_TIMEOUT
587
+
588
+ kc.execute(code, allow_stdin=False)
589
+ outputs, timed_out, has_error = _collect_kernel_outputs(kc, actual_timeout, stream=False)
590
+
591
+ execution_time = time.time() - start_time
592
+
593
+ if timed_out:
594
+ result = {
595
+ 'success': False,
596
+ 'outputs': [{
597
+ 'type': 'error',
598
+ 'ename': 'TimeoutError',
599
+ 'evalue': f'Execution timed out after {actual_timeout} seconds',
600
+ 'traceback': [f'Execution timed out after {actual_timeout} seconds']
601
+ }],
602
+ 'executionTime': round(execution_time, 3)
603
+ }
604
+ else:
605
+ result = {
606
+ 'success': not has_error,
607
+ 'outputs': outputs,
608
+ 'executionTime': round(execution_time, 3)
609
+ }
610
+
611
+ print(json.dumps(result, ensure_ascii=False))
612
+ sys.stdout.flush()
613
+
614
+
615
+ def _stream_execute(kc, code, timeout=0):
616
+ """
617
+ 在已有的 Kernel 中执行代码,流式模式,实时输出每个消息到 stdout。
618
+
619
+ Args:
620
+ kc: 已启动的 KernelClient
621
+ code: 要执行的 Python 代码
622
+ timeout: 执行超时时间(秒),0 表示不超时
623
+ """
624
+ start_time = time.time()
625
+ actual_timeout = timeout if timeout > 0 else DEFAULT_TIMEOUT
626
+
627
+ kc.execute(code, allow_stdin=False)
628
+ outputs, timed_out, has_error = _collect_kernel_outputs(kc, actual_timeout, stream=True)
629
+
630
+ execution_time = time.time() - start_time
631
+
632
+ if timed_out:
633
+ output_json({'type': 'error', 'ename': 'TimeoutError',
634
+ 'evalue': f'Execution timed out after {actual_timeout} seconds',
635
+ 'traceback': [f'Execution timed out after {actual_timeout} seconds']})
636
+ output_json({'type': 'result', 'success': False,
637
+ 'executionTime': round(execution_time, 3)})
638
+ else:
639
+ output_json({'type': 'result', 'success': not has_error,
640
+ 'outputs': outputs,
641
+ 'executionTime': round(execution_time, 3)})
642
+
643
+
486
644
  def check_cuda_compatibility():
487
645
  """
488
646
  快速检查 CUDA 兼容性
@@ -545,6 +703,8 @@ def main():
545
703
  parser.add_argument('--timeout', type=int, default=DEFAULT_TIMEOUT, help='执行超时时间(秒)')
546
704
  parser.add_argument('--check-cuda', action='store_true', help='仅检查 CUDA 兼容性')
547
705
  parser.add_argument('--stream', action='store_true', help='启用流式输出模式(实时输出每个消息)')
706
+ parser.add_argument("--serve", action="store_true",
707
+ help="长运行模式:初始代码执行后 Kernel 保持运行,从 stdin 接收后续指令")
548
708
 
549
709
  args = parser.parse_args()
550
710
 
@@ -569,7 +729,7 @@ def main():
569
729
  return
570
730
  elif args.code:
571
731
  code = args.code
572
- else:
732
+ elif not args.serve:
573
733
  result = {
574
734
  'success': False,
575
735
  'outputs': [{
@@ -589,6 +749,112 @@ def main():
589
749
  print(json.dumps(result, ensure_ascii=False))
590
750
  return
591
751
 
752
+ # serve 模式:自行管理 Kernel 生命周期,执行初始代码后进入 stdin 监听循环
753
+ if args.serve:
754
+ from jupyter_client import KernelManager
755
+
756
+ km = KernelManager()
757
+ suppress_stdout()
758
+ km.start_kernel()
759
+ kc = km.client()
760
+ kc.start_channels()
761
+ kc.wait_for_ready(timeout=30)
762
+ restore_stdout()
763
+
764
+ # 注入全局变量、数据路径和 sys.path 配置
765
+ python_path_env = os.environ.get('PYTHONPATH', '')
766
+ path_separator = ';' if os.name == 'nt' else ':'
767
+ python_path_entries = [p for p in python_path_env.split(path_separator) if p]
768
+ setup_code = '''
769
+ import os
770
+ import sys
771
+
772
+ DATA_DIR = os.environ.get('DMLA_DATA_PATH', '/data')
773
+
774
+ # 将 PYTHONPATH 中的路径注入 sys.path(IPython kernel 可能不会自动继承)
775
+ _python_path_entries = ''' + repr(python_path_entries) + '''
776
+ for _p in _python_path_entries:
777
+ if _p not in sys.path:
778
+ sys.path.insert(0, _p)
779
+
780
+ # 配置 matplotlib inline 后端(在用户 import matplotlib 之前设置)
781
+ import matplotlib
782
+ matplotlib.use('module://matplotlib_inline.backend_inline')
783
+ '''
784
+ kc.execute(setup_code, allow_stdin=False)
785
+ # 等待 setup 执行完成
786
+ setup_start = time.time()
787
+ while True:
788
+ if time.time() - setup_start > 5:
789
+ break
790
+ try:
791
+ msg = kc.get_iopub_msg(timeout=2)
792
+ msg_type = msg['header']['msg_type']
793
+ if msg_type == 'status' and msg['content'].get('execution_state') == 'idle':
794
+ break
795
+ except Exception:
796
+ break
797
+
798
+ # 执行初始代码
799
+ if code:
800
+ if args.stream:
801
+ _stream_execute(kc, code, args.timeout)
802
+ else:
803
+ _execute_and_output(kc, code, args.timeout)
804
+
805
+ # serve 模式:进入 stdin 监听循环
806
+ def output_message(msg_type, content):
807
+ """输出 JSON Lines 消息到 stdout"""
808
+ msg = {"type": msg_type}
809
+ if content is not None:
810
+ msg["content"] = content
811
+ sys.stdout.write(json.dumps(msg, ensure_ascii=False) + "\n")
812
+ sys.stdout.flush()
813
+
814
+ output_message("idle", "kernel ready")
815
+
816
+ while True:
817
+ try:
818
+ line = sys.stdin.readline()
819
+ if not line:
820
+ break
821
+ line = line.strip()
822
+ if not line:
823
+ continue
824
+ try:
825
+ cmd = json.loads(line)
826
+ except json.JSONDecodeError:
827
+ output_message("error", f"无效的 JSON 指令: {line}")
828
+ continue
829
+
830
+ action = cmd.get("action")
831
+ if action == "ping":
832
+ output_message("pong", None)
833
+ elif action == "execute":
834
+ exec_code = cmd.get("code", "")
835
+ if args.stream:
836
+ _stream_execute(kc, exec_code, cmd.get("timeout", 0))
837
+ else:
838
+ _execute_and_output(kc, exec_code, cmd.get("timeout", 0))
839
+ output_message("idle", "kernel ready")
840
+ else:
841
+ output_message("error", f"未知指令: {action}")
842
+ except Exception as e:
843
+ output_message("error", str(e))
844
+ break
845
+
846
+ # stdin 关闭或出错,清理退出
847
+ kc.stop_channels()
848
+ try:
849
+ kc.shutdown_kernel()
850
+ except Exception:
851
+ pass
852
+ try:
853
+ km.shutdown_kernel(now=True)
854
+ except Exception:
855
+ pass
856
+ return
857
+
592
858
  result = run_code(code, args.timeout, stream=args.stream)
593
859
 
594
860
  # 非流式模式:输出 JSON 结果到 stdout
@@ -10,6 +10,7 @@ import os from 'os'
10
10
  import fs from 'fs'
11
11
  import chalk from 'chalk'
12
12
  import { getCachedEnvironment, getKernelRunnerPath, getSharedModulesPath, getServerPythonPath, getDataPath, getProgressPath, getPythonCommand, detectPythonCommand } from './native_env_check.js'
13
+ import chatManager from './chat-manager.cjs'
13
14
 
14
15
  const __filename = fileURLToPath(import.meta.url)
15
16
  const __dirname = path.dirname(__filename)
@@ -232,6 +233,14 @@ export async function runPythonCodeNative(code, useGpu = false, timeoutOverride
232
233
 
233
234
  registerProcess(executionId, proc)
234
235
 
236
+ // 检测是否为对话沙箱
237
+ if (procArgs.includes('--serve')) {
238
+ chatManager.register('native', {
239
+ process: proc,
240
+ stdin: proc.stdin
241
+ })
242
+ }
243
+
235
244
  // 设置超时
236
245
  const timeoutPromise = new Promise((_, reject) => {
237
246
  timeoutId = setTimeout(() => {
@@ -259,6 +268,9 @@ export async function runPythonCodeNative(code, useGpu = false, timeoutOverride
259
268
  const execPromise = new Promise((resolve, reject) => {
260
269
  proc.on('close', (code) => {
261
270
  log(`Process exited with code ${code}`)
271
+ if (chatManager.session) {
272
+ chatManager.clear()
273
+ }
262
274
  if (timeoutId) clearTimeout(timeoutId)
263
275
  resolve({ stdout, stderr, exitCode: code })
264
276
  })
@@ -378,8 +390,9 @@ export async function runPythonCodeNative(code, useGpu = false, timeoutOverride
378
390
  * @param {boolean} useGpu - 是否请求 GPU
379
391
  * @param {object} res - Express 响应对象
380
392
  * @param {number|null} timeoutOverride - 超时时间(秒)
393
+ * @param {string|null} mode - 执行模式('chat' 启用对话模式)
381
394
  */
382
- export async function runPythonCodeStreamingNative(code, useGpu = false, res, timeoutOverride = null) {
395
+ export async function runPythonCodeStreamingNative(code, useGpu = false, res, timeoutOverride = null, mode = null) {
383
396
  const startTime = Date.now()
384
397
  const executionId = generateExecutionId()
385
398
 
@@ -474,6 +487,7 @@ export async function runPythonCodeStreamingNative(code, useGpu = false, res, ti
474
487
  '--timeout', String(timeoutSeconds),
475
488
  '--stream'
476
489
  ]
490
+ if (mode === 'chat') procArgs.push('--serve')
477
491
  } else {
478
492
  // Linux/macOS 直接传递代码参数
479
493
  procArgs = [
@@ -482,6 +496,7 @@ export async function runPythonCodeStreamingNative(code, useGpu = false, res, ti
482
496
  '--timeout', String(timeoutSeconds),
483
497
  '--stream'
484
498
  ]
499
+ if (mode === 'chat') procArgs.push('--serve')
485
500
  }
486
501
 
487
502
  try {
@@ -489,6 +504,14 @@ export async function runPythonCodeStreamingNative(code, useGpu = false, res, ti
489
504
 
490
505
  registerProcess(executionId, proc)
491
506
 
507
+ // 检测是否为对话沙箱
508
+ if (procArgs.includes('--serve')) {
509
+ chatManager.register('native', {
510
+ process: proc,
511
+ stdin: proc.stdin
512
+ })
513
+ }
514
+
492
515
  // 输出运行状态
493
516
  res.write(JSON.stringify({
494
517
  type: 'status',
@@ -514,6 +537,19 @@ export async function runPythonCodeStreamingNative(code, useGpu = false, res, ti
514
537
  // kernel_runner.py 输出的已经是 JSON 格式,直接转发
515
538
  if (line.trim().startsWith('{')) {
516
539
  res.write(line + '\n')
540
+ // chat 模式:检测 idle 消息,注册 ChatManager
541
+ if (mode === 'chat' && !chatManager.session) {
542
+ try {
543
+ const msg = JSON.parse(line)
544
+ if (msg.type === 'idle') {
545
+ chatManager.register('native', {
546
+ process: proc,
547
+ stdin: proc.stdin
548
+ })
549
+ log('ChatManager registered for Native chat sandbox')
550
+ }
551
+ } catch {}
552
+ }
517
553
  } else {
518
554
  // 非 JSON 内容包装为 stream 消息
519
555
  res.write(JSON.stringify({
@@ -562,10 +598,25 @@ export async function runPythonCodeStreamingNative(code, useGpu = false, res, ti
562
598
  }) + '\n')
563
599
  })
564
600
 
565
- // 等待进程完成
566
- await new Promise((resolve) => {
567
- proc.on('close', resolve)
568
- })
601
+ // 等待进程完成(chat 模式下进程持续运行,不等待 close)
602
+ if (mode === 'chat') {
603
+ // chat 模式:进程持续运行,HTTP 流保持打开
604
+ // 当进程意外退出时清理
605
+ proc.on('close', () => {
606
+ if (chatManager.session) {
607
+ chatManager.clear()
608
+ }
609
+ })
610
+ } else {
611
+ await new Promise((resolve) => {
612
+ proc.on('close', () => {
613
+ if (chatManager.session) {
614
+ chatManager.clear()
615
+ }
616
+ resolve()
617
+ })
618
+ })
619
+ }
569
620
 
570
621
  // 处理缓冲区剩余内容
571
622
  if (stdoutBuffer.trim()) {
@@ -613,8 +664,11 @@ export async function runPythonCodeStreamingNative(code, useGpu = false, res, ti
613
664
  log(`Failed to clean up temp file: ${e.message}`)
614
665
  }
615
666
  }
616
- res.end()
617
- log('Streaming response ended')
667
+ // chat 模式下不关闭 HTTP 响应,进程持续运行
668
+ if (mode !== 'chat') {
669
+ res.end()
670
+ log('Streaming response ended')
671
+ }
618
672
  }
619
673
  }
620
674
 
@@ -0,0 +1,35 @@
1
+ import { Router } from 'express'
2
+ import chatManager from '../chat-manager.cjs'
3
+
4
+ const router = Router()
5
+
6
+ /**
7
+ * 查询对话服务状态
8
+ * GET /api/chat/status
9
+ */
10
+ router.get('/status', (req, res) => {
11
+ res.json(chatManager.getStatus())
12
+ })
13
+
14
+ /**
15
+ * 发送对话消息
16
+ * POST /api/chat/send
17
+ * Body: { message: string }
18
+ */
19
+ router.post('/send', async (req, res) => {
20
+ const { message } = req.body
21
+
22
+ if (!message || typeof message !== 'string') {
23
+ return res.status(400).json({ error: '消息不能为空' })
24
+ }
25
+
26
+ try {
27
+ const response = await chatManager.send(message)
28
+ res.json({ response })
29
+ } catch (err) {
30
+ const status = err.message === '对话服务未就绪' ? 503 : 500
31
+ res.status(status).json({ error: err.message })
32
+ }
33
+ })
34
+
35
+ export default router
@@ -115,7 +115,7 @@ router.post('/run', async (req, res) => {
115
115
  * 响应: JSON Lines 流式输出
116
116
  */
117
117
  router.post('/stream', async (req, res) => {
118
- const { code, useGpu = false, timeout = null } = req.body
118
+ const { code, useGpu = false, timeout = null, mode = null } = req.body
119
119
 
120
120
  // 验证请求
121
121
  if (!code || typeof code !== 'string') {
@@ -155,7 +155,7 @@ router.post('/stream', async (req, res) => {
155
155
  }
156
156
 
157
157
  // 流式执行
158
- await runPythonCodeStreamingNative(code, useGpu, res, timeout)
158
+ await runPythonCodeStreamingNative(code, useGpu, res, timeout, mode)
159
159
 
160
160
  } catch (error) {
161
161
  console.error('[Native Sandbox Stream] Error:', error)
@@ -124,11 +124,11 @@ router.post('/run', async (req, res) => {
124
124
  /**
125
125
  * 流式执行代码
126
126
  * POST /api/sandbox/stream
127
- * Body: { code: string, useGpu?: boolean, timeout?: number|null }
127
+ * Body: { code: string, useGpu?: boolean, timeout?: number|null, mode?: string }
128
128
  * 响应: JSON Lines 流式输出
129
129
  */
130
130
  router.post('/stream', async (req, res) => {
131
- const { code, useGpu = false, timeout = null } = req.body
131
+ const { code, useGpu = false, timeout = null, mode = null } = req.body
132
132
 
133
133
  // 验证请求
134
134
  if (!code || typeof code !== 'string') {
@@ -193,7 +193,7 @@ router.post('/stream', async (req, res) => {
193
193
  }
194
194
 
195
195
  // 流式执行代码
196
- await runPythonCodeStreaming(code, actualUseGpu, res, actualImage, timeout)
196
+ await runPythonCodeStreaming(code, actualUseGpu, res, actualImage, timeout, mode)
197
197
 
198
198
  } catch (error) {
199
199
  console.error('Sandbox stream error:', error)
@@ -7,6 +7,7 @@ import path from 'path'
7
7
  import { fileURLToPath } from 'url'
8
8
  import fs from 'fs'
9
9
  import os from 'os'
10
+ import chatManager from './chat-manager.cjs'
10
11
 
11
12
  const __filename = fileURLToPath(import.meta.url)
12
13
  const __dirname = path.dirname(__filename)
@@ -71,6 +72,7 @@ export async function cleanupAllContainers() {
71
72
  }
72
73
 
73
74
  activeContainers.clear()
75
+ chatManager.clear()
74
76
  log(`All containers cleaned up: ${count}`)
75
77
  return count
76
78
  }
@@ -102,6 +104,7 @@ export async function abortExecution(executionId = null) {
102
104
  }
103
105
  }
104
106
 
107
+ chatManager.clear()
105
108
  return { success: true, stopped }
106
109
  }
107
110
 
@@ -496,7 +499,8 @@ export async function runPythonCode(code, useGpu = false, imageOverride = null,
496
499
  Env: [
497
500
  'PYTHONUNBUFFERED=1',
498
501
  'PYTHONPATH=/workspace',
499
- actualTimeout === null ? 'DMLA_NO_TIMEOUT=1' : ''
502
+ actualTimeout === null ? 'DMLA_NO_TIMEOUT=1' : '',
503
+ `DMLA_DATA_PATH=${getDataVolumePath() || '/data'}`
500
504
  ].filter(e => e) // 过滤空字符串
501
505
  }
502
506
 
@@ -558,6 +562,12 @@ export async function runPythonCode(code, useGpu = false, imageOverride = null,
558
562
  containerConfig.HostConfig.Binds = binds
559
563
  }
560
564
 
565
+ // 将宿主机 shared 目录路径注入到容器环境变量,供环境检查代码读取
566
+ const sharedMountInfo = (useMount && sharedModulesPath && fs.existsSync(sharedModulesPath))
567
+ ? `host_path=${sharedModulesPath},mounted=true`
568
+ : 'mounted=false'
569
+ containerConfig.Env.push(`DMLA_SHARED_INFO=${sharedMountInfo}`)
570
+
561
571
  if (!PROJECT_ROOT) {
562
572
  console.log('[Sandbox] 独立安装模式,无 Volume Mount')
563
573
  }
@@ -727,7 +737,7 @@ export async function runPythonCode(code, useGpu = false, imageOverride = null,
727
737
  * @param {number|null} timeoutOverride - 可选,超时时间(秒)
728
738
  * @returns {Promise<void>}
729
739
  */
730
- export async function runPythonCodeStreaming(code, useGpu = false, res, imageOverride = null, timeoutOverride = null) {
740
+ export async function runPythonCodeStreaming(code, useGpu = false, res, imageOverride = null, timeoutOverride = null, mode = null) {
731
741
  const startTime = Date.now()
732
742
 
733
743
  // 生成唯一执行 ID
@@ -785,9 +795,17 @@ export async function runPythonCodeStreaming(code, useGpu = false, res, imageOve
785
795
  // GPU 容器不限制内存,CPU 容器限制 4GB
786
796
  const memoryLimit = useGpu ? SANDBOX_CONFIG.memoryGpu : SANDBOX_CONFIG.memoryCpu
787
797
 
798
+ // 构建命令参数
799
+ const cmdArgs = ['python3', '/workspace/kernel_runner.py', '--code', code, '--timeout', String(timeoutSeconds), '--stream']
800
+ if (mode === 'chat') {
801
+ cmdArgs.push('--serve')
802
+ }
803
+
788
804
  const containerConfig = {
789
805
  Image: image,
790
- Cmd: ['python3', '/workspace/kernel_runner.py', '--code', code, '--timeout', String(timeoutSeconds), '--stream'],
806
+ Cmd: cmdArgs,
807
+ OpenStdin: true,
808
+ StdinOnce: false,
791
809
  HostConfig: {
792
810
  AutoRemove: false
793
811
  },
@@ -838,6 +856,12 @@ export async function runPythonCodeStreaming(code, useGpu = false, res, imageOve
838
856
  containerConfig.HostConfig.Binds = binds
839
857
  }
840
858
 
859
+ // 将宿主机 shared 目录路径注入到容器环境变量
860
+ const sharedMountInfo = (useMount && sharedModulesPath && fs.existsSync(sharedModulesPath))
861
+ ? `host_path=${sharedModulesPath},mounted=true`
862
+ : 'mounted=false'
863
+ containerConfig.Env.push(`DMLA_SHARED_INFO=${sharedMountInfo}`)
864
+
841
865
  // GPU 配置
842
866
  if (useGpu) {
843
867
  containerConfig.HostConfig.DeviceRequests = [{
@@ -872,6 +896,24 @@ export async function runPythonCodeStreaming(code, useGpu = false, res, imageOve
872
896
  await container.start()
873
897
  log('Container started')
874
898
 
899
+ // chat 模式:获取容器的 attach 流(支持 stdin 写入)
900
+ if (mode === 'chat') {
901
+ log('Chat mode detected, preparing stdin stream...')
902
+ const attachStream = await container.attach({
903
+ hijack: true,
904
+ stdin: true,
905
+ stream: true,
906
+ stdout: true,
907
+ stderr: true
908
+ })
909
+ // 先注册 stdin,等 idle 消息时设置 ready
910
+ chatManager.register('docker', {
911
+ container,
912
+ stdin: attachStream
913
+ })
914
+ log('ChatManager registered for Docker sandbox (stdin ready, waiting for idle)')
915
+ }
916
+
875
917
  // 输出运行状态消息
876
918
  const runningMsg = {
877
919
  type: 'status',
@@ -940,6 +982,16 @@ export async function runPythonCodeStreaming(code, useGpu = false, res, imageOve
940
982
  if (isJsonComplete(text)) {
941
983
  log(`Forwarding complete JSON message: ${text.length} bytes`)
942
984
  res.write(text + '\n')
985
+ // chat 模式:检测 idle 消息,设置 ChatManager 就绪
986
+ if (mode === 'chat') {
987
+ try {
988
+ const msg = JSON.parse(text)
989
+ if (msg.type === 'idle') {
990
+ chatManager.setReady(true)
991
+ log('ChatManager ready (idle message received)')
992
+ }
993
+ } catch {}
994
+ }
943
995
  } else {
944
996
  // JSON 不完整,存入缓冲等待后续帧
945
997
  log(`JSON message incomplete, buffering: ${text.length} bytes`)
@@ -1029,10 +1081,14 @@ export async function runPythonCodeStreaming(code, useGpu = false, res, imageOve
1029
1081
  res.write(JSON.stringify(errorMsg) + '\n')
1030
1082
  })
1031
1083
 
1032
- // 等待容器结束
1033
- log('Waiting for container to finish...')
1034
- await container.wait()
1035
- log('Container finished')
1084
+ // 等待容器结束(chat 模式下不等待,容器持续运行)
1085
+ if (mode !== 'chat') {
1086
+ log('Waiting for container to finish...')
1087
+ await container.wait()
1088
+ log('Container finished')
1089
+ } else {
1090
+ log('Chat mode: container will keep running, not waiting for finish')
1091
+ }
1036
1092
 
1037
1093
  // 等待日志流结束(带超时保护)
1038
1094
  await new Promise((resolve) => {
@@ -1086,6 +1142,12 @@ export async function runPythonCodeStreaming(code, useGpu = false, res, imageOve
1086
1142
  // 从活跃列表移除
1087
1143
  unregisterContainer(executionId)
1088
1144
 
1145
+ // chat 模式下保持容器运行和 HTTP 流打开
1146
+ if (mode === 'chat') {
1147
+ log('Chat mode: keeping container alive and HTTP stream open')
1148
+ return
1149
+ }
1150
+
1089
1151
  // 清理容器
1090
1152
  log('Cleaning up container...')
1091
1153
  if (container) {
package/version.json CHANGED
@@ -1,4 +1,4 @@
1
1
  {
2
- "buildTime": "2026-05-24T23:36:43.904Z",
3
- "cliVersion": "2026.5.25-736"
2
+ "buildTime": "2026-05-29T12:19:03.854Z",
3
+ "cliVersion": "2026.5.29-2018"
4
4
  }