@icyfenix-dmla/cli 2026.5.29-2018 → 2026.6.5-1204

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@icyfenix-dmla/cli",
3
- "version": "2026.5.29-2018",
3
+ "version": "2026.6.5-1204",
4
4
  "description": "DMLA 沙箱服务命令行工具",
5
5
  "type": "module",
6
6
  "main": "src/index.js",
@@ -1,7 +1,9 @@
1
1
  # LLM 模块
2
+ from .dpodataset import DPODataset
3
+ from .logits_to_log_probs import logits_to_log_probs, dpo_loss
2
4
  from .mini_mind_config import MiniMindConfig, RMSNorm, Attention, FeedForward, MiniMindBlock, MiniMindModel, MiniMindForCausalLM, precompute_freqs_cis, apply_rotary_pos_emb, repeat_kv
3
5
  from .pretrain_dataset import PretrainDataset
4
6
  from .reward_model import RewardModel
5
7
  from .sftdataset import SFTDataset, pre_processing_chat
6
8
 
7
- __all__ = ['MiniMindConfig', 'RMSNorm', 'Attention', 'FeedForward', 'MiniMindBlock', 'MiniMindModel', 'MiniMindForCausalLM', 'precompute_freqs_cis', 'apply_rotary_pos_emb', 'repeat_kv', 'PretrainDataset', 'RewardModel', 'SFTDataset', 'pre_processing_chat']
9
+ __all__ = ['DPODataset', 'logits_to_log_probs', 'dpo_loss', 'MiniMindConfig', 'RMSNorm', 'Attention', 'FeedForward', 'MiniMindBlock', 'MiniMindModel', 'MiniMindForCausalLM', 'precompute_freqs_cis', 'apply_rotary_pos_emb', 'repeat_kv', 'PretrainDataset', 'RewardModel', 'SFTDataset', 'pre_processing_chat']
@@ -0,0 +1,108 @@
1
+ # DPODataset 定义
2
+ # 从文档自动提取生成
3
+
4
+ import json
5
+ import os
6
+ import torch
7
+ from datasets import load_dataset, Features, Value
8
+ from datasets import logging as datasets_logging
9
+ from torch.utils.data import Dataset
10
+
11
+ class DPODataset(Dataset):
12
+ """
13
+ DPO 数据集:将偏好对比数据 tokenize 为模型可训练的格式
14
+
15
+ 每条样本格式:{"chosen": [{role, content}, ...], "rejected": [{role, content}, ...]}
16
+ 输出 chosen 和 rejected 的 input_ids、目标 ids 和 loss_mask
17
+ loss_mask 仅在 assistant 回答部分为 1,其余为 0
18
+ """
19
+ CHATML_TEMPLATE = (
20
+ "{% for message in messages %}<|im_start|>{{ message.role }}\n"
21
+ "{{ message.content }}<|im_end|>\n"
22
+ "{% endfor %}"
23
+ "{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}"
24
+ )
25
+
26
+ def __init__(self, jsonl_path, tokenizer, max_length=768):
27
+ super().__init__()
28
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
29
+ self.tokenizer = tokenizer
30
+ if not tokenizer.chat_template:
31
+ tokenizer.chat_template = self.CHATML_TEMPLATE
32
+ self.max_length = max_length
33
+ self.padding = tokenizer.pad_token_id if tokenizer.pad_token_id is not None else 0
34
+ # 定位 assistant 回答的起止 token ID
35
+ self.bos_id = tokenizer(f'{tokenizer.bos_token}assistant\n', add_special_tokens=False).input_ids
36
+ self.eos_id = tokenizer(f'{tokenizer.eos_token}\n', add_special_tokens=False).input_ids
37
+ features = Features({
38
+ 'chosen': [{'role': Value('string'), 'content': Value('string')}],
39
+ 'rejected': [{'role': Value('string'), 'content': Value('string')}]
40
+ })
41
+ datasets_logging.set_verbosity_error()
42
+ self.samples = load_dataset('json', data_files=jsonl_path, split='train', features=features)
43
+ datasets_logging.set_verbosity_warning()
44
+
45
+ def __len__(self):
46
+ return len(self.samples)
47
+
48
+ def __getitem__(self, index):
49
+ sample = self.samples[index]
50
+ chosen = sample['chosen']
51
+ rejected = sample['rejected']
52
+
53
+ # 将对话转为 ChatML 格式文本
54
+ chosen_prompt = self.tokenizer.apply_chat_template(
55
+ chosen, tokenize=False, add_generation_prompt=False
56
+ )
57
+ rejected_prompt = self.tokenizer.apply_chat_template(
58
+ rejected, tokenize=False, add_generation_prompt=False
59
+ )
60
+
61
+ # Tokenize 并填充到固定长度
62
+ chosen_encoding = self.tokenizer(
63
+ chosen_prompt, truncation=True, max_length=self.max_length, padding='max_length'
64
+ )
65
+ rejected_encoding = self.tokenizer(
66
+ rejected_prompt, truncation=True, max_length=self.max_length, padding='max_length'
67
+ )
68
+
69
+ chosen_input_ids = chosen_encoding['input_ids']
70
+ chosen_loss_mask = self.generate_loss_mask(chosen_input_ids)
71
+
72
+ rejected_input_ids = rejected_encoding['input_ids']
73
+ rejected_loss_mask = self.generate_loss_mask(rejected_input_ids)
74
+
75
+ # DPO 采用 next-token prediction 的输入-目标对齐方式
76
+ # x 为输入序列(去掉最后一个 token),y 为目标序列(去掉第一个 token)
77
+ # mask 对齐 y 的位置,用于在 DPO loss 中只计算 assistant 回答部分
78
+ x_chosen = torch.tensor(chosen_input_ids[:-1], dtype=torch.long)
79
+ y_chosen = torch.tensor(chosen_input_ids[1:], dtype=torch.long)
80
+ mask_chosen = torch.tensor(chosen_loss_mask[1:], dtype=torch.long)
81
+
82
+ x_rejected = torch.tensor(rejected_input_ids[:-1], dtype=torch.long)
83
+ y_rejected = torch.tensor(rejected_input_ids[1:], dtype=torch.long)
84
+ mask_rejected = torch.tensor(rejected_loss_mask[1:], dtype=torch.long)
85
+
86
+ return {
87
+ 'x_chosen': x_chosen, 'y_chosen': y_chosen, 'mask_chosen': mask_chosen,
88
+ 'x_rejected': x_rejected, 'y_rejected': y_rejected, 'mask_rejected': mask_rejected
89
+ }
90
+
91
+ def generate_loss_mask(self, input_ids):
92
+ """生成 loss 掩码:仅在 assistant 回答部分为 1"""
93
+ loss_mask = [0] * len(input_ids)
94
+ i = 0
95
+ while i < len(input_ids):
96
+ if input_ids[i:i + len(self.bos_id)] == self.bos_id:
97
+ start = i + len(self.bos_id)
98
+ end = start
99
+ while end < len(input_ids):
100
+ if input_ids[end:end + len(self.eos_id)] == self.eos_id:
101
+ break
102
+ end += 1
103
+ for j in range(start, min(end + len(self.eos_id), self.max_length)):
104
+ loss_mask[j] = 1
105
+ i = end + len(self.eos_id) if end < len(input_ids) else len(input_ids)
106
+ else:
107
+ i += 1
108
+ return loss_mask
@@ -0,0 +1,55 @@
1
+ # logits_to_log_probs, dpo_loss 定义
2
+ # 从文档自动提取生成
3
+
4
+ import torch
5
+ import torch.nn.functional as F
6
+
7
+ def logits_to_log_probs(logits, labels):
8
+ """
9
+ 从模型输出的 logits 计算每个 token 位置的对数概率
10
+
11
+ Args:
12
+ logits: 模型输出, shape [batch, seq_len, vocab_size]
13
+ labels: 目标 token ids, shape [batch, seq_len]
14
+
15
+ Returns:
16
+ 每个位置的对数概率, shape [batch, seq_len]
17
+ """
18
+ log_probs = F.log_softmax(logits, dim=2)
19
+ log_probs_per_token = torch.gather(log_probs, dim=2, index=labels.unsqueeze(2)).squeeze(-1)
20
+ return log_probs_per_token
21
+
22
+
23
+ def dpo_loss(ref_log_probs, policy_log_probs, mask, beta):
24
+ """
25
+ 计算 DPO 损失
26
+
27
+ Args:
28
+ ref_log_probs: 参考模型的对数概率, shape [batch, seq_len]
29
+ policy_log_probs: 策略模型的对数概率, shape [batch, seq_len]
30
+ mask: loss 掩码, shape [batch, seq_len]
31
+ beta: DPO 温度参数
32
+
33
+ Returns:
34
+ 标量损失值
35
+ """
36
+ # 沿序列求和(仅在 mask 为 1 的位置)
37
+ ref_log_probs = (ref_log_probs * mask).sum(dim=1)
38
+ policy_log_probs = (policy_log_probs * mask).sum(dim=1)
39
+
40
+ # 将 chosen 和 rejected 数据分开
41
+ # batch 中前半部分是 chosen,后半部分是 rejected
42
+ batch_size = ref_log_probs.shape[0]
43
+ chosen_ref_log_probs = ref_log_probs[:batch_size // 2]
44
+ reject_ref_log_probs = ref_log_probs[batch_size // 2:]
45
+ chosen_policy_log_probs = policy_log_probs[:batch_size // 2]
46
+ reject_policy_log_probs = policy_log_probs[batch_size // 2:]
47
+
48
+ # 计算隐式奖励差值
49
+ pi_logratios = chosen_policy_log_probs - reject_policy_log_probs
50
+ ref_logratios = chosen_ref_log_probs - reject_ref_log_probs
51
+ logits = pi_logratios - ref_logratios
52
+
53
+ # DPO 损失 = -log(sigmoid(beta * logits))
54
+ loss = -F.logsigmoid(beta * logits)
55
+ return loss.mean()
@@ -11,14 +11,15 @@ from torch.utils.data import Dataset
11
11
 
12
12
  class SFTDataset(Dataset):
13
13
  """
14
- SFT 数据集:将对话数据 tokenize 为 next-token prediction 格式
14
+ SFT 数据集:将对话数据 tokenize 为 ChatML 格式
15
15
 
16
- 与 PretrainDataset 的核心差异:
16
+ 与 PretrainDataset 的主要差异:
17
17
  - 数据格式从 {"text": "..."} 变为 {"conversations": [...]}
18
- - 标签掩码:仅 assistant 回答部分参与 loss,其余标记为 -100
18
+ - 标签掩码:仅 assistant 回答部分参与 loss,其余标记为 -100(PyTorch CrossEntropyLoss 默认忽略 -100 对应的位置)
19
19
  - 使用 apply_chat_template 将对话转为 ChatML 格式
20
+ - SFT 数据集仍然支持工具调用训练,只要将训练集从 sft_t2t_tiny.jsonl 换回带有工具调用样例的 sft_t2t_mini.jsonl 即可
20
21
  """
21
- # MiniMind 使用 ChatML 格式:<|im_start|>role\ncontent<|im_end|>\n
22
+ # 使用 ChatML 格式:<|im_start|>role\ncontent<|im_end|>\n
22
23
  # tokenizer 本身未内置 chat_template,需手动设置
23
24
  CHATML_TEMPLATE = (
24
25
  "{% for message in messages %}<|im_start|>{{ message.role }}\n"
@@ -31,7 +32,7 @@ class SFTDataset(Dataset):
31
32
  super().__init__()
32
33
  os.environ["TOKENIZERS_PARALLELISM"] = "false"
33
34
  self.tokenizer = tokenizer
34
- # MiniMind tokenizer 未内置 chat_template,需手动设置 ChatML 格式
35
+ # Tokenizer 未内置 chat_template,需手动设置 ChatML 格式
35
36
  if not tokenizer.chat_template:
36
37
  tokenizer.chat_template = self.CHATML_TEMPLATE
37
38
  self.max_length = max_length
@@ -44,9 +45,11 @@ class SFTDataset(Dataset):
44
45
  datasets_logging.set_verbosity_error()
45
46
  self.samples = load_dataset('json', data_files=jsonl_path, split='train', features=features)
46
47
  datasets_logging.set_verbosity_warning()
47
- # 预计算 assistant 回答的起止标记 ID
48
- self.bos_id = tokenizer(f'{tokenizer.bos_token}assistant\n', add_special_tokens=False).input_ids
49
- self.eos_id = tokenizer(f'{tokenizer.eos_token}\n', add_special_tokens=False).input_ids
48
+ # 预计算 assistant 回答的起止 token ID
49
+ # <|im_start|>assistant\n 对应的 token ID 序列,用于定位助手回答的起始位置
50
+ self.bos_id = tokenizer(f'{tokenizer.bos_token}assistant\n', add_special_tokens=False).input_ids
51
+ # 即 <|im_end|>\n 对应的 token ID 序列,用于定位助手回答的结束位置
52
+ self.eos_id = tokenizer(f'{tokenizer.eos_token}\n', add_special_tokens=False).input_ids
50
53
 
51
54
  def __len__(self):
52
55
  return len(self.samples)
@@ -94,7 +97,8 @@ class SFTDataset(Dataset):
94
97
  prompt = self.create_chat_prompt(conversations)
95
98
  input_ids = self.tokenizer(prompt).input_ids[:self.max_length]
96
99
  # 填充到固定长度
97
- input_ids += [self.tokenizer.pad_token_id] * (self.max_length - len(input_ids))
100
+ # 右侧填充至固定长度,填充部分的标签已由 generate_labels 设为 -100,不参与 loss
101
+ input_ids += [self.tokenizer.pad_token_id] * (self.max_length - len(input_ids))
98
102
  labels = self.generate_labels(input_ids)
99
103
  return torch.tensor(input_ids, dtype=torch.long), torch.tensor(labels, dtype=torch.long)
100
104
 
@@ -83,10 +83,19 @@ const DATASETS = [
83
83
  id: 'minimind-sft',
84
84
  name: 'MiniMind SFT (LLM监督微调语料)',
85
85
  url: 'https://www.modelscope.cn/datasets/icyfenix/Minimind_SFT.git',
86
- size: '~1.7GB',
86
+ size: '~90MB',
87
87
  format: 'git',
88
88
  targetDir: 'datasets/minimind-sft',
89
89
  source: 'ModelScope (icyfenix)'
90
+ },
91
+ {
92
+ id: 'minimind-alignment',
93
+ name: 'MiniMind Alignment (LLM对齐语料)',
94
+ url: 'https://www.modelscope.cn/datasets/icyfenix/Minimind_Alignment.git',
95
+ size: '~54MB',
96
+ format: 'git',
97
+ targetDir: 'datasets/minimind-alignment',
98
+ source: 'ModelScope (icyfenix)'
90
99
  }
91
100
  ]
92
101
 
@@ -95,9 +104,11 @@ const DATASETS = [
95
104
  * enquirer 可能抛出空字符串错误或包含 'cancel' 的消息
96
105
  */
97
106
  function isUserCancel(error) {
98
- return !error.message ||
107
+ return !error ||
108
+ !error.message ||
99
109
  error.message === '' ||
100
- error.message.includes('cancel')
110
+ error.message.includes('cancel') ||
111
+ error.code === 'ERR_USE_AFTER_CLOSE'
101
112
  }
102
113
 
103
114
  /**
@@ -1049,6 +1060,19 @@ async function downloadDataset(dataPath, dataset) {
1049
1060
  export async function runDataTUI() {
1050
1061
  showBanner()
1051
1062
 
1063
+ // 处理 enquirer 在 Ctrl+C 时抛出的 ERR_USE_AFTER_CLOSE
1064
+ // enquirer 的 cancel() 方法关闭 readline 后又调用 pause(),导致此错误
1065
+ const handleUncaught = (err) => {
1066
+ if (err.code === 'ERR_USE_AFTER_CLOSE') {
1067
+ console.log()
1068
+ console.log(chalk.gray('已退出数据管理'))
1069
+ console.log()
1070
+ process.exit(0)
1071
+ }
1072
+ throw err
1073
+ }
1074
+ process.on('uncaughtException', handleUncaught)
1075
+
1052
1076
  let dataPath = getDataVolumePath()
1053
1077
 
1054
1078
  // 确保配置目录存在
@@ -1152,6 +1176,7 @@ export async function runDataTUI() {
1152
1176
  console.log()
1153
1177
  console.log(chalk.gray('已退出数据管理'))
1154
1178
  console.log()
1179
+ process.off('uncaughtException', handleUncaught)
1155
1180
  return
1156
1181
  }
1157
1182
 
@@ -1163,6 +1188,7 @@ export async function runDataTUI() {
1163
1188
  console.log()
1164
1189
  console.log(chalk.gray('已退出数据管理'))
1165
1190
  console.log()
1191
+ process.off('uncaughtException', handleUncaught)
1166
1192
  return
1167
1193
  }
1168
1194
  throw error
@@ -306,10 +306,45 @@ matplotlib.use('module://matplotlib_inline.backend_inline')
306
306
 
307
307
  # 处理不同类型的输出
308
308
  if msg_type == 'stream':
309
+ stream_name = content.get('name', 'stdout')
310
+ stream_text = content.get('text', '')
311
+
312
+ # 从 stderr 中提取 ProgressReporter 的 progress JSON,
313
+ # 作为独立的 progress 类型消息发送,避免与普通 stderr 输出混合
314
+ if stream_name == 'stderr':
315
+ progress_lines = []
316
+ other_lines = []
317
+ for line in stream_text.split('\n'):
318
+ if line.startswith('{"type": "progress"') or line.startswith('{"type":"progress"'):
319
+ progress_lines.append(line)
320
+ else:
321
+ other_lines.append(line)
322
+
323
+ # 将 progress JSON 作为独立消息发送(字段展开到顶层,与前端 progress case 匹配)
324
+ for pline in progress_lines:
325
+ if not pline.strip():
326
+ continue
327
+ try:
328
+ import json as _json
329
+ progress_data = _json.loads(pline)
330
+ progress_data['type'] = 'progress'
331
+ if stream:
332
+ output_json(progress_data)
333
+ else:
334
+ outputs.append(progress_data)
335
+ except Exception:
336
+ # JSON 解析失败,作为普通文本处理
337
+ other_lines.append(pline)
338
+
339
+ # 剩余 stderr 内容正常传递
340
+ stream_text = '\n'.join(other_lines)
341
+ if not stream_text.strip():
342
+ continue
343
+
309
344
  stream_output = {
310
345
  'type': 'stream',
311
- 'name': content.get('name', 'stdout'),
312
- 'text': content.get('text', '')
346
+ 'name': stream_name,
347
+ 'text': stream_text
313
348
  }
314
349
 
315
350
  if stream:
@@ -317,7 +352,7 @@ matplotlib.use('module://matplotlib_inline.backend_inline')
317
352
  output_json(stream_output)
318
353
  else:
319
354
  outputs.append(stream_output)
320
- log_debug(f'Stream output: {content.get("name")} len={len(content.get("text", ""))}')
355
+ log_debug(f'Stream output: {stream_name} len={len(stream_text)}')
321
356
 
322
357
  elif msg_type == 'display_data':
323
358
  display_output = {
@@ -982,10 +982,11 @@ export async function runPythonCodeStreaming(code, useGpu = false, res, imageOve
982
982
  if (isJsonComplete(text)) {
983
983
  log(`Forwarding complete JSON message: ${text.length} bytes`)
984
984
  res.write(text + '\n')
985
- // chat 模式:检测 idle 消息,设置 ChatManager 就绪
985
+ // chat 模式:将消息转发给 ChatManager 处理
986
986
  if (mode === 'chat') {
987
987
  try {
988
988
  const msg = JSON.parse(text)
989
+ chatManager.handleDockerStream(Buffer.from(text + '\n'))
989
990
  if (msg.type === 'idle') {
990
991
  chatManager.setReady(true)
991
992
  log('ChatManager ready (idle message received)')
package/version.json CHANGED
@@ -1,4 +1,4 @@
1
1
  {
2
- "buildTime": "2026-05-29T12:19:03.854Z",
3
- "cliVersion": "2026.5.29-2018"
2
+ "buildTime": "2026-06-05T12:04:28.715Z",
3
+ "cliVersion": "2026.6.5-1204"
4
4
  }