@huyooo/ai-chat-core 0.2.41 → 0.2.42
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{events-GWl1-vbT.d.ts → events-7V2drqe4.d.ts} +47 -3
- package/dist/events.d.ts +1 -1
- package/dist/events.js +1 -1
- package/dist/index.d.ts +15 -2
- package/dist/index.js +1 -1
- package/package.json +1 -1
- package/src/events.ts +78 -0
- package/src/providers/context-compressor.ts +178 -110
- package/src/providers/model-registry.ts +158 -14
- package/src/providers/orchestrator.ts +126 -7
- package/src/providers/protocols/ark.ts +1 -1
package/src/events.ts
CHANGED
|
@@ -369,6 +369,43 @@ export interface StepEndEvent {
|
|
|
369
369
|
/** 步骤相关事件联合类型 */
|
|
370
370
|
export type StepEvent = StepStartEvent | StepEndEvent;
|
|
371
371
|
|
|
372
|
+
// ==================== 上下文压缩事件 ====================
|
|
373
|
+
|
|
374
|
+
/** 上下文压缩开始事件 */
|
|
375
|
+
export interface CompactStartEvent {
|
|
376
|
+
type: 'compact_start';
|
|
377
|
+
data: {
|
|
378
|
+
/** 压缩前估算 token 数 */
|
|
379
|
+
estimatedTokens: number;
|
|
380
|
+
/** 可用 prompt token 预算 */
|
|
381
|
+
budget: number;
|
|
382
|
+
/** 开始时间戳 */
|
|
383
|
+
startedAt: number;
|
|
384
|
+
};
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
/** 上下文压缩完成事件 */
|
|
388
|
+
export interface CompactEndEvent {
|
|
389
|
+
type: 'compact_end';
|
|
390
|
+
data: {
|
|
391
|
+
/** 是否成功 */
|
|
392
|
+
success: boolean;
|
|
393
|
+
/** 压缩后估算 token 数 */
|
|
394
|
+
compressedTokens: number;
|
|
395
|
+
/** 压缩前消息数 */
|
|
396
|
+
originalMessageCount: number;
|
|
397
|
+
/** 压缩后消息数 */
|
|
398
|
+
compressedMessageCount: number;
|
|
399
|
+
/** 结束时间戳 */
|
|
400
|
+
endedAt: number;
|
|
401
|
+
/** 耗时(毫秒) */
|
|
402
|
+
duration: number;
|
|
403
|
+
};
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
/** 上下文压缩相关事件联合类型 */
|
|
407
|
+
export type CompactEvent = CompactStartEvent | CompactEndEvent;
|
|
408
|
+
|
|
372
409
|
// ==================== Agent 状态事件 ====================
|
|
373
410
|
|
|
374
411
|
/**
|
|
@@ -403,6 +440,7 @@ export type ChatEvent =
|
|
|
403
440
|
| TextEvent
|
|
404
441
|
| StatusEvent
|
|
405
442
|
| StepEvent
|
|
443
|
+
| CompactEvent
|
|
406
444
|
| AgentStatusEvent;
|
|
407
445
|
|
|
408
446
|
/** 事件类型字符串 */
|
|
@@ -416,6 +454,7 @@ export const CHAT_EVENT_TYPES: readonly ChatEventType[] = [
|
|
|
416
454
|
'text_delta',
|
|
417
455
|
'done', 'error', 'abort',
|
|
418
456
|
'step_start', 'step_end',
|
|
457
|
+
'compact_start', 'compact_end',
|
|
419
458
|
'agent_status',
|
|
420
459
|
] as const;
|
|
421
460
|
|
|
@@ -743,6 +782,40 @@ export function createStepEnd(stepNumber: number, startedAt: number): StepEndEve
|
|
|
743
782
|
};
|
|
744
783
|
}
|
|
745
784
|
|
|
785
|
+
/**
|
|
786
|
+
* 创建上下文压缩开始事件
|
|
787
|
+
*/
|
|
788
|
+
export function createCompactStart(estimatedTokens: number, budget: number): CompactStartEvent {
|
|
789
|
+
return {
|
|
790
|
+
type: 'compact_start',
|
|
791
|
+
data: { estimatedTokens, budget, startedAt: Date.now() },
|
|
792
|
+
};
|
|
793
|
+
}
|
|
794
|
+
|
|
795
|
+
/**
|
|
796
|
+
* 创建上下文压缩完成事件
|
|
797
|
+
*/
|
|
798
|
+
export function createCompactEnd(
|
|
799
|
+
success: boolean,
|
|
800
|
+
compressedTokens: number,
|
|
801
|
+
originalMessageCount: number,
|
|
802
|
+
compressedMessageCount: number,
|
|
803
|
+
startedAt: number,
|
|
804
|
+
): CompactEndEvent {
|
|
805
|
+
const endedAt = Date.now();
|
|
806
|
+
return {
|
|
807
|
+
type: 'compact_end',
|
|
808
|
+
data: {
|
|
809
|
+
success,
|
|
810
|
+
compressedTokens,
|
|
811
|
+
originalMessageCount,
|
|
812
|
+
compressedMessageCount,
|
|
813
|
+
endedAt,
|
|
814
|
+
duration: endedAt - startedAt,
|
|
815
|
+
},
|
|
816
|
+
};
|
|
817
|
+
}
|
|
818
|
+
|
|
746
819
|
// ==================== 类型守卫 ====================
|
|
747
820
|
|
|
748
821
|
/** 检查是否为思考事件 */
|
|
@@ -785,6 +858,11 @@ export function isStepEvent(event: ChatEvent): event is StepEvent {
|
|
|
785
858
|
return event.type.startsWith('step_');
|
|
786
859
|
}
|
|
787
860
|
|
|
861
|
+
/** 检查是否为上下文压缩事件 */
|
|
862
|
+
export function isCompactEvent(event: ChatEvent): event is CompactEvent {
|
|
863
|
+
return event.type.startsWith('compact_');
|
|
864
|
+
}
|
|
865
|
+
|
|
788
866
|
/** 检查错误是否可重试 */
|
|
789
867
|
export function isRetryableError(event: ChatEvent): boolean {
|
|
790
868
|
if (event.type !== 'error') return false;
|
|
@@ -1,16 +1,11 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Context 压缩模块
|
|
3
|
-
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
7
|
-
*
|
|
8
|
-
*
|
|
9
|
-
* 压缩算法:
|
|
10
|
-
* 1. 保留 system prompt
|
|
11
|
-
* 2. 保留第一条 user 消息(任务描述)
|
|
12
|
-
* 3. 将中间的 assistant/tool 交互压缩为一条摘要
|
|
13
|
-
* 4. 保留最近 N 条消息(工作上下文)
|
|
3
|
+
*
|
|
4
|
+
* 当 prompt token 估算接近模型 context window 时,让当前模型自己总结对话历史,
|
|
5
|
+
* 然后用 summary + 最近几条消息继续对话。
|
|
6
|
+
*
|
|
7
|
+
* 参考 Claude Code / Cursor 的做法:
|
|
8
|
+
* 不机械截断,而是让 AI 生成高质量摘要,保留关键决策和上下文。
|
|
14
9
|
*/
|
|
15
10
|
|
|
16
11
|
import type { StandardMessage } from './types';
|
|
@@ -18,69 +13,86 @@ import { DebugLogger } from '../utils';
|
|
|
18
13
|
|
|
19
14
|
const logger = DebugLogger.module('ContextCompressor');
|
|
20
15
|
|
|
16
|
+
// ==================== Token 估算 ====================
|
|
17
|
+
|
|
18
|
+
const CHARS_PER_TOKEN = 3.2;
|
|
19
|
+
const MESSAGE_OVERHEAD_TOKENS = 4;
|
|
20
|
+
|
|
21
|
+
function estimateStringTokens(s: string): number {
|
|
22
|
+
if (!s) return 0;
|
|
23
|
+
return Math.ceil(s.length / CHARS_PER_TOKEN);
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
export function estimateMessageTokens(msg: StandardMessage): number {
|
|
27
|
+
let tokens = MESSAGE_OVERHEAD_TOKENS;
|
|
28
|
+
tokens += estimateStringTokens(msg.content);
|
|
29
|
+
|
|
30
|
+
if (msg.toolCalls) {
|
|
31
|
+
for (const tc of msg.toolCalls) {
|
|
32
|
+
tokens += estimateStringTokens(tc.name);
|
|
33
|
+
tokens += estimateStringTokens(tc.arguments);
|
|
34
|
+
tokens += 10;
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
if (msg.images) {
|
|
39
|
+
tokens += msg.images.length * 85;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
return tokens;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
export function estimateTotalTokens(messages: StandardMessage[]): number {
|
|
46
|
+
let total = 3;
|
|
47
|
+
for (const msg of messages) {
|
|
48
|
+
total += estimateMessageTokens(msg);
|
|
49
|
+
}
|
|
50
|
+
return total;
|
|
51
|
+
}
|
|
52
|
+
|
|
21
53
|
// ==================== 配置 ====================
|
|
22
54
|
|
|
23
|
-
/** 压缩配置 */
|
|
24
55
|
export interface CompactConfig {
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
*
|
|
31
|
-
* @default 80_000
|
|
32
|
-
*/
|
|
33
|
-
charThreshold?: number;
|
|
34
|
-
|
|
35
|
-
/**
|
|
36
|
-
* 压缩后保留的最近消息数
|
|
37
|
-
*
|
|
38
|
-
* 保留最近的消息对(assistant + tool),确保模型有足够上下文继续工作。
|
|
39
|
-
*
|
|
40
|
-
* @default 10
|
|
41
|
-
*/
|
|
56
|
+
contextWindowTokens: number;
|
|
57
|
+
maxOutputTokens: number;
|
|
58
|
+
/** 触发压缩的使用率,默认 0.80 */
|
|
59
|
+
compactThresholdRatio?: number;
|
|
60
|
+
/** 压缩后保留的最近消息数,默认 6 */
|
|
42
61
|
keepRecentMessages?: number;
|
|
43
62
|
}
|
|
44
63
|
|
|
45
|
-
const
|
|
46
|
-
const DEFAULT_KEEP_RECENT =
|
|
64
|
+
const DEFAULT_THRESHOLD_RATIO = 0.80;
|
|
65
|
+
const DEFAULT_KEEP_RECENT = 6;
|
|
47
66
|
|
|
48
67
|
// ==================== 核心函数 ====================
|
|
49
68
|
|
|
69
|
+
/** 计算可用 prompt token 预算 */
|
|
70
|
+
export function getPromptBudget(config: CompactConfig): number {
|
|
71
|
+
const ratio = config.compactThresholdRatio ?? DEFAULT_THRESHOLD_RATIO;
|
|
72
|
+
return Math.floor(config.contextWindowTokens * ratio) - config.maxOutputTokens;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
/** 检测是否需要压缩 */
|
|
76
|
+
export function needsCompaction(messages: StandardMessage[], config: CompactConfig): boolean {
|
|
77
|
+
return estimateTotalTokens(messages) > getPromptBudget(config);
|
|
78
|
+
}
|
|
79
|
+
|
|
50
80
|
/**
|
|
51
|
-
*
|
|
52
|
-
*
|
|
53
|
-
*
|
|
54
|
-
*
|
|
55
|
-
* @param messages - 消息数组(会被原地修改)
|
|
56
|
-
* @param config - 压缩配置(可选)
|
|
57
|
-
* @returns 是否执行了压缩
|
|
81
|
+
* 构建发给 AI 的总结请求
|
|
82
|
+
*
|
|
83
|
+
* 返回一组消息,发给当前模型让它总结对话历史。
|
|
84
|
+
* 总结完成后调用 applySummary 组装新的消息列表。
|
|
58
85
|
*/
|
|
59
|
-
export function
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
if (totalChars < charThreshold) {
|
|
67
|
-
return false;
|
|
68
|
-
}
|
|
69
|
-
|
|
70
|
-
logger.info(`Context 压缩触发: ${totalChars} 字符, ${messages.length} 条消息`);
|
|
71
|
-
|
|
72
|
-
// ---- 找到各段边界 ----
|
|
73
|
-
// [0..systemEnd): system prompt(可能没有)
|
|
74
|
-
// [systemEnd..firstUserEnd): 第一条 user 消息
|
|
75
|
-
// [firstUserEnd..recentStart): 中间的 assistant/tool 交互(压缩目标)
|
|
76
|
-
// [recentStart..end): 最近 N 条消息(保留)
|
|
77
|
-
|
|
86
|
+
export function buildSummarizeRequest(
|
|
87
|
+
messages: StandardMessage[],
|
|
88
|
+
config: CompactConfig,
|
|
89
|
+
): { summarizeMessages: StandardMessage[]; keepMessages: StandardMessage[] } {
|
|
90
|
+
const keepRecent = config.keepRecentMessages ?? DEFAULT_KEEP_RECENT;
|
|
91
|
+
|
|
92
|
+
// 找边界
|
|
78
93
|
let systemEnd = 0;
|
|
79
|
-
if (messages[0]?.role === 'system')
|
|
80
|
-
|
|
81
|
-
}
|
|
82
|
-
|
|
83
|
-
// 找到第一条 user 消息
|
|
94
|
+
if (messages[0]?.role === 'system') systemEnd = 1;
|
|
95
|
+
|
|
84
96
|
let firstUserEnd = systemEnd;
|
|
85
97
|
for (let i = systemEnd; i < messages.length; i++) {
|
|
86
98
|
if (messages[i].role === 'user') {
|
|
@@ -88,62 +100,118 @@ export function compactMessages(messages: StandardMessage[], config?: CompactCon
|
|
|
88
100
|
break;
|
|
89
101
|
}
|
|
90
102
|
}
|
|
91
|
-
|
|
103
|
+
|
|
104
|
+
// 要保留的最近消息
|
|
92
105
|
const recentStart = Math.max(firstUserEnd, messages.length - keepRecent);
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
return false;
|
|
97
|
-
}
|
|
98
|
-
|
|
99
|
-
// ---- 生成摘要 ----
|
|
106
|
+
const keepMessages = messages.slice(recentStart);
|
|
107
|
+
|
|
108
|
+
// 要被总结的中间历史
|
|
100
109
|
const middleMessages = messages.slice(firstUserEnd, recentStart);
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
110
|
+
|
|
111
|
+
if (middleMessages.length < 2) {
|
|
112
|
+
// 中间太短,没什么可总结的
|
|
113
|
+
return { summarizeMessages: [], keepMessages: messages.slice(systemEnd) };
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
const estimatedTokens = estimateTotalTokens(messages);
|
|
117
|
+
const budget = getPromptBudget(config);
|
|
118
|
+
logger.info(`准备 AI 总结: ~${estimatedTokens} tokens > budget ${budget}, 总结 ${middleMessages.length} 条中间消息, 保留最近 ${keepMessages.length} 条`);
|
|
119
|
+
|
|
120
|
+
// 构建总结请求:把中间历史交给模型
|
|
121
|
+
const summarizeMessages: StandardMessage[] = [
|
|
122
|
+
{
|
|
123
|
+
role: 'system',
|
|
124
|
+
content: SUMMARIZE_SYSTEM_PROMPT,
|
|
125
|
+
},
|
|
126
|
+
{
|
|
127
|
+
role: 'user',
|
|
128
|
+
content: formatMessagesForSummary(middleMessages),
|
|
129
|
+
},
|
|
108
130
|
];
|
|
109
|
-
|
|
110
|
-
const compressedChars = compressed.reduce((s, m) => s + (m.content?.length ?? 0), 0);
|
|
111
|
-
logger.info(`Context 压缩完成: ${messages.length} → ${compressed.length} 条消息, ${totalChars} → ${compressedChars} 字符`);
|
|
112
|
-
|
|
113
|
-
// 原地替换
|
|
114
|
-
messages.length = 0;
|
|
115
|
-
messages.push(...compressed);
|
|
116
|
-
|
|
117
|
-
return true;
|
|
118
|
-
}
|
|
119
131
|
|
|
120
|
-
|
|
132
|
+
return { summarizeMessages, keepMessages };
|
|
133
|
+
}
|
|
121
134
|
|
|
122
135
|
/**
|
|
123
|
-
*
|
|
136
|
+
* 用 AI 返回的摘要组装新的消息列表
|
|
124
137
|
*/
|
|
125
|
-
function
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
138
|
+
export function applySummary(
|
|
139
|
+
originalMessages: StandardMessage[],
|
|
140
|
+
summary: string,
|
|
141
|
+
keepMessages: StandardMessage[],
|
|
142
|
+
): StandardMessage[] {
|
|
143
|
+
// 取原始的 system prompt
|
|
144
|
+
const systemPrompt = originalMessages[0]?.role === 'system' ? originalMessages[0] : null;
|
|
145
|
+
|
|
146
|
+
// 取第一条 user 消息
|
|
147
|
+
const startIdx = systemPrompt ? 1 : 0;
|
|
148
|
+
let firstUser: StandardMessage | null = null;
|
|
149
|
+
for (let i = startIdx; i < originalMessages.length; i++) {
|
|
150
|
+
if (originalMessages[i].role === 'user') {
|
|
151
|
+
firstUser = originalMessages[i];
|
|
152
|
+
break;
|
|
134
153
|
}
|
|
135
|
-
|
|
136
|
-
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
const result: StandardMessage[] = [];
|
|
157
|
+
if (systemPrompt) result.push(systemPrompt);
|
|
158
|
+
if (firstUser) result.push(firstUser);
|
|
159
|
+
|
|
160
|
+
// 插入 AI 生成的摘要
|
|
161
|
+
result.push({
|
|
162
|
+
role: 'system',
|
|
163
|
+
content: `[对话历史摘要]\n${summary}`,
|
|
164
|
+
});
|
|
165
|
+
|
|
166
|
+
// 拼上最近保留的消息
|
|
167
|
+
result.push(...keepMessages);
|
|
168
|
+
|
|
169
|
+
const tokens = estimateTotalTokens(result);
|
|
170
|
+
logger.info(`AI 总结应用完成: ${originalMessages.length} → ${result.length} 条消息, ~${tokens} tokens`);
|
|
171
|
+
|
|
172
|
+
return result;
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
// ==================== 内部 ====================
|
|
176
|
+
|
|
177
|
+
const SUMMARIZE_SYSTEM_PROMPT = `你是一个对话历史压缩助手。请总结以下对话历史,保留所有关键信息:
|
|
178
|
+
|
|
179
|
+
要求:
|
|
180
|
+
1. 保留所有文件修改记录(哪些文件被创建/修改/删除了,具体改了什么)
|
|
181
|
+
2. 保留所有关键决策和结论
|
|
182
|
+
3. 保留错误信息和解决方案
|
|
183
|
+
4. 保留用户的明确要求和偏好
|
|
184
|
+
5. 用简洁的条目列表格式输出
|
|
185
|
+
6. 不要遗漏任何可能影响后续工作的信息
|
|
186
|
+
|
|
187
|
+
直接输出摘要,不要开头说"以下是摘要"之类的话。`;
|
|
188
|
+
|
|
189
|
+
/** 把消息列表格式化为可读文本,供总结用 */
|
|
190
|
+
function formatMessagesForSummary(messages: StandardMessage[]): string {
|
|
191
|
+
const parts: string[] = [];
|
|
192
|
+
|
|
193
|
+
for (const msg of messages) {
|
|
194
|
+
const role = msg.role === 'assistant' ? 'AI' : msg.role === 'user' ? '用户' : '工具';
|
|
195
|
+
|
|
196
|
+
if (msg.role === 'assistant' && msg.toolCalls && msg.toolCalls.length > 0) {
|
|
197
|
+
const calls = msg.toolCalls.map(tc => {
|
|
198
|
+
const argsPreview = tc.arguments.length > 200
|
|
199
|
+
? tc.arguments.slice(0, 200) + '...'
|
|
200
|
+
: tc.arguments;
|
|
201
|
+
return ` 调用 ${tc.name}(${argsPreview})`;
|
|
202
|
+
}).join('\n');
|
|
203
|
+
const text = msg.content ? `${msg.content}\n${calls}` : calls;
|
|
204
|
+
parts.push(`[${role}]\n${text}`);
|
|
205
|
+
} else if (msg.role === 'tool') {
|
|
206
|
+
// tool result 保留前 1000 字符(总结模型能看到足够信息)
|
|
207
|
+
const content = msg.content.length > 1000
|
|
208
|
+
? msg.content.slice(0, 1000) + `... (共 ${msg.content.length} 字符)`
|
|
209
|
+
: msg.content;
|
|
210
|
+
parts.push(`[${role}: ${msg.toolName ?? 'unknown'}]\n${content}`);
|
|
211
|
+
} else if (msg.content) {
|
|
212
|
+
parts.push(`[${role}]\n${msg.content}`);
|
|
137
213
|
}
|
|
138
214
|
}
|
|
139
|
-
|
|
140
|
-
return
|
|
141
|
-
`[上下文压缩] 以下是之前 ${middleMessages.length} 条消息的摘要:`,
|
|
142
|
-
toolCallNames.length > 0
|
|
143
|
-
? `- 执行了 ${toolCallNames.length} 次工具调用: ${[...new Set(toolCallNames)].join(', ')}`
|
|
144
|
-
: '',
|
|
145
|
-
textPreview
|
|
146
|
-
? `- AI 回复摘要: ${textPreview.slice(0, 500)}`
|
|
147
|
-
: '',
|
|
148
|
-
].filter(Boolean).join('\n');
|
|
215
|
+
|
|
216
|
+
return parts.join('\n\n---\n\n');
|
|
149
217
|
}
|
|
@@ -102,7 +102,8 @@ export const DOUBAO_FAMILY: ModelFamilyConfig = {
|
|
|
102
102
|
supportsNativeSearch: false,
|
|
103
103
|
searchStrategy: 'tavily',
|
|
104
104
|
toolCallFormat: 'responses',
|
|
105
|
-
|
|
105
|
+
// Seed 1.6 最大输出 16K(默认 4K,需手动设置)
|
|
106
|
+
defaultMaxTokens: 16384,
|
|
106
107
|
};
|
|
107
108
|
|
|
108
109
|
/** DeepSeek 家族(联网搜索统一走 web_search_ai/Tavily,与其它模型事件与数据格式一致) */
|
|
@@ -164,12 +165,13 @@ export const CLAUDE_FAMILY: ModelFamilyConfig = {
|
|
|
164
165
|
id: 'claude',
|
|
165
166
|
displayName: 'Claude',
|
|
166
167
|
supportsVision: true,
|
|
167
|
-
supportsThinking: true,
|
|
168
|
+
supportsThinking: true,
|
|
168
169
|
thinkingFormat: 'reasoning',
|
|
169
170
|
supportsNativeSearch: false,
|
|
170
171
|
searchStrategy: 'tavily',
|
|
171
172
|
toolCallFormat: 'openai',
|
|
172
|
-
|
|
173
|
+
// Bedrock fallback 限制 64K output,取 64000 保证各 provider 兼容
|
|
174
|
+
defaultMaxTokens: 64000,
|
|
173
175
|
};
|
|
174
176
|
|
|
175
177
|
/** 家族配置映射 */
|
|
@@ -200,8 +202,10 @@ export interface ModelRegistryEntry {
|
|
|
200
202
|
visible?: boolean;
|
|
201
203
|
/** 是否支持图片理解(优先级高于 family.supportsVision) */
|
|
202
204
|
supportsVision?: boolean;
|
|
203
|
-
/** 上下文窗口大小(如 "256K"
|
|
205
|
+
/** 上下文窗口大小(如 "256K"),用于展示 */
|
|
204
206
|
contextWindow?: string;
|
|
207
|
+
/** 上下文窗口精确 token 数(用于压缩器计算) */
|
|
208
|
+
contextWindowTokens?: number;
|
|
205
209
|
/** 价格信息(数组,分行显示) */
|
|
206
210
|
pricing?: string[];
|
|
207
211
|
}
|
|
@@ -215,25 +219,25 @@ export interface ModelRegistryEntry {
|
|
|
215
219
|
*/
|
|
216
220
|
export const MODEL_REGISTRY: ModelRegistryEntry[] = [
|
|
217
221
|
// 豆包(价格为输入<=32k档,输出价格取决于输出长度)
|
|
218
|
-
{ id: 'doubao-seed-1-6-250615', displayName: '豆包 Seed 1.6', family: 'doubao', protocol: 'ark', visible: true, supportsVision: true, contextWindow: '256K', pricing: ['输入 0.8 元/百万tokens', '输出 2-8 元/百万tokens'] },
|
|
219
|
-
{ id: 'doubao-seed-1-8-251215', displayName: '豆包 Seed 1.8', family: 'doubao', protocol: 'ark', contextWindow: '256K', pricing: ['输入 0.8 元/百万tokens', '输出 2-8 元/百万tokens'] },
|
|
222
|
+
{ id: 'doubao-seed-1-6-250615', displayName: '豆包 Seed 1.6', family: 'doubao', protocol: 'ark', visible: true, supportsVision: true, contextWindow: '256K', contextWindowTokens: 256_000, pricing: ['输入 0.8 元/百万tokens', '输出 2-8 元/百万tokens'] },
|
|
223
|
+
{ id: 'doubao-seed-1-8-251215', displayName: '豆包 Seed 1.8', family: 'doubao', protocol: 'ark', contextWindow: '256K', contextWindowTokens: 256_000, pricing: ['输入 0.8 元/百万tokens', '输出 2-8 元/百万tokens'] },
|
|
220
224
|
|
|
221
225
|
// DeepSeek(价格为输入<=32k档)
|
|
222
|
-
{ id: 'deepseek-v3-2-251201', displayName: 'DeepSeek V3.2', family: 'deepseek', protocol: 'deepseek', visible: true, supportsVision: false, contextWindow: '128K', pricing: ['输入 2 元/百万tokens', '输出 3 元/百万tokens'] },
|
|
226
|
+
{ id: 'deepseek-v3-2-251201', displayName: 'DeepSeek V3.2', family: 'deepseek', protocol: 'deepseek', visible: true, supportsVision: false, contextWindow: '128K', contextWindowTokens: 128_000, pricing: ['输入 2 元/百万tokens', '输出 3 元/百万tokens'] },
|
|
223
227
|
|
|
224
|
-
//
|
|
225
|
-
{ id: 'qwen3-vl-plus', displayName: '通义千问 3 VL', family: 'qwen', protocol: 'qwen', visible: true, supportsVision: true, contextWindow: '
|
|
228
|
+
// 通义千问(官方 2025.09 确认 262K context / 32K output)
|
|
229
|
+
{ id: 'qwen3-vl-plus', displayName: '通义千问 3 VL', family: 'qwen', protocol: 'qwen', visible: true, supportsVision: true, contextWindow: '262K', contextWindowTokens: 262_144, pricing: ['输入 1 元/百万tokens', '输出 10 元/百万tokens'] },
|
|
226
230
|
|
|
227
231
|
// Gemini
|
|
228
|
-
{ id: 'gemini-3-pro-preview', displayName: 'Gemini 3 Pro', family: 'gemini', protocol: 'gemini', visible: true, supportsVision: true, contextWindow: '1M', pricing: ['输入 1.25 元/百万tokens', '输出 10 元/百万tokens'] },
|
|
229
|
-
{ id: 'gemini-2.5-flash-preview-05-20', displayName: 'Gemini 2.5 Flash', family: 'gemini', protocol: 'gemini', contextWindow: '1M', pricing: ['输入 0.15 元/百万tokens', '输出 0.6 元/百万tokens'] },
|
|
230
|
-
{ id: 'gemini-2.5-pro-preview-05-06', displayName: 'Gemini 2.5 Pro', family: 'gemini', protocol: 'gemini', contextWindow: '1M', pricing: ['输入 1.25 元/百万tokens', '输出 10 元/百万tokens'] },
|
|
232
|
+
{ id: 'gemini-3-pro-preview', displayName: 'Gemini 3 Pro', family: 'gemini', protocol: 'gemini', visible: true, supportsVision: true, contextWindow: '1M', contextWindowTokens: 1_000_000, pricing: ['输入 1.25 元/百万tokens', '输出 10 元/百万tokens'] },
|
|
233
|
+
{ id: 'gemini-2.5-flash-preview-05-20', displayName: 'Gemini 2.5 Flash', family: 'gemini', protocol: 'gemini', contextWindow: '1M', contextWindowTokens: 1_000_000, pricing: ['输入 0.15 元/百万tokens', '输出 0.6 元/百万tokens'] },
|
|
234
|
+
{ id: 'gemini-2.5-pro-preview-05-06', displayName: 'Gemini 2.5 Pro', family: 'gemini', protocol: 'gemini', contextWindow: '1M', contextWindowTokens: 1_000_000, pricing: ['输入 1.25 元/百万tokens', '输出 10 元/百万tokens'] },
|
|
231
235
|
|
|
232
236
|
// GPT(OpenRouter,美元价格按约7.2汇率换算)
|
|
233
|
-
{ id: 'openai/gpt-5.2', displayName: 'GPT-5.2', family: 'gpt', protocol: 'openai', visible: true, supportsVision: true, contextWindow: '400K', pricing: ['输入 12.6 元/百万tokens', '输出 100.8 元/百万tokens'] },
|
|
237
|
+
{ id: 'openai/gpt-5.2', displayName: 'GPT-5.2', family: 'gpt', protocol: 'openai', visible: true, supportsVision: true, contextWindow: '400K', contextWindowTokens: 400_000, pricing: ['输入 12.6 元/百万tokens', '输出 100.8 元/百万tokens'] },
|
|
234
238
|
|
|
235
239
|
// Claude(Vercel AI SDK,美元价格按约7.2汇率换算)
|
|
236
|
-
{ id: 'anthropic/claude-opus-4.5', displayName: 'Claude Opus 4.5', family: 'claude', protocol: 'anthropic', visible: true, supportsVision: true, contextWindow: '200K', pricing: ['输入 36 元/百万tokens', '输出 180 元/百万tokens'] },
|
|
240
|
+
{ id: 'anthropic/claude-opus-4.5', displayName: 'Claude Opus 4.5', family: 'claude', protocol: 'anthropic', visible: true, supportsVision: true, contextWindow: '200K', contextWindowTokens: 200_000, pricing: ['输入 36 元/百万tokens', '输出 180 元/百万tokens'] },
|
|
237
241
|
];
|
|
238
242
|
|
|
239
243
|
// ==================== 查询辅助函数 ====================
|
|
@@ -318,3 +322,143 @@ export function getModelSearchStrategy(modelId: string): SearchStrategy {
|
|
|
318
322
|
return family?.searchStrategy ?? 'tavily';
|
|
319
323
|
}
|
|
320
324
|
|
|
325
|
+
// ==================== Context 配置 ====================
|
|
326
|
+
|
|
327
|
+
export interface ModelContextConfig {
|
|
328
|
+
contextWindowTokens: number;
|
|
329
|
+
maxOutputTokens: number;
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
/**
|
|
333
|
+
* 获取模型的 context 配置(供压缩器使用)
|
|
334
|
+
*
|
|
335
|
+
* 所有模型必须注册且声明 contextWindowTokens,否则抛错。
|
|
336
|
+
*/
|
|
337
|
+
export function getModelContextConfig(modelId: string): ModelContextConfig {
|
|
338
|
+
const entry = getModelEntry(modelId);
|
|
339
|
+
if (!entry) throw new Error(`模型 ${modelId} 未在 MODEL_REGISTRY 中注册`);
|
|
340
|
+
|
|
341
|
+
const family = MODEL_FAMILIES[entry.family];
|
|
342
|
+
if (!family) throw new Error(`模型 ${modelId} 的家族 ${entry.family} 未定义`);
|
|
343
|
+
|
|
344
|
+
if (!entry.contextWindowTokens) {
|
|
345
|
+
throw new Error(`模型 ${modelId} 缺少 contextWindowTokens 配置`);
|
|
346
|
+
}
|
|
347
|
+
if (!family.defaultMaxTokens) {
|
|
348
|
+
throw new Error(`模型家族 ${entry.family} 缺少 defaultMaxTokens 配置`);
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
return {
|
|
352
|
+
contextWindowTokens: entry.contextWindowTokens,
|
|
353
|
+
maxOutputTokens: family.defaultMaxTokens,
|
|
354
|
+
};
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
/** 解析 "256K" / "1M" 格式为 token 数 */
|
|
358
|
+
function parseContextWindowString(s: string): number | undefined {
|
|
359
|
+
const match = s.match(/^([\d.]+)\s*(K|M)$/i);
|
|
360
|
+
if (!match) return undefined;
|
|
361
|
+
const num = parseFloat(match[1]);
|
|
362
|
+
const unit = match[2].toUpperCase();
|
|
363
|
+
return unit === 'M' ? num * 1_000_000 : num * 1_000;
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
// ==================== 配置校验 ====================
|
|
367
|
+
|
|
368
|
+
export interface ConfigValidationError {
|
|
369
|
+
modelId: string;
|
|
370
|
+
field: string;
|
|
371
|
+
message: string;
|
|
372
|
+
severity: 'error' | 'warning';
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
/**
|
|
376
|
+
* 校验所有模型配置的合法性
|
|
377
|
+
*
|
|
378
|
+
* 检查项:
|
|
379
|
+
* 1. defaultMaxTokens 不能超过 contextWindow
|
|
380
|
+
* 2. contextWindowTokens 与 contextWindow 字符串必须一致
|
|
381
|
+
* 3. 必须声明 contextWindowTokens(否则压缩器无法工作)
|
|
382
|
+
* 4. defaultMaxTokens 必须大于 0
|
|
383
|
+
* 5. 各 provider 的已知硬限制(如 Bedrock 64K output)
|
|
384
|
+
*/
|
|
385
|
+
export function validateModelConfigs(): ConfigValidationError[] {
|
|
386
|
+
const errors: ConfigValidationError[] = [];
|
|
387
|
+
|
|
388
|
+
// 各 provider 已知 maxOutputTokens 硬限制
|
|
389
|
+
const providerOutputLimits: Record<string, number> = {
|
|
390
|
+
anthropic: 64_000, // Bedrock fallback 限制
|
|
391
|
+
gemini: 65_536,
|
|
392
|
+
ark: 16_384, // 豆包 Seed 系列最大输出 16K
|
|
393
|
+
deepseek: 64_000, // V3.2 reasoner 最大 64K
|
|
394
|
+
qwen: 32_768,
|
|
395
|
+
};
|
|
396
|
+
|
|
397
|
+
for (const entry of MODEL_REGISTRY) {
|
|
398
|
+
const family = MODEL_FAMILIES[entry.family];
|
|
399
|
+
if (!family) {
|
|
400
|
+
errors.push({ modelId: entry.id, field: 'family', message: `未找到家族配置: ${entry.family}`, severity: 'error' });
|
|
401
|
+
continue;
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
// 检查 contextWindowTokens 声明
|
|
405
|
+
if (!entry.contextWindowTokens) {
|
|
406
|
+
errors.push({
|
|
407
|
+
modelId: entry.id,
|
|
408
|
+
field: 'contextWindowTokens',
|
|
409
|
+
message: '缺少 contextWindowTokens,压缩器将回退到字符数阈值',
|
|
410
|
+
severity: 'warning',
|
|
411
|
+
});
|
|
412
|
+
}
|
|
413
|
+
|
|
414
|
+
// 检查 contextWindow 字符串与数值一致性
|
|
415
|
+
if (entry.contextWindow && entry.contextWindowTokens) {
|
|
416
|
+
const parsed = parseContextWindowString(entry.contextWindow);
|
|
417
|
+
if (parsed && parsed !== entry.contextWindowTokens) {
|
|
418
|
+
errors.push({
|
|
419
|
+
modelId: entry.id,
|
|
420
|
+
field: 'contextWindow',
|
|
421
|
+
message: `contextWindow "${entry.contextWindow}" (${parsed}) 与 contextWindowTokens (${entry.contextWindowTokens}) 不一致`,
|
|
422
|
+
severity: 'error',
|
|
423
|
+
});
|
|
424
|
+
}
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
// 检查 defaultMaxTokens
|
|
428
|
+
const maxTokens = family.defaultMaxTokens;
|
|
429
|
+
if (maxTokens !== undefined) {
|
|
430
|
+
if (maxTokens <= 0) {
|
|
431
|
+
errors.push({
|
|
432
|
+
modelId: entry.id,
|
|
433
|
+
field: 'defaultMaxTokens',
|
|
434
|
+
message: `defaultMaxTokens (${maxTokens}) 必须大于 0`,
|
|
435
|
+
severity: 'error',
|
|
436
|
+
});
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
// 不能超过 contextWindow
|
|
440
|
+
if (entry.contextWindowTokens && maxTokens > entry.contextWindowTokens) {
|
|
441
|
+
errors.push({
|
|
442
|
+
modelId: entry.id,
|
|
443
|
+
field: 'defaultMaxTokens',
|
|
444
|
+
message: `defaultMaxTokens (${maxTokens}) 超过 contextWindow (${entry.contextWindowTokens})`,
|
|
445
|
+
severity: 'error',
|
|
446
|
+
});
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
// 检查 provider 硬限制
|
|
450
|
+
const providerLimit = providerOutputLimits[entry.protocol];
|
|
451
|
+
if (providerLimit && maxTokens > providerLimit) {
|
|
452
|
+
errors.push({
|
|
453
|
+
modelId: entry.id,
|
|
454
|
+
field: 'defaultMaxTokens',
|
|
455
|
+
message: `defaultMaxTokens (${maxTokens}) 超过 ${entry.protocol} 的硬限制 (${providerLimit})`,
|
|
456
|
+
severity: 'error',
|
|
457
|
+
});
|
|
458
|
+
}
|
|
459
|
+
}
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
return errors;
|
|
463
|
+
}
|
|
464
|
+
|