npm - @huyooo/ai-chat-core - Versions diffs - 0.2.41 → 0.2.42 - Mend

@huyooo/ai-chat-core 0.2.41 → 0.2.42

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/dist/{events-GWl1-vbT.d.ts → events-7V2drqe4.d.ts} +47 -3
package/dist/events.d.ts +1 -1
package/dist/events.js +1 -1
package/dist/index.d.ts +15 -2
package/dist/index.js +1 -1
package/package.json +1 -1
package/src/events.ts +78 -0
package/src/providers/context-compressor.ts +178 -110
package/src/providers/model-registry.ts +158 -14
package/src/providers/orchestrator.ts +126 -7
package/src/providers/protocols/ark.ts +1 -1

package/src/events.ts CHANGED Viewed

@@ -369,6 +369,43 @@ export interface StepEndEvent {
 /** 步骤相关事件联合类型 */
 export type StepEvent = StepStartEvent | StepEndEvent;
+// ==================== 上下文压缩事件 ====================
+/** 上下文压缩开始事件 */
+export interface CompactStartEvent {
+  type: 'compact_start';
+  data: {
+    /** 压缩前估算 token 数 */
+    estimatedTokens: number;
+    /** 可用 prompt token 预算 */
+    budget: number;
+    /** 开始时间戳 */
+    startedAt: number;
+  };
+}
+/** 上下文压缩完成事件 */
+export interface CompactEndEvent {
+  type: 'compact_end';
+  data: {
+    /** 是否成功 */
+    success: boolean;
+    /** 压缩后估算 token 数 */
+    compressedTokens: number;
+    /** 压缩前消息数 */
+    originalMessageCount: number;
+    /** 压缩后消息数 */
+    compressedMessageCount: number;
+    /** 结束时间戳 */
+    endedAt: number;
+    /** 耗时（毫秒） */
+    duration: number;
+  };
+}
+/** 上下文压缩相关事件联合类型 */
+export type CompactEvent = CompactStartEvent | CompactEndEvent;
 // ==================== Agent 状态事件 ====================
 /**
@@ -403,6 +440,7 @@ export type ChatEvent =
   | TextEvent
   | StatusEvent
   | StepEvent
+  | CompactEvent
   | AgentStatusEvent;
 /** 事件类型字符串 */
@@ -416,6 +454,7 @@ export const CHAT_EVENT_TYPES: readonly ChatEventType[] = [
   'text_delta',
   'done', 'error', 'abort',
   'step_start', 'step_end',
+  'compact_start', 'compact_end',
   'agent_status',
 ] as const;
@@ -743,6 +782,40 @@ export function createStepEnd(stepNumber: number, startedAt: number): StepEndEve
   };
 }
+/**
+ * 创建上下文压缩开始事件
+ */
+export function createCompactStart(estimatedTokens: number, budget: number): CompactStartEvent {
+  return {
+    type: 'compact_start',
+    data: { estimatedTokens, budget, startedAt: Date.now() },
+  };
+}
+/**
+ * 创建上下文压缩完成事件
+ */
+export function createCompactEnd(
+  success: boolean,
+  compressedTokens: number,
+  originalMessageCount: number,
+  compressedMessageCount: number,
+  startedAt: number,
+): CompactEndEvent {
+  const endedAt = Date.now();
+  return {
+    type: 'compact_end',
+    data: {
+      success,
+      compressedTokens,
+      originalMessageCount,
+      compressedMessageCount,
+      endedAt,
+      duration: endedAt - startedAt,
+    },
+  };
+}
 // ==================== 类型守卫 ====================
 /** 检查是否为思考事件 */
@@ -785,6 +858,11 @@ export function isStepEvent(event: ChatEvent): event is StepEvent {
   return event.type.startsWith('step_');
 }
+/** 检查是否为上下文压缩事件 */
+export function isCompactEvent(event: ChatEvent): event is CompactEvent {
+  return event.type.startsWith('compact_');
+}
 /** 检查错误是否可重试 */
 export function isRetryableError(event: ChatEvent): boolean {
   if (event.type !== 'error') return false;

package/src/providers/context-compressor.ts CHANGED Viewed

@@ -1,16 +1,11 @@
 /**
  * Context 压缩模块
- *
- * 当消息历史过长时自动压缩，避免超出模型 context window。
- *
- * 策略参考 Claude Code（92% context window 触发摘要）和
- * OpenAI Codex（auto_compact_limit 触发压缩）。
- *
- * 压缩算法：
- * 1. 保留 system prompt
- * 2. 保留第一条 user 消息（任务描述）
- * 3. 将中间的 assistant/tool 交互压缩为一条摘要
- * 4. 保留最近 N 条消息（工作上下文）
+ *
+ * 当 prompt token 估算接近模型 context window 时，让当前模型自己总结对话历史，
+ * 然后用 summary + 最近几条消息继续对话。
+ *
+ * 参考 Claude Code / Cursor 的做法：
+ * 不机械截断，而是让 AI 生成高质量摘要，保留关键决策和上下文。
  */
 import type { StandardMessage } from './types';
@@ -18,69 +13,86 @@ import { DebugLogger } from '../utils';
 const logger = DebugLogger.module('ContextCompressor');
+// ==================== Token 估算 ====================
+const CHARS_PER_TOKEN = 3.2;
+const MESSAGE_OVERHEAD_TOKENS = 4;
+function estimateStringTokens(s: string): number {
+  if (!s) return 0;
+  return Math.ceil(s.length / CHARS_PER_TOKEN);
+}
+export function estimateMessageTokens(msg: StandardMessage): number {
+  let tokens = MESSAGE_OVERHEAD_TOKENS;
+  tokens += estimateStringTokens(msg.content);
+  if (msg.toolCalls) {
+    for (const tc of msg.toolCalls) {
+      tokens += estimateStringTokens(tc.name);
+      tokens += estimateStringTokens(tc.arguments);
+      tokens += 10;
+    }
+  }
+  if (msg.images) {
+    tokens += msg.images.length * 85;
+  }
+  return tokens;
+}
+export function estimateTotalTokens(messages: StandardMessage[]): number {
+  let total = 3;
+  for (const msg of messages) {
+    total += estimateMessageTokens(msg);
+  }
+  return total;
+}
 // ==================== 配置 ====================
-/** 压缩配置 */
 export interface CompactConfig {
-  /**
-   * 字符数阈值，超过此值触发压缩
-   *
-   * 使用字符数而非 token 数（粗略估计：1 token ≈ 3-4 中文字符 / 4 英文字符）。
-   * 80K 字符 ≈ 20K-27K tokens，约为最小 context window (128K tokens) 的 15-20%。
-   *
-   * @default 80_000
-   */
-  charThreshold?: number;
-  /**
-   * 压缩后保留的最近消息数
-   *
-   * 保留最近的消息对（assistant + tool），确保模型有足够上下文继续工作。
-   *
-   * @default 10
-   */
+  contextWindowTokens: number;
+  maxOutputTokens: number;
+  /** 触发压缩的使用率，默认 0.80 */
+  compactThresholdRatio?: number;
+  /** 压缩后保留的最近消息数，默认 6 */
   keepRecentMessages?: number;
 }
-const DEFAULT_CHAR_THRESHOLD = 80_000;
-const DEFAULT_KEEP_RECENT = 10;
+const DEFAULT_THRESHOLD_RATIO = 0.80;
+const DEFAULT_KEEP_RECENT = 6;
 // ==================== 核心函数 ====================
+/** 计算可用 prompt token 预算 */
+export function getPromptBudget(config: CompactConfig): number {
+  const ratio = config.compactThresholdRatio ?? DEFAULT_THRESHOLD_RATIO;
+  return Math.floor(config.contextWindowTokens * ratio) - config.maxOutputTokens;
+}
+/** 检测是否需要压缩 */
+export function needsCompaction(messages: StandardMessage[], config: CompactConfig): boolean {
+  return estimateTotalTokens(messages) > getPromptBudget(config);
+}
 /**
- * 压缩消息历史
- *
- * 当消息总字符数超过阈值时，保留首尾、压缩中间部分。
- *
- * @param messages - 消息数组（会被原地修改）
- * @param config - 压缩配置（可选）
- * @returns 是否执行了压缩
+ * 构建发给 AI 的总结请求
+ *
+ * 返回一组消息，发给当前模型让它总结对话历史。
+ * 总结完成后调用 applySummary 组装新的消息列表。
  */
-export function compactMessages(messages: StandardMessage[], config?: CompactConfig): boolean {
-  const charThreshold = config?.charThreshold ?? DEFAULT_CHAR_THRESHOLD;
-  const keepRecent = config?.keepRecentMessages ?? DEFAULT_KEEP_RECENT;
-  // 计算总字符数
-  const totalChars = messages.reduce((sum, m) => sum + (m.content?.length ?? 0), 0);
-  if (totalChars < charThreshold) {
-    return false;
-  }
-  logger.info(`Context 压缩触发: ${totalChars} 字符, ${messages.length} 条消息`);
-  // ---- 找到各段边界 ----
-  // [0..systemEnd):       system prompt（可能没有）
-  // [systemEnd..firstUserEnd): 第一条 user 消息
-  // [firstUserEnd..recentStart): 中间的 assistant/tool 交互（压缩目标）
-  // [recentStart..end):   最近 N 条消息（保留）
+export function buildSummarizeRequest(
+  messages: StandardMessage[],
+  config: CompactConfig,
+): { summarizeMessages: StandardMessage[]; keepMessages: StandardMessage[] } {
+  const keepRecent = config.keepRecentMessages ?? DEFAULT_KEEP_RECENT;
+  // 找边界
   let systemEnd = 0;
-  if (messages[0]?.role === 'system') {
-    systemEnd = 1;
-  }
-  // 找到第一条 user 消息
+  if (messages[0]?.role === 'system') systemEnd = 1;
   let firstUserEnd = systemEnd;
   for (let i = systemEnd; i < messages.length; i++) {
     if (messages[i].role === 'user') {
@@ -88,62 +100,118 @@ export function compactMessages(messages: StandardMessage[], config?: CompactCon
       break;
     }
   }
+  // 要保留的最近消息
   const recentStart = Math.max(firstUserEnd, messages.length - keepRecent);
-  // 中间部分太短，不值得压缩
-  if (recentStart - firstUserEnd < 4) {
-    return false;
-  }
-  // ---- 生成摘要 ----
+  const keepMessages = messages.slice(recentStart);
+  // 要被总结的中间历史
   const middleMessages = messages.slice(firstUserEnd, recentStart);
-  const summary = buildSummary(middleMessages);
-  // ---- 组装压缩后的消息 ----
-  const compressed: StandardMessage[] = [
-    ...messages.slice(0, firstUserEnd),   // system + first user
-    { role: 'system', content: summary }, // 压缩摘要
-    ...messages.slice(recentStart),       // 最近 N 条
+  if (middleMessages.length < 2) {
+    // 中间太短，没什么可总结的
+    return { summarizeMessages: [], keepMessages: messages.slice(systemEnd) };
+  }
+  const estimatedTokens = estimateTotalTokens(messages);
+  const budget = getPromptBudget(config);
+  logger.info(`准备 AI 总结: ~${estimatedTokens} tokens > budget ${budget}, 总结 ${middleMessages.length} 条中间消息, 保留最近 ${keepMessages.length} 条`);
+  // 构建总结请求：把中间历史交给模型
+  const summarizeMessages: StandardMessage[] = [
+    {
+      role: 'system',
+      content: SUMMARIZE_SYSTEM_PROMPT,
+    },
+    {
+      role: 'user',
+      content: formatMessagesForSummary(middleMessages),
+    },
   ];
-  const compressedChars = compressed.reduce((s, m) => s + (m.content?.length ?? 0), 0);
-  logger.info(`Context 压缩完成: ${messages.length} → ${compressed.length} 条消息, ${totalChars} → ${compressedChars} 字符`);
-  // 原地替换
-  messages.length = 0;
-  messages.push(...compressed);
-  return true;
-}
-// ==================== 内部函数 ====================
+  return { summarizeMessages, keepMessages };
+}
 /**
- * 从中间消息中构建压缩摘要
+ * 用 AI 返回的摘要组装新的消息列表
  */
-function buildSummary(middleMessages: StandardMessage[]): string {
-  const toolCallNames: string[] = [];
-  let textPreview = '';
-  for (const msg of middleMessages) {
-    if (msg.role === 'assistant' && msg.toolCalls) {
-      for (const tc of msg.toolCalls) {
-        toolCallNames.push(tc.name);
-      }
+export function applySummary(
+  originalMessages: StandardMessage[],
+  summary: string,
+  keepMessages: StandardMessage[],
+): StandardMessage[] {
+  // 取原始的 system prompt
+  const systemPrompt = originalMessages[0]?.role === 'system' ? originalMessages[0] : null;
+  // 取第一条 user 消息
+  const startIdx = systemPrompt ? 1 : 0;
+  let firstUser: StandardMessage | null = null;
+  for (let i = startIdx; i < originalMessages.length; i++) {
+    if (originalMessages[i].role === 'user') {
+      firstUser = originalMessages[i];
+      break;
     }
-    if (msg.role === 'assistant' && msg.content) {
-      textPreview += msg.content.slice(0, 200) + '\n';
+  }
+  const result: StandardMessage[] = [];
+  if (systemPrompt) result.push(systemPrompt);
+  if (firstUser) result.push(firstUser);
+  // 插入 AI 生成的摘要
+  result.push({
+    role: 'system',
+    content: `[对话历史摘要]\n${summary}`,
+  });
+  // 拼上最近保留的消息
+  result.push(...keepMessages);
+  const tokens = estimateTotalTokens(result);
+  logger.info(`AI 总结应用完成: ${originalMessages.length} → ${result.length} 条消息, ~${tokens} tokens`);
+  return result;
+}
+// ==================== 内部 ====================
+const SUMMARIZE_SYSTEM_PROMPT = `你是一个对话历史压缩助手。请总结以下对话历史，保留所有关键信息：
+要求：
+1. 保留所有文件修改记录（哪些文件被创建/修改/删除了，具体改了什么）
+2. 保留所有关键决策和结论
+3. 保留错误信息和解决方案
+4. 保留用户的明确要求和偏好
+5. 用简洁的条目列表格式输出
+6. 不要遗漏任何可能影响后续工作的信息
+直接输出摘要，不要开头说"以下是摘要"之类的话。`;
+/** 把消息列表格式化为可读文本，供总结用 */
+function formatMessagesForSummary(messages: StandardMessage[]): string {
+  const parts: string[] = [];
+  for (const msg of messages) {
+    const role = msg.role === 'assistant' ? 'AI' : msg.role === 'user' ? '用户' : '工具';
+    if (msg.role === 'assistant' && msg.toolCalls && msg.toolCalls.length > 0) {
+      const calls = msg.toolCalls.map(tc => {
+        const argsPreview = tc.arguments.length > 200
+          ? tc.arguments.slice(0, 200) + '...'
+          : tc.arguments;
+        return `  调用 ${tc.name}(${argsPreview})`;
+      }).join('\n');
+      const text = msg.content ? `${msg.content}\n${calls}` : calls;
+      parts.push(`[${role}]\n${text}`);
+    } else if (msg.role === 'tool') {
+      // tool result 保留前 1000 字符（总结模型能看到足够信息）
+      const content = msg.content.length > 1000
+        ? msg.content.slice(0, 1000) + `... (共 ${msg.content.length} 字符)`
+        : msg.content;
+      parts.push(`[${role}: ${msg.toolName ?? 'unknown'}]\n${content}`);
+    } else if (msg.content) {
+      parts.push(`[${role}]\n${msg.content}`);
     }
   }
-  return [
-    `[上下文压缩] 以下是之前 ${middleMessages.length} 条消息的摘要：`,
-    toolCallNames.length > 0
-      ? `- 执行了 ${toolCallNames.length} 次工具调用: ${[...new Set(toolCallNames)].join(', ')}`
-      : '',
-    textPreview
-      ? `- AI 回复摘要: ${textPreview.slice(0, 500)}`
-      : '',
-  ].filter(Boolean).join('\n');
+  return parts.join('\n\n---\n\n');
 }

package/src/providers/model-registry.ts CHANGED Viewed

@@ -102,7 +102,8 @@ export const DOUBAO_FAMILY: ModelFamilyConfig = {
   supportsNativeSearch: false,
   searchStrategy: 'tavily',
   toolCallFormat: 'responses',
-  defaultMaxTokens: 32768,
+  // Seed 1.6 最大输出 16K（默认 4K，需手动设置）
+  defaultMaxTokens: 16384,
 };
 /** DeepSeek 家族（联网搜索统一走 web_search_ai/Tavily，与其它模型事件与数据格式一致） */
@@ -164,12 +165,13 @@ export const CLAUDE_FAMILY: ModelFamilyConfig = {
   id: 'claude',
   displayName: 'Claude',
   supportsVision: true,
-  supportsThinking: true, // 通过 Vercel AI SDK 支持 extended thinking
+  supportsThinking: true,
   thinkingFormat: 'reasoning',
   supportsNativeSearch: false,
   searchStrategy: 'tavily',
   toolCallFormat: 'openai',
-  defaultMaxTokens: 200000,
+  // Bedrock fallback 限制 64K output，取 64000 保证各 provider 兼容
+  defaultMaxTokens: 64000,
 };
 /** 家族配置映射 */
@@ -200,8 +202,10 @@ export interface ModelRegistryEntry {
   visible?: boolean;
   /** 是否支持图片理解（优先级高于 family.supportsVision） */
   supportsVision?: boolean;
-  /** 上下文窗口大小（如 "256K"） */
+  /** 上下文窗口大小（如 "256K"），用于展示 */
   contextWindow?: string;
+  /** 上下文窗口精确 token 数（用于压缩器计算） */
+  contextWindowTokens?: number;
   /** 价格信息（数组，分行显示） */
   pricing?: string[];
 }
@@ -215,25 +219,25 @@ export interface ModelRegistryEntry {
  */
 export const MODEL_REGISTRY: ModelRegistryEntry[] = [
   // 豆包（价格为输入<=32k档，输出价格取决于输出长度）
-  { id: 'doubao-seed-1-6-250615', displayName: '豆包 Seed 1.6', family: 'doubao', protocol: 'ark', visible: true, supportsVision: true, contextWindow: '256K', pricing: ['输入 0.8 元/百万tokens', '输出 2-8 元/百万tokens'] },
-  { id: 'doubao-seed-1-8-251215', displayName: '豆包 Seed 1.8', family: 'doubao', protocol: 'ark', contextWindow: '256K', pricing: ['输入 0.8 元/百万tokens', '输出 2-8 元/百万tokens'] },
+  { id: 'doubao-seed-1-6-250615', displayName: '豆包 Seed 1.6', family: 'doubao', protocol: 'ark', visible: true, supportsVision: true, contextWindow: '256K', contextWindowTokens: 256_000, pricing: ['输入 0.8 元/百万tokens', '输出 2-8 元/百万tokens'] },
+  { id: 'doubao-seed-1-8-251215', displayName: '豆包 Seed 1.8', family: 'doubao', protocol: 'ark', contextWindow: '256K', contextWindowTokens: 256_000, pricing: ['输入 0.8 元/百万tokens', '输出 2-8 元/百万tokens'] },
   // DeepSeek（价格为输入<=32k档）
-  { id: 'deepseek-v3-2-251201', displayName: 'DeepSeek V3.2', family: 'deepseek', protocol: 'deepseek', visible: true, supportsVision: false, contextWindow: '128K', pricing: ['输入 2 元/百万tokens', '输出 3 元/百万tokens'] },
+  { id: 'deepseek-v3-2-251201', displayName: 'DeepSeek V3.2', family: 'deepseek', protocol: 'deepseek', visible: true, supportsVision: false, contextWindow: '128K', contextWindowTokens: 128_000, pricing: ['输入 2 元/百万tokens', '输出 3 元/百万tokens'] },
-  // 通义千问
-  { id: 'qwen3-vl-plus', displayName: '通义千问 3 VL', family: 'qwen', protocol: 'qwen', visible: true, supportsVision: true, contextWindow: '128K', pricing: ['输入 1 元/百万tokens', '输出 10 元/百万tokens'] },
+  // 通义千问（官方 2025.09 确认 262K context / 32K output）
+  { id: 'qwen3-vl-plus', displayName: '通义千问 3 VL', family: 'qwen', protocol: 'qwen', visible: true, supportsVision: true, contextWindow: '262K', contextWindowTokens: 262_144, pricing: ['输入 1 元/百万tokens', '输出 10 元/百万tokens'] },
   // Gemini
-  { id: 'gemini-3-pro-preview', displayName: 'Gemini 3 Pro', family: 'gemini', protocol: 'gemini', visible: true, supportsVision: true, contextWindow: '1M', pricing: ['输入 1.25 元/百万tokens', '输出 10 元/百万tokens'] },
-  { id: 'gemini-2.5-flash-preview-05-20', displayName: 'Gemini 2.5 Flash', family: 'gemini', protocol: 'gemini', contextWindow: '1M', pricing: ['输入 0.15 元/百万tokens', '输出 0.6 元/百万tokens'] },
-  { id: 'gemini-2.5-pro-preview-05-06', displayName: 'Gemini 2.5 Pro', family: 'gemini', protocol: 'gemini', contextWindow: '1M', pricing: ['输入 1.25 元/百万tokens', '输出 10 元/百万tokens'] },
+  { id: 'gemini-3-pro-preview', displayName: 'Gemini 3 Pro', family: 'gemini', protocol: 'gemini', visible: true, supportsVision: true, contextWindow: '1M', contextWindowTokens: 1_000_000, pricing: ['输入 1.25 元/百万tokens', '输出 10 元/百万tokens'] },
+  { id: 'gemini-2.5-flash-preview-05-20', displayName: 'Gemini 2.5 Flash', family: 'gemini', protocol: 'gemini', contextWindow: '1M', contextWindowTokens: 1_000_000, pricing: ['输入 0.15 元/百万tokens', '输出 0.6 元/百万tokens'] },
+  { id: 'gemini-2.5-pro-preview-05-06', displayName: 'Gemini 2.5 Pro', family: 'gemini', protocol: 'gemini', contextWindow: '1M', contextWindowTokens: 1_000_000, pricing: ['输入 1.25 元/百万tokens', '输出 10 元/百万tokens'] },
   // GPT（OpenRouter，美元价格按约7.2汇率换算）
-  { id: 'openai/gpt-5.2', displayName: 'GPT-5.2', family: 'gpt', protocol: 'openai', visible: true, supportsVision: true, contextWindow: '400K', pricing: ['输入 12.6 元/百万tokens', '输出 100.8 元/百万tokens'] },
+  { id: 'openai/gpt-5.2', displayName: 'GPT-5.2', family: 'gpt', protocol: 'openai', visible: true, supportsVision: true, contextWindow: '400K', contextWindowTokens: 400_000, pricing: ['输入 12.6 元/百万tokens', '输出 100.8 元/百万tokens'] },
   // Claude（Vercel AI SDK，美元价格按约7.2汇率换算）
-  { id: 'anthropic/claude-opus-4.5', displayName: 'Claude Opus 4.5', family: 'claude', protocol: 'anthropic', visible: true, supportsVision: true, contextWindow: '200K', pricing: ['输入 36 元/百万tokens', '输出 180 元/百万tokens'] },
+  { id: 'anthropic/claude-opus-4.5', displayName: 'Claude Opus 4.5', family: 'claude', protocol: 'anthropic', visible: true, supportsVision: true, contextWindow: '200K', contextWindowTokens: 200_000, pricing: ['输入 36 元/百万tokens', '输出 180 元/百万tokens'] },
 ];
 // ==================== 查询辅助函数 ====================
@@ -318,3 +322,143 @@ export function getModelSearchStrategy(modelId: string): SearchStrategy {
   return family?.searchStrategy ?? 'tavily';
 }
+// ==================== Context 配置 ====================
+export interface ModelContextConfig {
+  contextWindowTokens: number;
+  maxOutputTokens: number;
+}
+/**
+ * 获取模型的 context 配置（供压缩器使用）
+ *
+ * 所有模型必须注册且声明 contextWindowTokens，否则抛错。
+ */
+export function getModelContextConfig(modelId: string): ModelContextConfig {
+  const entry = getModelEntry(modelId);
+  if (!entry) throw new Error(`模型 ${modelId} 未在 MODEL_REGISTRY 中注册`);
+  const family = MODEL_FAMILIES[entry.family];
+  if (!family) throw new Error(`模型 ${modelId} 的家族 ${entry.family} 未定义`);
+  if (!entry.contextWindowTokens) {
+    throw new Error(`模型 ${modelId} 缺少 contextWindowTokens 配置`);
+  }
+  if (!family.defaultMaxTokens) {
+    throw new Error(`模型家族 ${entry.family} 缺少 defaultMaxTokens 配置`);
+  }
+  return {
+    contextWindowTokens: entry.contextWindowTokens,
+    maxOutputTokens: family.defaultMaxTokens,
+  };
+}
+/** 解析 "256K" / "1M" 格式为 token 数 */
+function parseContextWindowString(s: string): number | undefined {
+  const match = s.match(/^([\d.]+)\s*(K|M)$/i);
+  if (!match) return undefined;
+  const num = parseFloat(match[1]);
+  const unit = match[2].toUpperCase();
+  return unit === 'M' ? num * 1_000_000 : num * 1_000;
+}
+// ==================== 配置校验 ====================
+export interface ConfigValidationError {
+  modelId: string;
+  field: string;
+  message: string;
+  severity: 'error' | 'warning';
+}
+/**
+ * 校验所有模型配置的合法性
+ *
+ * 检查项：
+ * 1. defaultMaxTokens 不能超过 contextWindow
+ * 2. contextWindowTokens 与 contextWindow 字符串必须一致
+ * 3. 必须声明 contextWindowTokens（否则压缩器无法工作）
+ * 4. defaultMaxTokens 必须大于 0
+ * 5. 各 provider 的已知硬限制（如 Bedrock 64K output）
+ */
+export function validateModelConfigs(): ConfigValidationError[] {
+  const errors: ConfigValidationError[] = [];
+  // 各 provider 已知 maxOutputTokens 硬限制
+  const providerOutputLimits: Record<string, number> = {
+    anthropic: 64_000,  // Bedrock fallback 限制
+    gemini: 65_536,
+    ark: 16_384,        // 豆包 Seed 系列最大输出 16K
+    deepseek: 64_000,   // V3.2 reasoner 最大 64K
+    qwen: 32_768,
+  };
+  for (const entry of MODEL_REGISTRY) {
+    const family = MODEL_FAMILIES[entry.family];
+    if (!family) {
+      errors.push({ modelId: entry.id, field: 'family', message: `未找到家族配置: ${entry.family}`, severity: 'error' });
+      continue;
+    }
+    // 检查 contextWindowTokens 声明
+    if (!entry.contextWindowTokens) {
+      errors.push({
+        modelId: entry.id,
+        field: 'contextWindowTokens',
+        message: '缺少 contextWindowTokens，压缩器将回退到字符数阈值',
+        severity: 'warning',
+      });
+    }
+    // 检查 contextWindow 字符串与数值一致性
+    if (entry.contextWindow && entry.contextWindowTokens) {
+      const parsed = parseContextWindowString(entry.contextWindow);
+      if (parsed && parsed !== entry.contextWindowTokens) {
+        errors.push({
+          modelId: entry.id,
+          field: 'contextWindow',
+          message: `contextWindow "${entry.contextWindow}" (${parsed}) 与 contextWindowTokens (${entry.contextWindowTokens}) 不一致`,
+          severity: 'error',
+        });
+      }
+    }
+    // 检查 defaultMaxTokens
+    const maxTokens = family.defaultMaxTokens;
+    if (maxTokens !== undefined) {
+      if (maxTokens <= 0) {
+        errors.push({
+          modelId: entry.id,
+          field: 'defaultMaxTokens',
+          message: `defaultMaxTokens (${maxTokens}) 必须大于 0`,
+          severity: 'error',
+        });
+      }
+      // 不能超过 contextWindow
+      if (entry.contextWindowTokens && maxTokens > entry.contextWindowTokens) {
+        errors.push({
+          modelId: entry.id,
+          field: 'defaultMaxTokens',
+          message: `defaultMaxTokens (${maxTokens}) 超过 contextWindow (${entry.contextWindowTokens})`,
+          severity: 'error',
+        });
+      }
+      // 检查 provider 硬限制
+      const providerLimit = providerOutputLimits[entry.protocol];
+      if (providerLimit && maxTokens > providerLimit) {
+        errors.push({
+          modelId: entry.id,
+          field: 'defaultMaxTokens',
+          message: `defaultMaxTokens (${maxTokens}) 超过 ${entry.protocol} 的硬限制 (${providerLimit})`,
+          severity: 'error',
+        });
+      }
+    }
+  }
+  return errors;
+}