npm - @shareai-lab/kode - Versions diffs - 1.0.69 → 1.0.71 - Mend

@shareai-lab/kode 1.0.69 → 1.0.71

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (253) hide show

package/README.md +205 -72
package/README.zh-CN.md +246 -0
package/cli.js +62 -0
package/package.json +45 -25
package/scripts/postinstall.js +56 -0
package/src/ProjectOnboarding.tsx +180 -0
package/src/Tool.ts +53 -0
package/src/commands/approvedTools.ts +53 -0
package/src/commands/bug.tsx +20 -0
package/src/commands/clear.ts +43 -0
package/src/commands/compact.ts +120 -0
package/src/commands/config.tsx +19 -0
package/src/commands/cost.ts +18 -0
package/src/commands/ctx_viz.ts +209 -0
package/src/commands/doctor.ts +24 -0
package/src/commands/help.tsx +19 -0
package/src/commands/init.ts +37 -0
package/src/commands/listen.ts +42 -0
package/src/commands/login.tsx +51 -0
package/src/commands/logout.tsx +40 -0
package/src/commands/mcp.ts +41 -0
package/src/commands/model.tsx +40 -0
package/src/commands/modelstatus.tsx +20 -0
package/src/commands/onboarding.tsx +34 -0
package/src/commands/pr_comments.ts +59 -0
package/src/commands/refreshCommands.ts +54 -0
package/src/commands/release-notes.ts +34 -0
package/src/commands/resume.tsx +30 -0
package/src/commands/review.ts +49 -0
package/src/commands/terminalSetup.ts +221 -0
package/src/commands.ts +136 -0
package/src/components/ApproveApiKey.tsx +93 -0
package/src/components/AsciiLogo.tsx +13 -0
package/src/components/AutoUpdater.tsx +148 -0
package/src/components/Bug.tsx +367 -0
package/src/components/Config.tsx +289 -0
package/src/components/ConsoleOAuthFlow.tsx +326 -0
package/src/components/Cost.tsx +23 -0
package/src/components/CostThresholdDialog.tsx +46 -0
package/src/components/CustomSelect/option-map.ts +42 -0
package/src/components/CustomSelect/select-option.tsx +52 -0
package/src/components/CustomSelect/select.tsx +143 -0
package/src/components/CustomSelect/use-select-state.ts +414 -0
package/src/components/CustomSelect/use-select.ts +35 -0
package/src/components/FallbackToolUseRejectedMessage.tsx +15 -0
package/src/components/FileEditToolUpdatedMessage.tsx +66 -0
package/src/components/Help.tsx +215 -0
package/src/components/HighlightedCode.tsx +33 -0
package/src/components/InvalidConfigDialog.tsx +113 -0
package/src/components/Link.tsx +32 -0
package/src/components/LogSelector.tsx +86 -0
package/src/components/Logo.tsx +145 -0
package/src/components/MCPServerApprovalDialog.tsx +100 -0
package/src/components/MCPServerDialogCopy.tsx +25 -0
package/src/components/MCPServerMultiselectDialog.tsx +109 -0
package/src/components/Message.tsx +219 -0
package/src/components/MessageResponse.tsx +15 -0
package/src/components/MessageSelector.tsx +211 -0
package/src/components/ModeIndicator.tsx +88 -0
package/src/components/ModelConfig.tsx +301 -0
package/src/components/ModelListManager.tsx +223 -0
package/src/components/ModelSelector.tsx +3208 -0
package/src/components/ModelStatusDisplay.tsx +228 -0
package/src/components/Onboarding.tsx +274 -0
package/src/components/PressEnterToContinue.tsx +11 -0
package/src/components/PromptInput.tsx +710 -0
package/src/components/SentryErrorBoundary.ts +33 -0
package/src/components/Spinner.tsx +129 -0
package/src/components/StructuredDiff.tsx +184 -0
package/src/components/TextInput.tsx +246 -0
package/src/components/TokenWarning.tsx +31 -0
package/src/components/ToolUseLoader.tsx +40 -0
package/src/components/TrustDialog.tsx +106 -0
package/src/components/binary-feedback/BinaryFeedback.tsx +63 -0
package/src/components/binary-feedback/BinaryFeedbackOption.tsx +111 -0
package/src/components/binary-feedback/BinaryFeedbackView.tsx +172 -0
package/src/components/binary-feedback/utils.ts +220 -0
package/src/components/messages/AssistantBashOutputMessage.tsx +22 -0
package/src/components/messages/AssistantLocalCommandOutputMessage.tsx +45 -0
package/src/components/messages/AssistantRedactedThinkingMessage.tsx +19 -0
package/src/components/messages/AssistantTextMessage.tsx +144 -0
package/src/components/messages/AssistantThinkingMessage.tsx +40 -0
package/src/components/messages/AssistantToolUseMessage.tsx +123 -0
package/src/components/messages/UserBashInputMessage.tsx +28 -0
package/src/components/messages/UserCommandMessage.tsx +30 -0
package/src/components/messages/UserKodingInputMessage.tsx +28 -0
package/src/components/messages/UserPromptMessage.tsx +35 -0
package/src/components/messages/UserTextMessage.tsx +39 -0
package/src/components/messages/UserToolResultMessage/UserToolCanceledMessage.tsx +12 -0
package/src/components/messages/UserToolResultMessage/UserToolErrorMessage.tsx +36 -0
package/src/components/messages/UserToolResultMessage/UserToolRejectMessage.tsx +31 -0
package/src/components/messages/UserToolResultMessage/UserToolResultMessage.tsx +57 -0
package/src/components/messages/UserToolResultMessage/UserToolSuccessMessage.tsx +35 -0
package/src/components/messages/UserToolResultMessage/utils.tsx +56 -0
package/src/components/permissions/BashPermissionRequest/BashPermissionRequest.tsx +121 -0
package/src/components/permissions/FallbackPermissionRequest.tsx +155 -0
package/src/components/permissions/FileEditPermissionRequest/FileEditPermissionRequest.tsx +182 -0
package/src/components/permissions/FileEditPermissionRequest/FileEditToolDiff.tsx +75 -0
package/src/components/permissions/FileWritePermissionRequest/FileWritePermissionRequest.tsx +164 -0
package/src/components/permissions/FileWritePermissionRequest/FileWriteToolDiff.tsx +81 -0
package/src/components/permissions/FilesystemPermissionRequest/FilesystemPermissionRequest.tsx +242 -0
package/src/components/permissions/PermissionRequest.tsx +103 -0
package/src/components/permissions/PermissionRequestTitle.tsx +69 -0
package/src/components/permissions/hooks.ts +44 -0
package/src/components/permissions/toolUseOptions.ts +59 -0
package/src/components/permissions/utils.ts +23 -0
package/src/constants/betas.ts +5 -0
package/src/constants/claude-asterisk-ascii-art.tsx +238 -0
package/src/constants/figures.ts +4 -0
package/src/constants/keys.ts +3 -0
package/src/constants/macros.ts +6 -0
package/src/constants/models.ts +935 -0
package/src/constants/oauth.ts +18 -0
package/src/constants/product.ts +17 -0
package/src/constants/prompts.ts +177 -0
package/src/constants/releaseNotes.ts +7 -0
package/src/context/PermissionContext.tsx +149 -0
package/src/context.ts +278 -0
package/src/cost-tracker.ts +84 -0
package/src/entrypoints/cli.tsx +1498 -0
package/src/entrypoints/mcp.ts +176 -0
package/src/history.ts +25 -0
package/src/hooks/useApiKeyVerification.ts +59 -0
package/src/hooks/useArrowKeyHistory.ts +55 -0
package/src/hooks/useCanUseTool.ts +138 -0
package/src/hooks/useCancelRequest.ts +39 -0
package/src/hooks/useDoublePress.ts +42 -0
package/src/hooks/useExitOnCtrlCD.ts +31 -0
package/src/hooks/useInterval.ts +25 -0
package/src/hooks/useLogMessages.ts +16 -0
package/src/hooks/useLogStartupTime.ts +12 -0
package/src/hooks/useNotifyAfterTimeout.ts +65 -0
package/src/hooks/usePermissionRequestLogging.ts +44 -0
package/src/hooks/useSlashCommandTypeahead.ts +137 -0
package/src/hooks/useTerminalSize.ts +49 -0
package/src/hooks/useTextInput.ts +315 -0
package/src/messages.ts +37 -0
package/src/permissions.ts +268 -0
package/src/query.ts +704 -0
package/src/screens/ConfigureNpmPrefix.tsx +197 -0
package/src/screens/Doctor.tsx +219 -0
package/src/screens/LogList.tsx +68 -0
package/src/screens/REPL.tsx +792 -0
package/src/screens/ResumeConversation.tsx +68 -0
package/src/services/browserMocks.ts +66 -0
package/src/services/claude.ts +1947 -0
package/src/services/customCommands.ts +683 -0
package/src/services/fileFreshness.ts +377 -0
package/src/services/mcpClient.ts +564 -0
package/src/services/mcpServerApproval.tsx +50 -0
package/src/services/notifier.ts +40 -0
package/src/services/oauth.ts +357 -0
package/src/services/openai.ts +796 -0
package/src/services/sentry.ts +3 -0
package/src/services/statsig.ts +171 -0
package/src/services/statsigStorage.ts +86 -0
package/src/services/systemReminder.ts +406 -0
package/src/services/vcr.ts +161 -0
package/src/tools/ArchitectTool/ArchitectTool.tsx +122 -0
package/src/tools/ArchitectTool/prompt.ts +15 -0
package/src/tools/AskExpertModelTool/AskExpertModelTool.tsx +505 -0
package/src/tools/BashTool/BashTool.tsx +270 -0
package/src/tools/BashTool/BashToolResultMessage.tsx +38 -0
package/src/tools/BashTool/OutputLine.tsx +48 -0
package/src/tools/BashTool/prompt.ts +174 -0
package/src/tools/BashTool/utils.ts +56 -0
package/src/tools/FileEditTool/FileEditTool.tsx +316 -0
package/src/tools/FileEditTool/prompt.ts +51 -0
package/src/tools/FileEditTool/utils.ts +58 -0
package/src/tools/FileReadTool/FileReadTool.tsx +371 -0
package/src/tools/FileReadTool/prompt.ts +7 -0
package/src/tools/FileWriteTool/FileWriteTool.tsx +297 -0
package/src/tools/FileWriteTool/prompt.ts +10 -0
package/src/tools/GlobTool/GlobTool.tsx +119 -0
package/src/tools/GlobTool/prompt.ts +8 -0
package/src/tools/GrepTool/GrepTool.tsx +147 -0
package/src/tools/GrepTool/prompt.ts +11 -0
package/src/tools/MCPTool/MCPTool.tsx +106 -0
package/src/tools/MCPTool/prompt.ts +3 -0
package/src/tools/MemoryReadTool/MemoryReadTool.tsx +127 -0
package/src/tools/MemoryReadTool/prompt.ts +3 -0
package/src/tools/MemoryWriteTool/MemoryWriteTool.tsx +89 -0
package/src/tools/MemoryWriteTool/prompt.ts +3 -0
package/src/tools/MultiEditTool/MultiEditTool.tsx +366 -0
package/src/tools/MultiEditTool/prompt.ts +45 -0
package/src/tools/NotebookEditTool/NotebookEditTool.tsx +298 -0
package/src/tools/NotebookEditTool/prompt.ts +3 -0
package/src/tools/NotebookReadTool/NotebookReadTool.tsx +266 -0
package/src/tools/NotebookReadTool/prompt.ts +3 -0
package/src/tools/StickerRequestTool/StickerRequestTool.tsx +93 -0
package/src/tools/StickerRequestTool/prompt.ts +19 -0
package/src/tools/TaskTool/TaskTool.tsx +382 -0
package/src/tools/TaskTool/constants.ts +1 -0
package/src/tools/TaskTool/prompt.ts +56 -0
package/src/tools/ThinkTool/ThinkTool.tsx +56 -0
package/src/tools/ThinkTool/prompt.ts +12 -0
package/src/tools/TodoWriteTool/TodoWriteTool.tsx +289 -0
package/src/tools/TodoWriteTool/prompt.ts +63 -0
package/src/tools/lsTool/lsTool.tsx +269 -0
package/src/tools/lsTool/prompt.ts +2 -0
package/src/tools.ts +63 -0
package/src/types/PermissionMode.ts +120 -0
package/src/types/RequestContext.ts +72 -0
package/src/utils/Cursor.ts +436 -0
package/src/utils/PersistentShell.ts +373 -0
package/src/utils/agentStorage.ts +97 -0
package/src/utils/array.ts +3 -0
package/src/utils/ask.tsx +98 -0
package/src/utils/auth.ts +13 -0
package/src/utils/autoCompactCore.ts +223 -0
package/src/utils/autoUpdater.ts +318 -0
package/src/utils/betas.ts +20 -0
package/src/utils/browser.ts +14 -0
package/src/utils/cleanup.ts +72 -0
package/src/utils/commands.ts +261 -0
package/src/utils/config.ts +771 -0
package/src/utils/conversationRecovery.ts +54 -0
package/src/utils/debugLogger.ts +1123 -0
package/src/utils/diff.ts +42 -0
package/src/utils/env.ts +57 -0
package/src/utils/errors.ts +21 -0
package/src/utils/exampleCommands.ts +108 -0
package/src/utils/execFileNoThrow.ts +51 -0
package/src/utils/expertChatStorage.ts +136 -0
package/src/utils/file.ts +402 -0
package/src/utils/fileRecoveryCore.ts +71 -0
package/src/utils/format.tsx +44 -0
package/src/utils/generators.ts +62 -0
package/src/utils/git.ts +92 -0
package/src/utils/globalLogger.ts +77 -0
package/src/utils/http.ts +10 -0
package/src/utils/imagePaste.ts +38 -0
package/src/utils/json.ts +13 -0
package/src/utils/log.ts +382 -0
package/src/utils/markdown.ts +213 -0
package/src/utils/messageContextManager.ts +289 -0
package/src/utils/messages.tsx +938 -0
package/src/utils/model.ts +836 -0
package/src/utils/permissions/filesystem.ts +118 -0
package/src/utils/ripgrep.ts +167 -0
package/src/utils/sessionState.ts +49 -0
package/src/utils/state.ts +25 -0
package/src/utils/style.ts +29 -0
package/src/utils/terminal.ts +49 -0
package/src/utils/theme.ts +122 -0
package/src/utils/thinking.ts +144 -0
package/src/utils/todoStorage.ts +431 -0
package/src/utils/tokens.ts +43 -0
package/src/utils/toolExecutionController.ts +163 -0
package/src/utils/unaryLogging.ts +26 -0
package/src/utils/user.ts +37 -0
package/src/utils/validate.ts +165 -0
package/cli.mjs +0 -1803

package/src/services/claude.ts ADDED Viewed

@@ -0,0 +1,1947 @@
+import '@anthropic-ai/sdk/shims/node'
+import Anthropic, { APIConnectionError, APIError } from '@anthropic-ai/sdk'
+import { AnthropicBedrock } from '@anthropic-ai/bedrock-sdk'
+import { AnthropicVertex } from '@anthropic-ai/vertex-sdk'
+import type { BetaUsage } from '@anthropic-ai/sdk/resources/beta/messages/messages.mjs'
+import chalk from 'chalk'
+import { createHash, randomUUID } from 'crypto'
+import 'dotenv/config'
+import { addToTotalCost } from '../cost-tracker'
+import models from '../constants/models'
+import type { AssistantMessage, UserMessage } from '../query'
+import { Tool } from '../Tool'
+import {
+  getAnthropicApiKey,
+  getOrCreateUserID,
+  getGlobalConfig,
+} from '../utils/config'
+import { getProjectDocs } from '../context'
+import { logError, SESSION_ID } from '../utils/log'
+import { USER_AGENT } from '../utils/http'
+import {
+  createAssistantAPIErrorMessage,
+  normalizeContentFromAPI,
+} from '../utils/messages'
+import { countTokens } from '../utils/tokens'
+import { logEvent } from './statsig'
+import { withVCR } from './vcr'
+import {
+  debug as debugLogger,
+  markPhase,
+  getCurrentRequest,
+  logLLMInteraction,
+  logSystemPromptConstruction,
+  logErrorWithDiagnosis,
+} from '../utils/debugLogger'
+import {
+  MessageContextManager,
+  createRetentionStrategy,
+} from '../utils/messageContextManager'
+import { getModelManager } from '../utils/model'
+import { zodToJsonSchema } from 'zod-to-json-schema'
+import type { BetaMessageStream } from '@anthropic-ai/sdk/lib/BetaMessageStream.mjs'
+import type {
+  Message as APIMessage,
+  MessageParam,
+  TextBlockParam,
+} from '@anthropic-ai/sdk/resources/index.mjs'
+import { USE_BEDROCK, USE_VERTEX } from '../utils/model'
+import { getCLISyspromptPrefix } from '../constants/prompts'
+import { getVertexRegionForModel } from '../utils/model'
+import OpenAI from 'openai'
+import type { ChatCompletionStream } from 'openai/lib/ChatCompletionStream'
+import { ContentBlock } from '@anthropic-ai/sdk/resources/messages/messages'
+import { nanoid } from 'nanoid'
+import { getCompletion, getCompletionWithProfile } from './openai'
+import { getReasoningEffort } from '../utils/thinking'
+import { generateSystemReminders } from './systemReminder'
+// Helper function to extract model configuration for debug logging
+function getModelConfigForDebug(model: string): {
+  modelName: string
+  provider: string
+  apiKeyStatus: 'configured' | 'missing' | 'invalid'
+  baseURL?: string
+  maxTokens?: number
+  reasoningEffort?: string
+  isStream?: boolean
+  temperature?: number
+} {
+  const config = getGlobalConfig()
+  const modelManager = getModelManager()
+  // 🔧 Fix: Use ModelManager to get the actual current model profile
+  const modelProfile = modelManager.getModel('main')
+  let apiKeyStatus: 'configured' | 'missing' | 'invalid' = 'missing'
+  let baseURL: string | undefined
+  let maxTokens: number | undefined
+  let reasoningEffort: string | undefined
+  // 🔧 Fix: Use ModelProfile configuration exclusively
+  if (modelProfile) {
+    apiKeyStatus = modelProfile.apiKey ? 'configured' : 'missing'
+    baseURL = modelProfile.baseURL
+    maxTokens = modelProfile.maxTokens
+    reasoningEffort = modelProfile.reasoningEffort
+  } else {
+    // 🚨 No ModelProfile available - this should not happen in modern system
+    apiKeyStatus = 'missing'
+    maxTokens = undefined
+    reasoningEffort = undefined
+  }
+  return {
+    modelName: model,
+    provider: modelProfile?.provider || config.primaryProvider || 'anthropic',
+    apiKeyStatus,
+    baseURL,
+    maxTokens,
+    reasoningEffort,
+    isStream: config.stream || false,
+    temperature: MAIN_QUERY_TEMPERATURE,
+  }
+}
+// KodeContext管理器 - 用于项目文档的同步缓存和访问
+class KodeContextManager {
+  private static instance: KodeContextManager
+  private projectDocsCache: string = ''
+  private cacheInitialized: boolean = false
+  private initPromise: Promise<void> | null = null
+  private constructor() {}
+  public static getInstance(): KodeContextManager {
+    if (!KodeContextManager.instance) {
+      KodeContextManager.instance = new KodeContextManager()
+    }
+    return KodeContextManager.instance
+  }
+  public async initialize(): Promise<void> {
+    if (this.cacheInitialized) return
+    if (this.initPromise) {
+      return this.initPromise
+    }
+    this.initPromise = this.loadProjectDocs()
+    await this.initPromise
+  }
+  private async loadProjectDocs(): Promise<void> {
+    try {
+      const projectDocs = await getProjectDocs()
+      this.projectDocsCache = projectDocs || ''
+      this.cacheInitialized = true
+      // 在调试模式下记录加载结果
+      if (process.env.NODE_ENV === 'development') {
+        console.log(
+          `[KodeContext] Loaded ${this.projectDocsCache.length} characters from project docs`,
+        )
+      }
+    } catch (error) {
+      console.warn('[KodeContext] Failed to load project docs:', error)
+      this.projectDocsCache = ''
+      this.cacheInitialized = true
+    }
+  }
+  public getKodeContext(): string {
+    if (!this.cacheInitialized) {
+      // 如果未初始化，异步初始化但立即返回空字符串
+      this.initialize().catch(console.warn)
+      return ''
+    }
+    return this.projectDocsCache
+  }
+  public async refreshCache(): Promise<void> {
+    this.cacheInitialized = false
+    this.initPromise = null
+    await this.initialize()
+  }
+}
+// 导出函数保持向后兼容
+const kodeContextManager = KodeContextManager.getInstance()
+// 在模块加载时异步初始化
+kodeContextManager.initialize().catch(console.warn)
+export const generateKodeContext = (): string => {
+  return kodeContextManager.getKodeContext()
+}
+export const refreshKodeContext = async (): Promise<void> => {
+  await kodeContextManager.refreshCache()
+}
+interface StreamResponse extends APIMessage {
+  ttftMs?: number
+}
+export const API_ERROR_MESSAGE_PREFIX = 'API Error'
+export const PROMPT_TOO_LONG_ERROR_MESSAGE = 'Prompt is too long'
+export const CREDIT_BALANCE_TOO_LOW_ERROR_MESSAGE = 'Credit balance is too low'
+export const INVALID_API_KEY_ERROR_MESSAGE =
+  'Invalid API key · Please run /login'
+export const NO_CONTENT_MESSAGE = '(no content)'
+const PROMPT_CACHING_ENABLED = !process.env.DISABLE_PROMPT_CACHING
+// @see https://docs.anthropic.com/en/docs/about-claude/models#model-comparison-table
+const HAIKU_COST_PER_MILLION_INPUT_TOKENS = 0.8
+const HAIKU_COST_PER_MILLION_OUTPUT_TOKENS = 4
+const HAIKU_COST_PER_MILLION_PROMPT_CACHE_WRITE_TOKENS = 1
+const HAIKU_COST_PER_MILLION_PROMPT_CACHE_READ_TOKENS = 0.08
+const SONNET_COST_PER_MILLION_INPUT_TOKENS = 3
+const SONNET_COST_PER_MILLION_OUTPUT_TOKENS = 15
+const SONNET_COST_PER_MILLION_PROMPT_CACHE_WRITE_TOKENS = 3.75
+const SONNET_COST_PER_MILLION_PROMPT_CACHE_READ_TOKENS = 0.3
+export const MAIN_QUERY_TEMPERATURE = 1 // to get more variation for binary feedback
+function getMetadata() {
+  return {
+    user_id: `${getOrCreateUserID()}_${SESSION_ID}`,
+  }
+}
+const MAX_RETRIES = process.env.USER_TYPE === 'SWE_BENCH' ? 100 : 10
+const BASE_DELAY_MS = 500
+interface RetryOptions {
+  maxRetries?: number
+  signal?: AbortSignal
+}
+// Helper function to create an abortable delay
+function abortableDelay(delayMs: number, signal?: AbortSignal): Promise<void> {
+  return new Promise((resolve, reject) => {
+    // Check if already aborted
+    if (signal?.aborted) {
+      reject(new Error('Request was aborted'))
+      return
+    }
+    const timeoutId = setTimeout(() => {
+      resolve()
+    }, delayMs)
+    // If signal is provided, listen for abort event
+    if (signal) {
+      const abortHandler = () => {
+        clearTimeout(timeoutId)
+        reject(new Error('Request was aborted'))
+      }
+      signal.addEventListener('abort', abortHandler, { once: true })
+    }
+  })
+}
+function getRetryDelay(
+  attempt: number,
+  retryAfterHeader?: string | null,
+): number {
+  if (retryAfterHeader) {
+    const seconds = parseInt(retryAfterHeader, 10)
+    if (!isNaN(seconds)) {
+      return seconds * 1000
+    }
+  }
+  return Math.min(BASE_DELAY_MS * Math.pow(2, attempt - 1), 32000) // Max 32s delay
+}
+function shouldRetry(error: APIError): boolean {
+  // Check for overloaded errors first and only retry for SWE_BENCH
+  if (error.message?.includes('"type":"overloaded_error"')) {
+    return process.env.USER_TYPE === 'SWE_BENCH'
+  }
+  // Note this is not a standard header.
+  const shouldRetryHeader = error.headers?.['x-should-retry']
+  // If the server explicitly says whether or not to retry, obey.
+  if (shouldRetryHeader === 'true') return true
+  if (shouldRetryHeader === 'false') return false
+  if (error instanceof APIConnectionError) {
+    return true
+  }
+  if (!error.status) return false
+  // Retry on request timeouts.
+  if (error.status === 408) return true
+  // Retry on lock timeouts.
+  if (error.status === 409) return true
+  // Retry on rate limits.
+  if (error.status === 429) return true
+  // Retry internal errors.
+  if (error.status && error.status >= 500) return true
+  return false
+}
+async function withRetry<T>(
+  operation: (attempt: number) => Promise<T>,
+  options: RetryOptions = {},
+): Promise<T> {
+  const maxRetries = options.maxRetries ?? MAX_RETRIES
+  let lastError: unknown
+  for (let attempt = 1; attempt <= maxRetries + 1; attempt++) {
+    try {
+      return await operation(attempt)
+    } catch (error) {
+      lastError = error
+      // Only retry if the error indicates we should
+      if (
+        attempt > maxRetries ||
+        !(error instanceof APIError) ||
+        !shouldRetry(error)
+      ) {
+        throw error
+      }
+      // 🔧 CRITICAL FIX: Check abort signal BEFORE showing retry message
+      if (options.signal?.aborted) {
+        throw new Error('Request cancelled by user')
+      }
+      // Get retry-after header if available
+      const retryAfter = error.headers?.['retry-after'] ?? null
+      const delayMs = getRetryDelay(attempt, retryAfter)
+      console.log(
+        `  ⎿  ${chalk.red(`API ${error.name} (${error.message}) · Retrying in ${Math.round(delayMs / 1000)} seconds… (attempt ${attempt}/${maxRetries})`)}`,
+      )
+      logEvent('tengu_api_retry', {
+        attempt: String(attempt),
+        delayMs: String(delayMs),
+        error: error.message,
+        status: String(error.status),
+        provider: USE_BEDROCK ? 'bedrock' : USE_VERTEX ? 'vertex' : '1p',
+      })
+      try {
+        await abortableDelay(delayMs, options.signal)
+      } catch (delayError) {
+        // If aborted during delay, throw the error to stop retrying
+        if (delayError.message === 'Request was aborted') {
+          throw new Error('Request cancelled by user')
+        }
+        throw delayError
+      }
+    }
+  }
+  throw lastError
+}
+/**
+ * Fetch available models from Anthropic API
+ */
+export async function fetchAnthropicModels(
+  baseURL: string,
+  apiKey: string,
+): Promise<any[]> {
+  try {
+    // Use provided baseURL or default to official Anthropic API
+    const modelsURL = baseURL
+      ? `${baseURL.replace(/\/+$/, '')}/v1/models`
+      : 'https://api.anthropic.com/v1/models'
+    const response = await fetch(modelsURL, {
+      method: 'GET',
+      headers: {
+        'x-api-key': apiKey,
+        'anthropic-version': '2023-06-01',
+        'User-Agent': USER_AGENT,
+      },
+    })
+    if (!response.ok) {
+      // Provide user-friendly error messages based on status code
+      if (response.status === 401) {
+        throw new Error(
+          'Invalid API key. Please check your Anthropic API key and try again.',
+        )
+      } else if (response.status === 403) {
+        throw new Error(
+          'API key does not have permission to access models. Please check your API key permissions.',
+        )
+      } else if (response.status === 429) {
+        throw new Error(
+          'Too many requests. Please wait a moment and try again.',
+        )
+      } else if (response.status >= 500) {
+        throw new Error(
+          'Anthropic service is temporarily unavailable. Please try again later.',
+        )
+      } else {
+        throw new Error(
+          `Unable to connect to Anthropic API (${response.status}). Please check your internet connection and API key.`,
+        )
+      }
+    }
+    const data = await response.json()
+    return data.data || []
+  } catch (error) {
+    // If it's already our custom error, pass it through
+    if (
+      (error instanceof Error && error.message.includes('API key')) ||
+      (error instanceof Error && error.message.includes('Anthropic'))
+    ) {
+      throw error
+    }
+    // For network errors or other issues
+    console.error('Failed to fetch Anthropic models:', error)
+    throw new Error(
+      'Unable to connect to Anthropic API. Please check your internet connection and try again.',
+    )
+  }
+}
+export async function verifyApiKey(
+  apiKey: string,
+  baseURL?: string,
+  provider?: string,
+): Promise<boolean> {
+  if (!apiKey) {
+    return false
+  }
+  // For non-Anthropic providers, use OpenAI-compatible verification
+  if (provider && provider !== 'anthropic') {
+    try {
+      const headers: Record<string, string> = {
+        Authorization: `Bearer ${apiKey}`,
+        'Content-Type': 'application/json',
+      }
+      // 🔧 Fix: Proper URL construction for verification
+      if (!baseURL) {
+        console.warn(
+          'No baseURL provided for non-Anthropic provider verification',
+        )
+        return false
+      }
+      const modelsURL = `${baseURL.replace(/\/+$/, '')}/models`
+      const response = await fetch(modelsURL, {
+        method: 'GET',
+        headers,
+      })
+      return response.ok
+    } catch (error) {
+      console.warn('API verification failed for non-Anthropic provider:', error)
+      return false
+    }
+  }
+  // For Anthropic and Anthropic-compatible APIs
+  const clientConfig: any = {
+    apiKey,
+    dangerouslyAllowBrowser: true,
+    maxRetries: 3,
+    defaultHeaders: {
+      'User-Agent': USER_AGENT,
+    },
+  }
+  // Only add baseURL for true Anthropic-compatible APIs
+  if (
+    baseURL &&
+    (provider === 'anthropic' ||
+      provider === 'bigdream' ||
+      provider === 'opendev')
+  ) {
+    clientConfig.baseURL = baseURL
+  }
+  const anthropic = new Anthropic(clientConfig)
+  try {
+    await withRetry(
+      async () => {
+        const model = 'claude-sonnet-4-20250514'
+        const messages: MessageParam[] = [{ role: 'user', content: 'test' }]
+        await anthropic.messages.create({
+          model,
+          max_tokens: 1000, // Simple test token limit for API verification
+          messages,
+          temperature: 0,
+          metadata: getMetadata(),
+        })
+        return true
+      },
+      { maxRetries: 2 }, // Use fewer retries for API key verification
+    )
+    return true
+  } catch (error) {
+    logError(error)
+    // Check for authentication error
+    if (
+      error instanceof Error &&
+      error.message.includes(
+        '{"type":"error","error":{"type":"authentication_error","message":"invalid x-api-key"}}',
+      )
+    ) {
+      return false
+    }
+    throw error
+  }
+}
+function convertAnthropicMessagesToOpenAIMessages(
+  messages: (UserMessage | AssistantMessage)[],
+): (
+  | OpenAI.ChatCompletionMessageParam
+  | OpenAI.ChatCompletionToolMessageParam
+)[] {
+  const openaiMessages: (
+    | OpenAI.ChatCompletionMessageParam
+    | OpenAI.ChatCompletionToolMessageParam
+  )[] = []
+  const toolResults: Record<string, OpenAI.ChatCompletionToolMessageParam> = {}
+  for (const message of messages) {
+    let contentBlocks = []
+    if (typeof message.message.content === 'string') {
+      contentBlocks = [
+        {
+          type: 'text',
+          text: message.message.content,
+        },
+      ]
+    } else if (!Array.isArray(message.message.content)) {
+      contentBlocks = [message.message.content]
+    } else {
+      contentBlocks = message.message.content
+    }
+    for (const block of contentBlocks) {
+      if (block.type === 'text') {
+        openaiMessages.push({
+          role: message.message.role,
+          content: block.text,
+        })
+      } else if (block.type === 'tool_use') {
+        openaiMessages.push({
+          role: 'assistant',
+          content: undefined,
+          tool_calls: [
+            {
+              type: 'function',
+              function: {
+                name: block.name,
+                arguments: JSON.stringify(block.input),
+              },
+              id: block.id,
+            },
+          ],
+        })
+      } else if (block.type === 'tool_result') {
+        // Ensure content is always a string for role:tool messages
+        let toolContent = block.content
+        if (typeof toolContent !== 'string') {
+          // Convert content to string if it's not already
+          toolContent = JSON.stringify(toolContent)
+        }
+        toolResults[block.tool_use_id] = {
+          role: 'tool',
+          content: toolContent,
+          tool_call_id: block.tool_use_id,
+        }
+      }
+    }
+  }
+  const finalMessages: (
+    | OpenAI.ChatCompletionMessageParam
+    | OpenAI.ChatCompletionToolMessageParam
+  )[] = []
+  for (const message of openaiMessages) {
+    finalMessages.push(message)
+    if ('tool_calls' in message && message.tool_calls) {
+      for (const toolCall of message.tool_calls) {
+        if (toolResults[toolCall.id]) {
+          finalMessages.push(toolResults[toolCall.id])
+        }
+      }
+    }
+  }
+  return finalMessages
+}
+function messageReducer(
+  previous: OpenAI.ChatCompletionMessage,
+  item: OpenAI.ChatCompletionChunk,
+): OpenAI.ChatCompletionMessage {
+  const reduce = (acc: any, delta: OpenAI.ChatCompletionChunk.Choice.Delta) => {
+    acc = { ...acc }
+    for (const [key, value] of Object.entries(delta)) {
+      if (acc[key] === undefined || acc[key] === null) {
+        acc[key] = value
+        //  OpenAI.Chat.Completions.ChatCompletionMessageToolCall does not have a key, .index
+        if (Array.isArray(acc[key])) {
+          for (const arr of acc[key]) {
+            delete arr.index
+          }
+        }
+      } else if (typeof acc[key] === 'string' && typeof value === 'string') {
+        acc[key] += value
+      } else if (typeof acc[key] === 'number' && typeof value === 'number') {
+        acc[key] = value
+      } else if (Array.isArray(acc[key]) && Array.isArray(value)) {
+        const accArray = acc[key]
+        for (let i = 0; i < value.length; i++) {
+          const { index, ...chunkTool } = value[i]
+          if (index - accArray.length > 1) {
+            throw new Error(
+              `Error: An array has an empty value when tool_calls are constructed. tool_calls: ${accArray}; tool: ${value}`,
+            )
+          }
+          accArray[index] = reduce(accArray[index], chunkTool)
+        }
+      } else if (typeof acc[key] === 'object' && typeof value === 'object') {
+        acc[key] = reduce(acc[key], value)
+      }
+    }
+    return acc
+  }
+  const choice = item.choices?.[0]
+  if (!choice) {
+    // chunk contains information about usage and token counts
+    return previous
+  }
+  return reduce(previous, choice.delta) as OpenAI.ChatCompletionMessage
+}
+async function handleMessageStream(
+  stream: ChatCompletionStream,
+  signal?: AbortSignal, // 🔧 Add AbortSignal support to stream handler
+): Promise<OpenAI.ChatCompletion> {
+  const streamStartTime = Date.now()
+  let ttftMs: number | undefined
+  let chunkCount = 0
+  let errorCount = 0
+  debugLogger.api('OPENAI_STREAM_START', {
+    streamStartTime: String(streamStartTime),
+  })
+  let message = {} as OpenAI.ChatCompletionMessage
+  let id, model, created, object, usage
+  try {
+    for await (const chunk of stream) {
+      // 🔧 CRITICAL FIX: Check abort signal in OpenAI streaming loop
+      if (signal?.aborted) {
+        debugLogger.flow('OPENAI_STREAM_ABORTED', {
+          chunkCount,
+          timestamp: Date.now()
+        })
+        throw new Error('Request was cancelled')
+      }
+      chunkCount++
+      try {
+        if (!id) {
+          id = chunk.id
+          debugLogger.api('OPENAI_STREAM_ID_RECEIVED', {
+            id,
+            chunkNumber: String(chunkCount),
+          })
+        }
+        if (!model) {
+          model = chunk.model
+          debugLogger.api('OPENAI_STREAM_MODEL_RECEIVED', {
+            model,
+            chunkNumber: String(chunkCount),
+          })
+        }
+        if (!created) {
+          created = chunk.created
+        }
+        if (!object) {
+          object = chunk.object
+        }
+        if (!usage) {
+          usage = chunk.usage
+        }
+        message = messageReducer(message, chunk)
+        if (chunk?.choices?.[0]?.delta?.content) {
+          if (!ttftMs) {
+            ttftMs = Date.now() - streamStartTime
+            debugLogger.api('OPENAI_STREAM_FIRST_TOKEN', {
+              ttftMs: String(ttftMs),
+              chunkNumber: String(chunkCount),
+            })
+          }
+        }
+      } catch (chunkError) {
+        errorCount++
+        debugLogger.error('OPENAI_STREAM_CHUNK_ERROR', {
+          chunkNumber: String(chunkCount),
+          errorMessage:
+            chunkError instanceof Error
+              ? chunkError.message
+              : String(chunkError),
+          errorType:
+            chunkError instanceof Error
+              ? chunkError.constructor.name
+              : typeof chunkError,
+        })
+        // Continue processing other chunks
+      }
+    }
+    debugLogger.api('OPENAI_STREAM_COMPLETE', {
+      totalChunks: String(chunkCount),
+      errorCount: String(errorCount),
+      totalDuration: String(Date.now() - streamStartTime),
+      ttftMs: String(ttftMs || 0),
+      finalMessageId: id || 'undefined',
+    })
+  } catch (streamError) {
+    debugLogger.error('OPENAI_STREAM_FATAL_ERROR', {
+      totalChunks: String(chunkCount),
+      errorCount: String(errorCount),
+      errorMessage:
+        streamError instanceof Error
+          ? streamError.message
+          : String(streamError),
+      errorType:
+        streamError instanceof Error
+          ? streamError.constructor.name
+          : typeof streamError,
+    })
+    throw streamError
+  }
+  return {
+    id,
+    created,
+    model,
+    object,
+    choices: [
+      {
+        index: 0,
+        message,
+        finish_reason: 'stop',
+        logprobs: undefined,
+      },
+    ],
+    usage,
+  }
+}
+function convertOpenAIResponseToAnthropic(response: OpenAI.ChatCompletion) {
+  let contentBlocks: ContentBlock[] = []
+  const message = response.choices?.[0]?.message
+  if (!message) {
+    logEvent('weird_response', {
+      response: JSON.stringify(response),
+    })
+    return {
+      role: 'assistant',
+      content: [],
+      stop_reason: response.choices?.[0]?.finish_reason,
+      type: 'message',
+      usage: response.usage,
+    }
+  }
+  if (message?.tool_calls) {
+    for (const toolCall of message.tool_calls) {
+      const tool = toolCall.function
+      const toolName = tool.name
+      let toolArgs = {}
+      try {
+        toolArgs = JSON.parse(tool.arguments)
+      } catch (e) {
+        // console.log(e)
+      }
+      contentBlocks.push({
+        type: 'tool_use',
+        input: toolArgs,
+        name: toolName,
+        id: toolCall.id?.length > 0 ? toolCall.id : nanoid(),
+      })
+    }
+  }
+  if ((message as any).reasoning) {
+    contentBlocks.push({
+      type: 'thinking',
+      thinking: (message as any).reasoning,
+      signature: '',
+    })
+  }
+  // NOTE: For deepseek api, the key for its returned reasoning process is reasoning_content
+  if ((message as any).reasoning_content) {
+    contentBlocks.push({
+      type: 'thinking',
+      thinking: (message as any).reasoning_content,
+      signature: '',
+    })
+  }
+  if (message.content) {
+    contentBlocks.push({
+      type: 'text',
+      text: message?.content,
+      citations: [],
+    })
+  }
+  const finalMessage = {
+    role: 'assistant',
+    content: contentBlocks,
+    stop_reason: response.choices?.[0]?.finish_reason,
+    type: 'message',
+    usage: response.usage,
+  }
+  return finalMessage
+}
+let anthropicClient: Anthropic | AnthropicBedrock | AnthropicVertex | null =
+  null
+/**
+ * Get the Anthropic client, creating it if it doesn't exist
+ */
+export function getAnthropicClient(
+  model?: string,
+): Anthropic | AnthropicBedrock | AnthropicVertex {
+  const config = getGlobalConfig()
+  const provider = config.primaryProvider
+  // Reset client if provider has changed to ensure correct configuration
+  if (anthropicClient && provider) {
+    // Always recreate client for provider-specific configurations
+    anthropicClient = null
+  }
+  if (anthropicClient) {
+    return anthropicClient
+  }
+  const region = getVertexRegionForModel(model)
+  const defaultHeaders: { [key: string]: string } = {
+    'x-app': 'cli',
+    'User-Agent': USER_AGENT,
+  }
+  if (process.env.ANTHROPIC_AUTH_TOKEN) {
+    defaultHeaders['Authorization'] =
+      `Bearer ${process.env.ANTHROPIC_AUTH_TOKEN}`
+  }
+  const ARGS = {
+    defaultHeaders,
+    maxRetries: 0, // Disabled auto-retry in favor of manual implementation
+    timeout: parseInt(process.env.API_TIMEOUT_MS || String(60 * 1000), 10),
+  }
+  if (USE_BEDROCK) {
+    const client = new AnthropicBedrock(ARGS)
+    anthropicClient = client
+    return client
+  }
+  if (USE_VERTEX) {
+    const vertexArgs = {
+      ...ARGS,
+      region: region || process.env.CLOUD_ML_REGION || 'us-east5',
+    }
+    const client = new AnthropicVertex(vertexArgs)
+    anthropicClient = client
+    return client
+  }
+  // Get appropriate API key and baseURL from ModelProfile
+  const modelManager = getModelManager()
+  const modelProfile = modelManager.getModel('main')
+  let apiKey: string
+  let baseURL: string | undefined
+  if (modelProfile) {
+    apiKey = modelProfile.apiKey || ''
+    baseURL = modelProfile.baseURL
+  } else {
+    // Fallback to default anthropic if no ModelProfile
+    apiKey = getAnthropicApiKey()
+    baseURL = undefined
+  }
+  if (process.env.USER_TYPE === 'ant' && !apiKey && provider === 'anthropic') {
+    console.error(
+      chalk.red(
+        '[ANT-ONLY] Please set the ANTHROPIC_API_KEY environment variable to use the CLI. To create a new key, go to https://console.anthropic.com/settings/keys.',
+      ),
+    )
+  }
+  // Create client with custom baseURL for BigDream/OpenDev
+  // Anthropic SDK will append the appropriate paths (like /v1/messages)
+  const clientConfig = {
+    apiKey,
+    dangerouslyAllowBrowser: true,
+    ...ARGS,
+    ...(baseURL && { baseURL }), // Use baseURL directly, SDK will handle API versioning
+  }
+  anthropicClient = new Anthropic(clientConfig)
+  return anthropicClient
+}
+/**
+ * Reset the Anthropic client to null, forcing a new client to be created on next use
+ */
+export function resetAnthropicClient(): void {
+  anthropicClient = null
+}
+/**
+ * Environment variables for different client types:
+ *
+ * Direct API:
+ * - ANTHROPIC_API_KEY: Required for direct API access
+ *
+ * AWS Bedrock:
+ * - AWS credentials configured via aws-sdk defaults
+ *
+ * Vertex AI:
+ * - Model-specific region variables (highest priority):
+ *   - VERTEX_REGION_CLAUDE_3_5_HAIKU: Region for Claude 3.5 Haiku model
+ *   - VERTEX_REGION_CLAUDE_3_5_SONNET: Region for Claude 3.5 Sonnet model
+ *   - VERTEX_REGION_CLAUDE_3_7_SONNET: Region for Claude 3.7 Sonnet model
+ * - CLOUD_ML_REGION: Optional. The default GCP region to use for all models
+ *   If specific model region not specified above
+ * - ANTHROPIC_VERTEX_PROJECT_ID: Required. Your GCP project ID
+ * - Standard GCP credentials configured via google-auth-library
+ *
+ * Priority for determining region:
+ * 1. Hardcoded model-specific environment variables
+ * 2. Global CLOUD_ML_REGION variable
+ * 3. Default region from config
+ * 4. Fallback region (us-east5)
+ */
+export function userMessageToMessageParam(
+  message: UserMessage,
+  addCache = false,
+): MessageParam {
+  if (addCache) {
+    if (typeof message.message.content === 'string') {
+      return {
+        role: 'user',
+        content: [
+          {
+            type: 'text',
+            text: message.message.content,
+            ...(PROMPT_CACHING_ENABLED
+              ? { cache_control: { type: 'ephemeral' } }
+              : {}),
+          },
+        ],
+      }
+    } else {
+      return {
+        role: 'user',
+        content: message.message.content.map((_, i) => ({
+          ..._,
+          ...(i === message.message.content.length - 1
+            ? PROMPT_CACHING_ENABLED
+              ? { cache_control: { type: 'ephemeral' } }
+              : {}
+            : {}),
+        })),
+      }
+    }
+  }
+  return {
+    role: 'user',
+    content: message.message.content,
+  }
+}
+export function assistantMessageToMessageParam(
+  message: AssistantMessage,
+  addCache = false,
+): MessageParam {
+  if (addCache) {
+    if (typeof message.message.content === 'string') {
+      return {
+        role: 'assistant',
+        content: [
+          {
+            type: 'text',
+            text: message.message.content,
+            ...(PROMPT_CACHING_ENABLED
+              ? { cache_control: { type: 'ephemeral' } }
+              : {}),
+          },
+        ],
+      }
+    } else {
+      return {
+        role: 'assistant',
+        content: message.message.content.map((_, i) => ({
+          ..._,
+          ...(i === message.message.content.length - 1 &&
+          _.type !== 'thinking' &&
+          _.type !== 'redacted_thinking'
+            ? PROMPT_CACHING_ENABLED
+              ? { cache_control: { type: 'ephemeral' } }
+              : {}
+            : {}),
+        })),
+      }
+    }
+  }
+  return {
+    role: 'assistant',
+    content: message.message.content,
+  }
+}
+function splitSysPromptPrefix(systemPrompt: string[]): string[] {
+  // split out the first block of the system prompt as the "prefix" for API
+  // to match on in https://console.statsig.com/4aF3Ewatb6xPVpCwxb5nA3/dynamic_configs/claude_cli_system_prompt_prefixes
+  const systemPromptFirstBlock = systemPrompt[0] || ''
+  const systemPromptRest = systemPrompt.slice(1)
+  return [systemPromptFirstBlock, systemPromptRest.join('\n')].filter(Boolean)
+}
+export async function queryLLM(
+  messages: (UserMessage | AssistantMessage)[],
+  systemPrompt: string[],
+  maxThinkingTokens: number,
+  tools: Tool[],
+  signal: AbortSignal,
+  options: {
+    safeMode: boolean
+    model: string | import('../utils/config').ModelPointerType
+    prependCLISysprompt: boolean
+  },
+): Promise<AssistantMessage> {
+  // 🔧 统一的模型解析：支持指针、model ID 和真实模型名称
+  const modelManager = getModelManager()
+  const modelResolution = modelManager.resolveModelWithInfo(options.model)
+  if (!modelResolution.success || !modelResolution.profile) {
+    throw new Error(
+      modelResolution.error || `Failed to resolve model: ${options.model}`,
+    )
+  }
+  const modelProfile = modelResolution.profile
+  const resolvedModel = modelProfile.modelName
+  debugLogger.api('MODEL_RESOLVED', {
+    inputParam: options.model,
+    resolvedModelName: modelProfile.modelName,
+    resolvedModelName: resolvedModel,
+    provider: modelProfile.provider,
+    isPointer: ['main', 'task', 'reasoning', 'quick'].includes(options.model),
+    requestId: getCurrentRequest()?.id,
+  })
+  const currentRequest = getCurrentRequest()
+  debugLogger.api('LLM_REQUEST_START', {
+    messageCount: messages.length,
+    systemPromptLength: systemPrompt.join(' ').length,
+    toolCount: tools.length,
+    model: resolvedModel,
+    originalModelParam: options.model,
+    requestId: currentRequest?.id,
+  })
+  markPhase('LLM_CALL')
+  try {
+    const result = await withVCR(messages, () =>
+      queryLLMWithPromptCaching(
+        messages,
+        systemPrompt,
+        maxThinkingTokens,
+        tools,
+        signal,
+        { ...options, model: resolvedModel, modelProfile }, // Pass resolved ModelProfile
+      ),
+    )
+    debugLogger.api('LLM_REQUEST_SUCCESS', {
+      costUSD: result.costUSD,
+      durationMs: result.durationMs,
+      responseLength: result.message.content?.length || 0,
+      requestId: currentRequest?.id,
+    })
+    return result
+  } catch (error) {
+    // 使用错误诊断系统记录 LLM 相关错误
+    logErrorWithDiagnosis(
+      error,
+      {
+        messageCount: messages.length,
+        systemPromptLength: systemPrompt.join(' ').length,
+        model: options.model,
+        toolCount: tools.length,
+        phase: 'LLM_CALL',
+      },
+      currentRequest?.id,
+    )
+    throw error
+  }
+}
+export function formatSystemPromptWithContext(
+  systemPrompt: string[],
+  context: { [k: string]: string },
+  agentId?: string,
+  skipContextReminders = false, // Parameter kept for API compatibility but not used anymore
+): { systemPrompt: string[]; reminders: string } {
+  // 构建增强的系统提示 - 对齐官方 Claude Code 直接注入方式
+  const enhancedPrompt = [...systemPrompt]
+  let reminders = ''
+  // 只有当上下文存在时才处理
+  const hasContext = Object.entries(context).length > 0
+  if (hasContext) {
+    // 步骤1: 直接注入 Kode 上下文到系统提示 - 对齐官方设计
+    if (!skipContextReminders) {
+      const kodeContext = generateKodeContext()
+      if (kodeContext) {
+        // 添加分隔符和标识，使项目文档在系统提示中更清晰
+        enhancedPrompt.push('\n---\n# 项目上下文\n')
+        enhancedPrompt.push(kodeContext)
+        enhancedPrompt.push('\n---\n')
+      }
+    }
+    // 步骤2: 生成其他动态提醒返回给调用方 - 保持现有动态提醒功能
+    const reminderMessages = generateSystemReminders(hasContext, agentId)
+    if (reminderMessages.length > 0) {
+      reminders = reminderMessages.map(r => r.content).join('\n') + '\n'
+    }
+    // 步骤3: 添加其他上下文到系统提示
+    enhancedPrompt.push(
+      `\nAs you answer the user's questions, you can use the following context:\n`,
+    )
+    // 过滤掉已经由 Kode 上下文处理的项目文档（避免重复）
+    const filteredContext = Object.fromEntries(
+      Object.entries(context).filter(
+        ([key]) => key !== 'projectDocs' && key !== 'userDocs',
+      ),
+    )
+    enhancedPrompt.push(
+      ...Object.entries(filteredContext).map(
+        ([key, value]) => `<context name="${key}">${value}</context>`,
+      ),
+    )
+  }
+  return { systemPrompt: enhancedPrompt, reminders }
+}
+async function queryLLMWithPromptCaching(
+  messages: (UserMessage | AssistantMessage)[],
+  systemPrompt: string[],
+  maxThinkingTokens: number,
+  tools: Tool[],
+  signal: AbortSignal,
+  options: {
+    safeMode: boolean
+    model: string
+    prependCLISysprompt: boolean
+    modelProfile?: ModelProfile | null
+  },
+): Promise<AssistantMessage> {
+  const config = getGlobalConfig()
+  const modelManager = getModelManager()
+  // 🔧 Fix: 使用传入的ModelProfile，而不是硬编码的'main'指针
+  const modelProfile = options.modelProfile || modelManager.getModel('main')
+  let provider: string
+  if (modelProfile) {
+    provider = modelProfile.provider || config.primaryProvider || 'anthropic'
+  } else {
+    provider = config.primaryProvider || 'anthropic'
+  }
+  // Use native Anthropic SDK for Anthropic and some Anthropic-compatible providers
+  if (
+    provider === 'anthropic' ||
+    provider === 'bigdream' ||
+    provider === 'opendev'
+  ) {
+    return queryAnthropicNative(
+      messages,
+      systemPrompt,
+      maxThinkingTokens,
+      tools,
+      signal,
+      { ...options, modelProfile },
+    )
+  }
+  // Use OpenAI-compatible interface for all other providers
+  return queryOpenAI(messages, systemPrompt, maxThinkingTokens, tools, signal, {
+    ...options,
+    modelProfile,
+  })
+}
+async function queryAnthropicNative(
+  messages: (UserMessage | AssistantMessage)[],
+  systemPrompt: string[],
+  maxThinkingTokens: number,
+  tools: Tool[],
+  signal: AbortSignal,
+  options?: {
+    safeMode: boolean
+    model: string
+    prependCLISysprompt: boolean
+    modelProfile?: ModelProfile | null
+  },
+): Promise<AssistantMessage> {
+  const config = getGlobalConfig()
+  const modelManager = getModelManager()
+  // 🔧 Fix: 使用传入的ModelProfile，而不是硬编码的'main'指针
+  const modelProfile = options?.modelProfile || modelManager.getModel('main')
+  let anthropic: Anthropic | AnthropicBedrock | AnthropicVertex
+  let model: string
+  let provider: string
+  // 🔍 Debug: 记录模型配置详情
+  debugLogger.api('MODEL_CONFIG_ANTHROPIC', {
+    modelProfileFound: !!modelProfile,
+    modelProfileId: modelProfile?.modelName,
+    modelProfileName: modelProfile?.name,
+    modelProfileModelName: modelProfile?.modelName,
+    modelProfileProvider: modelProfile?.provider,
+    modelProfileBaseURL: modelProfile?.baseURL,
+    modelProfileApiKeyExists: !!modelProfile?.apiKey,
+    optionsModel: options?.model,
+    requestId: currentRequest?.id,
+  })
+  if (modelProfile) {
+    // 使用ModelProfile的完整配置
+    model = modelProfile.modelName
+    provider = modelProfile.provider || config.primaryProvider || 'anthropic'
+    // 基于ModelProfile创建专用的API客户端
+    if (
+      modelProfile.provider === 'anthropic' ||
+      modelProfile.provider === 'bigdream' ||
+      modelProfile.provider === 'opendev'
+    ) {
+      const clientConfig: any = {
+        apiKey: modelProfile.apiKey,
+        dangerouslyAllowBrowser: true,
+        maxRetries: 0,
+        timeout: parseInt(process.env.API_TIMEOUT_MS || String(60 * 1000), 10),
+        defaultHeaders: {
+          'x-app': 'cli',
+          'User-Agent': USER_AGENT,
+        },
+      }
+      // 使用ModelProfile的baseURL而不是全局配置
+      if (modelProfile.baseURL) {
+        clientConfig.baseURL = modelProfile.baseURL
+      }
+      anthropic = new Anthropic(clientConfig)
+    } else {
+      // 其他提供商的处理逻辑
+      anthropic = getAnthropicClient(model)
+    }
+  } else {
+    // 🚨 降级：没有有效的ModelProfile时，应该抛出错误
+    const errorDetails = {
+      modelProfileExists: !!modelProfile,
+      modelProfileModelName: modelProfile?.modelName,
+      requestedModel: options?.model,
+      requestId: currentRequest?.id,
+    }
+    debugLogger.error('ANTHROPIC_FALLBACK_ERROR', errorDetails)
+    throw new Error(
+      `No valid ModelProfile available for Anthropic provider. Please configure model through /model command. Debug: ${JSON.stringify(errorDetails)}`,
+    )
+  }
+  // Prepend system prompt block for easy API identification
+  if (options?.prependCLISysprompt) {
+    // Log stats about first block for analyzing prefix matching config
+    const [firstSyspromptBlock] = splitSysPromptPrefix(systemPrompt)
+    logEvent('tengu_sysprompt_block', {
+      snippet: firstSyspromptBlock?.slice(0, 20),
+      length: String(firstSyspromptBlock?.length ?? 0),
+      hash: firstSyspromptBlock
+        ? createHash('sha256').update(firstSyspromptBlock).digest('hex')
+        : '',
+    })
+    systemPrompt = [getCLISyspromptPrefix(), ...systemPrompt]
+  }
+  const system: TextBlockParam[] = splitSysPromptPrefix(systemPrompt).map(
+    _ => ({
+      ...(PROMPT_CACHING_ENABLED
+        ? { cache_control: { type: 'ephemeral' } }
+        : {}),
+      text: _,
+      type: 'text',
+    }),
+  )
+  const toolSchemas = tools.map(
+    tool =>
+      ({
+        name: tool.name,
+        description: tool.description,
+        input_schema: zodToJsonSchema(tool.inputSchema),
+      }) as Anthropic.Beta.Tools.Tool,
+  )
+  const anthropicMessages = addCacheBreakpoints(messages)
+  const startIncludingRetries = Date.now()
+  // 记录系统提示构建过程
+  logSystemPromptConstruction({
+    basePrompt: systemPrompt.join('\n'),
+    kodeContext: generateKodeContext() || '',
+    reminders: [], // 这里可以从 generateSystemReminders 获取
+    finalPrompt: systemPrompt.join('\n'),
+  })
+  let start = Date.now()
+  let attemptNumber = 0
+  let response
+  try {
+    response = await withRetry(async attempt => {
+      attemptNumber = attempt
+      start = Date.now()
+      const params: Anthropic.Beta.Messages.MessageCreateParams = {
+        model,
+        max_tokens: getMaxTokensFromProfile(modelProfile),
+        messages: anthropicMessages,
+        system,
+        tools: toolSchemas.length > 0 ? toolSchemas : undefined,
+        tool_choice: toolSchemas.length > 0 ? { type: 'auto' } : undefined,
+      }
+      if (maxThinkingTokens > 0) {
+        params.extra_headers = {
+          'anthropic-beta': 'max-tokens-3-5-sonnet-2024-07-15',
+        }
+        ;(params as any).thinking = { max_tokens: maxThinkingTokens }
+      }
+      // 🔥 REAL-TIME API CALL DEBUG - 使用全局日志系统 (Anthropic Streaming)
+      debugLogger.api('ANTHROPIC_API_CALL_START_STREAMING', {
+        endpoint: modelProfile?.baseURL || 'DEFAULT_ANTHROPIC',
+        model,
+        provider,
+        apiKeyConfigured: !!modelProfile?.apiKey,
+        apiKeyPrefix: modelProfile?.apiKey
+          ? modelProfile.apiKey.substring(0, 8)
+          : null,
+        maxTokens: params.max_tokens,
+        temperature: MAIN_QUERY_TEMPERATURE,
+        messageCount: params.messages?.length || 0,
+        streamMode: true,
+        toolsCount: toolSchemas.length,
+        thinkingTokens: maxThinkingTokens,
+        timestamp: new Date().toISOString(),
+        modelProfileId: modelProfile?.modelName,
+        modelProfileName: modelProfile?.name,
+      })
+      if (config.stream) {
+        // 🔧 CRITICAL FIX: Connect AbortSignal to Anthropic API call
+        const stream = await anthropic.beta.messages.create({
+          ...params,
+          stream: true,
+        }, {
+          signal: signal // ← CRITICAL: Connect the AbortSignal to API call
+        })
+        let finalResponse: Anthropic.Beta.Messages.Message | null = null
+        let messageStartEvent: any = null
+        const contentBlocks: any[] = []
+        let usage: any = null
+        let stopReason: string | null = null
+        let stopSequence: string | null = null
+        for await (const event of stream) {
+          // 🔧 CRITICAL FIX: Check abort signal in streaming loop
+          if (signal.aborted) {
+            debugLogger.flow('STREAM_ABORTED', {
+              eventType: event.type,
+              timestamp: Date.now()
+            })
+            throw new Error('Request was cancelled')
+          }
+          if (event.type === 'message_start') {
+            messageStartEvent = event
+            finalResponse = {
+              ...event.message,
+              content: [], // Will be populated from content blocks
+            }
+          } else if (event.type === 'content_block_start') {
+            contentBlocks[event.index] = { ...event.content_block }
+          } else if (event.type === 'content_block_delta') {
+            if (!contentBlocks[event.index]) {
+              contentBlocks[event.index] = {
+                type: event.delta.type === 'text_delta' ? 'text' : 'unknown',
+                text: '',
+              }
+            }
+            if (event.delta.type === 'text_delta') {
+              contentBlocks[event.index].text += event.delta.text
+            }
+          } else if (event.type === 'message_delta') {
+            if (event.delta.stop_reason) stopReason = event.delta.stop_reason
+            if (event.delta.stop_sequence)
+              stopSequence = event.delta.stop_sequence
+            if (event.usage) usage = { ...usage, ...event.usage }
+          } else if (event.type === 'message_stop') {
+            break
+          }
+        }
+        if (!finalResponse || !messageStartEvent) {
+          throw new Error('Stream ended without proper message structure')
+        }
+        // Construct the final response
+        finalResponse = {
+          ...messageStartEvent.message,
+          content: contentBlocks.filter(Boolean),
+          stop_reason: stopReason,
+          stop_sequence: stopSequence,
+          usage: {
+            ...messageStartEvent.message.usage,
+            ...usage,
+          },
+        }
+        return finalResponse
+      } else {
+        // 🔥 REAL-TIME API CALL DEBUG - 使用全局日志系统 (Anthropic Non-Streaming)
+        debugLogger.api('ANTHROPIC_API_CALL_START_NON_STREAMING', {
+          endpoint: modelProfile?.baseURL || 'DEFAULT_ANTHROPIC',
+          model,
+          provider,
+          apiKeyConfigured: !!modelProfile?.apiKey,
+          apiKeyPrefix: modelProfile?.apiKey
+            ? modelProfile.apiKey.substring(0, 8)
+            : null,
+          maxTokens: params.max_tokens,
+          temperature: MAIN_QUERY_TEMPERATURE,
+          messageCount: params.messages?.length || 0,
+          streamMode: false,
+          toolsCount: toolSchemas.length,
+          thinkingTokens: maxThinkingTokens,
+          timestamp: new Date().toISOString(),
+          modelProfileId: modelProfile?.modelName,
+          modelProfileName: modelProfile?.name,
+        })
+        // 🔧 CRITICAL FIX: Connect AbortSignal to non-streaming API call
+        return await anthropic.beta.messages.create(params, {
+          signal: signal // ← CRITICAL: Connect the AbortSignal to API call
+        })
+      }
+    }, { signal }) // 🔧 CRITICAL FIX: Pass AbortSignal to withRetry
+    const ttftMs = start - Date.now()
+    const durationMs = Date.now() - startIncludingRetries
+    const content = response.content.map((block: ContentBlock) => {
+      if (block.type === 'text') {
+        return {
+          type: 'text' as const,
+          text: block.text,
+        }
+      } else if (block.type === 'tool_use') {
+        return {
+          type: 'tool_use' as const,
+          id: block.id,
+          name: block.name,
+          input: block.input,
+        }
+      }
+      return block
+    })
+    const assistantMessage: AssistantMessage = {
+      message: {
+        id: response.id,
+        content,
+        model: response.model,
+        role: 'assistant',
+        stop_reason: response.stop_reason,
+        stop_sequence: response.stop_sequence,
+        type: 'message',
+        usage: response.usage,
+      },
+      type: 'assistant',
+      uuid: nanoid() as UUID,
+      ttftMs,
+      durationMs,
+      costUSD: 0, // Will be calculated below
+    }
+    // 记录完整的 LLM 交互调试信息 (Anthropic path)
+    // 注意：Anthropic API将system prompt和messages分开，这里重构为完整的API调用视图
+    const systemMessages = system.map(block => ({
+      role: 'system',
+      content: block.text,
+    }))
+    logLLMInteraction({
+      systemPrompt: systemPrompt.join('\n'),
+      messages: [...systemMessages, ...anthropicMessages],
+      response: response,
+      usage: response.usage
+        ? {
+            inputTokens: response.usage.input_tokens,
+            outputTokens: response.usage.output_tokens,
+          }
+        : undefined,
+      timing: {
+        start: start,
+        end: Date.now(),
+      },
+      apiFormat: 'anthropic',
+      modelConfig: getModelConfigForDebug(model),
+    })
+    // Calculate cost using native Anthropic usage data
+    const inputTokens = response.usage.input_tokens
+    const outputTokens = response.usage.output_tokens
+    const cacheCreationInputTokens =
+      response.usage.cache_creation_input_tokens ?? 0
+    const cacheReadInputTokens = response.usage.cache_read_input_tokens ?? 0
+    const costUSD =
+      (inputTokens / 1_000_000) * getModelInputTokenCostUSD(model) +
+      (outputTokens / 1_000_000) * getModelOutputTokenCostUSD(model) +
+      (cacheCreationInputTokens / 1_000_000) *
+        getModelInputTokenCostUSD(model) +
+      (cacheReadInputTokens / 1_000_000) *
+        (getModelInputTokenCostUSD(model) * 0.1) // Cache reads are 10% of input cost
+    assistantMessage.costUSD = costUSD
+    addToTotalCost(costUSD)
+    logEvent('api_response_anthropic_native', {
+      model,
+      input_tokens: inputTokens,
+      output_tokens: outputTokens,
+      cache_creation_input_tokens: cacheCreationInputTokens,
+      cache_read_input_tokens: cacheReadInputTokens,
+      cost_usd: costUSD,
+      duration_ms: durationMs,
+      ttft_ms: ttftMs,
+      attempt_number: attemptNumber,
+    })
+    return assistantMessage
+  } catch (error) {
+    return getAssistantMessageFromError(error)
+  }
+}
+function getAssistantMessageFromError(error: unknown): AssistantMessage {
+  if (error instanceof Error && error.message.includes('prompt is too long')) {
+    return createAssistantAPIErrorMessage(PROMPT_TOO_LONG_ERROR_MESSAGE)
+  }
+  if (
+    error instanceof Error &&
+    error.message.includes('Your credit balance is too low')
+  ) {
+    return createAssistantAPIErrorMessage(CREDIT_BALANCE_TOO_LOW_ERROR_MESSAGE)
+  }
+  if (
+    error instanceof Error &&
+    error.message.toLowerCase().includes('x-api-key')
+  ) {
+    return createAssistantAPIErrorMessage(INVALID_API_KEY_ERROR_MESSAGE)
+  }
+  if (error instanceof Error) {
+    if (process.env.NODE_ENV === 'development') {
+      console.log(error)
+    }
+    return createAssistantAPIErrorMessage(
+      `${API_ERROR_MESSAGE_PREFIX}: ${error.message}`,
+    )
+  }
+  return createAssistantAPIErrorMessage(API_ERROR_MESSAGE_PREFIX)
+}
+function addCacheBreakpoints(
+  messages: (UserMessage | AssistantMessage)[],
+): MessageParam[] {
+  return messages.map((msg, index) => {
+    return msg.type === 'user'
+      ? userMessageToMessageParam(msg, index > messages.length - 3)
+      : assistantMessageToMessageParam(msg, index > messages.length - 3)
+  })
+}
+async function queryOpenAI(
+  messages: (UserMessage | AssistantMessage)[],
+  systemPrompt: string[],
+  maxThinkingTokens: number,
+  tools: Tool[],
+  signal: AbortSignal,
+  options?: {
+    safeMode: boolean
+    model: string
+    prependCLISysprompt: boolean
+    modelProfile?: ModelProfile | null
+  },
+): Promise<AssistantMessage> {
+  const config = getGlobalConfig()
+  const modelManager = getModelManager()
+  // 🔧 Fix: 使用传入的ModelProfile，而不是硬编码的'main'指针
+  const modelProfile = options?.modelProfile || modelManager.getModel('main')
+  let model: string
+  // 🔍 Debug: 记录模型配置详情
+  const currentRequest = getCurrentRequest()
+  debugLogger.api('MODEL_CONFIG_OPENAI', {
+    modelProfileFound: !!modelProfile,
+    modelProfileId: modelProfile?.modelName,
+    modelProfileName: modelProfile?.name,
+    modelProfileModelName: modelProfile?.modelName,
+    modelProfileProvider: modelProfile?.provider,
+    modelProfileBaseURL: modelProfile?.baseURL,
+    modelProfileApiKeyExists: !!modelProfile?.apiKey,
+    optionsModel: options?.model,
+    requestId: currentRequest?.id,
+  })
+  if (modelProfile) {
+    model = modelProfile.modelName
+  } else {
+    model = options?.model || modelProfile?.modelName || ''
+  }
+  // Prepend system prompt block for easy API identification
+  if (options?.prependCLISysprompt) {
+    // Log stats about first block for analyzing prefix matching config (see https://console.statsig.com/4aF3Ewatb6xPVpCwxb5nA3/dynamic_configs/claude_cli_system_prompt_prefixes)
+    const [firstSyspromptBlock] = splitSysPromptPrefix(systemPrompt)
+    logEvent('tengu_sysprompt_block', {
+      snippet: firstSyspromptBlock?.slice(0, 20),
+      length: String(firstSyspromptBlock?.length ?? 0),
+      hash: firstSyspromptBlock
+        ? createHash('sha256').update(firstSyspromptBlock).digest('hex')
+        : '',
+    })
+    systemPrompt = [getCLISyspromptPrefix(), ...systemPrompt]
+  }
+  const system: TextBlockParam[] = splitSysPromptPrefix(systemPrompt).map(
+    _ => ({
+      ...(PROMPT_CACHING_ENABLED
+        ? { cache_control: { type: 'ephemeral' } }
+        : {}),
+      text: _,
+      type: 'text',
+    }),
+  )
+  const toolSchemas = await Promise.all(
+    tools.map(
+      async _ =>
+        ({
+          type: 'function',
+          function: {
+            name: _.name,
+            description: await _.prompt({
+              safeMode: options?.safeMode,
+            }),
+            // Use tool's JSON schema directly if provided, otherwise convert Zod schema
+            parameters:
+              'inputJSONSchema' in _ && _.inputJSONSchema
+                ? _.inputJSONSchema
+                : zodToJsonSchema(_.inputSchema),
+          },
+        }) as OpenAI.ChatCompletionTool,
+    ),
+  )
+  const openaiSystem = system.map(
+    s =>
+      ({
+        role: 'system',
+        content: s.text,
+      }) as OpenAI.ChatCompletionMessageParam,
+  )
+  const openaiMessages = convertAnthropicMessagesToOpenAIMessages(messages)
+  const startIncludingRetries = Date.now()
+  // 记录系统提示构建过程 (OpenAI path)
+  logSystemPromptConstruction({
+    basePrompt: systemPrompt.join('\n'),
+    kodeContext: generateKodeContext() || '',
+    reminders: [], // 这里可以从 generateSystemReminders 获取
+    finalPrompt: systemPrompt.join('\n'),
+  })
+  let start = Date.now()
+  let attemptNumber = 0
+  let response
+  try {
+    response = await withRetry(async attempt => {
+      attemptNumber = attempt
+      start = Date.now()
+      const opts: OpenAI.ChatCompletionCreateParams = {
+        model,
+        max_tokens: getMaxTokensFromProfile(modelProfile),
+        messages: [...openaiSystem, ...openaiMessages],
+        temperature: MAIN_QUERY_TEMPERATURE,
+      }
+      if (config.stream) {
+        ;(opts as OpenAI.ChatCompletionCreateParams).stream = true
+        opts.stream_options = {
+          include_usage: true,
+        }
+      }
+      if (toolSchemas.length > 0) {
+        opts.tools = toolSchemas
+        opts.tool_choice = 'auto'
+      }
+      const reasoningEffort = await getReasoningEffort(modelProfile, messages)
+      if (reasoningEffort) {
+        logEvent('debug_reasoning_effort', {
+          effort: reasoningEffort,
+        })
+        opts.reasoning_effort = reasoningEffort
+      }
+      // 🔧 Fix: 如果有ModelProfile配置，直接使用它 (更宽松的条件)
+      if (modelProfile && modelProfile.modelName) {
+        debugLogger.api('USING_MODEL_PROFILE_PATH', {
+          modelProfileName: modelProfile.modelName,
+          modelName: modelProfile.modelName,
+          provider: modelProfile.provider,
+          baseURL: modelProfile.baseURL,
+          apiKeyExists: !!modelProfile.apiKey,
+          requestId: currentRequest?.id,
+        })
+        const s = await getCompletionWithProfile(modelProfile, opts, 0, 10, signal) // 🔧 CRITICAL FIX: Pass AbortSignal to OpenAI calls
+        let finalResponse
+        if (opts.stream) {
+          finalResponse = await handleMessageStream(s as ChatCompletionStream, signal) // 🔧 Pass AbortSignal to stream handler
+        } else {
+          finalResponse = s
+        }
+        const r = convertOpenAIResponseToAnthropic(finalResponse)
+        return r
+      } else {
+        // 🚨 警告：ModelProfile不可用，使用旧逻辑路径
+        debugLogger.api('USING_LEGACY_PATH', {
+          modelProfileExists: !!modelProfile,
+          modelProfileId: modelProfile?.modelName,
+          modelNameExists: !!modelProfile?.modelName,
+          fallbackModel: 'main',
+          actualModel: model,
+          requestId: currentRequest?.id,
+        })
+        // 🚨 FALLBACK: 没有有效的ModelProfile时，应该抛出错误而不是使用遗留系统
+        const errorDetails = {
+          modelProfileExists: !!modelProfile,
+          modelProfileId: modelProfile?.modelName,
+          modelNameExists: !!modelProfile?.modelName,
+          requestedModel: model,
+          requestId: currentRequest?.id,
+        }
+        debugLogger.error('NO_VALID_MODEL_PROFILE', errorDetails)
+        throw new Error(
+          `No valid ModelProfile available for model: ${model}. Please configure model through /model command. Debug: ${JSON.stringify(errorDetails)}`,
+        )
+      }
+    }, { signal }) // 🔧 CRITICAL FIX: Pass AbortSignal to withRetry
+  } catch (error) {
+    logError(error)
+    return getAssistantMessageFromError(error)
+  }
+  const durationMs = Date.now() - start
+  const durationMsIncludingRetries = Date.now() - startIncludingRetries
+  const inputTokens = response.usage?.prompt_tokens ?? 0
+  const outputTokens = response.usage?.completion_tokens ?? 0
+  const cacheReadInputTokens =
+    response.usage?.prompt_token_details?.cached_tokens ?? 0
+  const cacheCreationInputTokens =
+    response.usage?.prompt_token_details?.cached_tokens ?? 0
+  const costUSD =
+    (inputTokens / 1_000_000) * SONNET_COST_PER_MILLION_INPUT_TOKENS +
+    (outputTokens / 1_000_000) * SONNET_COST_PER_MILLION_OUTPUT_TOKENS +
+    (cacheReadInputTokens / 1_000_000) *
+      SONNET_COST_PER_MILLION_PROMPT_CACHE_READ_TOKENS +
+    (cacheCreationInputTokens / 1_000_000) *
+      SONNET_COST_PER_MILLION_PROMPT_CACHE_WRITE_TOKENS
+  addToTotalCost(costUSD, durationMsIncludingRetries)
+  // 记录完整的 LLM 交互调试信息 (OpenAI path)
+  logLLMInteraction({
+    systemPrompt: systemPrompt.join('\n'),
+    messages: [...openaiSystem, ...openaiMessages],
+    response: response,
+    usage: {
+      inputTokens: inputTokens,
+      outputTokens: outputTokens,
+    },
+    timing: {
+      start: start,
+      end: Date.now(),
+    },
+    apiFormat: 'openai',
+    modelConfig: getModelConfigForDebug(model),
+  })
+  return {
+    message: {
+      ...response,
+      content: normalizeContentFromAPI(response.content),
+      usage: {
+        input_tokens: inputTokens,
+        output_tokens: outputTokens,
+        cache_read_input_tokens: cacheReadInputTokens,
+        cache_creation_input_tokens: 0,
+      },
+    },
+    costUSD,
+    durationMs,
+    type: 'assistant',
+    uuid: randomUUID(),
+  }
+}
+function getMaxTokensFromProfile(modelProfile: any): number {
+  // Use ModelProfile maxTokens or reasonable default
+  return modelProfile?.maxTokens || 8000
+}
+function getModelInputTokenCostUSD(model: string): number {
+  // Find the model in the models object
+  for (const providerModels of Object.values(models)) {
+    const modelInfo = providerModels.find((m: any) => m.model === model)
+    if (modelInfo) {
+      return modelInfo.input_cost_per_token || 0
+    }
+  }
+  // Default fallback cost for unknown models
+  return 0.000003 // Default to Claude 3 Haiku cost
+}
+function getModelOutputTokenCostUSD(model: string): number {
+  // Find the model in the models object
+  for (const providerModels of Object.values(models)) {
+    const modelInfo = providerModels.find((m: any) => m.model === model)
+    if (modelInfo) {
+      return modelInfo.output_cost_per_token || 0
+    }
+  }
+  // Default fallback cost for unknown models
+  return 0.000015 // Default to Claude 3 Haiku cost
+}
+// New unified query functions for model pointer system
+export async function queryModel(
+  modelPointer: import('../utils/config').ModelPointerType,
+  messages: (UserMessage | AssistantMessage)[],
+  systemPrompt: string[] = [],
+  signal?: AbortSignal,
+): Promise<AssistantMessage> {
+  // Use queryLLM with the pointer directly
+  return queryLLM(
+    messages,
+    systemPrompt,
+    0, // maxThinkingTokens
+    [], // tools
+    signal || new AbortController().signal,
+    {
+      safeMode: false,
+      model: modelPointer,
+      prependCLISysprompt: true,
+    },
+  )
+}
+// Note: Use queryModel(pointer, ...) directly instead of these convenience functions
+// Simplified query function using quick model pointer
+export async function queryQuick({
+  systemPrompt = [],
+  userPrompt,
+  assistantPrompt,
+  enablePromptCaching = false,
+  signal,
+}: {
+  systemPrompt?: string[]
+  userPrompt: string
+  assistantPrompt?: string
+  enablePromptCaching?: boolean
+  signal?: AbortSignal
+}): Promise<AssistantMessage> {
+  const messages = [
+    {
+      message: { role: 'user', content: userPrompt },
+      type: 'user',
+      uuid: randomUUID(),
+    },
+  ] as (UserMessage | AssistantMessage)[]
+  return queryModel('quick', messages, systemPrompt, 0, [], signal)
+}