npm - @opencow-ai/opencow-agent-sdk - Versions diffs - 0.4.12 → 0.4.13 - Mend

@opencow-ai/opencow-agent-sdk 0.4.12 → 0.4.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/dist/cli.mjs +102 -34
package/dist/client.js +101 -26
package/dist/controller/compact/autoCompact.d.ts +4 -0
package/dist/controller/loop.d.ts +1 -0
package/dist/entrypoints/sdk/runtimeTypes.d.ts +25 -4
package/dist/providers/codex/shim.d.ts +3 -3
package/dist/providers/openai/shim.d.ts +8 -19
package/dist/providers/shared/config.d.ts +4 -3
package/dist/providers/shared/model/maxTokens.d.ts +1 -0
package/dist/providers/shared/routing.d.ts +3 -1
package/dist/query.d.ts +1 -0
package/dist/sdk.js +101 -26
package/dist/types/toolRuntime.d.ts +1 -0
package/package.json +1 -1

package/dist/client.js CHANGED Viewed

@@ -34951,7 +34951,7 @@ function parseReasoningEffort(value) {
   if (!value)
     return;
   const normalized = value.trim().toLowerCase();
-  if (normalized === "low" || normalized === "medium" || normalized === "high" || normalized === "xhigh") {
+  if (normalized === "none" || normalized === "minimal" || normalized === "low" || normalized === "medium" || normalized === "high" || normalized === "xhigh") {
     return normalized;
   }
   return;
@@ -35072,7 +35072,10 @@ function resolveProviderRequest(options) {
     transportOverride: options?.transportOverride
   });
   const resolvedModel = transport === "chat_completions" && isEnvTruthy(getQueryEnvVar("CLAUDE_CODE_USE_GITHUB")) ? normalizeGithubModelsApiModel(requestedModel) : descriptor.baseModel;
-  const reasoning = options?.reasoningEffortOverride ? { effort: options.reasoningEffortOverride } : descriptor.reasoning;
+  const hasReasoningEffortOverride = !!options && Object.prototype.hasOwnProperty.call(options, "reasoningEffortOverride");
+  const rawEnvReasoningEffortOverride = getQueryEnvVar(QUERY_ENV_KEY_REASONING_EFFORT_OVERRIDE);
+  const envReasoningEffortOverride = parseReasoningEffort(rawEnvReasoningEffortOverride);
+  const reasoning = hasReasoningEffortOverride ? options?.reasoningEffortOverride ? { effort: options.reasoningEffortOverride } : undefined : rawEnvReasoningEffortOverride === QUERY_ENV_VALUE_REASONING_EFFORT_CLEAR ? undefined : envReasoningEffortOverride ? { effort: envReasoningEffortOverride } : descriptor.reasoning;
   return {
     transport,
     requestedModel,
@@ -35176,7 +35179,7 @@ function resolveCodexApiCredentials(env2 = process.env) {
     originator: "opencow"
   };
 }
-var DEFAULT_OPENAI_BASE_URL = "https://api.openai.com/v1", DEFAULT_CODEX_BASE_URL = "https://chatgpt.com/backend-api/codex", DEFAULT_GITHUB_MODELS_API_MODEL = "openai/gpt-4.1", CODEX_ALIAS_MODELS, QUERY_ENV_KEY_TRANSPORT_OVERRIDE = "__OPENCOW_TRANSPORT_OVERRIDE", QUERY_ENV_KEY_PROVIDER_SPECIFIC_OPENAI_RESPONSES = "__OPENCOW_PROVIDER_SPECIFIC_OPENAI_RESPONSES", LOCALHOST_HOSTNAMES, warnedCodexAliasOnce = false, MissingProviderModelError;
+var DEFAULT_OPENAI_BASE_URL = "https://api.openai.com/v1", DEFAULT_CODEX_BASE_URL = "https://chatgpt.com/backend-api/codex", DEFAULT_GITHUB_MODELS_API_MODEL = "openai/gpt-4.1", CODEX_ALIAS_MODELS, QUERY_ENV_KEY_TRANSPORT_OVERRIDE = "__OPENCOW_TRANSPORT_OVERRIDE", QUERY_ENV_KEY_REASONING_EFFORT_OVERRIDE = "__OPENCOW_REASONING_EFFORT_OVERRIDE", QUERY_ENV_VALUE_REASONING_EFFORT_CLEAR = "__OPENCOW_CLEAR_REASONING_EFFORT__", QUERY_ENV_KEY_PROVIDER_SPECIFIC_OPENAI_RESPONSES = "__OPENCOW_PROVIDER_SPECIFIC_OPENAI_RESPONSES", LOCALHOST_HOSTNAMES, warnedCodexAliasOnce = false, MissingProviderModelError;
 var init_config2 = __esm(() => {
   init_envUtils();
   init_state2();
@@ -96385,7 +96388,7 @@ function convertChunkUsage(usage) {
   return openaiUsageToAnthropicUsage(usage);
 }
 function toOpenAIChatReasoningEffort(effort) {
-  return effort === "xhigh" ? "high" : effort;
+  return effort;
 }
 function getOpenAIChatProviderCapabilities(model) {
   return {
@@ -96830,7 +96833,14 @@ class OpenAIShimMessages {
     let httpResponse;
     const promise3 = (async () => {
       const overrideTransport = self2.providerOverride?.transport === "anthropic" ? undefined : self2.providerOverride?.transport;
-      const request = resolveProviderRequest({ model: self2.providerOverride?.model ?? params.model, baseUrl: self2.providerOverride?.baseURL, reasoningEffortOverride: self2.reasoningEffort, transportOverride: overrideTransport });
+      const hasProviderReasoningEffortOverride = !!self2.providerOverride && Object.prototype.hasOwnProperty.call(self2.providerOverride, "reasoningEffort");
+      const reasoningEffortOverride = hasProviderReasoningEffortOverride ? self2.providerOverride?.reasoningEffort ?? null : self2.reasoningEffort;
+      const request = resolveProviderRequest({
+        model: self2.providerOverride?.model ?? params.model,
+        baseUrl: self2.providerOverride?.baseURL,
+        ...reasoningEffortOverride !== undefined ? { reasoningEffortOverride } : {},
+        transportOverride: overrideTransport
+      });
       const response = await self2._doRequest(request, params, options);
       httpResponse = response;
       if (params.stream) {
@@ -130220,15 +130230,16 @@ function isMaxTokensCapEnabled() {
 }
 function getMaxOutputTokensForModel(model, opts) {
   const maxOutputTokens = getModelMaxOutputTokens(model);
-  const defaultTokens = isMaxTokensCapEnabled() ? Math.min(maxOutputTokens.default, CAPPED_DEFAULT_MAX_TOKENS) : maxOutputTokens.default;
+  const upperLimit = opts?.upperLimitOverride !== undefined && Number.isFinite(opts.upperLimitOverride) && opts.upperLimitOverride >= 1 ? Math.floor(opts.upperLimitOverride) : maxOutputTokens.upperLimit;
+  const defaultTokens = isMaxTokensCapEnabled() ? Math.min(maxOutputTokens.default, CAPPED_DEFAULT_MAX_TOKENS, upperLimit) : Math.min(maxOutputTokens.default, upperLimit);
   if (opts?.override !== undefined) {
-    if (!Number.isFinite(opts.override) || opts.override < 1 || opts.override > maxOutputTokens.upperLimit) {
-      console.warn(`[opencow] Options.maxOutputTokens=${opts.override} out of range ` + `[1, ${maxOutputTokens.upperLimit}] for model ${model}; clamping.`);
+    if (!Number.isFinite(opts.override) || opts.override < 1 || opts.override > upperLimit) {
+      console.warn(`[opencow] Options.maxOutputTokens=${opts.override} out of range ` + `[1, ${upperLimit}] for model ${model}; clamping.`);
     }
-    const clamped = Math.min(Math.max(1, Math.floor(opts.override)), maxOutputTokens.upperLimit);
+    const clamped = Math.min(Math.max(1, Math.floor(opts.override)), upperLimit);
     return clamped;
   }
-  const result = validateBoundedIntEnvVar("CLAUDE_CODE_MAX_OUTPUT_TOKENS", resolveEnvVar("MAX_OUTPUT_TOKENS"), defaultTokens, maxOutputTokens.upperLimit);
+  const result = validateBoundedIntEnvVar("CLAUDE_CODE_MAX_OUTPUT_TOKENS", resolveEnvVar("MAX_OUTPUT_TOKENS"), defaultTokens, upperLimit);
   return result.effective;
 }
 var init_maxTokens = __esm(() => {
@@ -255970,6 +255981,13 @@ function getDisableExtglobCommand(shellPath) {
   }
   return null;
 }
+function getHostProvidedPathCommand() {
+  const path11 = getHostProvidedEnvVar("PATH");
+  if (!path11) {
+    return null;
+  }
+  return `export PATH=${quote([path11])}`;
+}
 async function createBashShellProvider(shellPath, options2) {
   let currentSandboxTmpDir;
   const snapshotPromise = options2?.skipSnapshot ? Promise.resolve(undefined) : createAndSaveSnapshot(shellPath).catch((error41) => {
@@ -256010,6 +256028,10 @@ async function createBashShellProvider(shellPath, options2) {
         const finalPath = getPlatform() === "windows" ? windowsPathToPosixPath(snapshotFilePath) : snapshotFilePath;
         commandParts.push(`source ${quote([finalPath])} 2>/dev/null || true`);
       }
+      const hostPathCommand = getHostProvidedPathCommand();
+      if (hostPathCommand) {
+        commandParts.push(hostPathCommand);
+      }
       const sessionEnvScript2 = await getSessionEnvironmentScript();
       if (sessionEnvScript2) {
         commandParts.push(sessionEnvScript2);
@@ -281957,7 +281979,11 @@ async function* queryLoop(params, consumedCommandUuids) {
       if (false) {}
       const mediaRecoveryEnabled = reactiveCompact?.isReactiveCompactEnabled() ?? false;
       if (!compactionResult && querySource !== "compact" && querySource !== "session_memory" && !(reactiveCompact?.isReactiveCompactEnabled() && isAutoCompactEnabled()) && !collapseOwnsIt) {
-        const { isAtBlockingLimit } = calculateTokenWarningState(tokenCountWithEstimation(messagesForQuery) - snipTokensFreed, toolUseContext.options.mainLoopModel);
+        const { isAtBlockingLimit } = calculateTokenWarningState(tokenCountWithEstimation(messagesForQuery) - snipTokensFreed, toolUseContext.options.mainLoopModel, {
+          contextWindow: toolUseContext.options.contextWindow,
+          maxOutputTokens: toolUseContext.options.maxOutputTokens,
+          maxOutputTokensLimit: toolUseContext.options.maxOutputTokensLimit
+        });
         if (isAtBlockingLimit) {
           yield createAssistantAPIErrorMessage({
             content: PROMPT_TOO_LONG_ERROR_MESSAGE,
@@ -282000,6 +282026,7 @@ async function* queryLoop(params, consumedCommandUuids) {
                 allowedAgentTypes: toolUseContext.options.agentDefinitions.allowedAgentTypes,
                 hasAppendSystemPrompt: !!toolUseContext.options.appendSystemPrompt,
                 maxOutputTokensOverride,
+                maxOutputTokensLimitOverride: params.maxOutputTokensLimitOverride,
                 fetchOverride: dumpPromptsFetch,
                 mcpTools: appState.mcp.tools,
                 hasPendingMcpServers: appState.mcp.clients.some((c6) => c6.type === "pending"),
@@ -282675,7 +282702,7 @@ function getAnthropicEnvMetadata() {
 function getBuildAgeMinutes() {
   if (false)
     ;
-  const buildTime = new Date("2026-06-24T10:02:32.669Z").getTime();
+  const buildTime = new Date("2026-06-25T12:29:02.938Z").getTime();
   if (isNaN(buildTime))
     return;
   return Math.floor((Date.now() - buildTime) / 60000);
@@ -283257,6 +283284,7 @@ async function runForkedAgent({
       toolUseContext: isolatedToolUseContext,
       querySource,
       maxOutputTokensOverride: maxOutputTokens,
+      maxOutputTokensLimitOverride: isolatedToolUseContext.options.maxOutputTokensLimit,
       maxTurns,
       skipCacheWrite
     })) {
@@ -283495,7 +283523,17 @@ ${formattedSummary}`;
   if (transcriptPath) {
     baseSummary += `
-If you need specific details from before compaction (like exact code snippets, error messages, or content you generated), read the full transcript at: ${transcriptPath}`;
+IMPORTANT — Transcript recovery protocol:
+The full pre-compaction conversation is preserved at: ${transcriptPath}
+When you encounter ANY of these situations, you MUST use the Read tool to search the transcript for the missing context BEFORE responding:
+- You are unsure about a specific detail (file path, function name, error message, code snippet, user preference)
+- The user references something you cannot find in the summary above
+- You need exact code that was previously read or written
+- You are about to make a decision but feel uncertain whether the user already gave guidance on it
+- A tool name, skill name, or configuration was discussed but is not in the summary
+The transcript is a JSONL file. Read its tail (last 500–2000 lines) first for the most recent context; if that doesn't resolve the gap, read earlier sections.
+Do NOT guess or hallucinate details that might have been discussed — read the transcript instead.`;
   }
   if (recentMessagesPreserved) {
     baseSummary += `
@@ -283562,6 +283600,7 @@ Your summary should include the following sections:
 8. Current Work: Describe in detail precisely what was being worked on immediately before this summary request, paying special attention to the most recent messages from both user and assistant. Include file names and code snippets where applicable.
 9. Optional Next Step: List the next step that you will take that is related to the most recent work you were doing. IMPORTANT: ensure that this step is DIRECTLY in line with the user's most recent explicit requests, and the task you were working on immediately before this summary request. If your last task was concluded, then only list next steps if they are explicitly in line with the users request. Do not start on tangential requests or really old requests that were already completed without confirming with the user first.
                        If there is a next step, include direct quotes from the most recent conversation showing exactly what task you were working on and where you left off. This should be verbatim to ensure there's no drift in task interpretation.
+10. Uncertain or Incomplete Context: List any items where you are not fully confident in the details — e.g., a tool/skill name you vaguely recall but cannot confirm, a user preference you think was stated but cannot pinpoint, or a decision whose rationale is unclear. Mark each with [NEEDS_TRANSCRIPT_LOOKUP] so the post-compaction assistant knows to read the transcript file for these specific items before acting on them.
 Here's an example of how your output should be structured:
@@ -283612,10 +283651,14 @@ Here's an example of how your output should be structured:
 9. Optional Next Step:
    [Optional Next step to take]
+10. Uncertain or Incomplete Context:
+   - [Item with unclear details] [NEEDS_TRANSCRIPT_LOOKUP]
+   - [...]
 </summary>
 </example>
-Please provide your summary based on the conversation so far, following this structure and ensuring precision and thoroughness in your response.
+Please provide your summary based on the conversation so far, following this structure and ensuring precision and thoroughness in your response.
 There may be additional summarization instructions provided in the included context. If so, remember to follow these instructions when creating the above summary. Examples of instructions include:
 <example>
@@ -283643,6 +283686,7 @@ Your summary should include the following sections:
 7. Pending Tasks: Outline any pending tasks from the recent messages.
 8. Current Work: Describe precisely what was being worked on immediately before this summary request.
 9. Optional Next Step: List the next step related to the most recent work. Include direct quotes from the most recent conversation.
+10. Uncertain or Incomplete Context: List any items where details are unclear or potentially missing from the recent messages. Mark each with [NEEDS_TRANSCRIPT_LOOKUP].
 Here's an example of how your output should be structured:
@@ -283683,6 +283727,9 @@ Here's an example of how your output should be structured:
 9. Optional Next Step:
    [Optional Next step to take]
+10. Uncertain or Incomplete Context:
+   - [Item with unclear details] [NEEDS_TRANSCRIPT_LOOKUP]
 </summary>
 </example>
@@ -283703,6 +283750,7 @@ Your summary should include the following sections:
 7. Pending Tasks: Outline any pending tasks.
 8. Work Completed: Describe what was accomplished by the end of this portion.
 9. Context for Continuing Work: Summarize any context, decisions, or state that would be needed to understand and continue the work in subsequent messages.
+10. Uncertain or Incomplete Context: List any items where details are unclear or potentially incomplete. Mark each with [NEEDS_TRANSCRIPT_LOOKUP] so the continuing session knows to look them up in the transcript before acting.
 Here's an example of how your output should be structured:
@@ -283743,6 +283791,9 @@ Here's an example of how your output should be structured:
 9. Context for Continuing Work:
    [Key context, decisions, or state needed to continue the work]
+10. Uncertain or Incomplete Context:
+   - [Item with unclear details] [NEEDS_TRANSCRIPT_LOOKUP]
 </summary>
 </example>
@@ -284194,7 +284245,10 @@ async function streamCompactSummary({
           toolChoice: undefined,
           isNonInteractiveSession: context4.options.isNonInteractiveSession,
           hasAppendSystemPrompt: !!context4.options.appendSystemPrompt,
-          maxOutputTokensOverride: Math.min(COMPACT_MAX_OUTPUT_TOKENS, getMaxOutputTokensForModel(context4.options.mainLoopModel)),
+          maxOutputTokensOverride: Math.min(COMPACT_MAX_OUTPUT_TOKENS, getMaxOutputTokensForModel(context4.options.mainLoopModel, {
+            upperLimitOverride: context4.options.maxOutputTokensLimit
+          })),
+          maxOutputTokensLimitOverride: context4.options.maxOutputTokensLimit,
           querySource: "compact",
           agents: context4.options.agentDefinitions.activeAgents,
           mcpTools: [],
@@ -284928,7 +284982,10 @@ var init_sessionMemoryCompact = __esm(() => {
 // src/controller/compact/autoCompact.ts
 function getEffectiveContextWindowSize(model, opts) {
-  const reservedTokensForSummary = Math.min(getMaxOutputTokensForModel(model, { override: opts?.maxOutputTokens }), MAX_OUTPUT_TOKENS_FOR_SUMMARY);
+  const reservedTokensForSummary = Math.min(getMaxOutputTokensForModel(model, {
+    override: opts?.maxOutputTokens,
+    upperLimitOverride: opts?.maxOutputTokensLimit
+  }), MAX_OUTPUT_TOKENS_FOR_SUMMARY);
   let contextWindow = getContextWindowForModel(model, getSdkBetas(), {
     override: opts?.contextWindow
   });
@@ -285014,7 +285071,8 @@ async function autoCompactIfNeeded(messages, toolUseContext, cacheSafeParams, qu
   const model = toolUseContext.options.mainLoopModel;
   const opts = {
     contextWindow: toolUseContext.options.contextWindow,
-    maxOutputTokens: toolUseContext.options.maxOutputTokens
+    maxOutputTokens: toolUseContext.options.maxOutputTokens,
+    maxOutputTokensLimit: toolUseContext.options.maxOutputTokensLimit
   };
   const shouldCompact = await shouldAutoCompact(messages, model, querySource, snipTokensFreed, opts);
   if (!shouldCompact) {
@@ -293768,7 +293826,10 @@ ${deferredToolList}
         betasParams.push(STRUCTURED_OUTPUTS_BETA_HEADER);
       }
     }
-    const maxOutputTokens2 = retryContext?.maxTokensOverride || options2.maxOutputTokensOverride || getMaxOutputTokensForModel(options2.model);
+    const maxOutputTokens2 = retryContext?.maxTokensOverride || getMaxOutputTokensForModel(options2.model, {
+      override: options2.maxOutputTokensOverride,
+      upperLimitOverride: options2.maxOutputTokensLimitOverride
+    });
     const hasThinking = thinkingConfig.type !== "disabled" && !isEnvTruthy(resolveEnvVar("DISABLE_THINKING"));
     let thinking = undefined;
     if (hasThinking && modelSupportsThinking(options2.model)) {
@@ -299730,8 +299791,10 @@ async function processUserInputBase(input, mode, setToolJSX, context4, pastedCon
       }
     }
   }
+  const processSlashCommand = getProcessSlashCommand();
   if (false) {}
-  const shouldExtractAttachments = !skipAttachments && inputString !== null && (mode !== "prompt" || effectiveSkipSlash || !inputString.startsWith("/"));
+  const shouldFallbackSlashToPrompt = inputString !== null && mode === "prompt" && !effectiveSkipSlash && inputString.startsWith("/") && !processSlashCommand;
+  const shouldExtractAttachments = !skipAttachments && inputString !== null && (mode !== "prompt" || effectiveSkipSlash || !inputString.startsWith("/") || shouldFallbackSlashToPrompt);
   queryCheckpoint("query_attachment_loading_start");
   const attachmentMessages = shouldExtractAttachments ? await toArray2(getAttachmentMessages(inputString, context4, ideSelection ?? null, [], messages, querySource)) : [];
   queryCheckpoint("query_attachment_loading_end");
@@ -299742,11 +299805,10 @@ async function processUserInputBase(input, mode, setToolJSX, context4, pastedCon
     return addImageMetadataMessage(await processBashCommand(inputString, precedingInputBlocks, attachmentMessages, context4, setToolJSX), imageMetadataTexts);
   }
   if (inputString !== null && !effectiveSkipSlash && inputString.startsWith("/")) {
-    const processSlashCommand = getProcessSlashCommand();
-    if (!processSlashCommand)
-      throw new SlashCommandHandlerNotRegisteredError("slash");
-    const slashResult = await processSlashCommand(inputString, precedingInputBlocks, imageContentBlocks, attachmentMessages, context4, setToolJSX, uuid3, isAlreadyProcessing, canUseTool);
-    return addImageMetadataMessage(slashResult, imageMetadataTexts);
+    if (processSlashCommand) {
+      const slashResult = await processSlashCommand(inputString, precedingInputBlocks, imageContentBlocks, attachmentMessages, context4, setToolJSX, uuid3, isAlreadyProcessing, canUseTool);
+      return addImageMetadataMessage(slashResult, imageMetadataTexts);
+    }
   }
   if (inputString !== null && mode === "prompt") {
     const trimmedInput = inputString.trim();
@@ -299993,6 +300055,7 @@ class QueryEngine {
       maxTurns,
       maxBudgetUsd,
       maxOutputTokens,
+      maxOutputTokensLimit,
       contextWindow,
       compact,
       taskBudget,
@@ -300082,6 +300145,7 @@ class QueryEngine {
         theme: resolveThemeSetting(getGlobalConfig().theme),
         maxBudgetUsd,
         maxOutputTokens,
+        maxOutputTokensLimit,
         contextWindow,
         modelProviders: this.config.modelProviders,
         subagentDisallowedTools: this.config.subagentDisallowedTools
@@ -300187,6 +300251,7 @@ class QueryEngine {
         agentDefinitions: { activeAgents: agents, allAgents: [] },
         maxBudgetUsd,
         maxOutputTokens,
+        maxOutputTokensLimit,
         contextWindow,
         modelProviders: this.config.modelProviders,
         subagentDisallowedTools: this.config.subagentDisallowedTools
@@ -300307,6 +300372,7 @@ class QueryEngine {
       querySource: "sdk",
       maxTurns,
       maxOutputTokensOverride: maxOutputTokens,
+      maxOutputTokensLimitOverride: maxOutputTokensLimit,
       compactRequest: compact,
       taskBudget
     })) {
@@ -300677,6 +300743,7 @@ async function* ask({
   maxTurns,
   maxBudgetUsd,
   maxOutputTokens,
+  maxOutputTokensLimit,
   contextWindow,
   compact,
   taskBudget,
@@ -300722,6 +300789,7 @@ async function* ask({
     maxTurns,
     maxBudgetUsd,
     maxOutputTokens,
+    maxOutputTokensLimit,
     contextWindow,
     compact,
     taskBudget,
@@ -329743,14 +329811,20 @@ function normalizeInitialMessages(raw) {
 }
 function optionsWithProviderRoutingEnv(options2) {
   const transport = options2.transport;
+  const reasoningEffort = options2.reasoningEffort;
   const openaiResponses = options2.providerSpecific?.openaiResponses;
-  if (transport === undefined && openaiResponses === undefined) {
+  if (transport === undefined && reasoningEffort === undefined && openaiResponses === undefined) {
     return options2;
   }
   const mergedEnv = { ...options2.env ?? {} };
   if (typeof transport === "string" && transport.length > 0) {
     mergedEnv[QUERY_ENV_KEY_TRANSPORT_OVERRIDE] = transport;
   }
+  if (reasoningEffort === null) {
+    mergedEnv[QUERY_ENV_KEY_REASONING_EFFORT_OVERRIDE] = QUERY_ENV_VALUE_REASONING_EFFORT_CLEAR;
+  } else if (typeof reasoningEffort === "string" && reasoningEffort.length > 0) {
+    mergedEnv[QUERY_ENV_KEY_REASONING_EFFORT_OVERRIDE] = reasoningEffort;
+  }
   if (openaiResponses && typeof openaiResponses === "object") {
     mergedEnv[QUERY_ENV_KEY_PROVIDER_SPECIFIC_OPENAI_RESPONSES] = JSON.stringify(openaiResponses);
   }
@@ -329980,6 +330054,7 @@ function runSdkQueryRuntime(params) {
           maxTurns: options2.maxTurns,
           maxBudgetUsd: options2.maxBudgetUsd,
           maxOutputTokens: options2.maxOutputTokens,
+          maxOutputTokensLimit: options2.maxOutputTokensLimit,
           contextWindow: options2.contextWindow,
           compact: options2.compact,
           taskBudget: options2.taskBudget,
@@ -336057,4 +336132,4 @@ export {
   AbortError2 as AbortError
 };
-//# debugId=C79EE43EDCAB4CD864756E2164756E21
+//# debugId=F19FA9D2F1AEFB9664756E2164756E21

package/dist/controller/compact/autoCompact.d.ts CHANGED Viewed

@@ -7,6 +7,7 @@ import { trySessionMemoryCompaction } from './sessionMemoryCompact.js';
 export declare function getEffectiveContextWindowSize(model: string, opts?: {
     contextWindow?: number;
     maxOutputTokens?: number;
+    maxOutputTokensLimit?: number;
 }): number;
 export type AutoCompactTrackingState = {
     compacted: boolean;
@@ -21,10 +22,12 @@ export declare const MANUAL_COMPACT_BUFFER_TOKENS = 3000;
 export declare function getAutoCompactThreshold(model: string, opts?: {
     contextWindow?: number;
     maxOutputTokens?: number;
+    maxOutputTokensLimit?: number;
 }): number;
 export declare function calculateTokenWarningState(tokenUsage: number, model: string, opts?: {
     contextWindow?: number;
     maxOutputTokens?: number;
+    maxOutputTokensLimit?: number;
 }): {
     percentLeft: number;
     isAboveWarningThreshold: boolean;
@@ -36,6 +39,7 @@ export declare function isAutoCompactEnabled(): boolean;
 export declare function shouldAutoCompact(messages: Message[], model: string, querySource?: QuerySource, snipTokensFreed?: number, opts?: {
     contextWindow?: number;
     maxOutputTokens?: number;
+    maxOutputTokensLimit?: number;
 }): Promise<boolean>;
 export declare function autoCompactIfNeeded(messages: Message[], toolUseContext: ToolRuntimeContext, cacheSafeParams: CacheSafeParams, querySource?: QuerySource, tracking?: AutoCompactTrackingState, snipTokensFreed?: number): Promise<{
     wasCompacted: boolean;

package/dist/controller/loop.d.ts CHANGED Viewed

@@ -32,6 +32,7 @@ export type Options = {
     isNonInteractiveSession: boolean;
     extraToolSchemas?: BetaToolUnion[];
     maxOutputTokensOverride?: number;
+    maxOutputTokensLimitOverride?: number;
     fallbackModel?: string;
     onStreamingFallback?: () => void;
     querySource: QuerySource;

package/dist/entrypoints/sdk/runtimeTypes.d.ts CHANGED Viewed

@@ -125,7 +125,7 @@ export type SettingSource = 'user' | 'project' | 'local';
 import type { SdkTool } from '../../capabilities/SdkTool.js';
 import type { LayoutProfile } from '../../session/layout/LayoutProfile.js';
 import type { SdkRule } from '../../session/rules/SdkRule.js';
-import type { ProviderTransport, DeprecatedProviderTransportName } from '../../providers/shared/config.js';
+import type { ProviderTransport, DeprecatedProviderTransportName, ReasoningEffort } from '../../providers/shared/config.js';
 import type { FileHistoryChangeListener, FileHistoryState } from '../../session/fileHistory.js';
 export type Options = {
     cwd?: string;
@@ -148,6 +148,13 @@ export type Options = {
      * values are clamped + warn-logged, never thrown.
      */
     maxOutputTokens?: number;
+    /**
+     * Optional host-authoritative upper bound for `maxOutputTokens`. Use this
+     * when the selected model is a custom gateway/deployment id whose native
+     * output cap is known by the host model catalog but not by the SDK's built-in
+     * model table. When unset, SDK built-in per-model limits are used.
+     */
+    maxOutputTokensLimit?: number;
     /**
      * Per-session context window override (input tokens). Used by autoCompact
      * threshold computation and any other code path that calls
@@ -161,6 +168,20 @@ export type Options = {
      * Values < 10_000 or > 5_000_000 are dropped + warn-logged (table fallback).
      */
     contextWindow?: number;
+    /**
+     * Host-provided default reasoning effort for the selected model. This is
+     * protocol-neutral: OpenAI Chat Completions serializes it as
+     * `reasoning_effort`, while OpenAI Responses serializes it as
+     * `reasoning.effort` and can still merge protocol-specific fields such as
+     * `reasoning.summary`.
+     *
+     * Leave unset to preserve SDK/model defaults. Pass `null` on a per-turn
+     * override to clear inherited or descriptor defaults. Hosts with an
+     * authoritative model catalog should pass that model's default here, and
+     * user-selected effort can override the catalog default before it reaches
+     * the SDK.
+     */
+    reasoningEffort?: ReasoningEffort | null;
     /**
      * 本回合手动压缩上下文（对应 host 的 /compact）。设置后 SDK 复用 auto-compact
      * 机制（isAutoCompact=false + 这些指令）压缩当前消息、发出 system/compact_boundary，
@@ -341,8 +362,8 @@ export type Options = {
      *   - `metadata` — string-keyed map attached to the request
      *   - `responseFormat` — JSON schema enforcement
      *   - `reasoning` — reasoning configuration override. `effort` controls
-     *     how hard the model thinks (minimal/low/medium/high; default model-
-     *     specific). `summary` controls whether human-readable reasoning
+     *     how hard the model thinks (none/minimal/low/medium/high/xhigh;
+     *     default model-specific). `summary` controls whether human-readable reasoning
      *     summary is returned in the SSE stream ('auto'/'concise'/'detailed';
      *     when unset, upstream returns only encrypted reasoning items —
      *     useful for state preservation but invisible in UI). Merges with
@@ -357,7 +378,7 @@ export type Options = {
             metadata?: Record<string, string>;
             responseFormat?: unknown;
             reasoning?: {
-                effort?: 'minimal' | 'low' | 'medium' | 'high';
+                effort?: ReasoningEffort;
                 summary?: 'auto' | 'concise' | 'detailed' | null;
             };
         };

package/dist/providers/codex/shim.d.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import type { ResolvedCodexCredentials, ResolvedProviderRequest } from '../../providers/shared/config.js';
+import type { ResolvedCodexCredentials, ResolvedProviderRequest, ReasoningEffort } from '../../providers/shared/config.js';
 export interface AnthropicUsage {
     input_tokens: number;
     output_tokens: number;
@@ -53,7 +53,7 @@ export interface ResponsesProviderSpecific {
      * reasoning (Codex aliases or `?reasoning=high` model suffix) per-key.
      *
      * - `effort`: how hard the model thinks. Without it, model-specific
-     *   default applies (gpt-5 default = 'medium').
+     *   default applies.
      * - `summary`: whether human-readable reasoning summary is streamed via
      *   `response.reasoning_summary_text.delta` events. WITHOUT this set,
      *   the upstream returns only encrypted_content reasoning items —
@@ -62,7 +62,7 @@ export interface ResponsesProviderSpecific {
      *   chain-of-thought text.
      */
     reasoning?: {
-        effort?: 'minimal' | 'low' | 'medium' | 'high';
+        effort?: ReasoningEffort;
         summary?: 'auto' | 'concise' | 'detailed' | null;
     };
 }

package/dist/providers/openai/shim.d.ts CHANGED Viewed

@@ -22,6 +22,7 @@
  */
 import type { ProviderOverride } from '../shared/routing.js';
 import { type AnthropicStreamEvent, type AnthropicUsage, type ShimCreateParams } from '../../providers/codex/shim.js';
+import { type ReasoningEffort } from '../../providers/shared/config.js';
 interface OpenAIMessage {
     role: 'system' | 'user' | 'assistant' | 'tool';
     content?: string | null | Array<{
@@ -98,24 +99,12 @@ export declare function openaiUsageToAnthropicUsage(usage: {
  * responsibilities.
  */
 /**
- * Convert an SDK-internal reasoning-effort tier to the value accepted by
- * OpenAI's chat_completions `reasoning_effort` parameter.
- *
- * Two vocabularies meet here:
- *   - SDK vocab:              `'low' | 'medium' | 'high' | 'xhigh'`
- *                             (`'xhigh'` is the SDK-internal "Max" tier,
- *                             surfaced as "max" in the CLI — see
- *                             `lib/effort.ts`.)
- *   - OpenAI chat wire vocab: `'low' | 'medium' | 'high'`
- *                             (Spec: platform.openai.com/docs/api-reference/chat/create)
- *
- * `'xhigh'` is clamped down to `'high'` rather than rejected: the SDK
- * semantic is "as much reasoning as the provider will give" and `'high'`
- * is the upper bound on this wire. Sending `'xhigh'` raw would 400 on
- * strict proxies. The Responses API (codex) has its own serialisation
- * and does NOT go through this function — see `codex/shim.ts`.
+ * Convert an SDK reasoning-effort tier to the value accepted by OpenAI Chat
+ * Completions `reasoning_effort`. The current OpenAI wire accepts the same
+ * vocabulary as the SDK; model-specific legality is enforced by the host
+ * catalog / upstream provider, not by this transport boundary.
  */
-export declare function toOpenAIChatReasoningEffort(effort: 'low' | 'medium' | 'high' | 'xhigh'): 'low' | 'medium' | 'high';
+export declare function toOpenAIChatReasoningEffort(effort: ReasoningEffort): ReasoningEffort;
 export declare function buildOpenAIRequestBody(params: ShimCreateParams, ctx: {
     resolvedModel: string;
     baseUrl: string;
@@ -133,7 +122,7 @@ export declare function buildOpenAIRequestBody(params: ShimCreateParams, ctx: {
      * transports serialise differently on the wire.
      */
     reasoning?: {
-        effort: 'low' | 'medium' | 'high' | 'xhigh';
+        effort: ReasoningEffort;
     };
 }): Record<string, unknown>;
 export declare function openaiStreamToAnthropic(response: Response, model: string): AsyncGenerator<AnthropicStreamEvent>;
@@ -209,7 +198,7 @@ export declare function createOpenAIShimClient(options: {
     defaultHeaders?: Record<string, string>;
     maxRetries?: number;
     timeout?: number;
-    reasoningEffort?: 'low' | 'medium' | 'high' | 'xhigh';
+    reasoningEffort?: ReasoningEffort;
     providerOverride?: ProviderOverride;
 }): unknown;
 export {};

package/dist/providers/shared/config.d.ts CHANGED Viewed

@@ -2,7 +2,7 @@ export declare const DEFAULT_OPENAI_BASE_URL = "https://api.openai.com/v1";
 export declare const DEFAULT_CODEX_BASE_URL = "https://chatgpt.com/backend-api/codex";
 /** Default GitHub Models API model when user selects copilot / github:copilot */
 export declare const DEFAULT_GITHUB_MODELS_API_MODEL = "openai/gpt-4.1";
-type ReasoningEffort = 'low' | 'medium' | 'high' | 'xhigh';
+export type ReasoningEffort = 'none' | 'minimal' | 'low' | 'medium' | 'high' | 'xhigh';
 /**
  * Wire-level transport selected by `resolveProviderTransport`.
  *
@@ -45,6 +45,8 @@ export type DeprecatedProviderTransportName = 'codex_responses';
  * CLAUDE_CODE_USE_GITHUB, OPENCOW_DEBUG_REASONING).
  */
 export declare const QUERY_ENV_KEY_TRANSPORT_OVERRIDE = "__OPENCOW_TRANSPORT_OVERRIDE";
+export declare const QUERY_ENV_KEY_REASONING_EFFORT_OVERRIDE = "__OPENCOW_REASONING_EFFORT_OVERRIDE";
+export declare const QUERY_ENV_VALUE_REASONING_EFFORT_CLEAR = "__OPENCOW_CLEAR_REASONING_EFFORT__";
 export declare const QUERY_ENV_KEY_PROVIDER_SPECIFIC_OPENAI_RESPONSES = "__OPENCOW_PROVIDER_SPECIFIC_OPENAI_RESPONSES";
 export type ResolvedProviderRequest = {
     transport: ProviderTransport;
@@ -139,7 +141,7 @@ export declare function resolveProviderRequest(options?: {
     model?: string;
     baseUrl?: string;
     fallbackModel?: string;
-    reasoningEffortOverride?: ReasoningEffort;
+    reasoningEffortOverride?: ReasoningEffort | null;
     /**
      * Optional explicit transport override forwarded to
      * `resolveProviderTransport`. When unset, callers can still rely on the
@@ -181,4 +183,3 @@ export declare function parseChatgptAccountId(token: string | undefined): string
 export declare function resolveOpenAIResponsesCredentials(): ResolvedCodexCredentials;
 export declare function resolveCodexApiCredentials(env?: NodeJS.ProcessEnv): ResolvedCodexCredentials;
 export declare function getReasoningEffortForModel(model: string): ReasoningEffort | undefined;
-export {};

package/dist/providers/shared/model/maxTokens.d.ts CHANGED Viewed

@@ -1,3 +1,4 @@
 export declare function getMaxOutputTokensForModel(model: string, opts?: {
     override?: number;
+    upperLimitOverride?: number;
 }): number;

package/dist/providers/shared/routing.d.ts CHANGED Viewed

@@ -1,5 +1,5 @@
 import type { SettingsJson } from '../../session/settings/types.js';
-import type { ProviderTransport } from './config.js';
+import type { ProviderTransport, ReasoningEffort } from './config.js';
 /**
  * Provider override resolved for a specific agent/model.
  * When present, the API client uses these instead of the session-global
@@ -21,6 +21,8 @@ export interface ProviderOverride {
      * OpenAI shim.
      */
     transport?: ProviderTransport | 'anthropic';
+    /** Default reasoning effort for this model route; null clears session default. */
+    reasoningEffort?: ReasoningEffort | null;
     /** Per-wire extras (e.g. openai-responses reasoning summary config). */
     providerSpecific?: {
         openaiResponses?: Record<string, unknown>;

package/dist/query.d.ts CHANGED Viewed

@@ -19,6 +19,7 @@ export type QueryParams = {
     fallbackModel?: string;
     querySource: QuerySource;
     maxOutputTokensOverride?: number;
+    maxOutputTokensLimitOverride?: number;
     maxTurns?: number;
     skipCacheWrite?: boolean;
     taskBudget?: {