@opencow-ai/opencow-agent-sdk 0.4.12 → 0.4.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/client.js CHANGED
@@ -34951,7 +34951,7 @@ function parseReasoningEffort(value) {
34951
34951
  if (!value)
34952
34952
  return;
34953
34953
  const normalized = value.trim().toLowerCase();
34954
- if (normalized === "low" || normalized === "medium" || normalized === "high" || normalized === "xhigh") {
34954
+ if (normalized === "none" || normalized === "minimal" || normalized === "low" || normalized === "medium" || normalized === "high" || normalized === "xhigh") {
34955
34955
  return normalized;
34956
34956
  }
34957
34957
  return;
@@ -35072,7 +35072,10 @@ function resolveProviderRequest(options) {
35072
35072
  transportOverride: options?.transportOverride
35073
35073
  });
35074
35074
  const resolvedModel = transport === "chat_completions" && isEnvTruthy(getQueryEnvVar("CLAUDE_CODE_USE_GITHUB")) ? normalizeGithubModelsApiModel(requestedModel) : descriptor.baseModel;
35075
- const reasoning = options?.reasoningEffortOverride ? { effort: options.reasoningEffortOverride } : descriptor.reasoning;
35075
+ const hasReasoningEffortOverride = !!options && Object.prototype.hasOwnProperty.call(options, "reasoningEffortOverride");
35076
+ const rawEnvReasoningEffortOverride = getQueryEnvVar(QUERY_ENV_KEY_REASONING_EFFORT_OVERRIDE);
35077
+ const envReasoningEffortOverride = parseReasoningEffort(rawEnvReasoningEffortOverride);
35078
+ const reasoning = hasReasoningEffortOverride ? options?.reasoningEffortOverride ? { effort: options.reasoningEffortOverride } : undefined : rawEnvReasoningEffortOverride === QUERY_ENV_VALUE_REASONING_EFFORT_CLEAR ? undefined : envReasoningEffortOverride ? { effort: envReasoningEffortOverride } : descriptor.reasoning;
35076
35079
  return {
35077
35080
  transport,
35078
35081
  requestedModel,
@@ -35176,7 +35179,7 @@ function resolveCodexApiCredentials(env2 = process.env) {
35176
35179
  originator: "opencow"
35177
35180
  };
35178
35181
  }
35179
- var DEFAULT_OPENAI_BASE_URL = "https://api.openai.com/v1", DEFAULT_CODEX_BASE_URL = "https://chatgpt.com/backend-api/codex", DEFAULT_GITHUB_MODELS_API_MODEL = "openai/gpt-4.1", CODEX_ALIAS_MODELS, QUERY_ENV_KEY_TRANSPORT_OVERRIDE = "__OPENCOW_TRANSPORT_OVERRIDE", QUERY_ENV_KEY_PROVIDER_SPECIFIC_OPENAI_RESPONSES = "__OPENCOW_PROVIDER_SPECIFIC_OPENAI_RESPONSES", LOCALHOST_HOSTNAMES, warnedCodexAliasOnce = false, MissingProviderModelError;
35182
+ var DEFAULT_OPENAI_BASE_URL = "https://api.openai.com/v1", DEFAULT_CODEX_BASE_URL = "https://chatgpt.com/backend-api/codex", DEFAULT_GITHUB_MODELS_API_MODEL = "openai/gpt-4.1", CODEX_ALIAS_MODELS, QUERY_ENV_KEY_TRANSPORT_OVERRIDE = "__OPENCOW_TRANSPORT_OVERRIDE", QUERY_ENV_KEY_REASONING_EFFORT_OVERRIDE = "__OPENCOW_REASONING_EFFORT_OVERRIDE", QUERY_ENV_VALUE_REASONING_EFFORT_CLEAR = "__OPENCOW_CLEAR_REASONING_EFFORT__", QUERY_ENV_KEY_PROVIDER_SPECIFIC_OPENAI_RESPONSES = "__OPENCOW_PROVIDER_SPECIFIC_OPENAI_RESPONSES", LOCALHOST_HOSTNAMES, warnedCodexAliasOnce = false, MissingProviderModelError;
35180
35183
  var init_config2 = __esm(() => {
35181
35184
  init_envUtils();
35182
35185
  init_state2();
@@ -96385,7 +96388,7 @@ function convertChunkUsage(usage) {
96385
96388
  return openaiUsageToAnthropicUsage(usage);
96386
96389
  }
96387
96390
  function toOpenAIChatReasoningEffort(effort) {
96388
- return effort === "xhigh" ? "high" : effort;
96391
+ return effort;
96389
96392
  }
96390
96393
  function getOpenAIChatProviderCapabilities(model) {
96391
96394
  return {
@@ -96830,7 +96833,14 @@ class OpenAIShimMessages {
96830
96833
  let httpResponse;
96831
96834
  const promise3 = (async () => {
96832
96835
  const overrideTransport = self2.providerOverride?.transport === "anthropic" ? undefined : self2.providerOverride?.transport;
96833
- const request = resolveProviderRequest({ model: self2.providerOverride?.model ?? params.model, baseUrl: self2.providerOverride?.baseURL, reasoningEffortOverride: self2.reasoningEffort, transportOverride: overrideTransport });
96836
+ const hasProviderReasoningEffortOverride = !!self2.providerOverride && Object.prototype.hasOwnProperty.call(self2.providerOverride, "reasoningEffort");
96837
+ const reasoningEffortOverride = hasProviderReasoningEffortOverride ? self2.providerOverride?.reasoningEffort ?? null : self2.reasoningEffort;
96838
+ const request = resolveProviderRequest({
96839
+ model: self2.providerOverride?.model ?? params.model,
96840
+ baseUrl: self2.providerOverride?.baseURL,
96841
+ ...reasoningEffortOverride !== undefined ? { reasoningEffortOverride } : {},
96842
+ transportOverride: overrideTransport
96843
+ });
96834
96844
  const response = await self2._doRequest(request, params, options);
96835
96845
  httpResponse = response;
96836
96846
  if (params.stream) {
@@ -130220,15 +130230,16 @@ function isMaxTokensCapEnabled() {
130220
130230
  }
130221
130231
  function getMaxOutputTokensForModel(model, opts) {
130222
130232
  const maxOutputTokens = getModelMaxOutputTokens(model);
130223
- const defaultTokens = isMaxTokensCapEnabled() ? Math.min(maxOutputTokens.default, CAPPED_DEFAULT_MAX_TOKENS) : maxOutputTokens.default;
130233
+ const upperLimit = opts?.upperLimitOverride !== undefined && Number.isFinite(opts.upperLimitOverride) && opts.upperLimitOverride >= 1 ? Math.floor(opts.upperLimitOverride) : maxOutputTokens.upperLimit;
130234
+ const defaultTokens = isMaxTokensCapEnabled() ? Math.min(maxOutputTokens.default, CAPPED_DEFAULT_MAX_TOKENS, upperLimit) : Math.min(maxOutputTokens.default, upperLimit);
130224
130235
  if (opts?.override !== undefined) {
130225
- if (!Number.isFinite(opts.override) || opts.override < 1 || opts.override > maxOutputTokens.upperLimit) {
130226
- console.warn(`[opencow] Options.maxOutputTokens=${opts.override} out of range ` + `[1, ${maxOutputTokens.upperLimit}] for model ${model}; clamping.`);
130236
+ if (!Number.isFinite(opts.override) || opts.override < 1 || opts.override > upperLimit) {
130237
+ console.warn(`[opencow] Options.maxOutputTokens=${opts.override} out of range ` + `[1, ${upperLimit}] for model ${model}; clamping.`);
130227
130238
  }
130228
- const clamped = Math.min(Math.max(1, Math.floor(opts.override)), maxOutputTokens.upperLimit);
130239
+ const clamped = Math.min(Math.max(1, Math.floor(opts.override)), upperLimit);
130229
130240
  return clamped;
130230
130241
  }
130231
- const result = validateBoundedIntEnvVar("CLAUDE_CODE_MAX_OUTPUT_TOKENS", resolveEnvVar("MAX_OUTPUT_TOKENS"), defaultTokens, maxOutputTokens.upperLimit);
130242
+ const result = validateBoundedIntEnvVar("CLAUDE_CODE_MAX_OUTPUT_TOKENS", resolveEnvVar("MAX_OUTPUT_TOKENS"), defaultTokens, upperLimit);
130232
130243
  return result.effective;
130233
130244
  }
130234
130245
  var init_maxTokens = __esm(() => {
@@ -255970,6 +255981,13 @@ function getDisableExtglobCommand(shellPath) {
255970
255981
  }
255971
255982
  return null;
255972
255983
  }
255984
+ function getHostProvidedPathCommand() {
255985
+ const path11 = getHostProvidedEnvVar("PATH");
255986
+ if (!path11) {
255987
+ return null;
255988
+ }
255989
+ return `export PATH=${quote([path11])}`;
255990
+ }
255973
255991
  async function createBashShellProvider(shellPath, options2) {
255974
255992
  let currentSandboxTmpDir;
255975
255993
  const snapshotPromise = options2?.skipSnapshot ? Promise.resolve(undefined) : createAndSaveSnapshot(shellPath).catch((error41) => {
@@ -256010,6 +256028,10 @@ async function createBashShellProvider(shellPath, options2) {
256010
256028
  const finalPath = getPlatform() === "windows" ? windowsPathToPosixPath(snapshotFilePath) : snapshotFilePath;
256011
256029
  commandParts.push(`source ${quote([finalPath])} 2>/dev/null || true`);
256012
256030
  }
256031
+ const hostPathCommand = getHostProvidedPathCommand();
256032
+ if (hostPathCommand) {
256033
+ commandParts.push(hostPathCommand);
256034
+ }
256013
256035
  const sessionEnvScript2 = await getSessionEnvironmentScript();
256014
256036
  if (sessionEnvScript2) {
256015
256037
  commandParts.push(sessionEnvScript2);
@@ -281957,7 +281979,11 @@ async function* queryLoop(params, consumedCommandUuids) {
281957
281979
  if (false) {}
281958
281980
  const mediaRecoveryEnabled = reactiveCompact?.isReactiveCompactEnabled() ?? false;
281959
281981
  if (!compactionResult && querySource !== "compact" && querySource !== "session_memory" && !(reactiveCompact?.isReactiveCompactEnabled() && isAutoCompactEnabled()) && !collapseOwnsIt) {
281960
- const { isAtBlockingLimit } = calculateTokenWarningState(tokenCountWithEstimation(messagesForQuery) - snipTokensFreed, toolUseContext.options.mainLoopModel);
281982
+ const { isAtBlockingLimit } = calculateTokenWarningState(tokenCountWithEstimation(messagesForQuery) - snipTokensFreed, toolUseContext.options.mainLoopModel, {
281983
+ contextWindow: toolUseContext.options.contextWindow,
281984
+ maxOutputTokens: toolUseContext.options.maxOutputTokens,
281985
+ maxOutputTokensLimit: toolUseContext.options.maxOutputTokensLimit
281986
+ });
281961
281987
  if (isAtBlockingLimit) {
281962
281988
  yield createAssistantAPIErrorMessage({
281963
281989
  content: PROMPT_TOO_LONG_ERROR_MESSAGE,
@@ -282000,6 +282026,7 @@ async function* queryLoop(params, consumedCommandUuids) {
282000
282026
  allowedAgentTypes: toolUseContext.options.agentDefinitions.allowedAgentTypes,
282001
282027
  hasAppendSystemPrompt: !!toolUseContext.options.appendSystemPrompt,
282002
282028
  maxOutputTokensOverride,
282029
+ maxOutputTokensLimitOverride: params.maxOutputTokensLimitOverride,
282003
282030
  fetchOverride: dumpPromptsFetch,
282004
282031
  mcpTools: appState.mcp.tools,
282005
282032
  hasPendingMcpServers: appState.mcp.clients.some((c6) => c6.type === "pending"),
@@ -282675,7 +282702,7 @@ function getAnthropicEnvMetadata() {
282675
282702
  function getBuildAgeMinutes() {
282676
282703
  if (false)
282677
282704
  ;
282678
- const buildTime = new Date("2026-06-24T10:02:32.669Z").getTime();
282705
+ const buildTime = new Date("2026-06-25T12:29:02.938Z").getTime();
282679
282706
  if (isNaN(buildTime))
282680
282707
  return;
282681
282708
  return Math.floor((Date.now() - buildTime) / 60000);
@@ -283257,6 +283284,7 @@ async function runForkedAgent({
283257
283284
  toolUseContext: isolatedToolUseContext,
283258
283285
  querySource,
283259
283286
  maxOutputTokensOverride: maxOutputTokens,
283287
+ maxOutputTokensLimitOverride: isolatedToolUseContext.options.maxOutputTokensLimit,
283260
283288
  maxTurns,
283261
283289
  skipCacheWrite
283262
283290
  })) {
@@ -283495,7 +283523,17 @@ ${formattedSummary}`;
283495
283523
  if (transcriptPath) {
283496
283524
  baseSummary += `
283497
283525
 
283498
- If you need specific details from before compaction (like exact code snippets, error messages, or content you generated), read the full transcript at: ${transcriptPath}`;
283526
+ IMPORTANT Transcript recovery protocol:
283527
+ The full pre-compaction conversation is preserved at: ${transcriptPath}
283528
+ When you encounter ANY of these situations, you MUST use the Read tool to search the transcript for the missing context BEFORE responding:
283529
+ - You are unsure about a specific detail (file path, function name, error message, code snippet, user preference)
283530
+ - The user references something you cannot find in the summary above
283531
+ - You need exact code that was previously read or written
283532
+ - You are about to make a decision but feel uncertain whether the user already gave guidance on it
283533
+ - A tool name, skill name, or configuration was discussed but is not in the summary
283534
+
283535
+ The transcript is a JSONL file. Read its tail (last 500–2000 lines) first for the most recent context; if that doesn't resolve the gap, read earlier sections.
283536
+ Do NOT guess or hallucinate details that might have been discussed — read the transcript instead.`;
283499
283537
  }
283500
283538
  if (recentMessagesPreserved) {
283501
283539
  baseSummary += `
@@ -283562,6 +283600,7 @@ Your summary should include the following sections:
283562
283600
  8. Current Work: Describe in detail precisely what was being worked on immediately before this summary request, paying special attention to the most recent messages from both user and assistant. Include file names and code snippets where applicable.
283563
283601
  9. Optional Next Step: List the next step that you will take that is related to the most recent work you were doing. IMPORTANT: ensure that this step is DIRECTLY in line with the user's most recent explicit requests, and the task you were working on immediately before this summary request. If your last task was concluded, then only list next steps if they are explicitly in line with the users request. Do not start on tangential requests or really old requests that were already completed without confirming with the user first.
283564
283602
  If there is a next step, include direct quotes from the most recent conversation showing exactly what task you were working on and where you left off. This should be verbatim to ensure there's no drift in task interpretation.
283603
+ 10. Uncertain or Incomplete Context: List any items where you are not fully confident in the details — e.g., a tool/skill name you vaguely recall but cannot confirm, a user preference you think was stated but cannot pinpoint, or a decision whose rationale is unclear. Mark each with [NEEDS_TRANSCRIPT_LOOKUP] so the post-compaction assistant knows to read the transcript file for these specific items before acting on them.
283565
283604
 
283566
283605
  Here's an example of how your output should be structured:
283567
283606
 
@@ -283612,10 +283651,14 @@ Here's an example of how your output should be structured:
283612
283651
  9. Optional Next Step:
283613
283652
  [Optional Next step to take]
283614
283653
 
283654
+ 10. Uncertain or Incomplete Context:
283655
+ - [Item with unclear details] [NEEDS_TRANSCRIPT_LOOKUP]
283656
+ - [...]
283657
+
283615
283658
  </summary>
283616
283659
  </example>
283617
283660
 
283618
- Please provide your summary based on the conversation so far, following this structure and ensuring precision and thoroughness in your response.
283661
+ Please provide your summary based on the conversation so far, following this structure and ensuring precision and thoroughness in your response.
283619
283662
 
283620
283663
  There may be additional summarization instructions provided in the included context. If so, remember to follow these instructions when creating the above summary. Examples of instructions include:
283621
283664
  <example>
@@ -283643,6 +283686,7 @@ Your summary should include the following sections:
283643
283686
  7. Pending Tasks: Outline any pending tasks from the recent messages.
283644
283687
  8. Current Work: Describe precisely what was being worked on immediately before this summary request.
283645
283688
  9. Optional Next Step: List the next step related to the most recent work. Include direct quotes from the most recent conversation.
283689
+ 10. Uncertain or Incomplete Context: List any items where details are unclear or potentially missing from the recent messages. Mark each with [NEEDS_TRANSCRIPT_LOOKUP].
283646
283690
 
283647
283691
  Here's an example of how your output should be structured:
283648
283692
 
@@ -283683,6 +283727,9 @@ Here's an example of how your output should be structured:
283683
283727
  9. Optional Next Step:
283684
283728
  [Optional Next step to take]
283685
283729
 
283730
+ 10. Uncertain or Incomplete Context:
283731
+ - [Item with unclear details] [NEEDS_TRANSCRIPT_LOOKUP]
283732
+
283686
283733
  </summary>
283687
283734
  </example>
283688
283735
 
@@ -283703,6 +283750,7 @@ Your summary should include the following sections:
283703
283750
  7. Pending Tasks: Outline any pending tasks.
283704
283751
  8. Work Completed: Describe what was accomplished by the end of this portion.
283705
283752
  9. Context for Continuing Work: Summarize any context, decisions, or state that would be needed to understand and continue the work in subsequent messages.
283753
+ 10. Uncertain or Incomplete Context: List any items where details are unclear or potentially incomplete. Mark each with [NEEDS_TRANSCRIPT_LOOKUP] so the continuing session knows to look them up in the transcript before acting.
283706
283754
 
283707
283755
  Here's an example of how your output should be structured:
283708
283756
 
@@ -283743,6 +283791,9 @@ Here's an example of how your output should be structured:
283743
283791
  9. Context for Continuing Work:
283744
283792
  [Key context, decisions, or state needed to continue the work]
283745
283793
 
283794
+ 10. Uncertain or Incomplete Context:
283795
+ - [Item with unclear details] [NEEDS_TRANSCRIPT_LOOKUP]
283796
+
283746
283797
  </summary>
283747
283798
  </example>
283748
283799
 
@@ -284194,7 +284245,10 @@ async function streamCompactSummary({
284194
284245
  toolChoice: undefined,
284195
284246
  isNonInteractiveSession: context4.options.isNonInteractiveSession,
284196
284247
  hasAppendSystemPrompt: !!context4.options.appendSystemPrompt,
284197
- maxOutputTokensOverride: Math.min(COMPACT_MAX_OUTPUT_TOKENS, getMaxOutputTokensForModel(context4.options.mainLoopModel)),
284248
+ maxOutputTokensOverride: Math.min(COMPACT_MAX_OUTPUT_TOKENS, getMaxOutputTokensForModel(context4.options.mainLoopModel, {
284249
+ upperLimitOverride: context4.options.maxOutputTokensLimit
284250
+ })),
284251
+ maxOutputTokensLimitOverride: context4.options.maxOutputTokensLimit,
284198
284252
  querySource: "compact",
284199
284253
  agents: context4.options.agentDefinitions.activeAgents,
284200
284254
  mcpTools: [],
@@ -284928,7 +284982,10 @@ var init_sessionMemoryCompact = __esm(() => {
284928
284982
 
284929
284983
  // src/controller/compact/autoCompact.ts
284930
284984
  function getEffectiveContextWindowSize(model, opts) {
284931
- const reservedTokensForSummary = Math.min(getMaxOutputTokensForModel(model, { override: opts?.maxOutputTokens }), MAX_OUTPUT_TOKENS_FOR_SUMMARY);
284985
+ const reservedTokensForSummary = Math.min(getMaxOutputTokensForModel(model, {
284986
+ override: opts?.maxOutputTokens,
284987
+ upperLimitOverride: opts?.maxOutputTokensLimit
284988
+ }), MAX_OUTPUT_TOKENS_FOR_SUMMARY);
284932
284989
  let contextWindow = getContextWindowForModel(model, getSdkBetas(), {
284933
284990
  override: opts?.contextWindow
284934
284991
  });
@@ -285014,7 +285071,8 @@ async function autoCompactIfNeeded(messages, toolUseContext, cacheSafeParams, qu
285014
285071
  const model = toolUseContext.options.mainLoopModel;
285015
285072
  const opts = {
285016
285073
  contextWindow: toolUseContext.options.contextWindow,
285017
- maxOutputTokens: toolUseContext.options.maxOutputTokens
285074
+ maxOutputTokens: toolUseContext.options.maxOutputTokens,
285075
+ maxOutputTokensLimit: toolUseContext.options.maxOutputTokensLimit
285018
285076
  };
285019
285077
  const shouldCompact = await shouldAutoCompact(messages, model, querySource, snipTokensFreed, opts);
285020
285078
  if (!shouldCompact) {
@@ -293768,7 +293826,10 @@ ${deferredToolList}
293768
293826
  betasParams.push(STRUCTURED_OUTPUTS_BETA_HEADER);
293769
293827
  }
293770
293828
  }
293771
- const maxOutputTokens2 = retryContext?.maxTokensOverride || options2.maxOutputTokensOverride || getMaxOutputTokensForModel(options2.model);
293829
+ const maxOutputTokens2 = retryContext?.maxTokensOverride || getMaxOutputTokensForModel(options2.model, {
293830
+ override: options2.maxOutputTokensOverride,
293831
+ upperLimitOverride: options2.maxOutputTokensLimitOverride
293832
+ });
293772
293833
  const hasThinking = thinkingConfig.type !== "disabled" && !isEnvTruthy(resolveEnvVar("DISABLE_THINKING"));
293773
293834
  let thinking = undefined;
293774
293835
  if (hasThinking && modelSupportsThinking(options2.model)) {
@@ -299730,8 +299791,10 @@ async function processUserInputBase(input, mode, setToolJSX, context4, pastedCon
299730
299791
  }
299731
299792
  }
299732
299793
  }
299794
+ const processSlashCommand = getProcessSlashCommand();
299733
299795
  if (false) {}
299734
- const shouldExtractAttachments = !skipAttachments && inputString !== null && (mode !== "prompt" || effectiveSkipSlash || !inputString.startsWith("/"));
299796
+ const shouldFallbackSlashToPrompt = inputString !== null && mode === "prompt" && !effectiveSkipSlash && inputString.startsWith("/") && !processSlashCommand;
299797
+ const shouldExtractAttachments = !skipAttachments && inputString !== null && (mode !== "prompt" || effectiveSkipSlash || !inputString.startsWith("/") || shouldFallbackSlashToPrompt);
299735
299798
  queryCheckpoint("query_attachment_loading_start");
299736
299799
  const attachmentMessages = shouldExtractAttachments ? await toArray2(getAttachmentMessages(inputString, context4, ideSelection ?? null, [], messages, querySource)) : [];
299737
299800
  queryCheckpoint("query_attachment_loading_end");
@@ -299742,11 +299805,10 @@ async function processUserInputBase(input, mode, setToolJSX, context4, pastedCon
299742
299805
  return addImageMetadataMessage(await processBashCommand(inputString, precedingInputBlocks, attachmentMessages, context4, setToolJSX), imageMetadataTexts);
299743
299806
  }
299744
299807
  if (inputString !== null && !effectiveSkipSlash && inputString.startsWith("/")) {
299745
- const processSlashCommand = getProcessSlashCommand();
299746
- if (!processSlashCommand)
299747
- throw new SlashCommandHandlerNotRegisteredError("slash");
299748
- const slashResult = await processSlashCommand(inputString, precedingInputBlocks, imageContentBlocks, attachmentMessages, context4, setToolJSX, uuid3, isAlreadyProcessing, canUseTool);
299749
- return addImageMetadataMessage(slashResult, imageMetadataTexts);
299808
+ if (processSlashCommand) {
299809
+ const slashResult = await processSlashCommand(inputString, precedingInputBlocks, imageContentBlocks, attachmentMessages, context4, setToolJSX, uuid3, isAlreadyProcessing, canUseTool);
299810
+ return addImageMetadataMessage(slashResult, imageMetadataTexts);
299811
+ }
299750
299812
  }
299751
299813
  if (inputString !== null && mode === "prompt") {
299752
299814
  const trimmedInput = inputString.trim();
@@ -299993,6 +300055,7 @@ class QueryEngine {
299993
300055
  maxTurns,
299994
300056
  maxBudgetUsd,
299995
300057
  maxOutputTokens,
300058
+ maxOutputTokensLimit,
299996
300059
  contextWindow,
299997
300060
  compact,
299998
300061
  taskBudget,
@@ -300082,6 +300145,7 @@ class QueryEngine {
300082
300145
  theme: resolveThemeSetting(getGlobalConfig().theme),
300083
300146
  maxBudgetUsd,
300084
300147
  maxOutputTokens,
300148
+ maxOutputTokensLimit,
300085
300149
  contextWindow,
300086
300150
  modelProviders: this.config.modelProviders,
300087
300151
  subagentDisallowedTools: this.config.subagentDisallowedTools
@@ -300187,6 +300251,7 @@ class QueryEngine {
300187
300251
  agentDefinitions: { activeAgents: agents, allAgents: [] },
300188
300252
  maxBudgetUsd,
300189
300253
  maxOutputTokens,
300254
+ maxOutputTokensLimit,
300190
300255
  contextWindow,
300191
300256
  modelProviders: this.config.modelProviders,
300192
300257
  subagentDisallowedTools: this.config.subagentDisallowedTools
@@ -300307,6 +300372,7 @@ class QueryEngine {
300307
300372
  querySource: "sdk",
300308
300373
  maxTurns,
300309
300374
  maxOutputTokensOverride: maxOutputTokens,
300375
+ maxOutputTokensLimitOverride: maxOutputTokensLimit,
300310
300376
  compactRequest: compact,
300311
300377
  taskBudget
300312
300378
  })) {
@@ -300677,6 +300743,7 @@ async function* ask({
300677
300743
  maxTurns,
300678
300744
  maxBudgetUsd,
300679
300745
  maxOutputTokens,
300746
+ maxOutputTokensLimit,
300680
300747
  contextWindow,
300681
300748
  compact,
300682
300749
  taskBudget,
@@ -300722,6 +300789,7 @@ async function* ask({
300722
300789
  maxTurns,
300723
300790
  maxBudgetUsd,
300724
300791
  maxOutputTokens,
300792
+ maxOutputTokensLimit,
300725
300793
  contextWindow,
300726
300794
  compact,
300727
300795
  taskBudget,
@@ -329743,14 +329811,20 @@ function normalizeInitialMessages(raw) {
329743
329811
  }
329744
329812
  function optionsWithProviderRoutingEnv(options2) {
329745
329813
  const transport = options2.transport;
329814
+ const reasoningEffort = options2.reasoningEffort;
329746
329815
  const openaiResponses = options2.providerSpecific?.openaiResponses;
329747
- if (transport === undefined && openaiResponses === undefined) {
329816
+ if (transport === undefined && reasoningEffort === undefined && openaiResponses === undefined) {
329748
329817
  return options2;
329749
329818
  }
329750
329819
  const mergedEnv = { ...options2.env ?? {} };
329751
329820
  if (typeof transport === "string" && transport.length > 0) {
329752
329821
  mergedEnv[QUERY_ENV_KEY_TRANSPORT_OVERRIDE] = transport;
329753
329822
  }
329823
+ if (reasoningEffort === null) {
329824
+ mergedEnv[QUERY_ENV_KEY_REASONING_EFFORT_OVERRIDE] = QUERY_ENV_VALUE_REASONING_EFFORT_CLEAR;
329825
+ } else if (typeof reasoningEffort === "string" && reasoningEffort.length > 0) {
329826
+ mergedEnv[QUERY_ENV_KEY_REASONING_EFFORT_OVERRIDE] = reasoningEffort;
329827
+ }
329754
329828
  if (openaiResponses && typeof openaiResponses === "object") {
329755
329829
  mergedEnv[QUERY_ENV_KEY_PROVIDER_SPECIFIC_OPENAI_RESPONSES] = JSON.stringify(openaiResponses);
329756
329830
  }
@@ -329980,6 +330054,7 @@ function runSdkQueryRuntime(params) {
329980
330054
  maxTurns: options2.maxTurns,
329981
330055
  maxBudgetUsd: options2.maxBudgetUsd,
329982
330056
  maxOutputTokens: options2.maxOutputTokens,
330057
+ maxOutputTokensLimit: options2.maxOutputTokensLimit,
329983
330058
  contextWindow: options2.contextWindow,
329984
330059
  compact: options2.compact,
329985
330060
  taskBudget: options2.taskBudget,
@@ -336057,4 +336132,4 @@ export {
336057
336132
  AbortError2 as AbortError
336058
336133
  };
336059
336134
 
336060
- //# debugId=C79EE43EDCAB4CD864756E2164756E21
336135
+ //# debugId=F19FA9D2F1AEFB9664756E2164756E21
@@ -7,6 +7,7 @@ import { trySessionMemoryCompaction } from './sessionMemoryCompact.js';
7
7
  export declare function getEffectiveContextWindowSize(model: string, opts?: {
8
8
  contextWindow?: number;
9
9
  maxOutputTokens?: number;
10
+ maxOutputTokensLimit?: number;
10
11
  }): number;
11
12
  export type AutoCompactTrackingState = {
12
13
  compacted: boolean;
@@ -21,10 +22,12 @@ export declare const MANUAL_COMPACT_BUFFER_TOKENS = 3000;
21
22
  export declare function getAutoCompactThreshold(model: string, opts?: {
22
23
  contextWindow?: number;
23
24
  maxOutputTokens?: number;
25
+ maxOutputTokensLimit?: number;
24
26
  }): number;
25
27
  export declare function calculateTokenWarningState(tokenUsage: number, model: string, opts?: {
26
28
  contextWindow?: number;
27
29
  maxOutputTokens?: number;
30
+ maxOutputTokensLimit?: number;
28
31
  }): {
29
32
  percentLeft: number;
30
33
  isAboveWarningThreshold: boolean;
@@ -36,6 +39,7 @@ export declare function isAutoCompactEnabled(): boolean;
36
39
  export declare function shouldAutoCompact(messages: Message[], model: string, querySource?: QuerySource, snipTokensFreed?: number, opts?: {
37
40
  contextWindow?: number;
38
41
  maxOutputTokens?: number;
42
+ maxOutputTokensLimit?: number;
39
43
  }): Promise<boolean>;
40
44
  export declare function autoCompactIfNeeded(messages: Message[], toolUseContext: ToolRuntimeContext, cacheSafeParams: CacheSafeParams, querySource?: QuerySource, tracking?: AutoCompactTrackingState, snipTokensFreed?: number): Promise<{
41
45
  wasCompacted: boolean;
@@ -32,6 +32,7 @@ export type Options = {
32
32
  isNonInteractiveSession: boolean;
33
33
  extraToolSchemas?: BetaToolUnion[];
34
34
  maxOutputTokensOverride?: number;
35
+ maxOutputTokensLimitOverride?: number;
35
36
  fallbackModel?: string;
36
37
  onStreamingFallback?: () => void;
37
38
  querySource: QuerySource;
@@ -125,7 +125,7 @@ export type SettingSource = 'user' | 'project' | 'local';
125
125
  import type { SdkTool } from '../../capabilities/SdkTool.js';
126
126
  import type { LayoutProfile } from '../../session/layout/LayoutProfile.js';
127
127
  import type { SdkRule } from '../../session/rules/SdkRule.js';
128
- import type { ProviderTransport, DeprecatedProviderTransportName } from '../../providers/shared/config.js';
128
+ import type { ProviderTransport, DeprecatedProviderTransportName, ReasoningEffort } from '../../providers/shared/config.js';
129
129
  import type { FileHistoryChangeListener, FileHistoryState } from '../../session/fileHistory.js';
130
130
  export type Options = {
131
131
  cwd?: string;
@@ -148,6 +148,13 @@ export type Options = {
148
148
  * values are clamped + warn-logged, never thrown.
149
149
  */
150
150
  maxOutputTokens?: number;
151
+ /**
152
+ * Optional host-authoritative upper bound for `maxOutputTokens`. Use this
153
+ * when the selected model is a custom gateway/deployment id whose native
154
+ * output cap is known by the host model catalog but not by the SDK's built-in
155
+ * model table. When unset, SDK built-in per-model limits are used.
156
+ */
157
+ maxOutputTokensLimit?: number;
151
158
  /**
152
159
  * Per-session context window override (input tokens). Used by autoCompact
153
160
  * threshold computation and any other code path that calls
@@ -161,6 +168,20 @@ export type Options = {
161
168
  * Values < 10_000 or > 5_000_000 are dropped + warn-logged (table fallback).
162
169
  */
163
170
  contextWindow?: number;
171
+ /**
172
+ * Host-provided default reasoning effort for the selected model. This is
173
+ * protocol-neutral: OpenAI Chat Completions serializes it as
174
+ * `reasoning_effort`, while OpenAI Responses serializes it as
175
+ * `reasoning.effort` and can still merge protocol-specific fields such as
176
+ * `reasoning.summary`.
177
+ *
178
+ * Leave unset to preserve SDK/model defaults. Pass `null` on a per-turn
179
+ * override to clear inherited or descriptor defaults. Hosts with an
180
+ * authoritative model catalog should pass that model's default here, and
181
+ * user-selected effort can override the catalog default before it reaches
182
+ * the SDK.
183
+ */
184
+ reasoningEffort?: ReasoningEffort | null;
164
185
  /**
165
186
  * 本回合手动压缩上下文(对应 host 的 /compact)。设置后 SDK 复用 auto-compact
166
187
  * 机制(isAutoCompact=false + 这些指令)压缩当前消息、发出 system/compact_boundary,
@@ -341,8 +362,8 @@ export type Options = {
341
362
  * - `metadata` — string-keyed map attached to the request
342
363
  * - `responseFormat` — JSON schema enforcement
343
364
  * - `reasoning` — reasoning configuration override. `effort` controls
344
- * how hard the model thinks (minimal/low/medium/high; default model-
345
- * specific). `summary` controls whether human-readable reasoning
365
+ * how hard the model thinks (none/minimal/low/medium/high/xhigh;
366
+ * default model-specific). `summary` controls whether human-readable reasoning
346
367
  * summary is returned in the SSE stream ('auto'/'concise'/'detailed';
347
368
  * when unset, upstream returns only encrypted reasoning items —
348
369
  * useful for state preservation but invisible in UI). Merges with
@@ -357,7 +378,7 @@ export type Options = {
357
378
  metadata?: Record<string, string>;
358
379
  responseFormat?: unknown;
359
380
  reasoning?: {
360
- effort?: 'minimal' | 'low' | 'medium' | 'high';
381
+ effort?: ReasoningEffort;
361
382
  summary?: 'auto' | 'concise' | 'detailed' | null;
362
383
  };
363
384
  };
@@ -1,4 +1,4 @@
1
- import type { ResolvedCodexCredentials, ResolvedProviderRequest } from '../../providers/shared/config.js';
1
+ import type { ResolvedCodexCredentials, ResolvedProviderRequest, ReasoningEffort } from '../../providers/shared/config.js';
2
2
  export interface AnthropicUsage {
3
3
  input_tokens: number;
4
4
  output_tokens: number;
@@ -53,7 +53,7 @@ export interface ResponsesProviderSpecific {
53
53
  * reasoning (Codex aliases or `?reasoning=high` model suffix) per-key.
54
54
  *
55
55
  * - `effort`: how hard the model thinks. Without it, model-specific
56
- * default applies (gpt-5 default = 'medium').
56
+ * default applies.
57
57
  * - `summary`: whether human-readable reasoning summary is streamed via
58
58
  * `response.reasoning_summary_text.delta` events. WITHOUT this set,
59
59
  * the upstream returns only encrypted_content reasoning items —
@@ -62,7 +62,7 @@ export interface ResponsesProviderSpecific {
62
62
  * chain-of-thought text.
63
63
  */
64
64
  reasoning?: {
65
- effort?: 'minimal' | 'low' | 'medium' | 'high';
65
+ effort?: ReasoningEffort;
66
66
  summary?: 'auto' | 'concise' | 'detailed' | null;
67
67
  };
68
68
  }
@@ -22,6 +22,7 @@
22
22
  */
23
23
  import type { ProviderOverride } from '../shared/routing.js';
24
24
  import { type AnthropicStreamEvent, type AnthropicUsage, type ShimCreateParams } from '../../providers/codex/shim.js';
25
+ import { type ReasoningEffort } from '../../providers/shared/config.js';
25
26
  interface OpenAIMessage {
26
27
  role: 'system' | 'user' | 'assistant' | 'tool';
27
28
  content?: string | null | Array<{
@@ -98,24 +99,12 @@ export declare function openaiUsageToAnthropicUsage(usage: {
98
99
  * responsibilities.
99
100
  */
100
101
  /**
101
- * Convert an SDK-internal reasoning-effort tier to the value accepted by
102
- * OpenAI's chat_completions `reasoning_effort` parameter.
103
- *
104
- * Two vocabularies meet here:
105
- * - SDK vocab: `'low' | 'medium' | 'high' | 'xhigh'`
106
- * (`'xhigh'` is the SDK-internal "Max" tier,
107
- * surfaced as "max" in the CLI — see
108
- * `lib/effort.ts`.)
109
- * - OpenAI chat wire vocab: `'low' | 'medium' | 'high'`
110
- * (Spec: platform.openai.com/docs/api-reference/chat/create)
111
- *
112
- * `'xhigh'` is clamped down to `'high'` rather than rejected: the SDK
113
- * semantic is "as much reasoning as the provider will give" and `'high'`
114
- * is the upper bound on this wire. Sending `'xhigh'` raw would 400 on
115
- * strict proxies. The Responses API (codex) has its own serialisation
116
- * and does NOT go through this function — see `codex/shim.ts`.
102
+ * Convert an SDK reasoning-effort tier to the value accepted by OpenAI Chat
103
+ * Completions `reasoning_effort`. The current OpenAI wire accepts the same
104
+ * vocabulary as the SDK; model-specific legality is enforced by the host
105
+ * catalog / upstream provider, not by this transport boundary.
117
106
  */
118
- export declare function toOpenAIChatReasoningEffort(effort: 'low' | 'medium' | 'high' | 'xhigh'): 'low' | 'medium' | 'high';
107
+ export declare function toOpenAIChatReasoningEffort(effort: ReasoningEffort): ReasoningEffort;
119
108
  export declare function buildOpenAIRequestBody(params: ShimCreateParams, ctx: {
120
109
  resolvedModel: string;
121
110
  baseUrl: string;
@@ -133,7 +122,7 @@ export declare function buildOpenAIRequestBody(params: ShimCreateParams, ctx: {
133
122
  * transports serialise differently on the wire.
134
123
  */
135
124
  reasoning?: {
136
- effort: 'low' | 'medium' | 'high' | 'xhigh';
125
+ effort: ReasoningEffort;
137
126
  };
138
127
  }): Record<string, unknown>;
139
128
  export declare function openaiStreamToAnthropic(response: Response, model: string): AsyncGenerator<AnthropicStreamEvent>;
@@ -209,7 +198,7 @@ export declare function createOpenAIShimClient(options: {
209
198
  defaultHeaders?: Record<string, string>;
210
199
  maxRetries?: number;
211
200
  timeout?: number;
212
- reasoningEffort?: 'low' | 'medium' | 'high' | 'xhigh';
201
+ reasoningEffort?: ReasoningEffort;
213
202
  providerOverride?: ProviderOverride;
214
203
  }): unknown;
215
204
  export {};
@@ -2,7 +2,7 @@ export declare const DEFAULT_OPENAI_BASE_URL = "https://api.openai.com/v1";
2
2
  export declare const DEFAULT_CODEX_BASE_URL = "https://chatgpt.com/backend-api/codex";
3
3
  /** Default GitHub Models API model when user selects copilot / github:copilot */
4
4
  export declare const DEFAULT_GITHUB_MODELS_API_MODEL = "openai/gpt-4.1";
5
- type ReasoningEffort = 'low' | 'medium' | 'high' | 'xhigh';
5
+ export type ReasoningEffort = 'none' | 'minimal' | 'low' | 'medium' | 'high' | 'xhigh';
6
6
  /**
7
7
  * Wire-level transport selected by `resolveProviderTransport`.
8
8
  *
@@ -45,6 +45,8 @@ export type DeprecatedProviderTransportName = 'codex_responses';
45
45
  * CLAUDE_CODE_USE_GITHUB, OPENCOW_DEBUG_REASONING).
46
46
  */
47
47
  export declare const QUERY_ENV_KEY_TRANSPORT_OVERRIDE = "__OPENCOW_TRANSPORT_OVERRIDE";
48
+ export declare const QUERY_ENV_KEY_REASONING_EFFORT_OVERRIDE = "__OPENCOW_REASONING_EFFORT_OVERRIDE";
49
+ export declare const QUERY_ENV_VALUE_REASONING_EFFORT_CLEAR = "__OPENCOW_CLEAR_REASONING_EFFORT__";
48
50
  export declare const QUERY_ENV_KEY_PROVIDER_SPECIFIC_OPENAI_RESPONSES = "__OPENCOW_PROVIDER_SPECIFIC_OPENAI_RESPONSES";
49
51
  export type ResolvedProviderRequest = {
50
52
  transport: ProviderTransport;
@@ -139,7 +141,7 @@ export declare function resolveProviderRequest(options?: {
139
141
  model?: string;
140
142
  baseUrl?: string;
141
143
  fallbackModel?: string;
142
- reasoningEffortOverride?: ReasoningEffort;
144
+ reasoningEffortOverride?: ReasoningEffort | null;
143
145
  /**
144
146
  * Optional explicit transport override forwarded to
145
147
  * `resolveProviderTransport`. When unset, callers can still rely on the
@@ -181,4 +183,3 @@ export declare function parseChatgptAccountId(token: string | undefined): string
181
183
  export declare function resolveOpenAIResponsesCredentials(): ResolvedCodexCredentials;
182
184
  export declare function resolveCodexApiCredentials(env?: NodeJS.ProcessEnv): ResolvedCodexCredentials;
183
185
  export declare function getReasoningEffortForModel(model: string): ReasoningEffort | undefined;
184
- export {};
@@ -1,3 +1,4 @@
1
1
  export declare function getMaxOutputTokensForModel(model: string, opts?: {
2
2
  override?: number;
3
+ upperLimitOverride?: number;
3
4
  }): number;
@@ -1,5 +1,5 @@
1
1
  import type { SettingsJson } from '../../session/settings/types.js';
2
- import type { ProviderTransport } from './config.js';
2
+ import type { ProviderTransport, ReasoningEffort } from './config.js';
3
3
  /**
4
4
  * Provider override resolved for a specific agent/model.
5
5
  * When present, the API client uses these instead of the session-global
@@ -21,6 +21,8 @@ export interface ProviderOverride {
21
21
  * OpenAI shim.
22
22
  */
23
23
  transport?: ProviderTransport | 'anthropic';
24
+ /** Default reasoning effort for this model route; null clears session default. */
25
+ reasoningEffort?: ReasoningEffort | null;
24
26
  /** Per-wire extras (e.g. openai-responses reasoning summary config). */
25
27
  providerSpecific?: {
26
28
  openaiResponses?: Record<string, unknown>;
package/dist/query.d.ts CHANGED
@@ -19,6 +19,7 @@ export type QueryParams = {
19
19
  fallbackModel?: string;
20
20
  querySource: QuerySource;
21
21
  maxOutputTokensOverride?: number;
22
+ maxOutputTokensLimitOverride?: number;
22
23
  maxTurns?: number;
23
24
  skipCacheWrite?: boolean;
24
25
  taskBudget?: {