npm - opencode-anthropic-fix - Versions diffs - 0.1.2 → 0.1.4 - Mend

opencode-anthropic-fix 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/index.mjs CHANGED Viewed

@@ -3041,7 +3041,15 @@ export async function AnthropicAuthPlugin({ client, project, directory, worktree
                     sessionMetrics.lastQuota.inputTokens = maxUtilization;
                   }
-                  if (maxUtilization > 0.8) {
+                  // Proactive account management is gated on config. When
+                  // account_management.proactive_disabled is true (default),
+                  // we never apply penalties on a 200 OK response — those
+                  // penalties were locking out single-account users whose
+                  // server-side quota was still in `allowed_warning` state.
+                  // The reactive 429 path below is unaffected.
+                  const proactiveDisabled = config.account_management?.proactive_disabled !== false;
+                  if (!proactiveDisabled && maxUtilization > 0.8) {
                     const penalty = Math.round((maxUtilization - 0.8) * 50); // 0-10 points
                     accountManager.applyUtilizationPenalty(account, penalty);
                     debugLog("high rate limit utilization", {
@@ -3052,7 +3060,7 @@ export async function AnthropicAuthPlugin({ client, project, directory, worktree
                     });
                   }
-                  if (anySurpassed) {
+                  if (!proactiveDisabled && anySurpassed) {
                     accountManager.applySurpassedThreshold(account, surpassedResetAt);
                     debugLog("rate limit threshold surpassed", {
                       accountIndex: account.index,
@@ -3070,8 +3078,10 @@ export async function AnthropicAuthPlugin({ client, project, directory, worktree
                   }
                   // Predictive rate limit avoidance: switch account BEFORE hitting 429
-                  // Parse reset timestamps to compute time-weighted risk
-                  if (maxUtilization > 0.6 && accountManager.getAccountCount() > 1) {
+                  // Parse reset timestamps to compute time-weighted risk.
+                  // Gated on proactive_disabled — when true (default), no automatic
+                  // switches happen on 200 OK responses (fully manual rotation).
+                  if (!proactiveDisabled && maxUtilization > 0.6 && accountManager.getAccountCount() > 1) {
                     let highestRisk = 0;
                     for (const win of RATE_LIMIT_WINDOWS) {
                       const utilizationStr = response.headers.get(`anthropic-ratelimit-unified-${win.key}-utilization`);
@@ -5722,6 +5732,24 @@ function buildAnthropicBillingHeader(version, firstUserMessage, provider) {
 // Opencode's customizations after ~5800 chars diverge and trigger extra usage billing.
 const MAX_SAFE_SYSTEM_TEXT_LENGTH = 5000;
+// A5: Subagent CC-prefix cache.
+//
+// Context: opencode/packages/opencode/src/session/llm.ts:110 uses
+//   `input.agent.prompt ? [input.agent.prompt] : SystemPrompt.provider(model)`
+// so any agent with a custom prompt (explore, fast, title, summary, etc.)
+// fires WITHOUT the base CC prompt — the server-side fingerprint match fails
+// and the request is billed as pay-as-you-go credits instead of Max-plan usage.
+//
+// Fix: on the first main-agent call (where the anchor is present), cache the
+// sanitized CC prefix. On subsequent subagent calls (anchor missing), prepend
+// the cached prefix to the sanitized blocks so the fingerprint matches again.
+//
+// The cache lives at module scope because buildSystemPromptBlocks is re-entered
+// per request. It gets populated exactly once per process on the first main call.
+const MAX_SUBAGENT_CC_PREFIX = MAX_SAFE_SYSTEM_TEXT_LENGTH;
+const SUBAGENT_CC_ANCHOR = "You are an interactive";
+let cachedCCPrompt = null;
 function sanitizeSystemText(text) {
   // QA fix M4: use word boundaries to avoid mangling URLs and code identifiers
   let sanitized = text.replace(/\bOpenCode\b/g, "Claude Code").replace(/\bopencode\b/gi, "Claude");
@@ -6035,6 +6063,40 @@ function buildSystemPromptBlocks(system, signature) {
     text: compactSystemText(sanitizeSystemText(item.text), signature.promptCompactionMode),
   }));
+  // A5: Subagent CC-prefix cache/inject (see constant declaration above for context).
+  //
+  // After sanitize, main-agent blocks start with "You are an interactive..." because
+  // sanitizeSystemText() strips everything before that anchor. Subagent blocks
+  // (custom prompts from input.agent.prompt) do NOT start with the anchor —
+  // they start with whatever the agent template says (e.g., "You are a file search
+  // specialist.").
+  //
+  // This logic runs ONLY for Anthropic requests with signature enabled (signature.enabled
+  // is false for non-Anthropic providers), and skips the title-generator fast path
+  // because that one is replaced wholesale with COMPACT_TITLE_GENERATOR_SYSTEM_PROMPT below.
+  if (signature.enabled && !titleGeneratorRequest && sanitized.length > 0) {
+    const firstText = typeof sanitized[0]?.text === "string" ? sanitized[0].text : "";
+    const hasCcAnchor = firstText.startsWith(SUBAGENT_CC_ANCHOR);
+    if (hasCcAnchor) {
+      // Main-agent path: cache the prefix on the first hit so subagents can reuse it.
+      // We slice to MAX_SUBAGENT_CC_PREFIX to avoid unbounded growth if the upstream
+      // sanitize limit is ever raised.
+      if (!cachedCCPrompt) {
+        cachedCCPrompt = firstText.slice(0, MAX_SUBAGENT_CC_PREFIX);
+      }
+    } else if (cachedCCPrompt) {
+      // Subagent path: prepend the cached CC prefix so the fingerprint matches.
+      // We prepend, not concatenate, so the original subagent prompt stays as a
+      // separate block — dedupeSystemBlocks and splitSysPromptPrefix handle the
+      // join on their own downstream.
+      sanitized = [{ type: "text", text: cachedCCPrompt }, ...sanitized];
+    }
+    // If !hasCcAnchor && !cachedCCPrompt: no-op. The cache primes on the very
+    // first main call in a process. In practice opencode always fires a main
+    // call before any subagent, so this branch is only hit in synthetic tests.
+  }
   if (titleGeneratorRequest) {
     sanitized = [{ type: "text", text: COMPACT_TITLE_GENERATOR_SYSTEM_PROMPT }];
   } else if (signature.promptCompactionMode !== "off") {
@@ -6133,9 +6195,10 @@ function buildAnthropicBetaHeader(
   // === ALWAYS-ON BETAS (Claude Code v2.1.90 base set) ===
   // These are ALWAYS included regardless of env vars or feature flags.
-  if (!haiku) {
-    betas.push(CLAUDE_CODE_BETA_FLAG); // "claude-code-20250219"
-  }
+  // NOTE: Real Claude Code skips this beta for Haiku, but we include it
+  // so that Haiku subagents (via model-router delegation) get full mimic
+  // behavior from the Anthropic API.
+  betas.push(CLAUDE_CODE_BETA_FLAG); // "claude-code-20250219"
   // Tool search: use provider-aware header.
   // 1P/Foundry u2192 advanced-tool-use-2025-11-20 (enables broader tool capabilities)
@@ -6147,10 +6210,20 @@ function buildAnthropicBetaHeader(
   }
   betas.push(FAST_MODE_BETA_FLAG); // "fast-mode-2026-02-01"
-  betas.push(EFFORT_BETA_FLAG); // "effort-2025-11-24"
-  // Interleaved thinking — always-on unless explicitly disabled
-  if (!isTruthyEnv(process.env.DISABLE_INTERLEAVED_THINKING)) {
+  // effort-2025-11-24 — real CC's Lyz() only pushes this flag when rE(model)
+  // is true (Opus 4.6 / Sonnet 4.6). Pushing it for non-adaptive models like
+  // Haiku is a fingerprint mismatch vs real CC and can contaminate billing
+  // attribution even when the request body has no effort field.
+  if (isAdaptiveThinkingModel(model)) {
+    betas.push(EFFORT_BETA_FLAG); // "effort-2025-11-24"
+  }
+  // Interleaved thinking — real CC's i01 pushes via hv4(model), which is
+  // (firstParty && non-Claude-3). Claude 3.x models don't support interleaved
+  // thinking and real CC never sends this flag for them, so emitting it
+  // diverges the fingerprint for legacy Haiku/Sonnet 3.x requests.
+  if (!isTruthyEnv(process.env.DISABLE_INTERLEAVED_THINKING) && !/claude-3-/i.test(model)) {
     betas.push("interleaved-thinking-2025-05-14");
   }
@@ -6548,6 +6621,27 @@ function transformRequestBody(body, signature, runtime, betaHeader, config) {
       parsed.thinking = normalizeThinkingBlock(parsed.thinking, parsed.model || "");
     }
+    // Fingerprint fix: real Claude Code v2.1.87+ nests the effort control inside
+    // `output_config.effort` (via Lyz() in cli.js). opencode's provider transform
+    // for variant=max on Opus 4.6 / Sonnet 4.6 sets `effort` at the top level,
+    // which causes Anthropic's server to fingerprint the body as non-CC and bill
+    // it as pay-as-you-go — surfacing as "You're out of extra usage" even on a
+    // valid Max plan. Move it into output_config when we're talking to an
+    // adaptive-thinking model so the wire shape matches real CC.
+    if (typeof parsed.effort === "string" && parsed.model && isAdaptiveThinkingModel(parsed.model)) {
+      if (!parsed.output_config || typeof parsed.output_config !== "object") {
+        parsed.output_config = {};
+      }
+      if (!("effort" in parsed.output_config)) {
+        parsed.output_config.effort = parsed.effort;
+      }
+      delete parsed.effort;
+    } else if (Object.prototype.hasOwnProperty.call(parsed, "effort")) {
+      // Non-adaptive models never carry a top-level effort in real CC — strip it
+      // to avoid polluting the fingerprint for models like Haiku.
+      delete parsed.effort;
+    }
     // Claude Code temperature rule: when extended thinking is active (any type),
     // temperature must be omitted (undefined). Otherwise default to 1.
     const thinkingActive =
@@ -7372,4 +7466,34 @@ function extractFileIds(body) {
   return ids;
 }
+// Internals exposed for tests only. Do not consume from production code paths.
+//
+// IMPORTANT: do NOT add a new `export` declaration here. Opencode's plugin
+// loader (opencode/packages/opencode/src/plugin/index.ts:74-79) iterates
+// `Object.values(mod)` of the loaded module and throws "Plugin export is not
+// a function" if ANY export is not a plugin function. A named `export const
+// __testing__ = {...}` object would break plugin loading entirely.
+//
+// Instead, attach the test hooks as a PROPERTY of the exported function.
+// Functions are objects in JS, so this is valid. The module surface still
+// has only one exported value (the AnthropicAuthPlugin function), which is
+// what the loader expects. Tests reach internals via
+// `import { AnthropicAuthPlugin } from "./index.mjs"` then
+// `AnthropicAuthPlugin.__testing__`.
+AnthropicAuthPlugin.__testing__ = {
+  sanitizeSystemText,
+  compactSystemText,
+  dedupeSystemBlocks,
+  normalizeSystemTextBlocks,
+  buildSystemPromptBlocks,
+  get cachedCCPrompt() {
+    return cachedCCPrompt;
+  },
+  resetCachedCCPrompt() {
+    cachedCCPrompt = null;
+  },
+  SUBAGENT_CC_ANCHOR,
+  CLAUDE_CODE_IDENTITY_STRING,
+};
 export default AnthropicAuthPlugin;

package/lib/config.mjs CHANGED Viewed

@@ -102,6 +102,7 @@ import { randomBytes } from "node:crypto";
  * @property {{ enabled: boolean, default: number, completion_threshold: number }} token_budget
  * @property {{ enabled: boolean, threshold_percent: number }} microcompact
  * @property {{ enabled: boolean, default_cooldown_ms: number, poll_quota_on_overload: boolean }} overload_recovery
+ * @property {{ proactive_disabled: boolean }} account_management
  */
 /** @type {AnthropicAuthConfig} */
@@ -241,6 +242,16 @@ export const DEFAULT_CONFIG = {
     /** Whether to poll /api/oauth/usage on 529 exhaustion for smarter cooldowns. */
     poll_quota_on_overload: true,
   },
+  /** Account management: control automatic account penalties and switching.
+   *  When proactive_disabled is true (default), the plugin will NOT apply
+   *  utilization penalties, surpassed-threshold penalties, or predictive
+   *  switches based on response headers (200 OK responses). Reactive 429
+   *  handling still works. This makes account switching fully manual and
+   *  prevents single-account users from being locally locked out by warning
+   *  thresholds the server still allows. */
+  account_management: {
+    proactive_disabled: true,
+  },
 };
 export const VALID_STRATEGIES = ["sticky", "round-robin", "hybrid"];
@@ -278,6 +289,7 @@ function createDefaultConfig() {
     token_budget: { ...DEFAULT_CONFIG.token_budget },
     microcompact: { ...DEFAULT_CONFIG.microcompact },
     overload_recovery: { ...DEFAULT_CONFIG.overload_recovery },
+    account_management: { ...DEFAULT_CONFIG.account_management },
   };
 }
@@ -675,6 +687,17 @@ function validateConfig(raw) {
     };
   }
+  // Account management sub-config
+  if (raw.account_management && typeof raw.account_management === "object") {
+    const am = /** @type {Record<string, unknown>} */ (raw.account_management);
+    config.account_management = {
+      proactive_disabled:
+        typeof am.proactive_disabled === "boolean"
+          ? am.proactive_disabled
+          : DEFAULT_CONFIG.account_management.proactive_disabled,
+    };
+  }
   return config;
 }
@@ -761,6 +784,16 @@ function applyEnvOverrides(config) {
     config.adaptive_context.enabled = false;
   }
+  // Account management: env override for proactive penalties / predictive switch.
+  // Set to 1/true to disable all proactive account management (matches default).
+  // Set to 0/false to re-enable the legacy proactive behavior.
+  if (env.OPENCODE_ANTHROPIC_PROACTIVE_DISABLED === "1" || env.OPENCODE_ANTHROPIC_PROACTIVE_DISABLED === "true") {
+    config.account_management.proactive_disabled = true;
+  }
+  if (env.OPENCODE_ANTHROPIC_PROACTIVE_DISABLED === "0" || env.OPENCODE_ANTHROPIC_PROACTIVE_DISABLED === "false") {
+    config.account_management.proactive_disabled = false;
+  }
   return config;
 }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "opencode-anthropic-fix",
-  "version": "0.1.2",
+  "version": "0.1.4",
   "license": "GPL-3.0-or-later",
   "main": "./index.mjs",
   "files": [