npm - @oh-my-pi/pi-catalog - Versions diffs - 16.0.4 → 16.0.5 - Mend

@oh-my-pi/pi-catalog 16.0.4 → 16.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

package/CHANGELOG.md +24 -0
package/dist/types/compat/openai.d.ts +1 -0
package/dist/types/discovery/antigravity.d.ts +9 -0
package/dist/types/identity/dialect.d.ts +1 -1
package/dist/types/identity/family.d.ts +2 -0
package/dist/types/types.d.ts +20 -1
package/dist/types/variant-collapse.d.ts +4 -5
package/dist/types/wire/gemini-headers.d.ts +16 -1
package/dist/types/wire/github-copilot.d.ts +2 -0
package/package.json +3 -3
package/src/compat/openai.ts +7 -0
package/src/discovery/antigravity.ts +15 -6
package/src/identity/dialect.ts +4 -1
package/src/identity/family.ts +8 -1
package/src/model-cache.ts +8 -6
package/src/models.json +40 -16
package/src/provider-models/google.ts +2 -0
package/src/provider-models/openai-compat.ts +7 -4
package/src/types.ts +20 -0
package/src/variant-collapse.ts +198 -72
package/src/wire/gemini-headers.ts +28 -5
package/src/wire/github-copilot.ts +18 -0

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,30 @@
 ## [Unreleased]
+## [16.0.5] - 2026-06-17
+### Added
+- Added `enableGeminiThinkingLoopGuard` to OpenAI compatibility options to allow explicit opt-in or opt-out of the Gemini thinking-loop guard for OpenAI-compatible model aliases
+- Added `LITELLM_BASE_URL` as the LiteLLM provider discovery base URL fallback, with discovery caches scoped by the resolved proxy URL and explicit provider `baseUrl` config kept at higher precedence. ([#2726](https://github.com/can1357/oh-my-pi/issues/2726))
+- Added `ThinkingConfig.effortBudgets` (per-effort thinking-budget contract baked into collapsed variants) and `ANTIGRAVITY_MODEL_WIRE_PROFILES` (`maxOutputTokens` + `model_enum` per Antigravity wire id) to mirror the captured Antigravity Cloud Code Assist client request shape.
+### Changed
+- Defaulted `enableGeminiThinkingLoopGuard` from Gemini family detection for both OpenAI completions and responses compatibility specs so Gemini models now enable the thinking-loop guard automatically
+- Updated the default Gemini CLI user-agent version fallback to 0.46.0.
+- Changed the Antigravity (`google-antigravity`, daily-cloudcode-pa) gemini-3.x collapse families to the `budget` thinking transport with the client's per-tier `thinkingBudget` (3.5 Flash low/medium/high = 1000/4000/10000, 3.1 Pro low/high = 1001/10001) and corrected 3.5 Flash effort→wire routing (medium → `gemini-3.5-flash-low`, high → `gemini-3-flash-agent`). Split the shared CCA collapse table so `google-gemini-cli` (cloudcode-pa) keeps the `google-level` `thinkingLevel` transport for official Gemini CLI parity. Stale collapsed snapshots (bundled catalog, recycled `gemini-3-flash` alias) self-heal from the hand table at collapse time, and the model cache schema is bumped to v7 to invalidate pre-budget Antigravity rows.
+- Changed the Antigravity user-agent to the `antigravity/hub/<version>` format (default `2.1.4`) to match the captured client.
+### Fixed
+- Fixed `off` effort routing for `claude-opus-4-5` and `claude-opus-4-6` to use their base model IDs when thinking is disabled
+- Fixed `gemini-2.5-flash` effort routing so all non-off effort levels resolve to `gemini-2.5-flash-thinking`
+- Fixed shared variant alias provider resolution so `resolveBareVariantAlias` reports all matching providers when model aliases are present in both CCA collapse tables
+- Routed google-antigravity default baseUrl to the stable primary daily endpoint in the catalog generator and all fallback snapshots, resolving connection drops on heavy queries.
+- Fixed MiniMax M3 dialect selection so MiniMax-family OpenAI-compatible models use the MiniMax tool-call dialect instead of generic XML. ([#2759](https://github.com/can1357/oh-my-pi/issues/2759))
+- Fixed GitHub Copilot dynamic discovery to honor plan-specific API endpoints stored in structured OAuth credentials. ([#2876](https://github.com/can1357/oh-my-pi/issues/2876))
 ## [16.0.4] - 2026-06-17
 ### Fixed

package/dist/types/compat/openai.d.ts CHANGED Viewed

@@ -5,6 +5,7 @@ import type { ModelSpec, OpenAICompat, ResolvedOpenAICompat, ResolvedOpenAIRespo
  */
 export declare function buildOpenAICompat(spec: ModelSpec<"openai-completions">): ResolvedOpenAICompat;
 interface OpenAIResponsesSpecLike {
+    id?: string;
     provider: string;
     name: string;
     baseUrl: string;

package/dist/types/discovery/antigravity.d.ts CHANGED Viewed

@@ -1,4 +1,7 @@
 import type { ModelSpec } from "../types";
+import { type VariantCollapseTable } from "../variant-collapse";
+export declare const ANTIGRAVITY_PRIMARY_ENDPOINT = "https://daily-cloudcode-pa.googleapis.com";
+export declare const ANTIGRAVITY_SANDBOX_ENDPOINT = "https://daily-cloudcode-pa.sandbox.googleapis.com";
 /**
  * Raw model metadata returned by Antigravity's `fetchAvailableModels` endpoint.
  */
@@ -51,6 +54,12 @@ export interface FetchAntigravityDiscoveryModelsOptions {
     signal?: AbortSignal;
     /** Optional fetch implementation override for tests. */
     fetcher?: typeof fetch;
+    /**
+     * Hand collapse table to apply to the discovered list. Defaults to the
+     * Antigravity (budget-transport) table; `googleGeminiCli` passes the
+     * level-transport table so cloudcode-pa keeps `thinkingLevel`.
+     */
+    collapseTable?: VariantCollapseTable;
 }
 /**
  * Fetches discoverable Antigravity models and normalizes them into canonical model entries.

package/dist/types/identity/dialect.d.ts CHANGED Viewed

@@ -1,3 +1,3 @@
-export type Dialect = "glm" | "hermes" | "kimi" | "xml" | "anthropic" | "deepseek" | "harmony" | "pi" | "qwen3" | "gemini" | "gemma";
+export type Dialect = "glm" | "hermes" | "kimi" | "xml" | "anthropic" | "deepseek" | "harmony" | "pi" | "qwen3" | "gemini" | "gemma" | "minimax";
 export declare const FALLBACK_DIALECT: Dialect;
 export declare function preferredDialect(modelId: string): Dialect;

package/dist/types/identity/family.d.ts CHANGED Viewed

@@ -32,6 +32,8 @@ export declare function isMimoModelIdOrName(value: string): boolean;
  * clamp instead. Excludes M1, M3, MiniMax-Text-01, music, hailuo, voice ids.
  */
 export declare function isMinimaxM2FamilyModelId(modelId: string): boolean;
+/** MiniMax M3 family ids in bundled/default and aggregator namespace forms. */
+export declare function isMinimaxM3FamilyModelId(modelId: string): boolean;
 /**
  * OpenAI gpt-oss family (`gpt-oss-20b`, `gpt-oss-120b`, `gpt-oss:120b`,
  * `vendor/gpt-oss-…`). The Harmony reasoning format only accepts

package/dist/types/types.d.ts CHANGED Viewed

@@ -33,6 +33,14 @@ export interface ThinkingConfig {
      * thinking is disabled. Missing keys fall back to `requestModelId ?? id`.
      */
     effortRouting?: Readonly<Partial<Record<Effort | "off", string>>>;
+    /**
+     * Per-effort thinking budget in tokens, baked at build time for collapsed
+     * variants whose upstream expects an explicit `thinkingBudget` instead of a
+     * value derived from the generic ladder (Antigravity Cloud Code Assist
+     * gemini-3.x). Request mapping prefers caller `thinkingBudgets`, then this
+     * map, then the provider default ladder. Only meaningful for `mode: "budget"`.
+     */
+    effortBudgets?: Readonly<Partial<Record<Effort, number>>>;
     /**
      * When true, a thinking-off request MUST explicitly suppress thinking on
      * the wire (google-level: `thinkingLevel: "MINIMAL"` + `includeThoughts:
@@ -137,6 +145,13 @@ export interface OpenAICompat {
     reasoningEffortMap?: Partial<Record<Effort, string>>;
     /** Whether the provider supports `stream_options: { include_usage: true }` for token usage in streaming responses. Default: true. */
     supportsUsageInStreaming?: boolean;
+    /**
+     * Enable the Gemini thinking-loop guard (pi-ai stream layer) for this model.
+     * Defaults to true when the model id classifies as the gemini family. Set
+     * explicitly to cover an opaque OpenAI-compat proxy alias (e.g. `my-model`)
+     * that routes to Gemini, or to false to opt a gemini-family id out.
+     */
+    enableGeminiThinkingLoopGuard?: boolean;
     /** Which field to use for max tokens. Default: auto-detected from URL. */
     maxTokensField?: "max_completion_tokens" | "max_tokens";
     /** Whether tool results require the `name` field. Default: auto-detected from URL. */
@@ -322,7 +337,7 @@ type ResolvedToolStrictMode = NonNullable<OpenAICompat["toolStrictMode"]> | "mix
  * `buildModel`; request handlers read fields and never detect, resolve, or
  * allocate.
  */
-export type ResolvedOpenAICompat = Required<Omit<OpenAICompat, "openRouterRouting" | "vercelGatewayRouting" | "extraBody" | "toolStrictMode" | "streamIdleTimeoutMs" | "supportsLongPromptCacheRetention" | "cacheControlFormat" | "thinkingKeep" | "strictResponsesPairing" | "requiresJuiceZeroHack" | "whenThinking">> & {
+export type ResolvedOpenAICompat = Required<Omit<OpenAICompat, "openRouterRouting" | "vercelGatewayRouting" | "extraBody" | "toolStrictMode" | "streamIdleTimeoutMs" | "supportsLongPromptCacheRetention" | "cacheControlFormat" | "thinkingKeep" | "strictResponsesPairing" | "requiresJuiceZeroHack" | "enableGeminiThinkingLoopGuard" | "whenThinking">> & {
     openRouterRouting?: OpenAICompat["openRouterRouting"];
     vercelGatewayRouting?: OpenAICompat["vercelGatewayRouting"];
     extraBody?: OpenAICompat["extraBody"];
@@ -334,6 +349,8 @@ export type ResolvedOpenAICompat = Required<Omit<OpenAICompat, "openRouterRoutin
     isOpenRouterHost: boolean;
     /** The model sits behind Vercel AI Gateway. */
     isVercelGatewayHost: boolean;
+    /** See {@link OpenAICompat.enableGeminiThinkingLoopGuard}. Set by the builder from the family classifier. */
+    enableGeminiThinkingLoopGuard?: boolean;
     /** Complete alternate view for thinking-engaged requests; swap pointers, never spread. */
     whenThinking?: ResolvedOpenAICompat;
 };
@@ -346,6 +363,8 @@ export interface ResolvedOpenAIResponsesCompat {
     strictResponsesPairing: boolean;
     requiresJuiceZeroHack: boolean;
     reasoningEffortMap: Partial<Record<Effort, string>>;
+    /** See {@link OpenAICompat.enableGeminiThinkingLoopGuard}. */
+    enableGeminiThinkingLoopGuard?: boolean;
 }
 /** Fully-resolved anthropic-messages compat view (same contract as `ResolvedOpenAICompat`). */
 export type ResolvedAnthropicCompat = Required<AnthropicCompat> & {

package/dist/types/variant-collapse.d.ts CHANGED Viewed

@@ -46,12 +46,11 @@ export interface EffortVariantFamily {
 export interface VariantCollapseTable {
     families: readonly EffortVariantFamily[];
 }
-/**
- * Shared by `google-antigravity` and `google-gemini-cli` — both serve the
- * Antigravity discovery list (`fetchAntigravityDiscoveryModels`).
- */
+/** `google-antigravity` (daily-cloudcode-pa): Gemini 3.x on the budget transport. */
 export declare const ANTIGRAVITY_VARIANT_COLLAPSE_TABLE: VariantCollapseTable;
-/** Provider id → hand collapse table. Both CCA providers share one table. */
+/** `google-gemini-cli` (cloudcode-pa): Gemini 3.x on the level transport (official CLI parity). */
+export declare const GEMINI_CLI_VARIANT_COLLAPSE_TABLE: VariantCollapseTable;
+/** Provider id → hand collapse table. The CCA providers diverge on thinking transport. */
 export declare const VARIANT_COLLAPSE_TABLES: Readonly<Record<string, VariantCollapseTable>>;
 /**
  * The global automatic rule: derive an `X` + `X-thinking` family for every

package/dist/types/wire/gemini-headers.d.ts CHANGED Viewed

@@ -9,7 +9,6 @@ export declare const getGeminiCliHeaders: (modelId?: string) => {
     "Client-Metadata": string;
 };
 export declare const ANTIGRAVITY_SYSTEM_INSTRUCTION: string;
-export declare const ANTIGRAVITY_NO_PREAMBLE_INSTRUCTION = "CRITICAL: NEVER output rule checks, formatting guidelines, constraint checklists (e.g. \"No emdashes\"), or your thinking/personality preambles in the final response. Output only the final response.";
 /**
  * Antigravity / Cloud Code Assist user agent. Lives in its own file so discovery
  * and usage code can read it without pulling the heavy google-gemini-cli provider
@@ -17,3 +16,19 @@ export declare const ANTIGRAVITY_NO_PREAMBLE_INSTRUCTION = "CRITICAL: NEVER outp
  * parse graph.
  */
 export declare let getAntigravityUserAgent: () => string;
+/**
+ * Per-wire-id Antigravity Cloud Code Assist request constants, captured from the
+ * real `antigravity/hub` client against `daily-cloudcode-pa`. `modelEnum` is the
+ * opaque `labels.model_enum` token the client tags each request with;
+ * `maxOutputTokens` is the fixed `generationConfig.maxOutputTokens` it sends
+ * regardless of the thinking budget. Keyed by the routed upstream wire id
+ * (post effort-routing), not the collapsed logical id. Checkpoint-only ids
+ * (e.g. `gemini-3.1-flash-lite`) are intentionally absent — this provider only
+ * emits agent requests.
+ */
+export interface AntigravityModelWireProfile {
+    modelEnum: string;
+    maxOutputTokens: number;
+}
+export declare const ANTIGRAVITY_MODEL_WIRE_PROFILES: Readonly<Record<string, AntigravityModelWireProfile>>;
+export declare function getAntigravityModelWireProfile(wireModelId: string): AntigravityModelWireProfile | undefined;

package/dist/types/wire/github-copilot.d.ts CHANGED Viewed

@@ -25,9 +25,11 @@ export declare const COPILOT_API_HEADERS: {
 export type ParsedGitHubCopilotApiKey = {
     accessToken: string;
     enterpriseUrl?: string;
+    apiEndpoint?: string;
 };
 export declare function isPublicGitHubHost(host: string): boolean;
 export declare function normalizeGitHubCopilotEnterpriseDomain(input: string | undefined): string | undefined;
+export declare function normalizeGitHubCopilotApiEndpoint(input: string | undefined): string | undefined;
 export declare function parseGitHubCopilotApiKey(apiKeyRaw: string): ParsedGitHubCopilotApiKey;
 export declare function normalizeDomain(input: string): string | null;
 export declare function getGitHubCopilotBaseUrl(enterpriseDomain?: string): string;

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
 	"type": "module",
 	"name": "@oh-my-pi/pi-catalog",
-	"version": "16.0.4",
+	"version": "16.0.5",
 	"description": "Model catalog for omp: bundled model database, provider discovery descriptors, model identity, classification, and equivalence",
 	"homepage": "https://omp.sh",
 	"author": "Can Boluk",
@@ -34,11 +34,11 @@
 	},
 	"dependencies": {
 		"@bufbuild/protobuf": "^2.12.0",
-		"@oh-my-pi/pi-utils": "16.0.4",
+		"@oh-my-pi/pi-utils": "16.0.5",
 		"zod": "^4"
 	},
 	"devDependencies": {
-		"@oh-my-pi/pi-ai": "16.0.4",
+		"@oh-my-pi/pi-ai": "16.0.5",
 		"@types/bun": "^1.3.14"
 	},
 	"engines": {

package/src/compat/openai.ts CHANGED Viewed

@@ -17,6 +17,7 @@ import {
 	isKimiModelId,
 	isMimoModelIdOrName,
 	isQwenModelId,
+	modelFamilyToken,
 } from "../identity/family";
 import type { ModelSpec, OpenAICompat, ResolvedOpenAICompat, ResolvedOpenAIResponsesCompat } from "../types";
 import { applyCompatOverrides } from "./apply";
@@ -211,6 +212,10 @@ export function buildOpenAICompat(spec: ModelSpec<"openai-completions">): Resolv
 		supportsReasoningParams: provider !== "github-copilot",
 		reasoningEffortMap: {},
 		supportsUsageInStreaming: !isCerebras,
+		// pi-ai's thinking-loop guard is gemini-only; default the flag from the
+		// family classifier so OpenAI-compat proxies serving Gemini are covered.
+		// An opaque alias can opt in via `compat.enableGeminiThinkingLoopGuard`.
+		enableGeminiThinkingLoopGuard: modelFamilyToken(spec.id) === "gemini",
 		// Kimi (including via OpenRouter and Fireworks router-form IDs such as
 		// `accounts/fireworks/routers/kimi-*`) calculates TPM rate limits based on
 		// max_tokens, not actual output. The official Kimi K2 model guidance
@@ -295,6 +300,7 @@ export function buildOpenAICompat(spec: ModelSpec<"openai-completions">): Resolv
 }
 interface OpenAIResponsesSpecLike {
+	id?: string;
 	provider: string;
 	name: string;
 	baseUrl: string;
@@ -329,6 +335,7 @@ export function buildOpenAIResponsesCompat(spec: OpenAIResponsesSpecLike): Resol
 		strictResponsesPairing: isAzure || spec.provider === "github-copilot",
 		requiresJuiceZeroHack: spec.name.toLowerCase().startsWith("gpt-5"),
 		reasoningEffortMap: {},
+		enableGeminiThinkingLoopGuard: modelFamilyToken(spec.id ?? "") === "gemini",
 	};
 	applyCompatOverrides(compat, spec.compat);
 	return compat;

package/src/discovery/antigravity.ts CHANGED Viewed

@@ -1,13 +1,16 @@
 import { z } from "zod/v4";
 import type { ModelSpec } from "../types";
 import { toPositiveNumber } from "../utils";
-import { ANTIGRAVITY_VARIANT_COLLAPSE_TABLE, collapseEffortVariants } from "../variant-collapse";
+import {
+	ANTIGRAVITY_VARIANT_COLLAPSE_TABLE,
+	collapseEffortVariants,
+	type VariantCollapseTable,
+} from "../variant-collapse";
 import { getAntigravityUserAgent } from "../wire/gemini-headers";
-const DEFAULT_ANTIGRAVITY_DISCOVERY_ENDPOINTS = [
-	"https://daily-cloudcode-pa.googleapis.com",
-	"https://daily-cloudcode-pa.sandbox.googleapis.com",
-] as const;
+export const ANTIGRAVITY_PRIMARY_ENDPOINT = "https://daily-cloudcode-pa.googleapis.com";
+export const ANTIGRAVITY_SANDBOX_ENDPOINT = "https://daily-cloudcode-pa.sandbox.googleapis.com";
+const DEFAULT_ANTIGRAVITY_DISCOVERY_ENDPOINTS = [ANTIGRAVITY_PRIMARY_ENDPOINT, ANTIGRAVITY_SANDBOX_ENDPOINT] as const;
 const FETCH_AVAILABLE_MODELS_PATH = "/v1internal:fetchAvailableModels";
 const DEFAULT_CONTEXT_WINDOW = 200_000;
@@ -157,6 +160,12 @@ export interface FetchAntigravityDiscoveryModelsOptions {
 	signal?: AbortSignal;
 	/** Optional fetch implementation override for tests. */
 	fetcher?: typeof fetch;
+	/**
+	 * Hand collapse table to apply to the discovered list. Defaults to the
+	 * Antigravity (budget-transport) table; `googleGeminiCli` passes the
+	 * level-transport table so cloudcode-pa keeps `thinkingLevel`.
+	 */
+	collapseTable?: VariantCollapseTable;
 }
 /**
@@ -239,7 +248,7 @@ export async function fetchAntigravityDiscoveryModels(
 		// Collapse effort-tier variants at the source so runtime discovery,
 		// the gemini-cli re-provision, and the catalog generator all see
 		// logical ids only.
-		const collapsed = collapseEffortVariants(models, ANTIGRAVITY_VARIANT_COLLAPSE_TABLE);
+		const collapsed = collapseEffortVariants(models, options.collapseTable ?? ANTIGRAVITY_VARIANT_COLLAPSE_TABLE);
 		collapsed.sort((a, b) => a.name.localeCompare(b.name) || a.id.localeCompare(b.id));
 		return collapsed;
 	}

package/src/identity/dialect.ts CHANGED Viewed

@@ -11,7 +11,8 @@ export type Dialect =
 	| "pi"
 	| "qwen3"
 	| "gemini"
-	| "gemma";
+	| "gemma"
+	| "minimax";
 export const FALLBACK_DIALECT: Dialect = "xml";
@@ -31,6 +32,8 @@ export function preferredDialect(modelId: string): Dialect {
 			return "qwen3";
 		case "deepseek":
 			return "deepseek";
+		case "minimax":
+			return "minimax";
 		case "openai":
 		case "gpt-oss":
 			return "harmony";

package/src/identity/family.ts CHANGED Viewed

@@ -73,6 +73,13 @@ export function isMinimaxM2FamilyModelId(modelId: string): boolean {
 	return /(?:^|[/.-])m2\d*(?:[.-]\d+)?(?:[-.:_]|$)/i.test(lower);
 }
+/** MiniMax M3 family ids in bundled/default and aggregator namespace forms. */
+export function isMinimaxM3FamilyModelId(modelId: string): boolean {
+	const lower = modelId.toLowerCase();
+	if (!lower.includes("minimax")) return false;
+	return /(?:^|[/._-])(?:minimax[/._-])?m3(?:[-.:_]|$)/i.test(lower);
+}
 /**
  * OpenAI gpt-oss family (`gpt-oss-20b`, `gpt-oss-120b`, `gpt-oss:120b`,
  * `vendor/gpt-oss-…`). The Harmony reasoning format only accepts
@@ -139,7 +146,7 @@ export function modelFamilyToken(modelId: string): string {
 	if (isOpenAIModelId(modelId)) return "openai";
 	if (isKimiModelId(modelId)) return "kimi";
 	if (isQwenModelId(modelId)) return "qwen";
-	if (isMinimaxM2FamilyModelId(modelId)) return "minimax";
+	if (isMinimaxM2FamilyModelId(modelId) || isMinimaxM3FamilyModelId(modelId)) return "minimax";
 	if (isOpenAIGptOssModelId(modelId)) return "gpt-oss";
 	if (isDeepseekModelIdOrName(modelId)) return "deepseek";
 	if (isMimoModelIdOrName(modelId)) return "mimo";

package/src/model-cache.ts CHANGED Viewed

@@ -7,12 +7,14 @@ import { getModelDbPath } from "@oh-my-pi/pi-utils";
 import type { Api, Model, ModelSpec } from "./types";
 // Rows persist ModelSpec JSON (sparse `compat`, never the resolved record);
-// the model manager rebuilds via `buildModel` on load. v6 invalidates rows
-// that may contain the retired unknown-limit sentinels (222222/8888); v5
-// invalidated rows predating effort-tier variant collapsing (raw
-// `-low`/`-high`/`-thinking` member ids); v4 dropped the pre-efforts
-// ThinkingConfig shape.
-const CACHE_SCHEMA_VERSION = 6;
+// the model manager rebuilds via `buildModel` on load. v7 invalidates rows
+// predating the Antigravity Gemini budget-mode migration (cached specs still
+// carrying `thinking.mode: "google-level"` and the old 3.5-flash effort
+// routing); v6 invalidates rows that may contain the retired unknown-limit
+// sentinels (222222/8888); v5 invalidated rows predating effort-tier variant
+// collapsing (raw `-low`/`-high`/`-thinking` member ids); v4 dropped the
+// pre-efforts ThinkingConfig shape.
+const CACHE_SCHEMA_VERSION = 7;
 interface CacheRow {
 	provider_id: string;

package/src/models.json CHANGED Viewed

@@ -17726,7 +17726,7 @@
 			"name": "Claude Opus 4.5",
 			"api": "google-gemini-cli",
 			"provider": "google-antigravity",
-			"baseUrl": "https://daily-cloudcode-pa.sandbox.googleapis.com",
+			"baseUrl": "https://daily-cloudcode-pa.googleapis.com",
 			"reasoning": true,
 			"input": [
 				"text",
@@ -17749,6 +17749,7 @@
 					"high"
 				],
 				"effortRouting": {
+					"off": "claude-opus-4-5",
 					"minimal": "claude-opus-4-5-thinking",
 					"low": "claude-opus-4-5-thinking",
 					"medium": "claude-opus-4-5-thinking",
@@ -17762,7 +17763,7 @@
 			"name": "Claude Opus 4.6",
 			"api": "google-gemini-cli",
 			"provider": "google-antigravity",
-			"baseUrl": "https://daily-cloudcode-pa.sandbox.googleapis.com",
+			"baseUrl": "https://daily-cloudcode-pa.googleapis.com",
 			"reasoning": true,
 			"input": [
 				"text",
@@ -17785,6 +17786,7 @@
 					"high"
 				],
 				"effortRouting": {
+					"off": "claude-opus-4-6",
 					"minimal": "claude-opus-4-6-thinking",
 					"low": "claude-opus-4-6-thinking",
 					"medium": "claude-opus-4-6-thinking",
@@ -17798,7 +17800,7 @@
 			"name": "Claude Sonnet 4.5",
 			"api": "google-gemini-cli",
 			"provider": "google-antigravity",
-			"baseUrl": "https://daily-cloudcode-pa.sandbox.googleapis.com",
+			"baseUrl": "https://daily-cloudcode-pa.googleapis.com",
 			"reasoning": true,
 			"input": [
 				"text",
@@ -17835,7 +17837,7 @@
 			"name": "Claude Sonnet 4.6",
 			"api": "google-gemini-cli",
 			"provider": "google-antigravity",
-			"baseUrl": "https://daily-cloudcode-pa.sandbox.googleapis.com",
+			"baseUrl": "https://daily-cloudcode-pa.googleapis.com",
 			"reasoning": true,
 			"input": [
 				"text",
@@ -17872,7 +17874,7 @@
 			"name": "Gemini 2.5 Flash",
 			"api": "google-gemini-cli",
 			"provider": "google-antigravity",
-			"baseUrl": "https://daily-cloudcode-pa.sandbox.googleapis.com",
+			"baseUrl": "https://daily-cloudcode-pa.googleapis.com",
 			"reasoning": true,
 			"input": [
 				"text",
@@ -17908,7 +17910,7 @@
 			"name": "Gemini 2.5 Pro",
 			"api": "google-gemini-cli",
 			"provider": "google-antigravity",
-			"baseUrl": "https://daily-cloudcode-pa.sandbox.googleapis.com",
+			"baseUrl": "https://daily-cloudcode-pa.googleapis.com",
 			"reasoning": true,
 			"input": [
 				"text",
@@ -17938,7 +17940,7 @@
 			"name": "Gemini 3 Flash",
 			"api": "google-gemini-cli",
 			"provider": "google-antigravity",
-			"baseUrl": "https://daily-cloudcode-pa.sandbox.googleapis.com",
+			"baseUrl": "https://daily-cloudcode-pa.googleapis.com",
 			"reasoning": true,
 			"input": [
 				"text",
@@ -17953,22 +17955,36 @@
 			"contextWindow": 1048576,
 			"maxTokens": 65536,
 			"thinking": {
-				"mode": "google-level",
+				"mode": "budget",
 				"efforts": [
 					"minimal",
 					"low",
 					"medium",
 					"high"
 				],
-				"requiresEffort": true
-			}
+				"effortBudgets": {
+					"minimal": 1000,
+					"low": 1000,
+					"medium": 4000,
+					"high": 10000
+				},
+				"effortRouting": {
+					"off": "gemini-3.5-flash-extra-low",
+					"minimal": "gemini-3.5-flash-extra-low",
+					"low": "gemini-3.5-flash-extra-low",
+					"medium": "gemini-3.5-flash-low",
+					"high": "gemini-3-flash-agent"
+				},
+				"suppressWhenOff": true
+			},
+			"requestModelId": "gemini-3.5-flash-extra-low"
 		},
 		"gemini-3-pro": {
 			"id": "gemini-3-pro",
 			"name": "Gemini 3 Pro",
 			"api": "google-gemini-cli",
 			"provider": "google-antigravity",
-			"baseUrl": "https://daily-cloudcode-pa.sandbox.googleapis.com",
+			"baseUrl": "https://daily-cloudcode-pa.googleapis.com",
 			"reasoning": true,
 			"input": [
 				"text",
@@ -18002,7 +18018,7 @@
 			"name": "Gemini 3.1 Pro Preview",
 			"api": "google-gemini-cli",
 			"provider": "google-antigravity",
-			"baseUrl": "https://daily-cloudcode-pa.sandbox.googleapis.com",
+			"baseUrl": "https://daily-cloudcode-pa.googleapis.com",
 			"reasoning": true,
 			"input": [
 				"text",
@@ -18017,11 +18033,15 @@
 			"contextWindow": 1048576,
 			"maxTokens": 65535,
 			"thinking": {
-				"mode": "google-level",
+				"mode": "budget",
 				"efforts": [
 					"low",
 					"high"
 				],
+				"effortBudgets": {
+					"low": 1001,
+					"high": 10001
+				},
 				"effortRouting": {
 					"off": "gemini-3.1-pro-low",
 					"low": "gemini-3.1-pro-low",
@@ -18036,7 +18056,7 @@
 			"name": "GPT OSS 120B",
 			"api": "google-gemini-cli",
 			"provider": "google-antigravity",
-			"baseUrl": "https://daily-cloudcode-pa.sandbox.googleapis.com",
+			"baseUrl": "https://daily-cloudcode-pa.googleapis.com",
 			"reasoning": true,
 			"input": [
 				"text"
@@ -18110,7 +18130,11 @@
 					"high"
 				],
 				"effortRouting": {
-					"off": "gemini-2.5-flash"
+					"off": "gemini-2.5-flash",
+					"minimal": "gemini-2.5-flash-thinking",
+					"low": "gemini-2.5-flash-thinking",
+					"medium": "gemini-2.5-flash-thinking",
+					"high": "gemini-2.5-flash-thinking"
 				}
 			}
 		},
@@ -69307,7 +69331,7 @@
 				"cacheRead": 0,
 				"cacheWrite": 0
 			},
-			"contextWindow": null,
+			"contextWindow": 1000000,
 			"maxTokens": null,
 			"compat": {
 				"supportsUsageInStreaming": false

package/src/provider-models/google.ts CHANGED Viewed

@@ -2,6 +2,7 @@ import { fetchAntigravityDiscoveryModels } from "../discovery/antigravity";
 import { fetchGeminiModels } from "../discovery/gemini";
 import type { ModelManagerOptions } from "../model-manager";
 import type { FetchImpl } from "../types";
+import { GEMINI_CLI_VARIANT_COLLAPSE_TABLE } from "../variant-collapse";
 export interface GoogleModelManagerConfig {
 	apiKey?: string;
@@ -89,6 +90,7 @@ export function googleGeminiCliModelManagerOptions(
 							token,
 							endpoint,
 							fetcher: toDiscoveryFetch(config?.fetch),
+							collapseTable: GEMINI_CLI_VARIANT_COLLAPSE_TABLE,
 						});
 						if (models === null) {
 							return null;

package/src/provider-models/openai-compat.ts CHANGED Viewed

@@ -2522,9 +2522,10 @@ export function litellmModelManagerOptions(
 	config?: LiteLLMModelManagerConfig,
 ): ModelManagerOptions<"openai-completions"> {
 	const apiKey = config?.apiKey;
-	const baseUrl = config?.baseUrl ?? "http://localhost:4000/v1";
+	const baseUrl = config?.baseUrl ?? Bun.env.LITELLM_BASE_URL ?? "http://localhost:4000/v1";
 	return {
 		providerId: "litellm",
+		cacheProviderId: `litellm:${Bun.hash(baseUrl).toString(36)}`,
 		// litellm is a local-only proxy whose /v1/models returns bare ids with no
 		// metadata, and it is never bundled in models.json (that would leak the
 		// machine's localhost catalog). It proxies known upstream models, so we
@@ -2807,9 +2808,11 @@ export function githubCopilotModelManagerOptions(config?: GithubCopilotModelMana
 	const parsedApiKey = rawApiKey ? parseGitHubCopilotApiKey(rawApiKey) : undefined;
 	const apiKey = parsedApiKey?.accessToken;
 	const baseUrl =
-		parsedApiKey?.enterpriseUrl && configuredBaseUrl.includes("githubcopilot.com")
-			? getGitHubCopilotBaseUrl(parsedApiKey.enterpriseUrl)
-			: configuredBaseUrl;
+		parsedApiKey?.apiEndpoint && configuredBaseUrl.includes("githubcopilot.com")
+			? parsedApiKey.apiEndpoint
+			: parsedApiKey?.enterpriseUrl && configuredBaseUrl.includes("githubcopilot.com")
+				? getGitHubCopilotBaseUrl(parsedApiKey.enterpriseUrl)
+				: configuredBaseUrl;
 	const providerRefs = createBundledReferenceMap<Api>("github-copilot");
 	const resolveReference = createReferenceResolver(providerRefs);
 	return {

package/src/types.ts CHANGED Viewed

@@ -53,6 +53,14 @@ export interface ThinkingConfig {
 	 * thinking is disabled. Missing keys fall back to `requestModelId ?? id`.
 	 */
 	effortRouting?: Readonly<Partial<Record<Effort | "off", string>>>;
+	/**
+	 * Per-effort thinking budget in tokens, baked at build time for collapsed
+	 * variants whose upstream expects an explicit `thinkingBudget` instead of a
+	 * value derived from the generic ladder (Antigravity Cloud Code Assist
+	 * gemini-3.x). Request mapping prefers caller `thinkingBudgets`, then this
+	 * map, then the provider default ladder. Only meaningful for `mode: "budget"`.
+	 */
+	effortBudgets?: Readonly<Partial<Record<Effort, number>>>;
 	/**
 	 * When true, a thinking-off request MUST explicitly suppress thinking on
 	 * the wire (google-level: `thinkingLevel: "MINIMAL"` + `includeThoughts:
@@ -162,6 +170,13 @@ export interface OpenAICompat {
 	reasoningEffortMap?: Partial<Record<Effort, string>>;
 	/** Whether the provider supports `stream_options: { include_usage: true }` for token usage in streaming responses. Default: true. */
 	supportsUsageInStreaming?: boolean;
+	/**
+	 * Enable the Gemini thinking-loop guard (pi-ai stream layer) for this model.
+	 * Defaults to true when the model id classifies as the gemini family. Set
+	 * explicitly to cover an opaque OpenAI-compat proxy alias (e.g. `my-model`)
+	 * that routes to Gemini, or to false to opt a gemini-family id out.
+	 */
+	enableGeminiThinkingLoopGuard?: boolean;
 	/** Which field to use for max tokens. Default: auto-detected from URL. */
 	maxTokensField?: "max_completion_tokens" | "max_tokens";
 	/** Whether tool results require the `name` field. Default: auto-detected from URL. */
@@ -365,6 +380,7 @@ export type ResolvedOpenAICompat = Required<
 		| "thinkingKeep"
 		| "strictResponsesPairing"
 		| "requiresJuiceZeroHack"
+		| "enableGeminiThinkingLoopGuard"
 		| "whenThinking"
 	>
 > & {
@@ -379,6 +395,8 @@ export type ResolvedOpenAICompat = Required<
 	isOpenRouterHost: boolean;
 	/** The model sits behind Vercel AI Gateway. */
 	isVercelGatewayHost: boolean;
+	/** See {@link OpenAICompat.enableGeminiThinkingLoopGuard}. Set by the builder from the family classifier. */
+	enableGeminiThinkingLoopGuard?: boolean;
 	/** Complete alternate view for thinking-engaged requests; swap pointers, never spread. */
 	whenThinking?: ResolvedOpenAICompat;
 };
@@ -392,6 +410,8 @@ export interface ResolvedOpenAIResponsesCompat {
 	strictResponsesPairing: boolean;
 	requiresJuiceZeroHack: boolean;
 	reasoningEffortMap: Partial<Record<Effort, string>>;
+	/** See {@link OpenAICompat.enableGeminiThinkingLoopGuard}. */
+	enableGeminiThinkingLoopGuard?: boolean;
 }
 /** Fully-resolved anthropic-messages compat view (same contract as `ResolvedOpenAICompat`). */

package/src/variant-collapse.ts CHANGED Viewed

@@ -107,80 +107,132 @@ const GEMINI_3_FLASH_FAMILY_EFFORTS: readonly Effort[] = [Effort.Minimal, Effort
 const GEMINI_3_PRO_FAMILY_EFFORTS: readonly Effort[] = [Effort.Low, Effort.High];
 /**
- * Shared by `google-antigravity` and `google-gemini-cli` — both serve the
- * Antigravity discovery list (`fetchAntigravityDiscoveryModels`).
+ * Antigravity Cloud Code Assist sends an explicit `thinkingBudget` per tier
+ * (verified against captured `daily-cloudcode-pa` requests). Flash uses round
+ * budgets; Pro offsets every budget by +1. Minimal mirrors Low (the Antigravity
+ * UI exposes Low/Medium/High only) so the effort stays selectable.
  */
-export const ANTIGRAVITY_VARIANT_COLLAPSE_TABLE: VariantCollapseTable = {
-	families: [
-		{
-			id: "gemini-3.5-flash",
-			name: "Gemini 3.5 Flash",
-			members: ["gemini-3.5-flash-extra-low", "gemini-3.5-flash-low", "gemini-3-flash-agent"],
-			routing: {
-				off: "gemini-3.5-flash-extra-low",
-				[Effort.Minimal]: "gemini-3-flash-agent",
-				[Effort.Low]: "gemini-3.5-flash-extra-low",
-				[Effort.Medium]: "gemini-3.5-flash-extra-low",
-				[Effort.High]: "gemini-3.5-flash-low",
-			},
-			thinking: { mode: "google-level", efforts: GEMINI_3_FLASH_FAMILY_EFFORTS },
-			suppressWhenOff: true,
-			// Retired bare id; the alias only fires when no live model holds it
-			// (exact match wins in every resolver).
-			extraAliases: ["gemini-3-flash"],
-		},
-		{
-			id: "gemini-3.1-pro",
-			name: "Gemini 3.1 Pro",
-			// High routes to `gemini-pro-agent` — the upstream `gemini-3.1-pro-high`
-			// deployment returns INVALID_ARGUMENT on every streamGenerateContent
-			// request (both CCA endpoints) while discovery still lists it;
-			// `gemini-pro-agent` is the same model ("Gemini 3.1 Pro (High)", same
-			// thinking budget/caps) and accepts the identical request body.
-			// `gemini-3.1-pro-high` stays a member so the dead raw id is consumed.
-			members: ["gemini-3.1-pro-low", "gemini-pro-agent", "gemini-3.1-pro-high"],
-			retiredMembers: ["gemini-3.1-pro-high"],
-			routing: {
-				off: "gemini-3.1-pro-low",
-				[Effort.Low]: "gemini-3.1-pro-low",
-				[Effort.High]: "gemini-pro-agent",
-			},
-			thinking: { mode: "google-level", efforts: GEMINI_3_PRO_FAMILY_EFFORTS },
-			suppressWhenOff: true,
-		},
-		{
-			// Legacy static family — covers stale snapshots and caches.
-			id: "gemini-3-pro",
-			name: "Gemini 3 Pro",
-			members: ["gemini-3-pro-low", "gemini-3-pro-high"],
-			routing: {
-				off: "gemini-3-pro-low",
-				[Effort.Low]: "gemini-3-pro-low",
-				[Effort.High]: "gemini-3-pro-high",
-			},
-			thinking: { mode: "google-level", efforts: GEMINI_3_PRO_FAMILY_EFFORTS },
-			suppressWhenOff: true,
+const GEMINI_3_FLASH_FAMILY_BUDGETS: Readonly<Partial<Record<Effort, number>>> = {
+	[Effort.Minimal]: 1000,
+	[Effort.Low]: 1000,
+	[Effort.Medium]: 4000,
+	[Effort.High]: 10000,
+};
+const GEMINI_3_PRO_FAMILY_BUDGETS: Readonly<Partial<Record<Effort, number>>> = {
+	[Effort.Low]: 1001,
+	[Effort.High]: 10001,
+};
+/**
+ * The two Cloud Code Assist providers share the same Antigravity discovery list
+ * but disagree on the thinking transport: `google-antigravity` (daily-cloudcode-pa)
+ * sends an explicit `thinkingBudget` (verified against captured requests), while
+ * `google-gemini-cli` (cloudcode-pa) follows the official Gemini CLI and uses
+ * `thinkingLevel`. The Gemini 3.x families therefore differ only in thinking
+ * transport (and, for Flash, the per-tier wire-id routing); everything else is
+ * shared verbatim.
+ */
+function geminiFlashFamily(mode: "budget" | "google-level"): EffortVariantFamily {
+	const budget = mode === "budget";
+	return {
+		id: "gemini-3.5-flash",
+		name: "Gemini 3.5 Flash",
+		members: ["gemini-3.5-flash-extra-low", "gemini-3.5-flash-low", "gemini-3-flash-agent"],
+		routing: budget
+			? {
+					off: "gemini-3.5-flash-extra-low",
+					[Effort.Minimal]: "gemini-3.5-flash-extra-low",
+					[Effort.Low]: "gemini-3.5-flash-extra-low",
+					[Effort.Medium]: "gemini-3.5-flash-low",
+					[Effort.High]: "gemini-3-flash-agent",
+				}
+			: {
+					off: "gemini-3.5-flash-extra-low",
+					[Effort.Minimal]: "gemini-3-flash-agent",
+					[Effort.Low]: "gemini-3.5-flash-extra-low",
+					[Effort.Medium]: "gemini-3.5-flash-extra-low",
+					[Effort.High]: "gemini-3.5-flash-low",
+				},
+		thinking: budget
+			? { mode: "budget", efforts: GEMINI_3_FLASH_FAMILY_EFFORTS, effortBudgets: GEMINI_3_FLASH_FAMILY_BUDGETS }
+			: { mode: "google-level", efforts: GEMINI_3_FLASH_FAMILY_EFFORTS },
+		suppressWhenOff: true,
+		// Retired bare id; the alias only fires when no live model holds it
+		// (exact match wins in every resolver).
+		extraAliases: ["gemini-3-flash"],
+	};
+}
+function geminiProFamily(mode: "budget" | "google-level"): EffortVariantFamily {
+	const budget = mode === "budget";
+	return {
+		id: "gemini-3.1-pro",
+		name: "Gemini 3.1 Pro",
+		// High routes to `gemini-pro-agent` — the upstream `gemini-3.1-pro-high`
+		// deployment returns INVALID_ARGUMENT on every streamGenerateContent
+		// request (both CCA endpoints) while discovery still lists it;
+		// `gemini-pro-agent` is the same model ("Gemini 3.1 Pro (High)", same
+		// thinking budget/caps) and accepts the identical request body.
+		// `gemini-3.1-pro-high` stays a member so the dead raw id is consumed.
+		members: ["gemini-3.1-pro-low", "gemini-pro-agent", "gemini-3.1-pro-high"],
+		retiredMembers: ["gemini-3.1-pro-high"],
+		routing: {
+			off: "gemini-3.1-pro-low",
+			[Effort.Low]: "gemini-3.1-pro-low",
+			[Effort.High]: "gemini-pro-agent",
 		},
-		{
-			// Rename-only collapse: every effort and off fall back to the wire id.
-			id: "gpt-oss-120b",
-			name: "GPT-OSS 120B",
-			members: ["gpt-oss-120b-medium"],
-			routing: {},
-			thinking: { mode: "budget", efforts: [Effort.Minimal, Effort.Low, Effort.Medium, Effort.High] },
+		thinking: budget
+			? { mode: "budget", efforts: GEMINI_3_PRO_FAMILY_EFFORTS, effortBudgets: GEMINI_3_PRO_FAMILY_BUDGETS }
+			: { mode: "google-level", efforts: GEMINI_3_PRO_FAMILY_EFFORTS },
+		suppressWhenOff: true,
+	};
+}
+/** CCA families shared verbatim by both providers (transport-agnostic). */
+const SHARED_CCA_FAMILIES: readonly EffortVariantFamily[] = [
+	{
+		// Legacy static family — covers stale snapshots and caches. Stale ids are
+		// unverified against the budget-mode CCA contract; keep them on level.
+		id: "gemini-3-pro",
+		name: "Gemini 3 Pro",
+		members: ["gemini-3-pro-low", "gemini-3-pro-high"],
+		routing: {
+			off: "gemini-3-pro-low",
+			[Effort.Low]: "gemini-3-pro-low",
+			[Effort.High]: "gemini-3-pro-high",
 		},
-		thinkingPair("claude-sonnet-4-6", "Claude Sonnet 4.6"),
-		thinkingPair("claude-opus-4-6", "Claude Opus 4.6"),
-		thinkingPair("claude-sonnet-4-5", "Claude Sonnet 4.5"),
-		thinkingPair("claude-opus-4-5", "Claude Opus 4.5"),
-		thinkingPair("gemini-2.5-flash", "Gemini 2.5 Flash"),
-	],
+		thinking: { mode: "google-level", efforts: GEMINI_3_PRO_FAMILY_EFFORTS },
+		suppressWhenOff: true,
+	},
+	{
+		// Rename-only collapse: every effort and off fall back to the wire id.
+		id: "gpt-oss-120b",
+		name: "GPT-OSS 120B",
+		members: ["gpt-oss-120b-medium"],
+		routing: {},
+		thinking: { mode: "budget", efforts: [Effort.Minimal, Effort.Low, Effort.Medium, Effort.High] },
+	},
+	thinkingPair("claude-sonnet-4-6", "Claude Sonnet 4.6"),
+	thinkingPair("claude-opus-4-6", "Claude Opus 4.6"),
+	thinkingPair("claude-sonnet-4-5", "Claude Sonnet 4.5"),
+	thinkingPair("claude-opus-4-5", "Claude Opus 4.5"),
+	thinkingPair("gemini-2.5-flash", "Gemini 2.5 Flash"),
+];
+/** `google-antigravity` (daily-cloudcode-pa): Gemini 3.x on the budget transport. */
+export const ANTIGRAVITY_VARIANT_COLLAPSE_TABLE: VariantCollapseTable = {
+	families: [geminiFlashFamily("budget"), geminiProFamily("budget"), ...SHARED_CCA_FAMILIES],
 };
-/** Provider id → hand collapse table. Both CCA providers share one table. */
+/** `google-gemini-cli` (cloudcode-pa): Gemini 3.x on the level transport (official CLI parity). */
+export const GEMINI_CLI_VARIANT_COLLAPSE_TABLE: VariantCollapseTable = {
+	families: [geminiFlashFamily("google-level"), geminiProFamily("google-level"), ...SHARED_CCA_FAMILIES],
+};
+/** Provider id → hand collapse table. The CCA providers diverge on thinking transport. */
 export const VARIANT_COLLAPSE_TABLES: Readonly<Record<string, VariantCollapseTable>> = {
 	"google-antigravity": ANTIGRAVITY_VARIANT_COLLAPSE_TABLE,
-	"google-gemini-cli": ANTIGRAVITY_VARIANT_COLLAPSE_TABLE,
+	"google-gemini-cli": GEMINI_CLI_VARIANT_COLLAPSE_TABLE,
 };
 /**
@@ -360,6 +412,47 @@ function reconcileRetiredRouting<TSpec extends VariantSpecLike>(
 	return next;
 }
+/**
+ * Refresh a collapsed snapshot's thinking surface in place. Bundled catalog and
+ * prev-generation snapshots freeze a family's transport, budgets, and routing;
+ * discovery emits the canonical id but the exact-id merge never overwrites a
+ * stale `family.id` row (e.g. `gemini-3.1-pro`) nor a recycled `extraAliases`
+ * row (e.g. `gemini-3-flash`). This re-applies the hand-table family's thinking,
+ * routing, and default wire id while keeping the spec id (load-bearing for exact
+ * selectors and bundled lookups). Returns `spec` by reference when unchanged.
+ */
+function refreshCollapsedThinking<TSpec extends VariantSpecLike>(
+	spec: TSpec,
+	family: EffortVariantFamily,
+	retired: ReadonlySet<string> | undefined,
+): TSpec {
+	// Scope snapshot self-heal to families carrying a curated per-effort budget
+	// contract (Antigravity gemini-3.x). Their routing targets are all verified
+	// live, so rebuilding routing here is safe; families without `effortBudgets`
+	// (derived `X`/`X-thinking` pairs, claude pairs) keep their presence-filtered
+	// snapshot routing untouched.
+	if (!spec.reasoning || family.thinking.effortBudgets === undefined) return spec;
+	const routing: Partial<Record<Effort | "off", string>> = {};
+	let hasRouting = false;
+	for (const effortKey in family.routing) {
+		const target = family.routing[effortKey as Effort | "off"];
+		if (target !== undefined && !retired?.has(target)) {
+			routing[effortKey as Effort | "off"] = target;
+			hasRouting = true;
+		}
+	}
+	const thinking: ThinkingConfig = { ...family.thinking };
+	if (hasRouting) thinking.effortRouting = routing;
+	if (family.suppressWhenOff) thinking.suppressWhenOff = true;
+	const offTarget = family.routing.off;
+	const requestModelId =
+		offTarget !== undefined && !retired?.has(offTarget) && offTarget !== spec.id ? offTarget : spec.requestModelId;
+	if (Bun.deepEquals(thinking, spec.thinking) && requestModelId === spec.requestModelId) {
+		return spec;
+	}
+	return { ...spec, thinking, ...(requestModelId !== undefined ? { requestModelId } : {}) };
+}
 /**
  * Collapse every family in `table` found in `specs`. Non-member specs pass
  * through verbatim (by reference), order preserved; the collapsed spec
@@ -394,11 +487,17 @@ export function collapseEffortVariants<TSpec extends VariantSpecLike>(
 				: existing;
 		const rawPresent = family.members.filter(id => byId.has(id) && !(id === family.id && existingCollapsed));
 		if (rawPresent.length === 0) {
-			// Inert (no members) or already collapsed (pass-through) — idempotence.
-			// A stale collapsed entry still gets retired routing re-pointed.
-			if (reconciled !== undefined && reconciled !== existing) {
+			// Inert (no members) or already collapsed (pass-through). A stale
+			// family.id-keyed snapshot is refreshed in place from the current
+			// hand-table family (transport/budgets/routing); retired targets drop.
+			// Recycled extraAliases rows are healed in a later pass.
+			const refreshed =
+				existing !== undefined && existingCollapsed
+					? refreshCollapsedThinking(reconciled ?? existing, family, retired)
+					: reconciled;
+			if (refreshed !== undefined && refreshed !== existing) {
 				familyIdBySpecId.set(family.id, family.id);
-				replacement.set(family.id, reconciled);
+				replacement.set(family.id, refreshed);
 			}
 			continue;
 		}
@@ -464,6 +563,27 @@ export function collapseEffortVariants<TSpec extends VariantSpecLike>(
 		replacement.set(family.id, collapsed);
 	}
+	// Refresh stale alias-keyed snapshots in place (recycled bare ids). Runs even
+	// when the canonical family.id row is also present, since the exact-id merge
+	// keeps the stale alias row alongside the discovered canonical one.
+	for (const family of table.families) {
+		if (family.extraAliases === undefined) continue;
+		const retired =
+			family.retiredMembers !== undefined && family.retiredMembers.length > 0
+				? new Set(family.retiredMembers)
+				: undefined;
+		for (const alias of family.extraAliases) {
+			if (alias === family.id || familyIdBySpecId.has(alias)) continue;
+			const aliasSpec = byId.get(alias);
+			if (aliasSpec === undefined) continue;
+			const refreshed = refreshCollapsedThinking(aliasSpec, family, retired);
+			if (refreshed !== aliasSpec) {
+				familyIdBySpecId.set(alias, alias);
+				replacement.set(alias, refreshed);
+			}
+		}
+	}
 	if (replacement.size === 0) return [...specs];
 	const emitted = new Set<string>();
@@ -602,7 +722,13 @@ export function resolveBareVariantAlias(modelId: string): BareVariantAliasHit |
 		if (hit === undefined) continue;
 		const providers: Provider[] = [];
 		for (const candidate in VARIANT_COLLAPSE_TABLES) {
-			if (VARIANT_COLLAPSE_TABLES[candidate] === table) providers.push(candidate);
+			// Match by resolved alias target, not table identity: the CCA providers
+			// now hold distinct table objects that still share these aliases.
+			if (
+				getAliasIndex(VARIANT_COLLAPSE_TABLES[candidate] as VariantCollapseTable).forward.get(normalized) === hit
+			) {
+				providers.push(candidate);
+			}
 		}
 		return { id: hit, providers };
 	}

package/src/wire/gemini-headers.ts CHANGED Viewed

@@ -4,7 +4,7 @@
  * GeminiCLI/VERSION/MODEL (PLATFORM; ARCH; SURFACE)
  */
 export function getGeminiCliUserAgent(modelId = "gemini-3.1-pro-preview"): string {
-	const version = process.env.PI_AI_GEMINI_CLI_VERSION || "0.35.3";
+	const version = process.env.PI_AI_GEMINI_CLI_VERSION || "0.46.0";
 	const platform = process.platform === "win32" ? "win32" : process.platform;
 	const arch = process.arch === "x64" ? "x64" : process.arch;
 	return `GeminiCLI/${version}/${modelId} (${platform}; ${arch}; terminal)`;
@@ -20,8 +20,6 @@ export const ANTIGRAVITY_SYSTEM_INSTRUCTION =
 	"You are pair programming with a USER to solve their coding task. The task may require creating a new codebase, modifying or debugging an existing codebase, or simply answering a question." +
 	"**Absolute paths only**" +
 	"**Proactiveness**";
-export const ANTIGRAVITY_NO_PREAMBLE_INSTRUCTION =
-	'CRITICAL: NEVER output rule checks, formatting guidelines, constraint checklists (e.g. "No emdashes"), or your thinking/personality preambles in the final response. Output only the final response.';
 /**
  * Antigravity / Cloud Code Assist user agent. Lives in its own file so discovery
  * and usage code can read it without pulling the heavy google-gemini-cli provider
@@ -29,7 +27,7 @@ export const ANTIGRAVITY_NO_PREAMBLE_INSTRUCTION =
  * parse graph.
  */
 export let getAntigravityUserAgent = () => {
-	const DEFAULT_ANTIGRAVITY_VERSION = "1.104.0";
+	const DEFAULT_ANTIGRAVITY_VERSION = "2.1.4";
 	const version = process.env.PI_AI_ANTIGRAVITY_VERSION || DEFAULT_ANTIGRAVITY_VERSION;
 	// Map Node.js platform/arch to Antigravity's expected format.
 	// Verified against Antigravity source: _qn() and wqn() in main.js.
@@ -37,7 +35,32 @@ export let getAntigravityUserAgent = () => {
 	// process.arch:     x64→amd64, ia32→386, others pass through (arm64)
 	const os = process.platform === "win32" ? "windows" : process.platform;
 	const arch = process.arch === "x64" ? "amd64" : process.arch === "ia32" ? "386" : process.arch;
-	const userAgent = `antigravity/${version} ${os}/${arch}`;
+	const userAgent = `antigravity/hub/${version} ${os}/${arch}`;
 	getAntigravityUserAgent = () => userAgent;
 	return userAgent;
 };
+/**
+ * Per-wire-id Antigravity Cloud Code Assist request constants, captured from the
+ * real `antigravity/hub` client against `daily-cloudcode-pa`. `modelEnum` is the
+ * opaque `labels.model_enum` token the client tags each request with;
+ * `maxOutputTokens` is the fixed `generationConfig.maxOutputTokens` it sends
+ * regardless of the thinking budget. Keyed by the routed upstream wire id
+ * (post effort-routing), not the collapsed logical id. Checkpoint-only ids
+ * (e.g. `gemini-3.1-flash-lite`) are intentionally absent — this provider only
+ * emits agent requests.
+ */
+export interface AntigravityModelWireProfile {
+	modelEnum: string;
+	maxOutputTokens: number;
+}
+export const ANTIGRAVITY_MODEL_WIRE_PROFILES: Readonly<Record<string, AntigravityModelWireProfile>> = {
+	"gemini-3.5-flash-extra-low": { modelEnum: "MODEL_PLACEHOLDER_M187", maxOutputTokens: 65536 },
+	"gemini-3.5-flash-low": { modelEnum: "MODEL_PLACEHOLDER_M20", maxOutputTokens: 65536 },
+	"gemini-3-flash-agent": { modelEnum: "MODEL_PLACEHOLDER_M132", maxOutputTokens: 65536 },
+	"gemini-3.1-pro-low": { modelEnum: "MODEL_PLACEHOLDER_M36", maxOutputTokens: 65535 },
+	"gemini-pro-agent": { modelEnum: "MODEL_PLACEHOLDER_M16", maxOutputTokens: 65535 },
+};
+export function getAntigravityModelWireProfile(wireModelId: string): AntigravityModelWireProfile | undefined {
+	return ANTIGRAVITY_MODEL_WIRE_PROFILES[wireModelId];
+}

package/src/wire/github-copilot.ts CHANGED Viewed

@@ -30,11 +30,13 @@ export const COPILOT_API_HEADERS = {
 type GitHubCopilotApiKeyPayload = {
 	token?: unknown;
 	enterpriseUrl?: unknown;
+	apiEndpoint?: unknown;
 };
 export type ParsedGitHubCopilotApiKey = {
 	accessToken: string;
 	enterpriseUrl?: string;
+	apiEndpoint?: string;
 };
 const PUBLIC_GITHUB_HOSTS = new Set(["api.github.com", "github.com", "www.github.com"]);
@@ -51,6 +53,18 @@ export function normalizeGitHubCopilotEnterpriseDomain(input: string | undefined
 	return normalized;
 }
+export function normalizeGitHubCopilotApiEndpoint(input: string | undefined): string | undefined {
+	const trimmed = input?.trim();
+	if (!trimmed?.startsWith("https://")) return undefined;
+	try {
+		const url = new URL(trimmed);
+		if (url.protocol !== "https:" || !url.hostname) return undefined;
+		return trimmed.replace(/\/+$/, "");
+	} catch {
+		return undefined;
+	}
+}
 export function parseGitHubCopilotApiKey(apiKeyRaw: string): ParsedGitHubCopilotApiKey {
 	try {
 		const parsed = JSON.parse(apiKeyRaw) as GitHubCopilotApiKeyPayload;
@@ -61,6 +75,10 @@ export function parseGitHubCopilotApiKey(apiKeyRaw: string): ParsedGitHubCopilot
 					typeof parsed.enterpriseUrl === "string"
 						? normalizeGitHubCopilotEnterpriseDomain(parsed.enterpriseUrl)
 						: undefined,
+				apiEndpoint:
+					typeof parsed.apiEndpoint === "string"
+						? normalizeGitHubCopilotApiEndpoint(parsed.apiEndpoint)
+						: undefined,
 			};
 		}
 	} catch {}