@oh-my-pi/pi-catalog 16.0.4 → 16.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +44 -0
- package/dist/types/compat/openai.d.ts +4 -1
- package/dist/types/discovery/antigravity.d.ts +9 -0
- package/dist/types/identity/dialect.d.ts +1 -1
- package/dist/types/identity/family.d.ts +8 -0
- package/dist/types/provider-models/descriptors.d.ts +1 -1
- package/dist/types/provider-models/openai-compat.d.ts +5 -6
- package/dist/types/types.d.ts +109 -13
- package/dist/types/variant-collapse.d.ts +4 -5
- package/dist/types/wire/gemini-headers.d.ts +16 -1
- package/dist/types/wire/github-copilot.d.ts +2 -0
- package/package.json +4 -3
- package/src/build.ts +3 -1
- package/src/compat/openai.ts +213 -19
- package/src/discovery/antigravity.ts +91 -98
- package/src/discovery/codex.ts +33 -40
- package/src/discovery/cursor.ts +31 -24
- package/src/discovery/gemini.ts +39 -30
- package/src/discovery/openai-compatible.ts +22 -32
- package/src/identity/dialect.ts +4 -1
- package/src/identity/family.ts +21 -1
- package/src/model-cache.ts +8 -6
- package/src/model-thinking.ts +24 -6
- package/src/models.json +544 -376
- package/src/provider-models/google.ts +2 -0
- package/src/provider-models/ollama.ts +11 -2
- package/src/provider-models/openai-compat.ts +47 -46
- package/src/types.ts +190 -43
- package/src/variant-collapse.ts +198 -72
- package/src/wire/gemini-headers.ts +28 -5
- package/src/wire/github-copilot.ts +18 -0
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,50 @@
|
|
|
2
2
|
|
|
3
3
|
## [Unreleased]
|
|
4
4
|
|
|
5
|
+
## [16.0.6] - 2026-06-18
|
|
6
|
+
|
|
7
|
+
### Added
|
|
8
|
+
|
|
9
|
+
- Added a dedicated `openrouter` API type and `ResolvedOpenRouterCompat` configuration to support unified chat-completions and Responses-API compatibility for OpenRouter models
|
|
10
|
+
|
|
11
|
+
### Changed
|
|
12
|
+
|
|
13
|
+
- Migrated bundled OpenRouter models in the catalog from `openai-completions` to the new `openrouter` API type
|
|
14
|
+
- Consolidated the resolved OpenAI compat shape: extracted a shared `ResolvedOpenAISharedCompat` core that both `ResolvedOpenAICompat` and `ResolvedOpenAIResponsesCompat` extend (each builder still computes its own per-surface value, preserving chat↔Responses divergence), added internal resolved wire-quirk fields (`wireModelIdMode`, `stripDeepseekSpecialTokens`, `reasoningDeltasMayBeCumulative`, `emptyLengthFinishIsContextError`, `usesOpenAIToolCallIdLimit`, `dropThinkingWhenReasoningEffort`, `supportsObfuscationOptOut`), and replaced `buildOpenRouterCompat`'s cast-and-copy with an exhaustive `pickResponsesOnly` composition that fails to compile if a new Responses-only field is added without handling. The public `OpenAICompat` config vocabulary is unchanged.
|
|
15
|
+
- Expanded `OpenAICompat`/`ResolvedOpenAISharedCompat` with shared reasoning/history/stream/request flags (`reasoningDisableMode`, `omitReasoningEffort`, `includeEncryptedReasoning`, `filterReasoningHistory`, `requiresReasoningContentForAllAssistantTurns`, `streamMarkupHealingPattern`, `promptCacheSessionHeader`, etc.) so model/provider/gateway constraints are declared once in catalog compat and then consumed uniformly by Chat Completions and Responses endpoints.
|
|
16
|
+
|
|
17
|
+
### Fixed
|
|
18
|
+
|
|
19
|
+
- Changed the default compatibility builder for `openai-completions` to set `requiresAssistantAfterToolResult` to `isMistral`, enabling the synthetic assistant bridge for built-in Mistral and Devstral models.
|
|
20
|
+
- Fixed local Ollama (`provider: "ollama"`) reasoning turns still failing with HTTP 400 `invalid reasoning value: "minimal"` when the model was selected from a stale `~/.omp/models.db` cache row or a hand-written config: the `minimal → low` / `xhigh → max` remap was only stamped during fresh discovery, so cached and custom specs reached the wire unmapped. The remap now lives in the OpenAI chat-completions and Responses compat builders, so every `buildModel` (including cache loads, custom specs, and the `whenThinking` variant) backfills it — no `omp models refresh` required. Custom OpenAI-compatible providers registered under a non-`ollama` provider id still need their own `compat.reasoningEffortMap`.
|
|
21
|
+
- Advertised Ollama Cloud GLM-5.2 reasoning efforts as high/xhigh-only and mapped `xhigh` to native max effort ([#2911](https://github.com/can1357/oh-my-pi/pull/2911) by [@serverinspector](https://github.com/serverinspector))
|
|
22
|
+
- Fixed OpenRouter pseudo-API model construction so bundled OpenRouter models resolve shared OpenAI compatibility metadata instead of an undefined compat record.
|
|
23
|
+
- Fixed custom/direct `xai-oauth` Responses model specs (e.g. `grok-build`) emitting `reasoning.effort` and hitting xAI's HTTP 400: `buildOpenAIResponsesCompat` now defaults `supportsReasoningEffort` to `false` for `xai-oauth` Grok models that are off the effort-capable allowlist (`grok-3-mini`/`grok-4.20-multi-agent`/`grok-4.3`), matching the curated discovery path; explicit `compat.supportsReasoningEffort` still overrides. The allowlist moved to a shared `isGrokReasoningEffortCapable` identity helper consumed by both the compat builder and provider-model curation so the two cannot drift.
|
|
24
|
+
|
|
25
|
+
## [16.0.5] - 2026-06-17
|
|
26
|
+
|
|
27
|
+
### Added
|
|
28
|
+
|
|
29
|
+
- Added `enableGeminiThinkingLoopGuard` to OpenAI compatibility options to allow explicit opt-in or opt-out of the Gemini thinking-loop guard for OpenAI-compatible model aliases
|
|
30
|
+
- Added `LITELLM_BASE_URL` as the LiteLLM provider discovery base URL fallback, with discovery caches scoped by the resolved proxy URL and explicit provider `baseUrl` config kept at higher precedence. ([#2726](https://github.com/can1357/oh-my-pi/issues/2726))
|
|
31
|
+
- Added `ThinkingConfig.effortBudgets` (per-effort thinking-budget contract baked into collapsed variants) and `ANTIGRAVITY_MODEL_WIRE_PROFILES` (`maxOutputTokens` + `model_enum` per Antigravity wire id) to mirror the captured Antigravity Cloud Code Assist client request shape.
|
|
32
|
+
|
|
33
|
+
### Changed
|
|
34
|
+
|
|
35
|
+
- Defaulted `enableGeminiThinkingLoopGuard` from Gemini family detection for both OpenAI completions and responses compatibility specs so Gemini models now enable the thinking-loop guard automatically
|
|
36
|
+
- Updated the default Gemini CLI user-agent version fallback to 0.46.0.
|
|
37
|
+
- Changed the Antigravity (`google-antigravity`, daily-cloudcode-pa) gemini-3.x collapse families to the `budget` thinking transport with the client's per-tier `thinkingBudget` (3.5 Flash low/medium/high = 1000/4000/10000, 3.1 Pro low/high = 1001/10001) and corrected 3.5 Flash effort→wire routing (medium → `gemini-3.5-flash-low`, high → `gemini-3-flash-agent`). Split the shared CCA collapse table so `google-gemini-cli` (cloudcode-pa) keeps the `google-level` `thinkingLevel` transport for official Gemini CLI parity. Stale collapsed snapshots (bundled catalog, recycled `gemini-3-flash` alias) self-heal from the hand table at collapse time, and the model cache schema is bumped to v7 to invalidate pre-budget Antigravity rows.
|
|
38
|
+
- Changed the Antigravity user-agent to the `antigravity/hub/<version>` format (default `2.1.4`) to match the captured client.
|
|
39
|
+
|
|
40
|
+
### Fixed
|
|
41
|
+
|
|
42
|
+
- Fixed `off` effort routing for `claude-opus-4-5` and `claude-opus-4-6` to use their base model IDs when thinking is disabled
|
|
43
|
+
- Fixed `gemini-2.5-flash` effort routing so all non-off effort levels resolve to `gemini-2.5-flash-thinking`
|
|
44
|
+
- Fixed shared variant alias provider resolution so `resolveBareVariantAlias` reports all matching providers when model aliases are present in both CCA collapse tables
|
|
45
|
+
- Routed google-antigravity default baseUrl to the stable primary daily endpoint in the catalog generator and all fallback snapshots, resolving connection drops on heavy queries.
|
|
46
|
+
- Fixed MiniMax M3 dialect selection so MiniMax-family OpenAI-compatible models use the MiniMax tool-call dialect instead of generic XML. ([#2759](https://github.com/can1357/oh-my-pi/issues/2759))
|
|
47
|
+
- Fixed GitHub Copilot dynamic discovery to honor plan-specific API endpoints stored in structured OAuth credentials. ([#2876](https://github.com/can1357/oh-my-pi/issues/2876))
|
|
48
|
+
|
|
5
49
|
## [16.0.4] - 2026-06-17
|
|
6
50
|
|
|
7
51
|
### Fixed
|
|
@@ -1,13 +1,15 @@
|
|
|
1
|
-
import type { ModelSpec, OpenAICompat, ResolvedOpenAICompat, ResolvedOpenAIResponsesCompat } from "../types";
|
|
1
|
+
import type { ModelSpec, OpenAICompat, ResolvedOpenAICompat, ResolvedOpenAIResponsesCompat, ResolvedOpenRouterCompat } from "../types";
|
|
2
2
|
/**
|
|
3
3
|
* Build the resolved chat-completions compat record for a model spec.
|
|
4
4
|
* Provider takes precedence over URL-based detection since it's explicitly configured.
|
|
5
5
|
*/
|
|
6
6
|
export declare function buildOpenAICompat(spec: ModelSpec<"openai-completions">): ResolvedOpenAICompat;
|
|
7
7
|
interface OpenAIResponsesSpecLike {
|
|
8
|
+
id?: string;
|
|
8
9
|
provider: string;
|
|
9
10
|
name: string;
|
|
10
11
|
baseUrl: string;
|
|
12
|
+
reasoning?: boolean;
|
|
11
13
|
compat?: OpenAICompat;
|
|
12
14
|
}
|
|
13
15
|
/**
|
|
@@ -22,4 +24,5 @@ interface OpenAIResponsesSpecLike {
|
|
|
22
24
|
* request-time check.
|
|
23
25
|
*/
|
|
24
26
|
export declare function buildOpenAIResponsesCompat(spec: OpenAIResponsesSpecLike): ResolvedOpenAIResponsesCompat;
|
|
27
|
+
export declare function buildOpenRouterCompat(spec: ModelSpec<"openrouter">): ResolvedOpenRouterCompat;
|
|
25
28
|
export {};
|
|
@@ -1,4 +1,7 @@
|
|
|
1
1
|
import type { ModelSpec } from "../types";
|
|
2
|
+
import { type VariantCollapseTable } from "../variant-collapse";
|
|
3
|
+
export declare const ANTIGRAVITY_PRIMARY_ENDPOINT = "https://daily-cloudcode-pa.googleapis.com";
|
|
4
|
+
export declare const ANTIGRAVITY_SANDBOX_ENDPOINT = "https://daily-cloudcode-pa.sandbox.googleapis.com";
|
|
2
5
|
/**
|
|
3
6
|
* Raw model metadata returned by Antigravity's `fetchAvailableModels` endpoint.
|
|
4
7
|
*/
|
|
@@ -51,6 +54,12 @@ export interface FetchAntigravityDiscoveryModelsOptions {
|
|
|
51
54
|
signal?: AbortSignal;
|
|
52
55
|
/** Optional fetch implementation override for tests. */
|
|
53
56
|
fetcher?: typeof fetch;
|
|
57
|
+
/**
|
|
58
|
+
* Hand collapse table to apply to the discovered list. Defaults to the
|
|
59
|
+
* Antigravity (budget-transport) table; `googleGeminiCli` passes the
|
|
60
|
+
* level-transport table so cloudcode-pa keeps `thinkingLevel`.
|
|
61
|
+
*/
|
|
62
|
+
collapseTable?: VariantCollapseTable;
|
|
54
63
|
}
|
|
55
64
|
/**
|
|
56
65
|
* Fetches discoverable Antigravity models and normalizes them into canonical model entries.
|
|
@@ -1,3 +1,3 @@
|
|
|
1
|
-
export type Dialect = "glm" | "hermes" | "kimi" | "xml" | "anthropic" | "deepseek" | "harmony" | "pi" | "qwen3" | "gemini" | "gemma";
|
|
1
|
+
export type Dialect = "glm" | "hermes" | "kimi" | "xml" | "anthropic" | "deepseek" | "harmony" | "pi" | "qwen3" | "gemini" | "gemma" | "minimax";
|
|
2
2
|
export declare const FALLBACK_DIALECT: Dialect;
|
|
3
3
|
export declare function preferredDialect(modelId: string): Dialect;
|
|
@@ -22,6 +22,12 @@ export declare function isGemmaModelId(modelId: string): boolean;
|
|
|
22
22
|
export declare function isDeepseekModelIdOrName(value: string): boolean;
|
|
23
23
|
/** Xiaomi MiMo family by id or display name. */
|
|
24
24
|
export declare function isMimoModelIdOrName(value: string): boolean;
|
|
25
|
+
/**
|
|
26
|
+
* Grok SKUs that expose the wire `reasoning.effort` dial. Other Grok reasoners
|
|
27
|
+
* (e.g. `grok-build`, `grok-4.20-0309-reasoning`) think natively but reject the
|
|
28
|
+
* param, so callers must omit reasoning effort for them.
|
|
29
|
+
*/
|
|
30
|
+
export declare function isGrokReasoningEffortCapable(modelId: string): boolean;
|
|
25
31
|
/**
|
|
26
32
|
* MiniMax M2-generation family (M2, M2.1, M2.5, M2.7, including `-highspeed`/
|
|
27
33
|
* `-lightning`/`-her`/`-turbo` variants, dotless aliases like `minimax-m21`,
|
|
@@ -32,6 +38,8 @@ export declare function isMimoModelIdOrName(value: string): boolean;
|
|
|
32
38
|
* clamp instead. Excludes M1, M3, MiniMax-Text-01, music, hailuo, voice ids.
|
|
33
39
|
*/
|
|
34
40
|
export declare function isMinimaxM2FamilyModelId(modelId: string): boolean;
|
|
41
|
+
/** MiniMax M3 family ids in bundled/default and aggregator namespace forms. */
|
|
42
|
+
export declare function isMinimaxM3FamilyModelId(modelId: string): boolean;
|
|
35
43
|
/**
|
|
36
44
|
* OpenAI gpt-oss family (`gpt-oss-20b`, `gpt-oss-120b`, `gpt-oss:120b`,
|
|
37
45
|
* `vendor/gpt-oss-…`). The Harmony reasoning format only accepts
|
|
@@ -233,7 +233,7 @@ export declare const CATALOG_PROVIDERS: readonly [{
|
|
|
233
233
|
readonly id: "openrouter";
|
|
234
234
|
readonly defaultModel: "openai/gpt-5.5";
|
|
235
235
|
readonly envVars: readonly ["OPENROUTER_API_KEY"];
|
|
236
|
-
readonly createModelManagerOptions: (config: ModelManagerConfig) => import("..").ModelManagerOptions<"
|
|
236
|
+
readonly createModelManagerOptions: (config: ModelManagerConfig) => import("..").ModelManagerOptions<"openrouter", unknown>;
|
|
237
237
|
readonly catalogDiscovery: {
|
|
238
238
|
readonly label: "OpenRouter";
|
|
239
239
|
readonly allowUnauthenticated: true;
|
|
@@ -95,11 +95,10 @@ interface XAICuratedModel {
|
|
|
95
95
|
reasoning?: boolean;
|
|
96
96
|
/**
|
|
97
97
|
* Whether xAI accepts the `reasoning.effort` wire param for this model.
|
|
98
|
-
* Default true. When false: picker hides the effort dial (via
|
|
99
|
-
* getSupportedEfforts in model-thinking.ts) AND wire
|
|
100
|
-
*
|
|
101
|
-
*
|
|
102
|
-
* author convention until a follow-up Op: compress unifies them.
|
|
98
|
+
* Default true. When false: the picker hides the effort dial (via
|
|
99
|
+
* getSupportedEfforts in model-thinking.ts) AND the wire omits the param —
|
|
100
|
+
* both derive from `isGrokReasoningEffortCapable` (identity/family.ts), the
|
|
101
|
+
* single allowlist shared by this curated layer and the compat builder.
|
|
103
102
|
*/
|
|
104
103
|
supportsReasoningEffort?: boolean;
|
|
105
104
|
/**
|
|
@@ -235,7 +234,7 @@ export interface OpenRouterModelManagerConfig {
|
|
|
235
234
|
baseUrl?: string;
|
|
236
235
|
fetch?: FetchImpl;
|
|
237
236
|
}
|
|
238
|
-
export declare function openrouterModelManagerOptions(config?: OpenRouterModelManagerConfig): ModelManagerOptions<"
|
|
237
|
+
export declare function openrouterModelManagerOptions(config?: OpenRouterModelManagerConfig): ModelManagerOptions<"openrouter">;
|
|
239
238
|
export interface ZenMuxModelManagerConfig {
|
|
240
239
|
apiKey?: string;
|
|
241
240
|
baseUrl?: string;
|
package/dist/types/types.d.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import type { Effort } from "./effort";
|
|
2
2
|
export type { KnownProvider } from "./provider-models/descriptors";
|
|
3
|
-
export type KnownApi = "openai-completions" | "openai-responses" | "openai-codex-responses" | "azure-openai-responses" | "anthropic-messages" | "bedrock-converse-stream" | "google-generative-ai" | "google-gemini-cli" | "google-vertex" | "ollama-chat" | "cursor-agent";
|
|
3
|
+
export type KnownApi = "openai-completions" | "openai-responses" | "openrouter" | "openai-codex-responses" | "azure-openai-responses" | "anthropic-messages" | "bedrock-converse-stream" | "google-generative-ai" | "google-gemini-cli" | "google-vertex" | "ollama-chat" | "cursor-agent";
|
|
4
4
|
export type Api = KnownApi | (string & {});
|
|
5
5
|
/** Canonical thinking transport used by a model. */
|
|
6
6
|
export type ThinkingControlMode = "effort" | "budget" | "google-level" | "anthropic-adaptive" | "anthropic-budget-effort";
|
|
@@ -33,6 +33,14 @@ export interface ThinkingConfig {
|
|
|
33
33
|
* thinking is disabled. Missing keys fall back to `requestModelId ?? id`.
|
|
34
34
|
*/
|
|
35
35
|
effortRouting?: Readonly<Partial<Record<Effort | "off", string>>>;
|
|
36
|
+
/**
|
|
37
|
+
* Per-effort thinking budget in tokens, baked at build time for collapsed
|
|
38
|
+
* variants whose upstream expects an explicit `thinkingBudget` instead of a
|
|
39
|
+
* value derived from the generic ladder (Antigravity Cloud Code Assist
|
|
40
|
+
* gemini-3.x). Request mapping prefers caller `thinkingBudgets`, then this
|
|
41
|
+
* map, then the provider default ladder. Only meaningful for `mode: "budget"`.
|
|
42
|
+
*/
|
|
43
|
+
effortBudgets?: Readonly<Partial<Record<Effort, number>>>;
|
|
36
44
|
/**
|
|
37
45
|
* When true, a thinking-off request MUST explicitly suppress thinking on
|
|
38
46
|
* the wire (google-level: `thinkingLevel: "MINIMAL"` + `includeThoughts:
|
|
@@ -109,6 +117,9 @@ export interface Usage {
|
|
|
109
117
|
total: number;
|
|
110
118
|
};
|
|
111
119
|
}
|
|
120
|
+
export type OpenAIReasoningFormat = "openai" | "openrouter" | "zai" | "qwen" | "qwen-chat-template";
|
|
121
|
+
export type OpenAIReasoningDisableMode = "omit" | "lowest-effort" | "openrouter-enabled-false" | "zai-thinking-disabled" | "qwen-enable-thinking-false" | "qwen-template-false" | "juice-zero-developer-message";
|
|
122
|
+
export type OpenAIStreamMarkupHealingPattern = "kimi" | "dsml" | "thinking";
|
|
112
123
|
/**
|
|
113
124
|
* Compatibility settings for openai-completions API.
|
|
114
125
|
* Use this to override URL-based auto-detection for custom providers.
|
|
@@ -137,6 +148,13 @@ export interface OpenAICompat {
|
|
|
137
148
|
reasoningEffortMap?: Partial<Record<Effort, string>>;
|
|
138
149
|
/** Whether the provider supports `stream_options: { include_usage: true }` for token usage in streaming responses. Default: true. */
|
|
139
150
|
supportsUsageInStreaming?: boolean;
|
|
151
|
+
/**
|
|
152
|
+
* Enable the Gemini thinking-loop guard (pi-ai stream layer) for this model.
|
|
153
|
+
* Defaults to true when the model id classifies as the gemini family. Set
|
|
154
|
+
* explicitly to cover an opaque OpenAI-compat proxy alias (e.g. `my-model`)
|
|
155
|
+
* that routes to Gemini, or to false to opt a gemini-family id out.
|
|
156
|
+
*/
|
|
157
|
+
enableGeminiThinkingLoopGuard?: boolean;
|
|
140
158
|
/** Which field to use for max tokens. Default: auto-detected from URL. */
|
|
141
159
|
maxTokensField?: "max_completion_tokens" | "max_tokens";
|
|
142
160
|
/** Whether tool results require the `name` field. Default: auto-detected from URL. */
|
|
@@ -148,13 +166,23 @@ export interface OpenAICompat {
|
|
|
148
166
|
/** Whether tool call IDs must be normalized to Mistral format (exactly 9 alphanumeric chars). Default: auto-detected from URL. */
|
|
149
167
|
requiresMistralToolIds?: boolean;
|
|
150
168
|
/** Format for reasoning/thinking parameter. "openai" uses reasoning_effort, "openrouter" uses reasoning: { effort }, "zai" uses thinking: { type: "enabled" | "disabled" } (also used by Moonshot Kimi), "qwen" uses top-level enable_thinking, and "qwen-chat-template" uses chat_template_kwargs.enable_thinking. Default: "openai". */
|
|
151
|
-
thinkingFormat?:
|
|
169
|
+
thinkingFormat?: OpenAIReasoningFormat;
|
|
170
|
+
/** Request-time disable encoding for the selected reasoning/thinking format. Default: derived from `thinkingFormat`. */
|
|
171
|
+
reasoningDisableMode?: OpenAIReasoningDisableMode;
|
|
172
|
+
/** Whether the provider rejects `reasoning.effort`/`reasoning_effort` even when the model reasons natively. Default: false unless reasoning effort is unsupported. */
|
|
173
|
+
omitReasoningEffort?: boolean;
|
|
174
|
+
/** Whether Responses requests should ask for encrypted reasoning replay items. Default: true. */
|
|
175
|
+
includeEncryptedReasoning?: boolean;
|
|
176
|
+
/** Whether replayed Responses history should strip native `type: "reasoning"` items before request encoding. Default: false. */
|
|
177
|
+
filterReasoningHistory?: boolean;
|
|
152
178
|
/** Optional `thinking.keep` value for Z.ai/Moonshot-style thinking params. Set false to suppress auto-detected keep. Default: auto-detected. */
|
|
153
179
|
thinkingKeep?: "all" | false;
|
|
154
180
|
/** Which reasoning content field to emit on assistant messages. Default: auto-detected. */
|
|
155
181
|
reasoningContentField?: "reasoning_content" | "reasoning" | "reasoning_text";
|
|
156
182
|
/** Whether assistant tool-call messages must include reasoning content. Default: false. */
|
|
157
183
|
requiresReasoningContentForToolCalls?: boolean;
|
|
184
|
+
/** Whether all assistant messages must include reasoning content. Default: false. */
|
|
185
|
+
requiresReasoningContentForAllAssistantTurns?: boolean;
|
|
158
186
|
/** Whether the provider accepts a synthetic placeholder (e.g. ".") for missing reasoning_content on tool-call turns. Default: true. Set to false for providers like DeepSeek that validate the exact reasoning_content value. */
|
|
159
187
|
allowsSyntheticReasoningContentForToolCalls?: boolean;
|
|
160
188
|
/** Whether assistant tool-call messages must include non-empty content. Default: false. */
|
|
@@ -188,10 +216,22 @@ export interface OpenAICompat {
|
|
|
188
216
|
vercelGatewayRouting?: VercelGatewayRouting;
|
|
189
217
|
/** Extra fields to include in request body (e.g. gateway routing hints for OpenClaw-style proxies). */
|
|
190
218
|
extraBody?: Record<string, unknown>;
|
|
219
|
+
/** Request-session header that should mirror the normalized prompt-cache key. Default: unset. */
|
|
220
|
+
promptCacheSessionHeader?: "x-grok-conv-id";
|
|
191
221
|
/** Whether chat-completions payloads should include provider-specific prompt-cache markers. */
|
|
192
222
|
cacheControlFormat?: "anthropic" | undefined;
|
|
193
223
|
/** Whether the provider supports the `strict` field in tool definitions. Default: auto-detected per provider/baseUrl (conservative for unknown providers). */
|
|
194
224
|
supportsStrictMode?: boolean;
|
|
225
|
+
/**
|
|
226
|
+
* Tool-schema dialect the endpoint validates `tools.function.parameters`
|
|
227
|
+
* against. `"moonshot-mfjs"` triggers Moonshot Flavored JSON Schema
|
|
228
|
+
* normalization (collapse `const`→`enum`, infer `type` on bare enums, strip
|
|
229
|
+
* unsupported validators/`prefixItems`) because Moonshot/Kimi native hosts
|
|
230
|
+
* reject standard JSON Schema constructs with HTTP 400. Default:
|
|
231
|
+
* auto-detected (`"moonshot-mfjs"` on api.moonshot.ai / api.kimi.com). Set
|
|
232
|
+
* `"none"` to opt a custom Moonshot-compatible host out.
|
|
233
|
+
*/
|
|
234
|
+
toolSchemaFlavor?: "moonshot-mfjs" | "none";
|
|
195
235
|
/**
|
|
196
236
|
* Stream-watchdog idle-timeout floor in ms for slow reasoning hosts.
|
|
197
237
|
* Default: auto-detected (GLM coding-plan hosts, direct DeepSeek reasoning).
|
|
@@ -215,6 +255,16 @@ export interface OpenAICompat {
|
|
|
215
255
|
* Default: auto-detected (GPT-5-family model names).
|
|
216
256
|
*/
|
|
217
257
|
requiresJuiceZeroHack?: boolean;
|
|
258
|
+
/** Whether streamed reasoning deltas for the same field may repeat the full cumulative text snapshot. Default: false. */
|
|
259
|
+
reasoningDeltasMayBeCumulative?: boolean;
|
|
260
|
+
/** Strip leaked DeepSeek chat-template special tokens from visible content deltas. Default: auto-detected. */
|
|
261
|
+
stripDeepseekSpecialTokens?: boolean;
|
|
262
|
+
/** Heal leaked chat-template/tool-call/thinking markup from visible content deltas. Default: auto-detected. */
|
|
263
|
+
streamMarkupHealingPattern?: OpenAIStreamMarkupHealingPattern;
|
|
264
|
+
/** Treat an empty length-finished stream as a context-window error. Default: auto-detected. */
|
|
265
|
+
emptyLengthFinishIsContextError?: boolean;
|
|
266
|
+
/** Normalize tool call ids to OpenAI's 40-character limit. Default: auto-detected. */
|
|
267
|
+
usesOpenAIToolCallIdLimit?: boolean;
|
|
218
268
|
/**
|
|
219
269
|
* Compat deltas applied when a request actually engages thinking mode
|
|
220
270
|
* (reasoning requested and not disabled, model reasoning-capable, and not
|
|
@@ -316,37 +366,83 @@ export interface VercelGatewayRouting {
|
|
|
316
366
|
order?: string[];
|
|
317
367
|
}
|
|
318
368
|
type ResolvedToolStrictMode = NonNullable<OpenAICompat["toolStrictMode"]> | "mixed";
|
|
369
|
+
/**
|
|
370
|
+
* Fields whose meaning is identical across chat-completions and Responses surfaces.
|
|
371
|
+
* Each builder still computes its own per-surface value when defaults diverge.
|
|
372
|
+
*/
|
|
373
|
+
export interface ResolvedOpenAISharedCompat {
|
|
374
|
+
supportsDeveloperRole: boolean;
|
|
375
|
+
supportsStrictMode: boolean;
|
|
376
|
+
supportsReasoningEffort: boolean;
|
|
377
|
+
reasoningEffortMap: Partial<Record<Effort, string>>;
|
|
378
|
+
supportsReasoningParams: boolean;
|
|
379
|
+
thinkingFormat: OpenAIReasoningFormat;
|
|
380
|
+
reasoningDisableMode: OpenAIReasoningDisableMode;
|
|
381
|
+
omitReasoningEffort: boolean;
|
|
382
|
+
includeEncryptedReasoning: boolean;
|
|
383
|
+
filterReasoningHistory: boolean;
|
|
384
|
+
disableReasoningOnForcedToolChoice: boolean;
|
|
385
|
+
disableReasoningOnToolChoice: boolean;
|
|
386
|
+
supportsToolChoice: boolean;
|
|
387
|
+
supportsForcedToolChoice: boolean;
|
|
388
|
+
reasoningContentField?: OpenAICompat["reasoningContentField"];
|
|
389
|
+
requiresReasoningContentForToolCalls: boolean;
|
|
390
|
+
requiresReasoningContentForAllAssistantTurns: boolean;
|
|
391
|
+
allowsSyntheticReasoningContentForToolCalls: boolean;
|
|
392
|
+
requiresThinkingAsText: boolean;
|
|
393
|
+
requiresMistralToolIds: boolean;
|
|
394
|
+
requiresToolResultName: boolean;
|
|
395
|
+
requiresAssistantAfterToolResult: boolean;
|
|
396
|
+
requiresAssistantContentForToolCalls: boolean;
|
|
397
|
+
stripDeepseekSpecialTokens: boolean;
|
|
398
|
+
streamMarkupHealingPattern?: OpenAIStreamMarkupHealingPattern;
|
|
399
|
+
reasoningDeltasMayBeCumulative: boolean;
|
|
400
|
+
emptyLengthFinishIsContextError: boolean;
|
|
401
|
+
usesOpenAIToolCallIdLimit: boolean;
|
|
402
|
+
promptCacheSessionHeader?: OpenAICompat["promptCacheSessionHeader"];
|
|
403
|
+
/** The model sits behind OpenRouter (routing prefs and max-token omission apply). */
|
|
404
|
+
isOpenRouterHost: boolean;
|
|
405
|
+
/** Whether this endpoint needs a max-token field even when caller did not set one. */
|
|
406
|
+
alwaysSendMaxTokens: boolean;
|
|
407
|
+
/** See {@link OpenAICompat.enableGeminiThinkingLoopGuard}. Set by the builder from the family classifier. */
|
|
408
|
+
enableGeminiThinkingLoopGuard?: boolean;
|
|
409
|
+
openRouterRouting?: OpenAICompat["openRouterRouting"];
|
|
410
|
+
/** Provider-specific wire model-id transform applied to the base id. */
|
|
411
|
+
wireModelIdMode: "raw" | "firepass" | "fireworks" | "openrouter";
|
|
412
|
+
}
|
|
319
413
|
/**
|
|
320
414
|
* Fully-resolved chat-completions compat view: every detected default
|
|
321
415
|
* materialized and user overrides applied. Built once per model by
|
|
322
416
|
* `buildModel`; request handlers read fields and never detect, resolve, or
|
|
323
417
|
* allocate.
|
|
324
418
|
*/
|
|
325
|
-
export type ResolvedOpenAICompat = Required<Omit<OpenAICompat, "openRouterRouting" | "vercelGatewayRouting" | "extraBody" | "toolStrictMode" | "
|
|
326
|
-
openRouterRouting?: OpenAICompat["openRouterRouting"];
|
|
419
|
+
export type ResolvedOpenAICompat = ResolvedOpenAISharedCompat & Required<Omit<OpenAICompat, "supportsDeveloperRole" | "supportsReasoningEffort" | "reasoningEffortMap" | "supportsReasoningParams" | "thinkingFormat" | "reasoningDisableMode" | "omitReasoningEffort" | "includeEncryptedReasoning" | "filterReasoningHistory" | "disableReasoningOnForcedToolChoice" | "disableReasoningOnToolChoice" | "supportsToolChoice" | "supportsForcedToolChoice" | "reasoningContentField" | "requiresReasoningContentForToolCalls" | "requiresReasoningContentForAllAssistantTurns" | "allowsSyntheticReasoningContentForToolCalls" | "requiresThinkingAsText" | "requiresMistralToolIds" | "requiresToolResultName" | "requiresAssistantAfterToolResult" | "requiresAssistantContentForToolCalls" | "stripDeepseekSpecialTokens" | "streamMarkupHealingPattern" | "reasoningDeltasMayBeCumulative" | "emptyLengthFinishIsContextError" | "usesOpenAIToolCallIdLimit" | "promptCacheSessionHeader" | "openRouterRouting" | "isOpenRouterHost" | "supportsStrictMode" | "supportsLongPromptCacheRetention" | "alwaysSendMaxTokens" | "wireModelIdMode" | "vercelGatewayRouting" | "extraBody" | "toolStrictMode" | "toolSchemaFlavor" | "streamIdleTimeoutMs" | "cacheControlFormat" | "thinkingKeep" | "strictResponsesPairing" | "requiresJuiceZeroHack" | "enableGeminiThinkingLoopGuard" | "whenThinking">> & {
|
|
327
420
|
vercelGatewayRouting?: OpenAICompat["vercelGatewayRouting"];
|
|
328
421
|
extraBody?: OpenAICompat["extraBody"];
|
|
329
422
|
cacheControlFormat?: OpenAICompat["cacheControlFormat"];
|
|
330
423
|
thinkingKeep?: OpenAICompat["thinkingKeep"];
|
|
331
424
|
streamIdleTimeoutMs?: number;
|
|
332
425
|
toolStrictMode: ResolvedToolStrictMode;
|
|
333
|
-
|
|
334
|
-
isOpenRouterHost: boolean;
|
|
426
|
+
toolSchemaFlavor?: OpenAICompat["toolSchemaFlavor"];
|
|
335
427
|
/** The model sits behind Vercel AI Gateway. */
|
|
336
428
|
isVercelGatewayHost: boolean;
|
|
429
|
+
dropThinkingWhenReasoningEffort: boolean;
|
|
337
430
|
/** Complete alternate view for thinking-engaged requests; swap pointers, never spread. */
|
|
338
431
|
whenThinking?: ResolvedOpenAICompat;
|
|
339
432
|
};
|
|
340
433
|
/** Fully-resolved Responses-API compat view (same contract as `ResolvedOpenAICompat`). */
|
|
341
|
-
export interface ResolvedOpenAIResponsesCompat {
|
|
342
|
-
supportsDeveloperRole: boolean;
|
|
343
|
-
supportsStrictMode: boolean;
|
|
344
|
-
supportsReasoningEffort: boolean;
|
|
434
|
+
export interface ResolvedOpenAIResponsesCompat extends ResolvedOpenAISharedCompat {
|
|
345
435
|
supportsLongPromptCacheRetention: boolean;
|
|
346
436
|
strictResponsesPairing: boolean;
|
|
347
437
|
requiresJuiceZeroHack: boolean;
|
|
348
|
-
|
|
438
|
+
supportsObfuscationOptOut: boolean;
|
|
349
439
|
}
|
|
440
|
+
/**
|
|
441
|
+
* OpenRouter is a pseudo API: runtime dispatch can use either Responses
|
|
442
|
+
* (default) or Chat Completions (`PI_OPENROUTER_RESPONSES=0`) with the same
|
|
443
|
+
* model object, so its resolved compat must satisfy both handlers.
|
|
444
|
+
*/
|
|
445
|
+
export type ResolvedOpenRouterCompat = ResolvedOpenAICompat & ResolvedOpenAIResponsesCompat;
|
|
350
446
|
/** Fully-resolved anthropic-messages compat view (same contract as `ResolvedOpenAICompat`). */
|
|
351
447
|
export type ResolvedAnthropicCompat = Required<AnthropicCompat> & {
|
|
352
448
|
/**
|
|
@@ -358,9 +454,9 @@ export type ResolvedAnthropicCompat = Required<AnthropicCompat> & {
|
|
|
358
454
|
officialEndpoint: boolean;
|
|
359
455
|
};
|
|
360
456
|
/** Sparse, user-authored compat overrides for a given API (models.json / config vocabulary). */
|
|
361
|
-
export type CompatConfigOf<TApi extends Api> = TApi extends "openai-completions" | "openai-responses" | "azure-openai-responses" | "openai-codex-responses" ? OpenAICompat : TApi extends "anthropic-messages" ? AnthropicCompat : undefined;
|
|
457
|
+
export type CompatConfigOf<TApi extends Api> = TApi extends "openai-completions" | "openrouter" | "openai-responses" | "azure-openai-responses" | "openai-codex-responses" ? OpenAICompat : TApi extends "anthropic-messages" ? AnthropicCompat : undefined;
|
|
362
458
|
/** Resolved compat for a given API: complete record, materialized once by `buildModel`. */
|
|
363
|
-
export type CompatOf<TApi extends Api> = TApi extends "openai-completions" ? ResolvedOpenAICompat : TApi extends "openai-responses" | "azure-openai-responses" | "openai-codex-responses" ? ResolvedOpenAIResponsesCompat : TApi extends "anthropic-messages" ? ResolvedAnthropicCompat : undefined;
|
|
459
|
+
export type CompatOf<TApi extends Api> = TApi extends "openrouter" ? ResolvedOpenRouterCompat : TApi extends "openai-completions" ? ResolvedOpenAICompat : TApi extends "openai-responses" | "azure-openai-responses" | "openai-codex-responses" ? ResolvedOpenAIResponsesCompat : TApi extends "anthropic-messages" ? ResolvedAnthropicCompat : undefined;
|
|
364
460
|
export interface Model<TApi extends Api = Api> {
|
|
365
461
|
id: string;
|
|
366
462
|
/**
|
|
@@ -46,12 +46,11 @@ export interface EffortVariantFamily {
|
|
|
46
46
|
export interface VariantCollapseTable {
|
|
47
47
|
families: readonly EffortVariantFamily[];
|
|
48
48
|
}
|
|
49
|
-
/**
|
|
50
|
-
* Shared by `google-antigravity` and `google-gemini-cli` — both serve the
|
|
51
|
-
* Antigravity discovery list (`fetchAntigravityDiscoveryModels`).
|
|
52
|
-
*/
|
|
49
|
+
/** `google-antigravity` (daily-cloudcode-pa): Gemini 3.x on the budget transport. */
|
|
53
50
|
export declare const ANTIGRAVITY_VARIANT_COLLAPSE_TABLE: VariantCollapseTable;
|
|
54
|
-
/**
|
|
51
|
+
/** `google-gemini-cli` (cloudcode-pa): Gemini 3.x on the level transport (official CLI parity). */
|
|
52
|
+
export declare const GEMINI_CLI_VARIANT_COLLAPSE_TABLE: VariantCollapseTable;
|
|
53
|
+
/** Provider id → hand collapse table. The CCA providers diverge on thinking transport. */
|
|
55
54
|
export declare const VARIANT_COLLAPSE_TABLES: Readonly<Record<string, VariantCollapseTable>>;
|
|
56
55
|
/**
|
|
57
56
|
* The global automatic rule: derive an `X` + `X-thinking` family for every
|
|
@@ -9,7 +9,6 @@ export declare const getGeminiCliHeaders: (modelId?: string) => {
|
|
|
9
9
|
"Client-Metadata": string;
|
|
10
10
|
};
|
|
11
11
|
export declare const ANTIGRAVITY_SYSTEM_INSTRUCTION: string;
|
|
12
|
-
export declare const ANTIGRAVITY_NO_PREAMBLE_INSTRUCTION = "CRITICAL: NEVER output rule checks, formatting guidelines, constraint checklists (e.g. \"No emdashes\"), or your thinking/personality preambles in the final response. Output only the final response.";
|
|
13
12
|
/**
|
|
14
13
|
* Antigravity / Cloud Code Assist user agent. Lives in its own file so discovery
|
|
15
14
|
* and usage code can read it without pulling the heavy google-gemini-cli provider
|
|
@@ -17,3 +16,19 @@ export declare const ANTIGRAVITY_NO_PREAMBLE_INSTRUCTION = "CRITICAL: NEVER outp
|
|
|
17
16
|
* parse graph.
|
|
18
17
|
*/
|
|
19
18
|
export declare let getAntigravityUserAgent: () => string;
|
|
19
|
+
/**
|
|
20
|
+
* Per-wire-id Antigravity Cloud Code Assist request constants, captured from the
|
|
21
|
+
* real `antigravity/hub` client against `daily-cloudcode-pa`. `modelEnum` is the
|
|
22
|
+
* opaque `labels.model_enum` token the client tags each request with;
|
|
23
|
+
* `maxOutputTokens` is the fixed `generationConfig.maxOutputTokens` it sends
|
|
24
|
+
* regardless of the thinking budget. Keyed by the routed upstream wire id
|
|
25
|
+
* (post effort-routing), not the collapsed logical id. Checkpoint-only ids
|
|
26
|
+
* (e.g. `gemini-3.1-flash-lite`) are intentionally absent — this provider only
|
|
27
|
+
* emits agent requests.
|
|
28
|
+
*/
|
|
29
|
+
export interface AntigravityModelWireProfile {
|
|
30
|
+
modelEnum: string;
|
|
31
|
+
maxOutputTokens: number;
|
|
32
|
+
}
|
|
33
|
+
export declare const ANTIGRAVITY_MODEL_WIRE_PROFILES: Readonly<Record<string, AntigravityModelWireProfile>>;
|
|
34
|
+
export declare function getAntigravityModelWireProfile(wireModelId: string): AntigravityModelWireProfile | undefined;
|
|
@@ -25,9 +25,11 @@ export declare const COPILOT_API_HEADERS: {
|
|
|
25
25
|
export type ParsedGitHubCopilotApiKey = {
|
|
26
26
|
accessToken: string;
|
|
27
27
|
enterpriseUrl?: string;
|
|
28
|
+
apiEndpoint?: string;
|
|
28
29
|
};
|
|
29
30
|
export declare function isPublicGitHubHost(host: string): boolean;
|
|
30
31
|
export declare function normalizeGitHubCopilotEnterpriseDomain(input: string | undefined): string | undefined;
|
|
32
|
+
export declare function normalizeGitHubCopilotApiEndpoint(input: string | undefined): string | undefined;
|
|
31
33
|
export declare function parseGitHubCopilotApiKey(apiKeyRaw: string): ParsedGitHubCopilotApiKey;
|
|
32
34
|
export declare function normalizeDomain(input: string): string | null;
|
|
33
35
|
export declare function getGitHubCopilotBaseUrl(enterpriseDomain?: string): string;
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"type": "module",
|
|
3
3
|
"name": "@oh-my-pi/pi-catalog",
|
|
4
|
-
"version": "16.0.
|
|
4
|
+
"version": "16.0.6",
|
|
5
5
|
"description": "Model catalog for omp: bundled model database, provider discovery descriptors, model identity, classification, and equivalence",
|
|
6
6
|
"homepage": "https://omp.sh",
|
|
7
7
|
"author": "Can Boluk",
|
|
@@ -34,11 +34,12 @@
|
|
|
34
34
|
},
|
|
35
35
|
"dependencies": {
|
|
36
36
|
"@bufbuild/protobuf": "^2.12.0",
|
|
37
|
-
"@oh-my-pi/pi-utils": "16.0.
|
|
37
|
+
"@oh-my-pi/pi-utils": "16.0.6",
|
|
38
|
+
"arktype": "^2.2.0",
|
|
38
39
|
"zod": "^4"
|
|
39
40
|
},
|
|
40
41
|
"devDependencies": {
|
|
41
|
-
"@oh-my-pi/pi-ai": "16.0.
|
|
42
|
+
"@oh-my-pi/pi-ai": "16.0.6",
|
|
42
43
|
"@types/bun": "^1.3.14"
|
|
43
44
|
},
|
|
44
45
|
"engines": {
|
package/src/build.ts
CHANGED
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
* compat per request.
|
|
11
11
|
*/
|
|
12
12
|
import { buildAnthropicCompat } from "./compat/anthropic";
|
|
13
|
-
import { buildOpenAICompat, buildOpenAIResponsesCompat } from "./compat/openai";
|
|
13
|
+
import { buildOpenAICompat, buildOpenAIResponsesCompat, buildOpenRouterCompat } from "./compat/openai";
|
|
14
14
|
import { resolveModelThinking } from "./model-thinking";
|
|
15
15
|
import type { Api, CompatOf, Model, ModelSpec } from "./types";
|
|
16
16
|
import { cleanModelName } from "./utils";
|
|
@@ -28,6 +28,8 @@ export function buildModel<TApi extends Api>(spec: ModelSpec<TApi>): Model<TApi>
|
|
|
28
28
|
|
|
29
29
|
export function buildCompat(spec: ModelSpec<Api>): CompatOf<Api> {
|
|
30
30
|
switch (spec.api) {
|
|
31
|
+
case "openrouter":
|
|
32
|
+
return buildOpenRouterCompat(spec as ModelSpec<"openrouter">);
|
|
31
33
|
case "openai-completions":
|
|
32
34
|
return buildOpenAICompat(spec as ModelSpec<"openai-completions">);
|
|
33
35
|
case "openai-responses":
|