@oh-my-pi/pi-catalog 15.10.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +38 -0
- package/dist/types/build.d.ts +3 -0
- package/dist/types/compat/anthropic.d.ts +11 -0
- package/dist/types/compat/apply.d.ts +7 -0
- package/dist/types/compat/openai.d.ts +21 -0
- package/dist/types/discovery/antigravity.d.ts +61 -0
- package/dist/types/discovery/codex.d.ts +38 -0
- package/dist/types/discovery/cursor-gen/agent_pb.d.ts +13022 -0
- package/dist/types/discovery/cursor.d.ts +23 -0
- package/dist/types/discovery/gemini.d.ts +25 -0
- package/dist/types/discovery/index.d.ts +4 -0
- package/dist/types/discovery/openai-compatible.d.ts +72 -0
- package/dist/types/effort.d.ts +9 -0
- package/dist/types/fireworks-model-id.d.ts +10 -0
- package/dist/types/hosts.d.ts +128 -0
- package/dist/types/identity/bundled.d.ts +6 -0
- package/dist/types/identity/classify.d.ts +45 -0
- package/dist/types/identity/equivalence.d.ts +46 -0
- package/dist/types/identity/family.d.ts +45 -0
- package/dist/types/identity/id.d.ts +12 -0
- package/dist/types/identity/index.d.ts +9 -0
- package/dist/types/identity/markers.d.ts +4 -0
- package/dist/types/identity/priority.d.ts +1 -0
- package/dist/types/identity/reference.d.ts +22 -0
- package/dist/types/identity/selection.d.ts +20 -0
- package/dist/types/index.d.ts +15 -0
- package/dist/types/model-cache.d.ts +17 -0
- package/dist/types/model-manager.d.ts +64 -0
- package/dist/types/model-thinking.d.ts +67 -0
- package/dist/types/models.d.ts +12 -0
- package/dist/types/provider-models/bundled-references.d.ts +11 -0
- package/dist/types/provider-models/descriptor-types.d.ts +74 -0
- package/dist/types/provider-models/descriptors.d.ts +384 -0
- package/dist/types/provider-models/discovery-constants.d.ts +11 -0
- package/dist/types/provider-models/google.d.ts +27 -0
- package/dist/types/provider-models/index.d.ts +6 -0
- package/dist/types/provider-models/ollama.d.ts +9 -0
- package/dist/types/provider-models/openai-compat.d.ts +385 -0
- package/dist/types/provider-models/special.d.ts +16 -0
- package/dist/types/types.d.ts +405 -0
- package/dist/types/utils.d.ts +5 -0
- package/dist/types/wire/codex.d.ts +26 -0
- package/dist/types/wire/gemini-headers.d.ts +18 -0
- package/dist/types/wire/github-copilot.d.ts +18 -0
- package/package.json +100 -0
- package/src/build.ts +40 -0
- package/src/compat/anthropic.ts +67 -0
- package/src/compat/apply.ts +15 -0
- package/src/compat/openai.ts +365 -0
- package/src/discovery/antigravity.ts +261 -0
- package/src/discovery/codex.ts +371 -0
- package/src/discovery/cursor-gen/agent_pb.ts +15274 -0
- package/src/discovery/cursor.ts +307 -0
- package/src/discovery/gemini.ts +249 -0
- package/src/discovery/index.ts +4 -0
- package/src/discovery/openai-compatible.ts +224 -0
- package/src/effort.ts +16 -0
- package/src/fireworks-model-id.ts +30 -0
- package/src/hosts.ts +114 -0
- package/src/identity/bundled.ts +38 -0
- package/src/identity/classify.ts +141 -0
- package/src/identity/equivalence.ts +870 -0
- package/src/identity/family.ts +88 -0
- package/src/identity/id.ts +81 -0
- package/src/identity/index.ts +9 -0
- package/src/identity/markers.ts +49 -0
- package/src/identity/priority.ts +56 -0
- package/src/identity/reference.ts +134 -0
- package/src/identity/selection.ts +65 -0
- package/src/index.ts +15 -0
- package/src/model-cache.ts +132 -0
- package/src/model-manager.ts +472 -0
- package/src/model-thinking.ts +407 -0
- package/src/models.json +75308 -0
- package/src/models.json.d.ts +9 -0
- package/src/models.ts +64 -0
- package/src/provider-models/bundled-references.ts +54 -0
- package/src/provider-models/descriptor-types.ts +79 -0
- package/src/provider-models/descriptors.ts +456 -0
- package/src/provider-models/discovery-constants.ts +11 -0
- package/src/provider-models/google.ts +105 -0
- package/src/provider-models/index.ts +6 -0
- package/src/provider-models/ollama.ts +154 -0
- package/src/provider-models/openai-compat.ts +3106 -0
- package/src/provider-models/special.ts +67 -0
- package/src/types.ts +470 -0
- package/src/utils.ts +27 -0
- package/src/wire/codex.ts +43 -0
- package/src/wire/gemini-headers.ts +41 -0
- package/src/wire/github-copilot.ts +72 -0
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
import { once } from "@oh-my-pi/pi-utils";
|
|
2
|
+
import { fetchCodexModels } from "../discovery/codex";
|
|
3
|
+
import type { ModelManagerOptions } from "../model-manager";
|
|
4
|
+
|
|
5
|
+
// ---------------------------------------------------------------------------
|
|
6
|
+
// OpenAI Codex
|
|
7
|
+
// ---------------------------------------------------------------------------
|
|
8
|
+
|
|
9
|
+
export interface OpenAICodexModelManagerConfig {
|
|
10
|
+
accessToken?: string;
|
|
11
|
+
accountId?: string;
|
|
12
|
+
clientVersion?: string;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
export function openaiCodexModelManagerOptions(
|
|
16
|
+
config: OpenAICodexModelManagerConfig = {},
|
|
17
|
+
): ModelManagerOptions<"openai-codex-responses"> {
|
|
18
|
+
const { accessToken, accountId, clientVersion } = config;
|
|
19
|
+
return {
|
|
20
|
+
providerId: "openai-codex",
|
|
21
|
+
...(accessToken
|
|
22
|
+
? {
|
|
23
|
+
fetchDynamicModels: async () => {
|
|
24
|
+
const result = await fetchCodexModels({ accessToken, accountId, clientVersion });
|
|
25
|
+
return result?.models ?? null;
|
|
26
|
+
},
|
|
27
|
+
}
|
|
28
|
+
: undefined),
|
|
29
|
+
};
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
// ---------------------------------------------------------------------------
|
|
33
|
+
// Cursor
|
|
34
|
+
// ---------------------------------------------------------------------------
|
|
35
|
+
|
|
36
|
+
export interface CursorModelManagerConfig {
|
|
37
|
+
apiKey?: string;
|
|
38
|
+
baseUrl?: string;
|
|
39
|
+
clientVersion?: string;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
export function cursorModelManagerOptions(config: CursorModelManagerConfig = {}): ModelManagerOptions<"cursor-agent"> {
|
|
43
|
+
const { apiKey, baseUrl, clientVersion } = config;
|
|
44
|
+
return {
|
|
45
|
+
providerId: "cursor",
|
|
46
|
+
...(apiKey
|
|
47
|
+
? {
|
|
48
|
+
fetchDynamicModels: async () => {
|
|
49
|
+
const { fetchCursorUsableModels } = await cursorDiscovery();
|
|
50
|
+
return fetchCursorUsableModels({ apiKey, baseUrl, clientVersion });
|
|
51
|
+
},
|
|
52
|
+
}
|
|
53
|
+
: undefined),
|
|
54
|
+
};
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
const cursorDiscovery = once(() => import("../discovery/cursor"));
|
|
58
|
+
|
|
59
|
+
// ---------------------------------------------------------------------------
|
|
60
|
+
// Zai
|
|
61
|
+
// ---------------------------------------------------------------------------
|
|
62
|
+
|
|
63
|
+
export interface ZaiModelManagerConfig {}
|
|
64
|
+
|
|
65
|
+
export function zaiModelManagerOptions(_config: ZaiModelManagerConfig = {}): ModelManagerOptions<"anthropic-messages"> {
|
|
66
|
+
return { providerId: "zai" };
|
|
67
|
+
}
|
package/src/types.ts
ADDED
|
@@ -0,0 +1,470 @@
|
|
|
1
|
+
import type { Effort } from "./effort";
|
|
2
|
+
|
|
3
|
+
export type { KnownProvider } from "./provider-models/descriptors";
|
|
4
|
+
|
|
5
|
+
export type KnownApi =
|
|
6
|
+
| "openai-completions"
|
|
7
|
+
| "openai-responses"
|
|
8
|
+
| "openai-codex-responses"
|
|
9
|
+
| "azure-openai-responses"
|
|
10
|
+
| "anthropic-messages"
|
|
11
|
+
| "bedrock-converse-stream"
|
|
12
|
+
| "google-generative-ai"
|
|
13
|
+
| "google-gemini-cli"
|
|
14
|
+
| "google-vertex"
|
|
15
|
+
| "ollama-chat"
|
|
16
|
+
| "cursor-agent";
|
|
17
|
+
export type Api = KnownApi | (string & {});
|
|
18
|
+
|
|
19
|
+
/** Canonical thinking transport used by a model. */
|
|
20
|
+
export type ThinkingControlMode =
|
|
21
|
+
| "effort"
|
|
22
|
+
| "budget"
|
|
23
|
+
| "google-level"
|
|
24
|
+
| "anthropic-adaptive"
|
|
25
|
+
| "anthropic-budget-effort";
|
|
26
|
+
|
|
27
|
+
/** Per-model thinking capabilities used to clamp and map user-facing effort levels. */
|
|
28
|
+
export interface ThinkingConfig {
|
|
29
|
+
/** Provider-specific transport used to encode the selected effort. */
|
|
30
|
+
mode: ThinkingControlMode;
|
|
31
|
+
/**
|
|
32
|
+
* Supported user-facing efforts, ordered least → most intensive. Never
|
|
33
|
+
* empty: a reasoning model without a controllable effort surface carries
|
|
34
|
+
* `thinking: undefined` instead of an empty list.
|
|
35
|
+
*/
|
|
36
|
+
efforts: readonly Effort[];
|
|
37
|
+
/** Optional default effort applied when this model is selected. Falls back to global default if absent. */
|
|
38
|
+
defaultLevel?: Effort;
|
|
39
|
+
/**
|
|
40
|
+
* Effort → wire-value remap for `anthropic-adaptive` transports, baked at
|
|
41
|
+
* build time (4-tier legacy scale vs the 5-tier Opus 4.7+/Fable/Mythos
|
|
42
|
+
* scale). Identity for efforts the map omits.
|
|
43
|
+
*/
|
|
44
|
+
effortMap?: Partial<Record<Effort, string>>;
|
|
45
|
+
/**
|
|
46
|
+
* Adaptive thinking accepts the `display` field (Opus 4.7+, Fable/Mythos
|
|
47
|
+
* 5). Also implies native interleaved thinking — no beta header needed.
|
|
48
|
+
*/
|
|
49
|
+
supportsDisplay?: boolean;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
// `Provider` is any provider-id string; `KnownProvider` (re-exported above) enumerates
|
|
53
|
+
// the built-in model providers from the catalog descriptor table.
|
|
54
|
+
export type Provider = string;
|
|
55
|
+
|
|
56
|
+
/** Token budgets for each thinking level (token-based providers only) */
|
|
57
|
+
export type ThinkingBudgets = { [key in Effort]?: number };
|
|
58
|
+
|
|
59
|
+
/**
|
|
60
|
+
* `fetch`-compatible function. Accepts any callable matching the standard
|
|
61
|
+
* fetch signature; `preconnect` is optional because non-Bun runtimes (browsers,
|
|
62
|
+
* test mocks) won't expose it.
|
|
63
|
+
*/
|
|
64
|
+
export type FetchImpl = ((input: string | URL | Request, init?: RequestInit) => Promise<Response>) & {
|
|
65
|
+
preconnect?: typeof globalThis.fetch.preconnect;
|
|
66
|
+
};
|
|
67
|
+
|
|
68
|
+
export interface Usage {
|
|
69
|
+
/** Non-cached input tokens (matches the bucket the provider bills as new input). */
|
|
70
|
+
input: number;
|
|
71
|
+
/** Total output tokens for the turn, including thinking, assistant text, and tool-call argument tokens. */
|
|
72
|
+
output: number;
|
|
73
|
+
/** Tokens read from the prompt cache. */
|
|
74
|
+
cacheRead: number;
|
|
75
|
+
/** Tokens written to the prompt cache (cache creation). */
|
|
76
|
+
cacheWrite: number;
|
|
77
|
+
/** Sum of input + output + cacheRead + cacheWrite. */
|
|
78
|
+
totalTokens: number;
|
|
79
|
+
/** Copilot premium-request counter, when applicable. */
|
|
80
|
+
premiumRequests?: number;
|
|
81
|
+
/**
|
|
82
|
+
* Reasoning/thinking tokens included in `output`, when the provider reports them
|
|
83
|
+
* (OpenAI `output_tokens_details.reasoning_tokens`, Google `thoughtsTokenCount`).
|
|
84
|
+
* Always a subset of `output` — non-reasoning output is `output - reasoningTokens`.
|
|
85
|
+
*
|
|
86
|
+
* Providers that don't expose this leave it undefined rather than guessing;
|
|
87
|
+
* `undefined` means unknown, NOT zero.
|
|
88
|
+
*/
|
|
89
|
+
reasoningTokens?: number;
|
|
90
|
+
/**
|
|
91
|
+
* Cache-write TTL breakdown (Anthropic only). When set, the components sum to
|
|
92
|
+
* `cacheWrite`. Absent providers do not populate this.
|
|
93
|
+
*/
|
|
94
|
+
cttl?: {
|
|
95
|
+
ephemeral5m?: number;
|
|
96
|
+
ephemeral1h?: number;
|
|
97
|
+
};
|
|
98
|
+
/**
|
|
99
|
+
* Server-side tool invocations made during this turn (Anthropic web_search /
|
|
100
|
+
* web_fetch, OpenAI built-in tools when reported). Counts requests, not tokens.
|
|
101
|
+
*/
|
|
102
|
+
server?: {
|
|
103
|
+
webSearch?: number;
|
|
104
|
+
webFetch?: number;
|
|
105
|
+
};
|
|
106
|
+
cost: {
|
|
107
|
+
input: number;
|
|
108
|
+
output: number;
|
|
109
|
+
cacheRead: number;
|
|
110
|
+
cacheWrite: number;
|
|
111
|
+
total: number;
|
|
112
|
+
};
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
/**
|
|
116
|
+
* Compatibility settings for openai-completions API.
|
|
117
|
+
* Use this to override URL-based auto-detection for custom providers.
|
|
118
|
+
*/
|
|
119
|
+
export interface OpenAICompat {
|
|
120
|
+
/** Whether the provider supports the `store` field. Default: auto-detected from URL. */
|
|
121
|
+
supportsStore?: boolean;
|
|
122
|
+
/** Whether the provider supports the `developer` role (vs `system`). Default: auto-detected from URL. */
|
|
123
|
+
supportsDeveloperRole?: boolean;
|
|
124
|
+
/**
|
|
125
|
+
* Whether the provider's chat-completions endpoint accepts multiple
|
|
126
|
+
* leading `system`/`developer` messages. When false, ordered system
|
|
127
|
+
* prompts are coalesced into a single message joined by `\n\n` so
|
|
128
|
+
* strict chat templates (e.g. Qwen-served via vLLM, MiniMax) accept
|
|
129
|
+
* the request. Default: detected per provider/baseUrl. Canonical
|
|
130
|
+
* OpenAI/Azure/OpenRouter/Cerebras/Together/Fireworks/Groq/DeepSeek/
|
|
131
|
+
* Mistral/xAI/Z.ai/GitHub Copilot/Zenmux are treated as `true`;
|
|
132
|
+
* unknown or strict-template hosts default to `false`. Setting this
|
|
133
|
+
* to `true` preserves separate blocks, which is preferred for
|
|
134
|
+
* KV-cache reuse when the trailing prompt changes between calls.
|
|
135
|
+
*/
|
|
136
|
+
supportsMultipleSystemMessages?: boolean;
|
|
137
|
+
/** Whether the provider supports `reasoning_effort`. Default: auto-detected from URL. */
|
|
138
|
+
supportsReasoningEffort?: boolean;
|
|
139
|
+
/** Optional mapping from pi-ai reasoning levels to provider/model-specific `reasoning_effort` values. */
|
|
140
|
+
reasoningEffortMap?: Partial<Record<Effort, string>>;
|
|
141
|
+
/** Whether the provider supports `stream_options: { include_usage: true }` for token usage in streaming responses. Default: true. */
|
|
142
|
+
supportsUsageInStreaming?: boolean;
|
|
143
|
+
/** Which field to use for max tokens. Default: auto-detected from URL. */
|
|
144
|
+
maxTokensField?: "max_completion_tokens" | "max_tokens";
|
|
145
|
+
/** Whether tool results require the `name` field. Default: auto-detected from URL. */
|
|
146
|
+
requiresToolResultName?: boolean;
|
|
147
|
+
/** Whether a user message after tool results requires an assistant message in between. Default: auto-detected from URL. */
|
|
148
|
+
requiresAssistantAfterToolResult?: boolean;
|
|
149
|
+
/** Whether thinking blocks must be converted to text blocks with <thinking> delimiters. Default: auto-detected from URL. */
|
|
150
|
+
requiresThinkingAsText?: boolean;
|
|
151
|
+
/** Whether tool call IDs must be normalized to Mistral format (exactly 9 alphanumeric chars). Default: auto-detected from URL. */
|
|
152
|
+
requiresMistralToolIds?: boolean;
|
|
153
|
+
/** Format for reasoning/thinking parameter. "openai" uses reasoning_effort, "openrouter" uses reasoning: { effort }, "zai" uses thinking: { type: "enabled" | "disabled" } (also used by Moonshot Kimi), "qwen" uses top-level enable_thinking, and "qwen-chat-template" uses chat_template_kwargs.enable_thinking. Default: "openai". */
|
|
154
|
+
thinkingFormat?: "openai" | "openrouter" | "zai" | "qwen" | "qwen-chat-template";
|
|
155
|
+
/** Optional `thinking.keep` value for Z.ai/Moonshot-style thinking params. Set false to suppress auto-detected keep. Default: auto-detected. */
|
|
156
|
+
thinkingKeep?: "all" | false;
|
|
157
|
+
/** Which reasoning content field to emit on assistant messages. Default: auto-detected. */
|
|
158
|
+
reasoningContentField?: "reasoning_content" | "reasoning" | "reasoning_text";
|
|
159
|
+
/** Whether assistant tool-call messages must include reasoning content. Default: false. */
|
|
160
|
+
requiresReasoningContentForToolCalls?: boolean;
|
|
161
|
+
/** Whether the provider accepts a synthetic placeholder (e.g. ".") for missing reasoning_content on tool-call turns. Default: true. Set to false for providers like DeepSeek that validate the exact reasoning_content value. */
|
|
162
|
+
allowsSyntheticReasoningContentForToolCalls?: boolean;
|
|
163
|
+
/** Whether assistant tool-call messages must include non-empty content. Default: false. */
|
|
164
|
+
requiresAssistantContentForToolCalls?: boolean;
|
|
165
|
+
/** Whether the provider supports the `tool_choice` parameter. Default: true. */
|
|
166
|
+
supportsToolChoice?: boolean;
|
|
167
|
+
/**
|
|
168
|
+
* Drop reasoning fields (`reasoning_effort`, OpenRouter `reasoning`) for
|
|
169
|
+
* the request when `tool_choice` forces a tool call. Mirrors the Anthropic
|
|
170
|
+
* `disableThinkingIfToolChoiceForced` rule for backends like Kimi that
|
|
171
|
+
* 400 with `tool_choice 'specified' is incompatible with thinking
|
|
172
|
+
* enabled` whenever both are present. Default: auto-detected (Kimi).
|
|
173
|
+
*/
|
|
174
|
+
disableReasoningOnForcedToolChoice?: boolean;
|
|
175
|
+
/**
|
|
176
|
+
* Drop reasoning fields (`reasoning_effort`, OpenRouter `reasoning`) for
|
|
177
|
+
* any request that sends `tool_choice`. Use for providers/models that accept
|
|
178
|
+
* tools and `tool_choice`, but reject `tool_choice` while thinking is enabled.
|
|
179
|
+
* Default: auto-detected (DeepSeek reasoning models).
|
|
180
|
+
*/
|
|
181
|
+
disableReasoningOnToolChoice?: boolean;
|
|
182
|
+
/** OpenRouter-specific routing preferences. Only used when baseUrl points to OpenRouter. */
|
|
183
|
+
openRouterRouting?: OpenRouterRouting;
|
|
184
|
+
/** Vercel AI Gateway routing preferences. Only used when baseUrl points to Vercel AI Gateway. */
|
|
185
|
+
vercelGatewayRouting?: VercelGatewayRouting;
|
|
186
|
+
/** Extra fields to include in request body (e.g. gateway routing hints for OpenClaw-style proxies). */
|
|
187
|
+
extraBody?: Record<string, unknown>;
|
|
188
|
+
/** Whether chat-completions payloads should include provider-specific prompt-cache markers. */
|
|
189
|
+
cacheControlFormat?: "anthropic" | undefined;
|
|
190
|
+
/** Whether the provider supports the `strict` field in tool definitions. Default: auto-detected per provider/baseUrl (conservative for unknown providers). */
|
|
191
|
+
supportsStrictMode?: boolean;
|
|
192
|
+
/**
|
|
193
|
+
* Stream-watchdog idle-timeout floor in ms for slow reasoning hosts.
|
|
194
|
+
* Default: auto-detected (GLM coding-plan hosts, direct DeepSeek reasoning).
|
|
195
|
+
*/
|
|
196
|
+
streamIdleTimeoutMs?: number;
|
|
197
|
+
/** Whether the host honors `prompt_cache_retention: "24h"` on the Responses API. Default: auto-detected (api.openai.com). */
|
|
198
|
+
supportsLongPromptCacheRetention?: boolean;
|
|
199
|
+
/** Whether tool schemas must be sent either all strict or all non-strict. Undefined keeps the existing per-tool mixed behavior. */
|
|
200
|
+
toolStrictMode?: "all_strict" | "none";
|
|
201
|
+
/** Whether request shaping may send reasoning params at all. Default: auto-detected (disabled for GitHub Copilot chat-completions). */
|
|
202
|
+
supportsReasoningParams?: boolean;
|
|
203
|
+
/** Always send a max-token field when the caller did not provide one. Default: auto-detected (Kimi-family models derive TPM limits from max_tokens). */
|
|
204
|
+
alwaysSendMaxTokens?: boolean;
|
|
205
|
+
/** Whether Responses-API tool-call/result history must be strictly paired. Default: auto-detected (Azure OpenAI, GitHub Copilot). */
|
|
206
|
+
strictResponsesPairing?: boolean;
|
|
207
|
+
/**
|
|
208
|
+
* Compat deltas applied when a request actually engages thinking mode
|
|
209
|
+
* (reasoning requested and not disabled, model reasoning-capable, and not
|
|
210
|
+
* suppressed by a forced tool choice). `buildModel` materializes the full
|
|
211
|
+
* alternate view as `compat.whenThinking`; handlers pointer-swap, never
|
|
212
|
+
* spread. Default: auto-detected (OpenCode gateways, #1071/#1484).
|
|
213
|
+
*/
|
|
214
|
+
whenThinking?: Partial<Omit<OpenAICompat, "whenThinking">>;
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
/**
|
|
218
|
+
* Compatibility settings for anthropic-messages API.
|
|
219
|
+
* Use this to disable features that strict-by-default Anthropic accepts but
|
|
220
|
+
* that proxy gateways (Vertex AI, AWS Bedrock-style fronts, etc.) reject.
|
|
221
|
+
*/
|
|
222
|
+
export interface AnthropicCompat {
|
|
223
|
+
/**
|
|
224
|
+
* Drop the top-level `strict: true` field on tool definitions. Vertex AI's
|
|
225
|
+
* Anthropic-compatible endpoint rejects unknown tool fields with
|
|
226
|
+
* `tools.<n>.custom.strict: Extra inputs are not permitted`.
|
|
227
|
+
*/
|
|
228
|
+
disableStrictTools?: boolean;
|
|
229
|
+
/**
|
|
230
|
+
* Map adaptive thinking (`thinking: { type: "adaptive" }`) to
|
|
231
|
+
* `{ type: "enabled", budget_tokens }`. Vertex AI rejects the `adaptive`
|
|
232
|
+
* tag with `Input tag 'adaptive' ... does not match any of the expected
|
|
233
|
+
* tags: 'disabled', 'enabled'`.
|
|
234
|
+
*/
|
|
235
|
+
disableAdaptiveThinking?: boolean;
|
|
236
|
+
/** Whether tools may include Anthropic's per-tool eager_input_streaming flag. Default: true. */
|
|
237
|
+
supportsEagerToolInputStreaming?: boolean;
|
|
238
|
+
/** Whether long prompt-cache retention (`ttl: "1h"`) is supported. Default: true for canonical Anthropic API. */
|
|
239
|
+
supportsLongCacheRetention?: boolean;
|
|
240
|
+
/**
|
|
241
|
+
* Whether mid-conversation `role: "system"` messages are accepted in the
|
|
242
|
+
* `messages` array (Claude Opus 4.8+ and Claude Fable/Mythos 5 on the
|
|
243
|
+
* first-party Claude API and Claude Platform on AWS). When unset,
|
|
244
|
+
* auto-detected from the model id and base URL. Not available on Bedrock,
|
|
245
|
+
* Vertex AI, or Microsoft Foundry.
|
|
246
|
+
*/
|
|
247
|
+
supportsMidConversationSystem?: boolean;
|
|
248
|
+
/**
|
|
249
|
+
* Whether the model accepts a forced `tool_choice` (`{ type: "any" }` or
|
|
250
|
+
* `{ type: "tool", name }`). Claude Fable/Mythos 5 reject forced tool use
|
|
251
|
+
* outright ("tool_choice forces tool use is not compatible with this model");
|
|
252
|
+
* the request builder downgrades forced choices to `auto` when this is false.
|
|
253
|
+
* When unset, auto-detected from the model id. Default: true.
|
|
254
|
+
*/
|
|
255
|
+
supportsForcedToolChoice?: boolean;
|
|
256
|
+
/**
|
|
257
|
+
* Whether the model accepts sampling parameters (`temperature`, `top_p`,
|
|
258
|
+
* `top_k`). Opus 4.7+ and Fable/Mythos reject them with a 400. When unset,
|
|
259
|
+
* auto-detected from the model id. Default: true.
|
|
260
|
+
*/
|
|
261
|
+
supportsSamplingParams?: boolean;
|
|
262
|
+
/**
|
|
263
|
+
* Include a non-standard `id` field (aliasing `tool_use_id`) on
|
|
264
|
+
* `tool_result` blocks. Z.AI's Anthropic-compatible proxy deserializes
|
|
265
|
+
* tool results into a class that reads `.id` (issue #814). Default:
|
|
266
|
+
* auto-detected (Z.AI hosts).
|
|
267
|
+
*/
|
|
268
|
+
requiresToolResultId?: boolean;
|
|
269
|
+
/**
|
|
270
|
+
* Replay unsigned `thinking` blocks from prior assistant turns as native
|
|
271
|
+
* thinking instead of demoting them to text. Official Anthropic enforces
|
|
272
|
+
* signature-based thinking-chain integrity, so unsigned blocks must stay
|
|
273
|
+
* text there; compatible reasoning endpoints (Z.AI, DeepSeek, …) emit
|
|
274
|
+
* unsigned blocks and expect them back as `type: "thinking"` (#2005).
|
|
275
|
+
* Default: auto-detected from provider/baseUrl and `model.reasoning`.
|
|
276
|
+
*/
|
|
277
|
+
replayUnsignedThinking?: boolean;
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
/**
|
|
281
|
+
* OpenRouter provider routing preferences.
|
|
282
|
+
* Controls which upstream providers OpenRouter routes requests to.
|
|
283
|
+
* @see https://openrouter.ai/docs/provider-routing
|
|
284
|
+
*/
|
|
285
|
+
export interface OpenRouterRouting {
|
|
286
|
+
/** List of provider slugs to exclusively use for this request (e.g., ["amazon-bedrock", "anthropic"]). */
|
|
287
|
+
only?: string[];
|
|
288
|
+
/** List of provider slugs to try in order (e.g., ["anthropic", "openai"]). */
|
|
289
|
+
order?: string[];
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
/**
|
|
293
|
+
* Vercel AI Gateway routing preferences.
|
|
294
|
+
* Controls which upstream providers the gateway routes requests to.
|
|
295
|
+
* @see https://vercel.com/docs/ai-gateway/models-and-providers/provider-options
|
|
296
|
+
*/
|
|
297
|
+
export interface VercelGatewayRouting {
|
|
298
|
+
/** List of provider slugs to exclusively use for this request (e.g., ["bedrock", "anthropic"]). */
|
|
299
|
+
only?: string[];
|
|
300
|
+
/** List of provider slugs to try in order (e.g., ["anthropic", "openai"]). */
|
|
301
|
+
order?: string[];
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
type ResolvedToolStrictMode = NonNullable<OpenAICompat["toolStrictMode"]> | "mixed";
|
|
305
|
+
|
|
306
|
+
/**
|
|
307
|
+
* Fully-resolved chat-completions compat view: every detected default
|
|
308
|
+
* materialized and user overrides applied. Built once per model by
|
|
309
|
+
* `buildModel`; request handlers read fields and never detect, resolve, or
|
|
310
|
+
* allocate.
|
|
311
|
+
*/
|
|
312
|
+
export type ResolvedOpenAICompat = Required<
|
|
313
|
+
Omit<
|
|
314
|
+
OpenAICompat,
|
|
315
|
+
| "openRouterRouting"
|
|
316
|
+
| "vercelGatewayRouting"
|
|
317
|
+
| "extraBody"
|
|
318
|
+
| "toolStrictMode"
|
|
319
|
+
| "streamIdleTimeoutMs"
|
|
320
|
+
| "supportsLongPromptCacheRetention"
|
|
321
|
+
| "cacheControlFormat"
|
|
322
|
+
| "thinkingKeep"
|
|
323
|
+
| "strictResponsesPairing"
|
|
324
|
+
| "whenThinking"
|
|
325
|
+
>
|
|
326
|
+
> & {
|
|
327
|
+
openRouterRouting?: OpenAICompat["openRouterRouting"];
|
|
328
|
+
vercelGatewayRouting?: OpenAICompat["vercelGatewayRouting"];
|
|
329
|
+
extraBody?: OpenAICompat["extraBody"];
|
|
330
|
+
cacheControlFormat?: OpenAICompat["cacheControlFormat"];
|
|
331
|
+
thinkingKeep?: OpenAICompat["thinkingKeep"];
|
|
332
|
+
streamIdleTimeoutMs?: number;
|
|
333
|
+
toolStrictMode: ResolvedToolStrictMode;
|
|
334
|
+
/** The model sits behind OpenRouter (routing prefs and max-token omission apply). */
|
|
335
|
+
isOpenRouterHost: boolean;
|
|
336
|
+
/** The model sits behind Vercel AI Gateway. */
|
|
337
|
+
isVercelGatewayHost: boolean;
|
|
338
|
+
/** Complete alternate view for thinking-engaged requests; swap pointers, never spread. */
|
|
339
|
+
whenThinking?: ResolvedOpenAICompat;
|
|
340
|
+
};
|
|
341
|
+
|
|
342
|
+
/** Fully-resolved Responses-API compat view (same contract as `ResolvedOpenAICompat`). */
|
|
343
|
+
export interface ResolvedOpenAIResponsesCompat {
|
|
344
|
+
supportsDeveloperRole: boolean;
|
|
345
|
+
supportsStrictMode: boolean;
|
|
346
|
+
supportsReasoningEffort: boolean;
|
|
347
|
+
supportsLongPromptCacheRetention: boolean;
|
|
348
|
+
strictResponsesPairing: boolean;
|
|
349
|
+
reasoningEffortMap: Partial<Record<Effort, string>>;
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
/** Fully-resolved anthropic-messages compat view (same contract as `ResolvedOpenAICompat`). */
|
|
353
|
+
export type ResolvedAnthropicCompat = Required<AnthropicCompat> & {
|
|
354
|
+
/**
|
|
355
|
+
* The configured endpoint is the official first-party Anthropic API
|
|
356
|
+
* (https + exact `api.anthropic.com` host; a missing baseUrl counts as
|
|
357
|
+
* official because dispatch defaults there). Gates OAuth framing, custom
|
|
358
|
+
* env headers, and cache-TTL shaping without per-request URL parsing.
|
|
359
|
+
*/
|
|
360
|
+
officialEndpoint: boolean;
|
|
361
|
+
};
|
|
362
|
+
|
|
363
|
+
/** Sparse, user-authored compat overrides for a given API (models.json / config vocabulary). */
|
|
364
|
+
export type CompatConfigOf<TApi extends Api> = TApi extends
|
|
365
|
+
| "openai-completions"
|
|
366
|
+
| "openai-responses"
|
|
367
|
+
| "azure-openai-responses"
|
|
368
|
+
| "openai-codex-responses"
|
|
369
|
+
? OpenAICompat
|
|
370
|
+
: TApi extends "anthropic-messages"
|
|
371
|
+
? AnthropicCompat
|
|
372
|
+
: undefined;
|
|
373
|
+
|
|
374
|
+
/** Resolved compat for a given API: complete record, materialized once by `buildModel`. */
|
|
375
|
+
export type CompatOf<TApi extends Api> = TApi extends "openai-completions"
|
|
376
|
+
? ResolvedOpenAICompat
|
|
377
|
+
: TApi extends "openai-responses" | "azure-openai-responses" | "openai-codex-responses"
|
|
378
|
+
? ResolvedOpenAIResponsesCompat
|
|
379
|
+
: TApi extends "anthropic-messages"
|
|
380
|
+
? ResolvedAnthropicCompat
|
|
381
|
+
: undefined;
|
|
382
|
+
|
|
383
|
+
// Model interface for the unified model system
|
|
384
|
+
export interface Model<TApi extends Api = Api> {
|
|
385
|
+
id: string;
|
|
386
|
+
name: string;
|
|
387
|
+
api: TApi;
|
|
388
|
+
provider: Provider;
|
|
389
|
+
baseUrl: string;
|
|
390
|
+
reasoning: boolean;
|
|
391
|
+
input: ("text" | "image")[];
|
|
392
|
+
cost: {
|
|
393
|
+
input: number; // $/million tokens
|
|
394
|
+
output: number; // $/million tokens
|
|
395
|
+
cacheRead: number; // $/million tokens
|
|
396
|
+
cacheWrite: number; // $/million tokens
|
|
397
|
+
};
|
|
398
|
+
/** Premium Copilot requests charged per user-initiated request (defaults to 1). */
|
|
399
|
+
premiumMultiplier?: number;
|
|
400
|
+
contextWindow: number;
|
|
401
|
+
maxTokens: number;
|
|
402
|
+
/**
|
|
403
|
+
* When `true`, providers MUST omit `max_output_tokens` (Responses) /
|
|
404
|
+
* `max_tokens` / `max_completion_tokens` (Completions) from the outbound
|
|
405
|
+
* request and let the upstream API decide the per-response cap. `maxTokens`
|
|
406
|
+
* is still used locally for budgeting (compaction, context promotion); only
|
|
407
|
+
* the wire field is suppressed.
|
|
408
|
+
*
|
|
409
|
+
* Use this for proxies (notably Ollama) that forward to a backend whose true
|
|
410
|
+
* output limit OMP cannot discover — sending the wrong value triggers 400s
|
|
411
|
+
* from the upstream provider.
|
|
412
|
+
*/
|
|
413
|
+
omitMaxOutputTokens?: boolean;
|
|
414
|
+
headers?: Record<string, string>;
|
|
415
|
+
/**
|
|
416
|
+
* Streaming transport override. When `"pi-native"`, `streamSimple` routes
|
|
417
|
+
* the request to the model's `baseUrl` via the auth-gateway's
|
|
418
|
+
* `POST /v1/pi/stream` endpoint instead of dispatching the per-API
|
|
419
|
+
* provider client. The `baseUrl` must point at an `omp auth-gateway`
|
|
420
|
+
* (or compatible) host; `headers.Authorization` (or `apiKey` resolved by
|
|
421
|
+
* the registry) carries the gateway bearer.
|
|
422
|
+
*
|
|
423
|
+
* Used by containerized omp installs (e.g. robomp slots) to route every
|
|
424
|
+
* LLM call through a sidecar gateway that holds the real provider
|
|
425
|
+
* credentials. The model's other metadata (pricing, context window,
|
|
426
|
+
* thinking config, …) still resolves locally; only the streaming
|
|
427
|
+
* dispatch is redirected.
|
|
428
|
+
*/
|
|
429
|
+
transport?: "pi-native";
|
|
430
|
+
/** Hint that websocket transport should be preferred when supported by the provider implementation. */
|
|
431
|
+
preferWebsockets?: boolean;
|
|
432
|
+
/** Preferred model to switch to when context promotion is triggered (model id or provider/id). */
|
|
433
|
+
contextPromotionTarget?: string;
|
|
434
|
+
/** Provider-assigned priority value (lower = higher priority). */
|
|
435
|
+
priority?: number;
|
|
436
|
+
/** Canonical thinking capability metadata for this model. */
|
|
437
|
+
thinking?: ThinkingConfig;
|
|
438
|
+
/**
|
|
439
|
+
* Fully-resolved compatibility record, materialized once by `buildModel`.
|
|
440
|
+
* Protocol handlers read fields; they never detect, resolve, or allocate.
|
|
441
|
+
*/
|
|
442
|
+
compat: CompatOf<TApi>;
|
|
443
|
+
/** Verbatim sparse compat from the spec (user/config intent), for introspection only. */
|
|
444
|
+
compatConfig?: CompatConfigOf<TApi>;
|
|
445
|
+
/**
|
|
446
|
+
* Which shape to use when exposing the Codex `apply_patch` tool to this model.
|
|
447
|
+
* Generated catalog policy sets `"freeform"` for first-party GPT-5 Responses
|
|
448
|
+
* models that support OpenAI custom tools with a Lark grammar. The freeform
|
|
449
|
+
* variant sends a raw patch string with no JSON envelope.
|
|
450
|
+
* - `"function"` or undefined: JSON function-tool with `{input: string}` (spec §1.2).
|
|
451
|
+
*/
|
|
452
|
+
applyPatchToolType?: "freeform" | "function";
|
|
453
|
+
/**
|
|
454
|
+
* Force OAuth-style request shaping for providers whose API key prefix doesn't
|
|
455
|
+
* match an OAuth token (e.g. routing Anthropic traffic through a proxy that
|
|
456
|
+
* expects Claude Code framing). When true, the streaming layer sets
|
|
457
|
+
* `options.isOAuth = true` for the underlying provider call.
|
|
458
|
+
*/
|
|
459
|
+
isOAuth?: boolean;
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
/**
|
|
463
|
+
* A model as authored by configs, bundled catalogs, and discovery — the input
|
|
464
|
+
* vocabulary of `buildModel`. Identical to `Model` except `compat` carries the
|
|
465
|
+
* sparse override shape and nothing is resolved yet.
|
|
466
|
+
*/
|
|
467
|
+
export interface ModelSpec<TApi extends Api = Api> extends Omit<Model<TApi>, "compat" | "compatConfig"> {
|
|
468
|
+
/** Sparse compatibility overrides; resolved into `Model.compat` by `buildModel`. */
|
|
469
|
+
compat?: CompatConfigOf<TApi>;
|
|
470
|
+
}
|
package/src/utils.ts
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
export { isRecord } from "@oh-my-pi/pi-utils";
|
|
2
|
+
|
|
3
|
+
export function toNumber(value: unknown): number | undefined {
|
|
4
|
+
if (typeof value === "number" && Number.isFinite(value)) {
|
|
5
|
+
return value;
|
|
6
|
+
}
|
|
7
|
+
if (typeof value === "string" && value.trim()) {
|
|
8
|
+
const parsed = Number(value);
|
|
9
|
+
if (Number.isFinite(parsed)) {
|
|
10
|
+
return parsed;
|
|
11
|
+
}
|
|
12
|
+
}
|
|
13
|
+
return undefined;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
export function toPositiveNumber(value: unknown, fallback: number): number {
|
|
17
|
+
const parsed = toNumber(value);
|
|
18
|
+
return parsed !== undefined && parsed > 0 ? parsed : fallback;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export function toBoolean(value: unknown): boolean | undefined {
|
|
22
|
+
return typeof value === "boolean" ? value : undefined;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
export function isAnthropicOAuthToken(key: string): boolean {
|
|
26
|
+
return key.includes("sk-ant-oat");
|
|
27
|
+
}
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Constants for OpenAI Codex (ChatGPT OAuth) backend
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
export const CODEX_BASE_URL = "https://chatgpt.com/backend-api";
|
|
6
|
+
|
|
7
|
+
export const OPENAI_HEADERS = {
|
|
8
|
+
BETA: "OpenAI-Beta",
|
|
9
|
+
ACCOUNT_ID: "chatgpt-account-id",
|
|
10
|
+
ORIGINATOR: "originator",
|
|
11
|
+
SESSION_ID: "session_id",
|
|
12
|
+
CONVERSATION_ID: "conversation_id",
|
|
13
|
+
} as const;
|
|
14
|
+
|
|
15
|
+
export const OPENAI_HEADER_VALUES = {
|
|
16
|
+
BETA_RESPONSES: "responses=experimental",
|
|
17
|
+
BETA_RESPONSES_WEBSOCKETS_V2: "responses_websockets=2026-02-06",
|
|
18
|
+
ORIGINATOR_CODEX: "pi",
|
|
19
|
+
} as const;
|
|
20
|
+
|
|
21
|
+
export const URL_PATHS = {
|
|
22
|
+
RESPONSES: "/responses",
|
|
23
|
+
CODEX_RESPONSES: "/codex/responses",
|
|
24
|
+
} as const;
|
|
25
|
+
|
|
26
|
+
export const JWT_CLAIM_PATH = "https://api.openai.com/auth" as const;
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Extract account ID from a Codex JWT access token.
|
|
30
|
+
* Returns undefined if the token is not a valid Codex JWT.
|
|
31
|
+
*/
|
|
32
|
+
export function getCodexAccountId(accessToken: string): string | undefined {
|
|
33
|
+
try {
|
|
34
|
+
const parts = accessToken.split(".");
|
|
35
|
+
if (parts.length !== 3) return undefined;
|
|
36
|
+
const decoded = Buffer.from(parts[1] ?? "", "base64").toString("utf-8");
|
|
37
|
+
const payload = JSON.parse(decoded) as Record<string, unknown>;
|
|
38
|
+
const auth = payload[JWT_CLAIM_PATH] as { chatgpt_account_id?: string } | undefined;
|
|
39
|
+
return auth?.chatgpt_account_id ?? undefined;
|
|
40
|
+
} catch {
|
|
41
|
+
return undefined;
|
|
42
|
+
}
|
|
43
|
+
}
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Build a User-Agent string that identifies as Gemini CLI to unlock higher rate limits.
|
|
3
|
+
* Uses the same format as the official Gemini CLI (v0.35+):
|
|
4
|
+
* GeminiCLI/VERSION/MODEL (PLATFORM; ARCH; SURFACE)
|
|
5
|
+
*/
|
|
6
|
+
export function getGeminiCliUserAgent(modelId = "gemini-3.1-pro-preview"): string {
|
|
7
|
+
const version = process.env.PI_AI_GEMINI_CLI_VERSION || "0.35.3";
|
|
8
|
+
const platform = process.platform === "win32" ? "win32" : process.platform;
|
|
9
|
+
const arch = process.arch === "x64" ? "x64" : process.arch;
|
|
10
|
+
return `GeminiCLI/${version}/${modelId} (${platform}; ${arch}; terminal)`;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
export const getGeminiCliHeaders = (modelId?: string) => ({
|
|
14
|
+
"User-Agent": getGeminiCliUserAgent(modelId),
|
|
15
|
+
"Client-Metadata": "ideType=IDE_UNSPECIFIED,platform=PLATFORM_UNSPECIFIED,pluginType=GEMINI",
|
|
16
|
+
});
|
|
17
|
+
|
|
18
|
+
export const ANTIGRAVITY_SYSTEM_INSTRUCTION =
|
|
19
|
+
"You are Antigravity, a powerful agentic AI coding assistant designed by the Google Deepmind team working on Advanced Agentic Coding." +
|
|
20
|
+
"You are pair programming with a USER to solve their coding task. The task may require creating a new codebase, modifying or debugging an existing codebase, or simply answering a question." +
|
|
21
|
+
"**Absolute paths only**" +
|
|
22
|
+
"**Proactiveness**";
|
|
23
|
+
/**
|
|
24
|
+
* Antigravity / Cloud Code Assist user agent. Lives in its own file so discovery
|
|
25
|
+
* and usage code can read it without pulling the heavy google-gemini-cli provider
|
|
26
|
+
* (and its @google/genai → google-auth-library dependency chain) into the startup
|
|
27
|
+
* parse graph.
|
|
28
|
+
*/
|
|
29
|
+
export let getAntigravityUserAgent = () => {
|
|
30
|
+
const DEFAULT_ANTIGRAVITY_VERSION = "1.104.0";
|
|
31
|
+
const version = process.env.PI_AI_ANTIGRAVITY_VERSION || DEFAULT_ANTIGRAVITY_VERSION;
|
|
32
|
+
// Map Node.js platform/arch to Antigravity's expected format.
|
|
33
|
+
// Verified against Antigravity source: _qn() and wqn() in main.js.
|
|
34
|
+
// process.platform: win32→windows, others pass through (darwin, linux)
|
|
35
|
+
// process.arch: x64→amd64, ia32→386, others pass through (arm64)
|
|
36
|
+
const os = process.platform === "win32" ? "windows" : process.platform;
|
|
37
|
+
const arch = process.arch === "x64" ? "amd64" : process.arch === "ia32" ? "386" : process.arch;
|
|
38
|
+
const userAgent = `antigravity/${version} ${os}/${arch}`;
|
|
39
|
+
getAntigravityUserAgent = () => userAgent;
|
|
40
|
+
return userAgent;
|
|
41
|
+
};
|