@oh-my-pi/pi-ai 15.5.7 → 15.5.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +30 -0
- package/README.md +2 -0
- package/dist/types/auth-gateway/server.d.ts +19 -0
- package/dist/types/auth-storage.d.ts +81 -0
- package/dist/types/provider-models/google.d.ts +1 -1
- package/dist/types/provider-models/openai-compat.d.ts +6 -0
- package/dist/types/providers/openai-chat-server-schema.d.ts +3 -0
- package/dist/types/types.d.ts +1 -1
- package/dist/types/utils/discovery/index.d.ts +0 -1
- package/dist/types/utils/oauth/types.d.ts +1 -1
- package/dist/types/utils/oauth/wafer.d.ts +2 -0
- package/dist/types/utils/stream-markup-healing.d.ts +83 -0
- package/package.json +2 -2
- package/src/auth-gateway/server.ts +155 -57
- package/src/auth-storage.ts +185 -32
- package/src/models.json +1235 -254
- package/src/provider-models/descriptors.ts +14 -0
- package/src/provider-models/google.ts +2 -38
- package/src/provider-models/openai-compat.ts +184 -0
- package/src/providers/anthropic.ts +104 -1
- package/src/providers/ollama.ts +136 -43
- package/src/providers/openai-chat-server-schema.ts +9 -0
- package/src/providers/openai-chat-server.ts +21 -2
- package/src/providers/openai-completions.ts +51 -111
- package/src/stream.ts +115 -9
- package/src/types.ts +2 -0
- package/src/utils/discovery/index.ts +0 -1
- package/src/utils/oauth/index.ts +12 -0
- package/src/utils/oauth/types.ts +2 -0
- package/src/utils/oauth/wafer.ts +50 -0
- package/src/utils/stream-markup-healing.ts +759 -0
- package/dist/types/utils/discovery/vertex.d.ts +0 -25
- package/dist/types/utils/tool-call-healing.d.ts +0 -71
- package/src/utils/discovery/vertex.ts +0 -210
- package/src/utils/tool-call-healing.ts +0 -271
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,36 @@
|
|
|
2
2
|
|
|
3
3
|
## [Unreleased]
|
|
4
4
|
|
|
5
|
+
## [15.5.8] - 2026-05-28
|
|
6
|
+
|
|
7
|
+
### Added
|
|
8
|
+
|
|
9
|
+
- Added `CheckCredentialsOptions.completionProbe` (and `completionTimeoutMs`) so `AuthStorage.checkCredentials` can additionally exercise each credential against the provider's chat-completion endpoint after refresh-on-expiry. Result lands on `CredentialHealthResult.completion` ({ok, reason?, modelId?, latencyMs?}) without disturbing the usage `ok` field. Public types: `CompletionProbe`, `CompletionProbeInput`, `CompletionProbeCredential`, `CredentialCompletionResult`. The probe is invoked even when no `UsageProvider` is registered for the row, and is skipped when OAuth refresh fails (the stale bytes would only mask the upstream failure).
|
|
10
|
+
- Added Wafer Pass and Wafer Serverless providers (`wafer-pass`, `wafer-serverless`). OpenAI-compatible (`https://pass.wafer.ai/v1`), bearer auth, `wfr_…` keys. `/login wafer-pass` and `/login wafer-serverless` paste-and-validate the key against `/v1/models`. `WAFER_PASS_API_KEY` and `WAFER_SERVERLESS_API_KEY` environment variables wired into `getEnvApiKey`. Bundled catalog seeds `wafer-pass/{GLM-5.1, Qwen3.5-397B-A17B}` and `wafer-serverless/{GLM-5.1, Kimi-K2.6, Qwen3.5-397B-A17B, Qwen3.6-35B-A3B, qwen3.7-max, deepseek-v4-flash, deepseek-v4-pro}`; dynamic discovery via `/v1/models` overlays additional models at runtime. Pass-tier discovery filters `wafer.tier === "pass_included"`. Pass-SKU costs are seeded at `0` (flat-rate subscription, no per-token charge — matches `kimi-code`/`firepass`/`alibaba-coding-plan`). Serverless costs are the wafer.ai retail rate, derived from the `*_cents_per_million` envelope via `value × 125 / 10000` (e.g. GLM-5.1 `120` → $1.50/M, Kimi-K2.6 `88` → $1.10/M). Reasoning entries get a thinking compat picked from the `wafer.provider` envelope: `zai`/`moonshotai` → zai-style `thinking: { type }`, `qwen` → top-level `enable_thinking`, `deepseek` and unknown upstreams stay unset so `detectOpenAICompat` can pick `reasoning_effort` from the id pattern at request time.
|
|
11
|
+
|
|
12
|
+
### Changed
|
|
13
|
+
|
|
14
|
+
- Changed auth-gateway credential resolution to use per-conversation `promptCacheKey`/`sessionId` when calling `AuthStorage.getApiKey`, so repeated turns can keep the same credential until it becomes unavailable
|
|
15
|
+
- Changed auth-gateway and pi-native request handling to align `sessionId` with prompt/context identity before credential lookup
|
|
16
|
+
- Changed Anthropic prompt preparation to downscale image blocks over 2000px when a request includes 20+ images, reducing oversized payloads automatically
|
|
17
|
+
- Changed OpenAI chat request parsing to accept `name` on `tool` messages and fall back to the matching assistant `tool_calls` name, so parsed tool results now carry a proper tool name when the wire omits it
|
|
18
|
+
- Changed `checkCredentials` to skip running `completionProbe` when OAuth refresh fails, so stale bearer tokens are never probed and the refresh failure remains the returned `reason`
|
|
19
|
+
- Changed completion reporting to return `completion: { ok: null, reason: ... }` when a credential has no usable bearer bytes instead of attempting the probe
|
|
20
|
+
- Refactored `AuthStorage.checkCredentials` so OAuth refresh-on-expiry runs up-front and the refreshed credential is shared between the usage probe and the new completion probe; rows without a registered `UsageProvider` no longer short-circuit before the completion probe runs.
|
|
21
|
+
|
|
22
|
+
### Fixed
|
|
23
|
+
|
|
24
|
+
- Fixed DeepSeek DSML tool-call envelope leaks on Ollama Cloud and OpenAI-compatible streams by healing leaked envelopes into structured tool calls without displaying raw DSML markers. ([#1462](https://github.com/can1357/oh-my-pi/issues/1462))
|
|
25
|
+
- Fixed auth-gateway to classify usage-limit messages such as `usage_limit_reached`, `resource_exhausted`, and Codex-style `Try again in ~X min` text as 429 `rate_limit_error` responses
|
|
26
|
+
- Fixed auth-gateway usage-limit handling to honor parsed retry hints and switch to a sibling credential via `markUsageLimitReached` instead of invalidating the rate-limited credential
|
|
27
|
+
- Fixed `streamSimple` to retry on usage-limit errors (including message-only error events) before any content is emitted, so `onAuthError` can rotate credentials automatically
|
|
28
|
+
- Fixed auth-gateway error classification to extract embedded status codes and use word-boundary matching, so `GenerateContentRequest` and similar messages are no longer misreported as rate-limit errors
|
|
29
|
+
- Fixed `checkCredentials` to handle `completionProbe` exceptions by recording the failure in `CredentialHealthResult.completion.reason` while still returning the usage probe result
|
|
30
|
+
|
|
31
|
+
### Fixed
|
|
32
|
+
|
|
33
|
+
- Fixed Google Vertex's bundled model list to use the authoritative models.dev catalog, including MaaS entries such as `deepseek-ai/deepseek-v3.2-maas` and removing retired Gemini 1.5 fallbacks. ([#1456](https://github.com/can1357/oh-my-pi/issues/1456))
|
|
34
|
+
|
|
5
35
|
## [15.5.7] - 2026-05-27
|
|
6
36
|
### Added
|
|
7
37
|
- `SimpleStreamOptions.openrouterVariant` (`"nitro"`, `"floor"`, `"online"`, `"exacto"`, …) — when set, appends `:<variant>` to OpenRouter model IDs at request time, leaving ids that already carry an explicit `:suffix` untouched. Plumbed through `openai-completions` and the pi-native gateway forwarder.
|
package/README.md
CHANGED
|
@@ -62,6 +62,8 @@ Unified LLM API with automatic model discovery, provider configuration, token an
|
|
|
62
62
|
- **Hugging Face Inference**
|
|
63
63
|
- **xAI**
|
|
64
64
|
- **Venice** (requires `VENICE_API_KEY`)
|
|
65
|
+
- **Wafer Pass** (requires `WAFER_PASS_API_KEY`; flat-rate subscription, includes GLM-5.1 and Qwen3.5-397B-A17B)
|
|
66
|
+
- **Wafer Serverless** (requires `WAFER_SERVERLESS_API_KEY`; pay-as-you-go)
|
|
65
67
|
- **OpenRouter**
|
|
66
68
|
- **Kilo Gateway** (supports OAuth `/login kilo` or `KILO_API_KEY`)
|
|
67
69
|
- **LiteLLM** (requires `LITELLM_API_KEY`)
|
|
@@ -14,4 +14,23 @@ export interface AuthGatewayBootOptions extends AuthGatewayServerOptions {
|
|
|
14
14
|
/** Optional supplier for `/v1/models` listing. Returns the full model array. */
|
|
15
15
|
listModels?: () => Iterable<Model<Api>>;
|
|
16
16
|
}
|
|
17
|
+
/**
|
|
18
|
+
* Classify an upstream / gateway-internal error into a status code and a
|
|
19
|
+
* format-neutral type. The order is intentional:
|
|
20
|
+
*
|
|
21
|
+
* 1. Honour an explicit numeric `status` property on the thrown error.
|
|
22
|
+
* 2. Parse a status code embedded in the message string. Provider errors
|
|
23
|
+
* virtually always carry one (`Google API error (400): …`, `HTTP 429`,
|
|
24
|
+
* `status=503`) and the embedded value is authoritative.
|
|
25
|
+
* 3. Fall through to **word-boundaried** substring heuristics. The old
|
|
26
|
+
* `lower.includes("rate")` test famously matched
|
|
27
|
+
* `GenerateContentRequest`, surfacing every Google 400 as a 429
|
|
28
|
+
* `rate_limit_error`. The patterns here all require boundaries so they
|
|
29
|
+
* don't collide with provider field names.
|
|
30
|
+
*/
|
|
31
|
+
export declare function classifyGatewayError(err: unknown): {
|
|
32
|
+
status: number;
|
|
33
|
+
type: string;
|
|
34
|
+
message: string;
|
|
35
|
+
};
|
|
17
36
|
export declare function startAuthGateway(opts: AuthGatewayBootOptions): AuthGatewayServerHandle;
|
|
@@ -75,13 +75,88 @@ export interface CredentialHealthResult {
|
|
|
75
75
|
reason?: string;
|
|
76
76
|
/** Probe usage report (raw payload stripped) when `ok === true`. */
|
|
77
77
|
report?: Omit<UsageReport, "raw">;
|
|
78
|
+
/**
|
|
79
|
+
* Result of the optional end-to-end completion probe (see
|
|
80
|
+
* {@link CheckCredentialsOptions.completionProbe}). Absent when no probe was
|
|
81
|
+
* supplied. The completion probe exercises the provider's chat-completion
|
|
82
|
+
* endpoint with the credential's bearer bytes, which is a stricter signal
|
|
83
|
+
* than the usage endpoint (some providers happily 200 a `/usage` call while
|
|
84
|
+
* the chat endpoint 401s the same bearer).
|
|
85
|
+
*/
|
|
86
|
+
completion?: CredentialCompletionResult;
|
|
87
|
+
}
|
|
88
|
+
/**
|
|
89
|
+
* Outcome of the end-to-end completion probe. `null` means the probe was
|
|
90
|
+
* skipped (no bearer bytes were available — e.g. OAuth refresh failed
|
|
91
|
+
* upstream of the probe).
|
|
92
|
+
*/
|
|
93
|
+
export interface CredentialCompletionResult {
|
|
94
|
+
ok: boolean | null;
|
|
95
|
+
/** Failure / unverifiable reason; absent when `ok === true`. */
|
|
96
|
+
reason?: string;
|
|
97
|
+
/** Probe model id used (carried back from the caller for display). */
|
|
98
|
+
modelId?: string;
|
|
99
|
+
/** Round-trip latency in milliseconds. */
|
|
100
|
+
latencyMs?: number;
|
|
101
|
+
}
|
|
102
|
+
/**
|
|
103
|
+
* Credential payload handed to {@link CompletionProbe}. For API-key
|
|
104
|
+
* credentials only the bytes are exposed; for OAuth, every identity field
|
|
105
|
+
* carried by the refreshed credential is included so the probe can compose
|
|
106
|
+
* provider-specific apiKey shapes (e.g. GitHub Copilot / Google Gemini CLI
|
|
107
|
+
* expect a JSON blob with `token` + `projectId`, not the raw access token).
|
|
108
|
+
*
|
|
109
|
+
* `refreshToken` may be {@link REMOTE_REFRESH_SENTINEL} when the credential
|
|
110
|
+
* lives behind a broker; the chat endpoint never reads it, so the probe can
|
|
111
|
+
* forward it verbatim into the structured shape without harm.
|
|
112
|
+
*/
|
|
113
|
+
export type CompletionProbeCredential = {
|
|
114
|
+
type: "api_key";
|
|
115
|
+
apiKey: string;
|
|
116
|
+
} | {
|
|
117
|
+
type: "oauth";
|
|
118
|
+
accessToken: string;
|
|
119
|
+
refreshToken?: string;
|
|
120
|
+
expiresAt?: number;
|
|
121
|
+
accountId?: string;
|
|
122
|
+
projectId?: string;
|
|
123
|
+
email?: string;
|
|
124
|
+
enterpriseUrl?: string;
|
|
125
|
+
};
|
|
126
|
+
/**
|
|
127
|
+
* Caller-supplied bearer probe. Receives the post-refresh credential for a
|
|
128
|
+
* single row and reports whether a real chat-completion round-trip succeeds.
|
|
129
|
+
* The check-credentials pipeline calls this AFTER any OAuth refresh so the
|
|
130
|
+
* bytes match what a live request would send.
|
|
131
|
+
*/
|
|
132
|
+
export interface CompletionProbeInput {
|
|
133
|
+
provider: Provider;
|
|
134
|
+
credentialId: number;
|
|
135
|
+
credential: CompletionProbeCredential;
|
|
136
|
+
signal: AbortSignal;
|
|
78
137
|
}
|
|
138
|
+
export type CompletionProbe = (input: CompletionProbeInput) => Promise<CredentialCompletionResult>;
|
|
79
139
|
export interface CheckCredentialsOptions {
|
|
80
140
|
signal?: AbortSignal;
|
|
81
141
|
/** Per-credential probe timeout (ms). Defaults to the configured usage request timeout. */
|
|
82
142
|
timeoutMs?: number;
|
|
83
143
|
/** Provider → base URL override, same shape as {@link AuthStorage.fetchUsageReports}. */
|
|
84
144
|
baseUrlResolver?: (provider: Provider) => string | undefined;
|
|
145
|
+
/**
|
|
146
|
+
* Optional end-to-end probe. When provided, `checkCredentials` invokes it
|
|
147
|
+
* for every credential where a usable bearer is available (API key, or
|
|
148
|
+
* OAuth access token after refresh-on-expiry succeeded). The result lands
|
|
149
|
+
* on {@link CredentialHealthResult.completion}.
|
|
150
|
+
*
|
|
151
|
+
* The probe runs INDEPENDENTLY of whether a {@link UsageProvider} is
|
|
152
|
+
* configured: providers without a usage endpoint still benefit from the
|
|
153
|
+
* extra signal. The probe is NOT invoked when OAuth refresh fails — the
|
|
154
|
+
* bytes would be stale anyway and the upstream failure is already captured
|
|
155
|
+
* on `reason`.
|
|
156
|
+
*/
|
|
157
|
+
completionProbe?: CompletionProbe;
|
|
158
|
+
/** Per-credential completion probe timeout (ms). Defaults to `timeoutMs`. */
|
|
159
|
+
completionTimeoutMs?: number;
|
|
85
160
|
}
|
|
86
161
|
/**
|
|
87
162
|
* Sentinel value placed in OAuth `refresh` fields when a credential is shared
|
|
@@ -491,6 +566,12 @@ export declare class AuthStorage {
|
|
|
491
566
|
* soft-disabled rows are already known-bad and don't need a network probe.
|
|
492
567
|
* Environment-variable API keys are not enumerated — the caller's intent
|
|
493
568
|
* here is "which of my stored credentials is broken".
|
|
569
|
+
*
|
|
570
|
+
* Pass {@link CheckCredentialsOptions.completionProbe} to additionally
|
|
571
|
+
* exercise each credential against the provider's chat-completion endpoint
|
|
572
|
+
* (strict mode). The result lands on
|
|
573
|
+
* {@link CredentialHealthResult.completion}; the usage `ok` field is
|
|
574
|
+
* unchanged so callers can tell the two signals apart.
|
|
494
575
|
*/
|
|
495
576
|
checkCredentials(options?: CheckCredentialsOptions): Promise<CredentialHealthResult[]>;
|
|
496
577
|
/**
|
|
@@ -19,6 +19,6 @@ export interface GoogleGeminiCliModelManagerConfig {
|
|
|
19
19
|
endpoint?: string;
|
|
20
20
|
}
|
|
21
21
|
export declare function googleModelManagerOptions(config?: GoogleModelManagerConfig): ModelManagerOptions<"google-generative-ai">;
|
|
22
|
-
export declare function googleVertexModelManagerOptions(
|
|
22
|
+
export declare function googleVertexModelManagerOptions(_config?: GoogleVertexModelManagerConfig): ModelManagerOptions;
|
|
23
23
|
export declare function googleAntigravityModelManagerOptions(config?: GoogleAntigravityModelManagerConfig): ModelManagerOptions<"google-gemini-cli">;
|
|
24
24
|
export declare function googleGeminiCliModelManagerOptions(config?: GoogleGeminiCliModelManagerConfig): ModelManagerOptions<"google-gemini-cli">;
|
|
@@ -127,6 +127,12 @@ export interface FirepassModelManagerConfig {
|
|
|
127
127
|
* See https://docs.fireworks.ai/firepass.
|
|
128
128
|
*/
|
|
129
129
|
export declare function firepassModelManagerOptions(_config?: FirepassModelManagerConfig): ModelManagerOptions<"openai-completions">;
|
|
130
|
+
export interface WaferModelManagerConfig {
|
|
131
|
+
apiKey?: string;
|
|
132
|
+
baseUrl?: string;
|
|
133
|
+
}
|
|
134
|
+
export declare function waferPassModelManagerOptions(config?: WaferModelManagerConfig): ModelManagerOptions<"openai-completions">;
|
|
135
|
+
export declare function waferServerlessModelManagerOptions(config?: WaferModelManagerConfig): ModelManagerOptions<"openai-completions">;
|
|
130
136
|
export interface MistralModelManagerConfig {
|
|
131
137
|
apiKey?: string;
|
|
132
138
|
baseUrl?: string;
|
|
@@ -322,6 +322,7 @@ export declare const toolMessageSchema: z.ZodObject<{
|
|
|
322
322
|
type: z.ZodString;
|
|
323
323
|
}, z.core.$loose>]>>]>>;
|
|
324
324
|
tool_call_id: z.ZodOptional<z.ZodString>;
|
|
325
|
+
name: z.ZodPipe<z.ZodOptional<z.ZodString>, z.ZodTransform<string | undefined, string | undefined>>;
|
|
325
326
|
}, z.core.$strip>;
|
|
326
327
|
/**
|
|
327
328
|
* Legacy `function` role (pre-tools API). Translated to a `tool` role
|
|
@@ -526,6 +527,7 @@ export declare const messageSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
|
526
527
|
type: z.ZodString;
|
|
527
528
|
}, z.core.$loose>]>>]>>;
|
|
528
529
|
tool_call_id: z.ZodOptional<z.ZodString>;
|
|
530
|
+
name: z.ZodPipe<z.ZodOptional<z.ZodString>, z.ZodTransform<string | undefined, string | undefined>>;
|
|
529
531
|
}, z.core.$strip>, z.ZodObject<{
|
|
530
532
|
role: z.ZodLiteral<"function">;
|
|
531
533
|
name: z.ZodString;
|
|
@@ -736,6 +738,7 @@ export declare const openaiChatRequestSchema: z.ZodObject<{
|
|
|
736
738
|
type: z.ZodString;
|
|
737
739
|
}, z.core.$loose>]>>]>>;
|
|
738
740
|
tool_call_id: z.ZodOptional<z.ZodString>;
|
|
741
|
+
name: z.ZodPipe<z.ZodOptional<z.ZodString>, z.ZodTransform<string | undefined, string | undefined>>;
|
|
739
742
|
}, z.core.$strip>, z.ZodObject<{
|
|
740
743
|
role: z.ZodLiteral<"function">;
|
|
741
744
|
name: z.ZodString;
|
package/dist/types/types.d.ts
CHANGED
|
@@ -48,7 +48,7 @@ export interface ThinkingConfig {
|
|
|
48
48
|
/** Provider-specific transport used to encode the selected effort. */
|
|
49
49
|
mode: ThinkingControlMode;
|
|
50
50
|
}
|
|
51
|
-
export type KnownProvider = "alibaba-coding-plan" | "amazon-bedrock" | "anthropic" | "google" | "google-gemini-cli" | "google-antigravity" | "google-vertex" | "openai" | "openai-codex" | "kimi-code" | "minimax-code" | "minimax-code-cn" | "github-copilot" | "fireworks" | "firepass" | "gitlab-duo" | "cursor" | "deepseek" | "xai" | "xai-oauth" | "groq" | "cerebras" | "openrouter" | "kilo" | "vercel-ai-gateway" | "zai" | "zhipu-coding-plan" | "mistral" | "minimax" | "opencode-go" | "opencode-zen" | "synthetic" | "cloudflare-ai-gateway" | "huggingface" | "litellm" | "moonshot" | "nvidia" | "nanogpt" | "ollama" | "ollama-cloud" | "qianfan" | "qwen-portal" | "together" | "venice" | "vllm" | "xiaomi" | "zenmux" | "lm-studio";
|
|
51
|
+
export type KnownProvider = "alibaba-coding-plan" | "amazon-bedrock" | "anthropic" | "google" | "google-gemini-cli" | "google-antigravity" | "google-vertex" | "openai" | "openai-codex" | "kimi-code" | "minimax-code" | "minimax-code-cn" | "github-copilot" | "fireworks" | "firepass" | "gitlab-duo" | "cursor" | "deepseek" | "xai" | "xai-oauth" | "groq" | "cerebras" | "openrouter" | "kilo" | "vercel-ai-gateway" | "zai" | "zhipu-coding-plan" | "mistral" | "minimax" | "opencode-go" | "opencode-zen" | "synthetic" | "cloudflare-ai-gateway" | "huggingface" | "litellm" | "moonshot" | "nvidia" | "nanogpt" | "ollama" | "ollama-cloud" | "qianfan" | "qwen-portal" | "together" | "venice" | "vllm" | "xiaomi" | "wafer-pass" | "wafer-serverless" | "zenmux" | "lm-studio";
|
|
52
52
|
export type Provider = KnownProvider | string;
|
|
53
53
|
import type { Effort } from "./model-thinking";
|
|
54
54
|
/** Token budgets for each thinking level (token-based providers only) */
|
|
@@ -7,7 +7,7 @@ export type OAuthCredentials = {
|
|
|
7
7
|
email?: string;
|
|
8
8
|
accountId?: string;
|
|
9
9
|
};
|
|
10
|
-
export type OAuthProvider = "alibaba-coding-plan" | "anthropic" | "cerebras" | "cloudflare-ai-gateway" | "cursor" | "deepseek" | "fireworks" | "firepass" | "github-copilot" | "google-gemini-cli" | "google-antigravity" | "gitlab-duo" | "huggingface" | "kimi-code" | "kilo" | "kagi" | "litellm" | "lm-studio" | "minimax-code" | "minimax-code-cn" | "moonshot" | "nvidia" | "nanogpt" | "ollama" | "ollama-cloud" | "openai-codex" | "openai-codex-device" | "opencode-go" | "openrouter" | "opencode-zen" | "parallel" | "perplexity" | "qianfan" | "qwen-portal" | "synthetic" | "tavily" | "together" | "venice" | "vercel-ai-gateway" | "vllm" | "xai-oauth" | "xiaomi" | "zenmux" | "zai" | "zhipu-coding-plan";
|
|
10
|
+
export type OAuthProvider = "alibaba-coding-plan" | "anthropic" | "cerebras" | "cloudflare-ai-gateway" | "cursor" | "deepseek" | "fireworks" | "firepass" | "github-copilot" | "google-gemini-cli" | "google-antigravity" | "gitlab-duo" | "huggingface" | "kimi-code" | "kilo" | "kagi" | "litellm" | "lm-studio" | "minimax-code" | "minimax-code-cn" | "moonshot" | "nvidia" | "nanogpt" | "ollama" | "ollama-cloud" | "openai-codex" | "openai-codex-device" | "opencode-go" | "openrouter" | "opencode-zen" | "parallel" | "perplexity" | "qianfan" | "qwen-portal" | "synthetic" | "tavily" | "together" | "venice" | "vercel-ai-gateway" | "wafer-pass" | "wafer-serverless" | "vllm" | "xai-oauth" | "xiaomi" | "zenmux" | "zai" | "zhipu-coding-plan";
|
|
11
11
|
export type OAuthProviderId = OAuthProvider | (string & {});
|
|
12
12
|
export type OAuthPrompt = {
|
|
13
13
|
message: string;
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Streaming-safe filters for leaked chat-template tool-call and thinking markup.
|
|
3
|
+
*
|
|
4
|
+
* Hosted models sometimes leak raw template markup into visible `content` instead
|
|
5
|
+
* of returning structured events. One `StreamMarkupHealing` instance owns one stream
|
|
6
|
+
* and one grammar selected by options:
|
|
7
|
+
*
|
|
8
|
+
* - `kimi`: Kimi K2 `<|tool_calls_section_begin|>` sections.
|
|
9
|
+
* - `dsml`: DeepSeek `<|DSML|tool_calls>` envelopes.
|
|
10
|
+
* - `antml`: Anthropic `<antml:function_calls>` envelopes (`function_calls`, `invoke`, `parameter`).
|
|
11
|
+
* - `thinking`: plain `<think>` / `<thinking>` blocks used by MiniMax-style streams.
|
|
12
|
+
*
|
|
13
|
+
* The parser strips marker bytes, reconstructs embedded calls, emits thinking
|
|
14
|
+
* deltas for thinking blocks, and holds partial tags across chunk boundaries.
|
|
15
|
+
*/
|
|
16
|
+
export interface HealedToolCall {
|
|
17
|
+
readonly id: string;
|
|
18
|
+
readonly name: string;
|
|
19
|
+
readonly arguments: string;
|
|
20
|
+
}
|
|
21
|
+
export type StreamMarkupHealingPattern = "kimi" | "dsml" | "antml" | "thinking";
|
|
22
|
+
export interface StreamMarkupHealingOptions {
|
|
23
|
+
readonly pattern: StreamMarkupHealingPattern;
|
|
24
|
+
}
|
|
25
|
+
export type StreamMarkupHealingEvent = {
|
|
26
|
+
readonly type: "text";
|
|
27
|
+
readonly text: string;
|
|
28
|
+
} | {
|
|
29
|
+
readonly type: "thinking";
|
|
30
|
+
readonly thinking: string;
|
|
31
|
+
} | {
|
|
32
|
+
readonly type: "toolCall";
|
|
33
|
+
readonly call: HealedToolCall;
|
|
34
|
+
};
|
|
35
|
+
/**
|
|
36
|
+
* State machine that consumes streamed visible text and emits cleaned text,
|
|
37
|
+
* thinking deltas, and reconstructed tool calls.
|
|
38
|
+
*
|
|
39
|
+
* Feed only one stream channel (usually `delta.content` / `message.content`).
|
|
40
|
+
* Mixing reasoning and visible text into the same instance can corrupt the
|
|
41
|
+
* held-back partial tag buffer.
|
|
42
|
+
*/
|
|
43
|
+
export declare class StreamMarkupHealing {
|
|
44
|
+
#private;
|
|
45
|
+
constructor(options: StreamMarkupHealingOptions);
|
|
46
|
+
get pattern(): StreamMarkupHealingPattern;
|
|
47
|
+
/**
|
|
48
|
+
* Feed a chunk and return visible text only. Reconstructed tool calls are
|
|
49
|
+
* stored for {@link drainCompleted}; thinking blocks are intentionally not
|
|
50
|
+
* returned by this compatibility helper. Use {@link feedEvents} when the
|
|
51
|
+
* caller needs ordered text/thinking/tool-call events.
|
|
52
|
+
*/
|
|
53
|
+
feed(text: string): string;
|
|
54
|
+
/** Feed a chunk and return cleaned text/thinking/tool-call events in stream order. */
|
|
55
|
+
feedEvents(text: string): StreamMarkupHealingEvent[];
|
|
56
|
+
/**
|
|
57
|
+
* Like {@link feed}, but discards completed calls. Used when the upstream
|
|
58
|
+
* chunk also carries structured `tool_calls`, keeping that structured payload
|
|
59
|
+
* as the single source of truth.
|
|
60
|
+
*/
|
|
61
|
+
consumeWithoutCalls(text: string): string;
|
|
62
|
+
/** Drain accumulated tool calls from calls to {@link feed}. */
|
|
63
|
+
drainCompleted(): HealedToolCall[];
|
|
64
|
+
/**
|
|
65
|
+
* Flush held-back stream-end fragments as ordered events. Partial tool-call
|
|
66
|
+
* sections/envelopes are dropped; unterminated thinking blocks are emitted as
|
|
67
|
+
* thinking, matching the previous MiniMax parser behavior.
|
|
68
|
+
*/
|
|
69
|
+
flushEvents(): StreamMarkupHealingEvent[];
|
|
70
|
+
/** Flush held-back text only. Reconstructed calls are retained for {@link drainCompleted}. */
|
|
71
|
+
flushPending(): string;
|
|
72
|
+
/** True once any configured tool-call section/envelope has fully closed. */
|
|
73
|
+
get sectionClosed(): boolean;
|
|
74
|
+
}
|
|
75
|
+
/** Cheap model/provider gate for Kimi-K2 chat-template token leaks. */
|
|
76
|
+
export declare function modelMayLeakKimiToolCalls(provider: string, modelId: string): boolean;
|
|
77
|
+
/** Cheap model/provider gate for DeepSeek DSML envelope leaks. */
|
|
78
|
+
export declare function modelMayLeakDsmlToolCalls(provider: string, modelId: string): boolean;
|
|
79
|
+
/** Cheap model/provider gate for Anthropic ANTML function-call envelope leaks. */
|
|
80
|
+
export declare function modelMayLeakAntmlToolCalls(provider: string, modelId: string): boolean;
|
|
81
|
+
export declare function getStreamMarkupHealingPattern(provider: string, modelId: string, options?: {
|
|
82
|
+
readonly parseThinkingTags?: boolean;
|
|
83
|
+
}): StreamMarkupHealingPattern | undefined;
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"type": "module",
|
|
3
3
|
"name": "@oh-my-pi/pi-ai",
|
|
4
|
-
"version": "15.5.
|
|
4
|
+
"version": "15.5.9",
|
|
5
5
|
"description": "Unified LLM API with automatic model discovery and provider configuration",
|
|
6
6
|
"homepage": "https://omp.sh",
|
|
7
7
|
"author": "Can Boluk",
|
|
@@ -40,7 +40,7 @@
|
|
|
40
40
|
"dependencies": {
|
|
41
41
|
"@anthropic-ai/sdk": "^0.94.0",
|
|
42
42
|
"@bufbuild/protobuf": "^2.12.0",
|
|
43
|
-
"@oh-my-pi/pi-utils": "15.5.
|
|
43
|
+
"@oh-my-pi/pi-utils": "15.5.9",
|
|
44
44
|
"openai": "^6.36.0",
|
|
45
45
|
"partial-json": "^0.1.7",
|
|
46
46
|
"zod": "4.4.3"
|