@oh-my-pi/pi-ai 15.5.7 → 15.5.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/CHANGELOG.md +30 -0
  2. package/README.md +2 -0
  3. package/dist/types/auth-gateway/server.d.ts +19 -0
  4. package/dist/types/auth-storage.d.ts +81 -0
  5. package/dist/types/provider-models/google.d.ts +1 -1
  6. package/dist/types/provider-models/openai-compat.d.ts +6 -0
  7. package/dist/types/providers/openai-chat-server-schema.d.ts +3 -0
  8. package/dist/types/types.d.ts +1 -1
  9. package/dist/types/utils/discovery/index.d.ts +0 -1
  10. package/dist/types/utils/oauth/types.d.ts +1 -1
  11. package/dist/types/utils/oauth/wafer.d.ts +2 -0
  12. package/dist/types/utils/stream-markup-healing.d.ts +83 -0
  13. package/package.json +2 -2
  14. package/src/auth-gateway/server.ts +155 -57
  15. package/src/auth-storage.ts +185 -32
  16. package/src/models.json +1235 -254
  17. package/src/provider-models/descriptors.ts +14 -0
  18. package/src/provider-models/google.ts +2 -38
  19. package/src/provider-models/openai-compat.ts +184 -0
  20. package/src/providers/anthropic.ts +104 -1
  21. package/src/providers/ollama.ts +136 -43
  22. package/src/providers/openai-chat-server-schema.ts +9 -0
  23. package/src/providers/openai-chat-server.ts +21 -2
  24. package/src/providers/openai-completions.ts +51 -111
  25. package/src/stream.ts +115 -9
  26. package/src/types.ts +2 -0
  27. package/src/utils/discovery/index.ts +0 -1
  28. package/src/utils/oauth/index.ts +12 -0
  29. package/src/utils/oauth/types.ts +2 -0
  30. package/src/utils/oauth/wafer.ts +50 -0
  31. package/src/utils/stream-markup-healing.ts +759 -0
  32. package/dist/types/utils/discovery/vertex.d.ts +0 -25
  33. package/dist/types/utils/tool-call-healing.d.ts +0 -71
  34. package/src/utils/discovery/vertex.ts +0 -210
  35. package/src/utils/tool-call-healing.ts +0 -271
package/CHANGELOG.md CHANGED
@@ -2,6 +2,36 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ## [15.5.8] - 2026-05-28
6
+
7
+ ### Added
8
+
9
+ - Added `CheckCredentialsOptions.completionProbe` (and `completionTimeoutMs`) so `AuthStorage.checkCredentials` can additionally exercise each credential against the provider's chat-completion endpoint after refresh-on-expiry. Result lands on `CredentialHealthResult.completion` ({ok, reason?, modelId?, latencyMs?}) without disturbing the usage `ok` field. Public types: `CompletionProbe`, `CompletionProbeInput`, `CompletionProbeCredential`, `CredentialCompletionResult`. The probe is invoked even when no `UsageProvider` is registered for the row, and is skipped when OAuth refresh fails (the stale bytes would only mask the upstream failure).
10
+ - Added Wafer Pass and Wafer Serverless providers (`wafer-pass`, `wafer-serverless`). OpenAI-compatible (`https://pass.wafer.ai/v1`), bearer auth, `wfr_…` keys. `/login wafer-pass` and `/login wafer-serverless` paste-and-validate the key against `/v1/models`. `WAFER_PASS_API_KEY` and `WAFER_SERVERLESS_API_KEY` environment variables wired into `getEnvApiKey`. Bundled catalog seeds `wafer-pass/{GLM-5.1, Qwen3.5-397B-A17B}` and `wafer-serverless/{GLM-5.1, Kimi-K2.6, Qwen3.5-397B-A17B, Qwen3.6-35B-A3B, qwen3.7-max, deepseek-v4-flash, deepseek-v4-pro}`; dynamic discovery via `/v1/models` overlays additional models at runtime. Pass-tier discovery filters `wafer.tier === "pass_included"`. Pass-SKU costs are seeded at `0` (flat-rate subscription, no per-token charge — matches `kimi-code`/`firepass`/`alibaba-coding-plan`). Serverless costs are the wafer.ai retail rate, derived from the `*_cents_per_million` envelope via `value × 125 / 10000` (e.g. GLM-5.1 `120` → $1.50/M, Kimi-K2.6 `88` → $1.10/M). Reasoning entries get a thinking compat picked from the `wafer.provider` envelope: `zai`/`moonshotai` → zai-style `thinking: { type }`, `qwen` → top-level `enable_thinking`, `deepseek` and unknown upstreams stay unset so `detectOpenAICompat` can pick `reasoning_effort` from the id pattern at request time.
11
+
12
+ ### Changed
13
+
14
+ - Changed auth-gateway credential resolution to use per-conversation `promptCacheKey`/`sessionId` when calling `AuthStorage.getApiKey`, so repeated turns can keep the same credential until it becomes unavailable
15
+ - Changed auth-gateway and pi-native request handling to align `sessionId` with prompt/context identity before credential lookup
16
+ - Changed Anthropic prompt preparation to downscale image blocks over 2000px when a request includes 20+ images, reducing oversized payloads automatically
17
+ - Changed OpenAI chat request parsing to accept `name` on `tool` messages and fall back to the matching assistant `tool_calls` name, so parsed tool results now carry a proper tool name when the wire omits it
18
+ - Changed `checkCredentials` to skip running `completionProbe` when OAuth refresh fails, so stale bearer tokens are never probed and the refresh failure remains the returned `reason`
19
+ - Changed completion reporting to return `completion: { ok: null, reason: ... }` when a credential has no usable bearer bytes instead of attempting the probe
20
+ - Refactored `AuthStorage.checkCredentials` so OAuth refresh-on-expiry runs up-front and the refreshed credential is shared between the usage probe and the new completion probe; rows without a registered `UsageProvider` no longer short-circuit before the completion probe runs.
21
+
22
+ ### Fixed
23
+
24
+ - Fixed DeepSeek DSML tool-call envelope leaks on Ollama Cloud and OpenAI-compatible streams by healing leaked envelopes into structured tool calls without displaying raw DSML markers. ([#1462](https://github.com/can1357/oh-my-pi/issues/1462))
25
+ - Fixed auth-gateway to classify usage-limit messages such as `usage_limit_reached`, `resource_exhausted`, and Codex-style `Try again in ~X min` text as 429 `rate_limit_error` responses
26
+ - Fixed auth-gateway usage-limit handling to honor parsed retry hints and switch to a sibling credential via `markUsageLimitReached` instead of invalidating the rate-limited credential
27
+ - Fixed `streamSimple` to retry on usage-limit errors (including message-only error events) before any content is emitted, so `onAuthError` can rotate credentials automatically
28
+ - Fixed auth-gateway error classification to extract embedded status codes and use word-boundary matching, so `GenerateContentRequest` and similar messages are no longer misreported as rate-limit errors
29
+ - Fixed `checkCredentials` to handle `completionProbe` exceptions by recording the failure in `CredentialHealthResult.completion.reason` while still returning the usage probe result
30
+
31
+ ### Fixed
32
+
33
+ - Fixed Google Vertex's bundled model list to use the authoritative models.dev catalog, including MaaS entries such as `deepseek-ai/deepseek-v3.2-maas` and removing retired Gemini 1.5 fallbacks. ([#1456](https://github.com/can1357/oh-my-pi/issues/1456))
34
+
5
35
  ## [15.5.7] - 2026-05-27
6
36
  ### Added
7
37
  - `SimpleStreamOptions.openrouterVariant` (`"nitro"`, `"floor"`, `"online"`, `"exacto"`, …) — when set, appends `:<variant>` to OpenRouter model IDs at request time, leaving ids that already carry an explicit `:suffix` untouched. Plumbed through `openai-completions` and the pi-native gateway forwarder.
package/README.md CHANGED
@@ -62,6 +62,8 @@ Unified LLM API with automatic model discovery, provider configuration, token an
62
62
  - **Hugging Face Inference**
63
63
  - **xAI**
64
64
  - **Venice** (requires `VENICE_API_KEY`)
65
+ - **Wafer Pass** (requires `WAFER_PASS_API_KEY`; flat-rate subscription, includes GLM-5.1 and Qwen3.5-397B-A17B)
66
+ - **Wafer Serverless** (requires `WAFER_SERVERLESS_API_KEY`; pay-as-you-go)
65
67
  - **OpenRouter**
66
68
  - **Kilo Gateway** (supports OAuth `/login kilo` or `KILO_API_KEY`)
67
69
  - **LiteLLM** (requires `LITELLM_API_KEY`)
@@ -14,4 +14,23 @@ export interface AuthGatewayBootOptions extends AuthGatewayServerOptions {
14
14
  /** Optional supplier for `/v1/models` listing. Returns the full model array. */
15
15
  listModels?: () => Iterable<Model<Api>>;
16
16
  }
17
+ /**
18
+ * Classify an upstream / gateway-internal error into a status code and a
19
+ * format-neutral type. The order is intentional:
20
+ *
21
+ * 1. Honour an explicit numeric `status` property on the thrown error.
22
+ * 2. Parse a status code embedded in the message string. Provider errors
23
+ * virtually always carry one (`Google API error (400): …`, `HTTP 429`,
24
+ * `status=503`) and the embedded value is authoritative.
25
+ * 3. Fall through to **word-boundaried** substring heuristics. The old
26
+ * `lower.includes("rate")` test famously matched
27
+ * `GenerateContentRequest`, surfacing every Google 400 as a 429
28
+ * `rate_limit_error`. The patterns here all require boundaries so they
29
+ * don't collide with provider field names.
30
+ */
31
+ export declare function classifyGatewayError(err: unknown): {
32
+ status: number;
33
+ type: string;
34
+ message: string;
35
+ };
17
36
  export declare function startAuthGateway(opts: AuthGatewayBootOptions): AuthGatewayServerHandle;
@@ -75,13 +75,88 @@ export interface CredentialHealthResult {
75
75
  reason?: string;
76
76
  /** Probe usage report (raw payload stripped) when `ok === true`. */
77
77
  report?: Omit<UsageReport, "raw">;
78
+ /**
79
+ * Result of the optional end-to-end completion probe (see
80
+ * {@link CheckCredentialsOptions.completionProbe}). Absent when no probe was
81
+ * supplied. The completion probe exercises the provider's chat-completion
82
+ * endpoint with the credential's bearer bytes, which is a stricter signal
83
+ * than the usage endpoint (some providers happily 200 a `/usage` call while
84
+ * the chat endpoint 401s the same bearer).
85
+ */
86
+ completion?: CredentialCompletionResult;
87
+ }
88
+ /**
89
+ * Outcome of the end-to-end completion probe. `null` means the probe was
90
+ * skipped (no bearer bytes were available — e.g. OAuth refresh failed
91
+ * upstream of the probe).
92
+ */
93
+ export interface CredentialCompletionResult {
94
+ ok: boolean | null;
95
+ /** Failure / unverifiable reason; absent when `ok === true`. */
96
+ reason?: string;
97
+ /** Probe model id used (carried back from the caller for display). */
98
+ modelId?: string;
99
+ /** Round-trip latency in milliseconds. */
100
+ latencyMs?: number;
101
+ }
102
+ /**
103
+ * Credential payload handed to {@link CompletionProbe}. For API-key
104
+ * credentials only the bytes are exposed; for OAuth, every identity field
105
+ * carried by the refreshed credential is included so the probe can compose
106
+ * provider-specific apiKey shapes (e.g. GitHub Copilot / Google Gemini CLI
107
+ * expect a JSON blob with `token` + `projectId`, not the raw access token).
108
+ *
109
+ * `refreshToken` may be {@link REMOTE_REFRESH_SENTINEL} when the credential
110
+ * lives behind a broker; the chat endpoint never reads it, so the probe can
111
+ * forward it verbatim into the structured shape without harm.
112
+ */
113
+ export type CompletionProbeCredential = {
114
+ type: "api_key";
115
+ apiKey: string;
116
+ } | {
117
+ type: "oauth";
118
+ accessToken: string;
119
+ refreshToken?: string;
120
+ expiresAt?: number;
121
+ accountId?: string;
122
+ projectId?: string;
123
+ email?: string;
124
+ enterpriseUrl?: string;
125
+ };
126
+ /**
127
+ * Caller-supplied bearer probe. Receives the post-refresh credential for a
128
+ * single row and reports whether a real chat-completion round-trip succeeds.
129
+ * The check-credentials pipeline calls this AFTER any OAuth refresh so the
130
+ * bytes match what a live request would send.
131
+ */
132
+ export interface CompletionProbeInput {
133
+ provider: Provider;
134
+ credentialId: number;
135
+ credential: CompletionProbeCredential;
136
+ signal: AbortSignal;
78
137
  }
138
+ export type CompletionProbe = (input: CompletionProbeInput) => Promise<CredentialCompletionResult>;
79
139
  export interface CheckCredentialsOptions {
80
140
  signal?: AbortSignal;
81
141
  /** Per-credential probe timeout (ms). Defaults to the configured usage request timeout. */
82
142
  timeoutMs?: number;
83
143
  /** Provider → base URL override, same shape as {@link AuthStorage.fetchUsageReports}. */
84
144
  baseUrlResolver?: (provider: Provider) => string | undefined;
145
+ /**
146
+ * Optional end-to-end probe. When provided, `checkCredentials` invokes it
147
+ * for every credential where a usable bearer is available (API key, or
148
+ * OAuth access token after refresh-on-expiry succeeded). The result lands
149
+ * on {@link CredentialHealthResult.completion}.
150
+ *
151
+ * The probe runs INDEPENDENTLY of whether a {@link UsageProvider} is
152
+ * configured: providers without a usage endpoint still benefit from the
153
+ * extra signal. The probe is NOT invoked when OAuth refresh fails — the
154
+ * bytes would be stale anyway and the upstream failure is already captured
155
+ * on `reason`.
156
+ */
157
+ completionProbe?: CompletionProbe;
158
+ /** Per-credential completion probe timeout (ms). Defaults to `timeoutMs`. */
159
+ completionTimeoutMs?: number;
85
160
  }
86
161
  /**
87
162
  * Sentinel value placed in OAuth `refresh` fields when a credential is shared
@@ -491,6 +566,12 @@ export declare class AuthStorage {
491
566
  * soft-disabled rows are already known-bad and don't need a network probe.
492
567
  * Environment-variable API keys are not enumerated — the caller's intent
493
568
  * here is "which of my stored credentials is broken".
569
+ *
570
+ * Pass {@link CheckCredentialsOptions.completionProbe} to additionally
571
+ * exercise each credential against the provider's chat-completion endpoint
572
+ * (strict mode). The result lands on
573
+ * {@link CredentialHealthResult.completion}; the usage `ok` field is
574
+ * unchanged so callers can tell the two signals apart.
494
575
  */
495
576
  checkCredentials(options?: CheckCredentialsOptions): Promise<CredentialHealthResult[]>;
496
577
  /**
@@ -19,6 +19,6 @@ export interface GoogleGeminiCliModelManagerConfig {
19
19
  endpoint?: string;
20
20
  }
21
21
  export declare function googleModelManagerOptions(config?: GoogleModelManagerConfig): ModelManagerOptions<"google-generative-ai">;
22
- export declare function googleVertexModelManagerOptions(config?: GoogleVertexModelManagerConfig): ModelManagerOptions;
22
+ export declare function googleVertexModelManagerOptions(_config?: GoogleVertexModelManagerConfig): ModelManagerOptions;
23
23
  export declare function googleAntigravityModelManagerOptions(config?: GoogleAntigravityModelManagerConfig): ModelManagerOptions<"google-gemini-cli">;
24
24
  export declare function googleGeminiCliModelManagerOptions(config?: GoogleGeminiCliModelManagerConfig): ModelManagerOptions<"google-gemini-cli">;
@@ -127,6 +127,12 @@ export interface FirepassModelManagerConfig {
127
127
  * See https://docs.fireworks.ai/firepass.
128
128
  */
129
129
  export declare function firepassModelManagerOptions(_config?: FirepassModelManagerConfig): ModelManagerOptions<"openai-completions">;
130
+ export interface WaferModelManagerConfig {
131
+ apiKey?: string;
132
+ baseUrl?: string;
133
+ }
134
+ export declare function waferPassModelManagerOptions(config?: WaferModelManagerConfig): ModelManagerOptions<"openai-completions">;
135
+ export declare function waferServerlessModelManagerOptions(config?: WaferModelManagerConfig): ModelManagerOptions<"openai-completions">;
130
136
  export interface MistralModelManagerConfig {
131
137
  apiKey?: string;
132
138
  baseUrl?: string;
@@ -322,6 +322,7 @@ export declare const toolMessageSchema: z.ZodObject<{
322
322
  type: z.ZodString;
323
323
  }, z.core.$loose>]>>]>>;
324
324
  tool_call_id: z.ZodOptional<z.ZodString>;
325
+ name: z.ZodPipe<z.ZodOptional<z.ZodString>, z.ZodTransform<string | undefined, string | undefined>>;
325
326
  }, z.core.$strip>;
326
327
  /**
327
328
  * Legacy `function` role (pre-tools API). Translated to a `tool` role
@@ -526,6 +527,7 @@ export declare const messageSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
526
527
  type: z.ZodString;
527
528
  }, z.core.$loose>]>>]>>;
528
529
  tool_call_id: z.ZodOptional<z.ZodString>;
530
+ name: z.ZodPipe<z.ZodOptional<z.ZodString>, z.ZodTransform<string | undefined, string | undefined>>;
529
531
  }, z.core.$strip>, z.ZodObject<{
530
532
  role: z.ZodLiteral<"function">;
531
533
  name: z.ZodString;
@@ -736,6 +738,7 @@ export declare const openaiChatRequestSchema: z.ZodObject<{
736
738
  type: z.ZodString;
737
739
  }, z.core.$loose>]>>]>>;
738
740
  tool_call_id: z.ZodOptional<z.ZodString>;
741
+ name: z.ZodPipe<z.ZodOptional<z.ZodString>, z.ZodTransform<string | undefined, string | undefined>>;
739
742
  }, z.core.$strip>, z.ZodObject<{
740
743
  role: z.ZodLiteral<"function">;
741
744
  name: z.ZodString;
@@ -48,7 +48,7 @@ export interface ThinkingConfig {
48
48
  /** Provider-specific transport used to encode the selected effort. */
49
49
  mode: ThinkingControlMode;
50
50
  }
51
- export type KnownProvider = "alibaba-coding-plan" | "amazon-bedrock" | "anthropic" | "google" | "google-gemini-cli" | "google-antigravity" | "google-vertex" | "openai" | "openai-codex" | "kimi-code" | "minimax-code" | "minimax-code-cn" | "github-copilot" | "fireworks" | "firepass" | "gitlab-duo" | "cursor" | "deepseek" | "xai" | "xai-oauth" | "groq" | "cerebras" | "openrouter" | "kilo" | "vercel-ai-gateway" | "zai" | "zhipu-coding-plan" | "mistral" | "minimax" | "opencode-go" | "opencode-zen" | "synthetic" | "cloudflare-ai-gateway" | "huggingface" | "litellm" | "moonshot" | "nvidia" | "nanogpt" | "ollama" | "ollama-cloud" | "qianfan" | "qwen-portal" | "together" | "venice" | "vllm" | "xiaomi" | "zenmux" | "lm-studio";
51
+ export type KnownProvider = "alibaba-coding-plan" | "amazon-bedrock" | "anthropic" | "google" | "google-gemini-cli" | "google-antigravity" | "google-vertex" | "openai" | "openai-codex" | "kimi-code" | "minimax-code" | "minimax-code-cn" | "github-copilot" | "fireworks" | "firepass" | "gitlab-duo" | "cursor" | "deepseek" | "xai" | "xai-oauth" | "groq" | "cerebras" | "openrouter" | "kilo" | "vercel-ai-gateway" | "zai" | "zhipu-coding-plan" | "mistral" | "minimax" | "opencode-go" | "opencode-zen" | "synthetic" | "cloudflare-ai-gateway" | "huggingface" | "litellm" | "moonshot" | "nvidia" | "nanogpt" | "ollama" | "ollama-cloud" | "qianfan" | "qwen-portal" | "together" | "venice" | "vllm" | "xiaomi" | "wafer-pass" | "wafer-serverless" | "zenmux" | "lm-studio";
52
52
  export type Provider = KnownProvider | string;
53
53
  import type { Effort } from "./model-thinking";
54
54
  /** Token budgets for each thinking level (token-based providers only) */
@@ -2,4 +2,3 @@ export * from "./antigravity";
2
2
  export * from "./codex";
3
3
  export * from "./gemini";
4
4
  export * from "./openai-compatible";
5
- export * from "./vertex";
@@ -7,7 +7,7 @@ export type OAuthCredentials = {
7
7
  email?: string;
8
8
  accountId?: string;
9
9
  };
10
- export type OAuthProvider = "alibaba-coding-plan" | "anthropic" | "cerebras" | "cloudflare-ai-gateway" | "cursor" | "deepseek" | "fireworks" | "firepass" | "github-copilot" | "google-gemini-cli" | "google-antigravity" | "gitlab-duo" | "huggingface" | "kimi-code" | "kilo" | "kagi" | "litellm" | "lm-studio" | "minimax-code" | "minimax-code-cn" | "moonshot" | "nvidia" | "nanogpt" | "ollama" | "ollama-cloud" | "openai-codex" | "openai-codex-device" | "opencode-go" | "openrouter" | "opencode-zen" | "parallel" | "perplexity" | "qianfan" | "qwen-portal" | "synthetic" | "tavily" | "together" | "venice" | "vercel-ai-gateway" | "vllm" | "xai-oauth" | "xiaomi" | "zenmux" | "zai" | "zhipu-coding-plan";
10
+ export type OAuthProvider = "alibaba-coding-plan" | "anthropic" | "cerebras" | "cloudflare-ai-gateway" | "cursor" | "deepseek" | "fireworks" | "firepass" | "github-copilot" | "google-gemini-cli" | "google-antigravity" | "gitlab-duo" | "huggingface" | "kimi-code" | "kilo" | "kagi" | "litellm" | "lm-studio" | "minimax-code" | "minimax-code-cn" | "moonshot" | "nvidia" | "nanogpt" | "ollama" | "ollama-cloud" | "openai-codex" | "openai-codex-device" | "opencode-go" | "openrouter" | "opencode-zen" | "parallel" | "perplexity" | "qianfan" | "qwen-portal" | "synthetic" | "tavily" | "together" | "venice" | "vercel-ai-gateway" | "wafer-pass" | "wafer-serverless" | "vllm" | "xai-oauth" | "xiaomi" | "zenmux" | "zai" | "zhipu-coding-plan";
11
11
  export type OAuthProviderId = OAuthProvider | (string & {});
12
12
  export type OAuthPrompt = {
13
13
  message: string;
@@ -0,0 +1,2 @@
1
+ export declare const loginWaferPass: (options: import("./types").OAuthController) => Promise<string>;
2
+ export declare const loginWaferServerless: (options: import("./types").OAuthController) => Promise<string>;
@@ -0,0 +1,83 @@
1
+ /**
2
+ * Streaming-safe filters for leaked chat-template tool-call and thinking markup.
3
+ *
4
+ * Hosted models sometimes leak raw template markup into visible `content` instead
5
+ * of returning structured events. One `StreamMarkupHealing` instance owns one stream
6
+ * and one grammar selected by options:
7
+ *
8
+ * - `kimi`: Kimi K2 `<|tool_calls_section_begin|>` sections.
9
+ * - `dsml`: DeepSeek `<|DSML|tool_calls>` envelopes.
10
+ * - `antml`: Anthropic `<antml:function_calls>` envelopes (`function_calls`, `invoke`, `parameter`).
11
+ * - `thinking`: plain `<think>` / `<thinking>` blocks used by MiniMax-style streams.
12
+ *
13
+ * The parser strips marker bytes, reconstructs embedded calls, emits thinking
14
+ * deltas for thinking blocks, and holds partial tags across chunk boundaries.
15
+ */
16
+ export interface HealedToolCall {
17
+ readonly id: string;
18
+ readonly name: string;
19
+ readonly arguments: string;
20
+ }
21
+ export type StreamMarkupHealingPattern = "kimi" | "dsml" | "antml" | "thinking";
22
+ export interface StreamMarkupHealingOptions {
23
+ readonly pattern: StreamMarkupHealingPattern;
24
+ }
25
+ export type StreamMarkupHealingEvent = {
26
+ readonly type: "text";
27
+ readonly text: string;
28
+ } | {
29
+ readonly type: "thinking";
30
+ readonly thinking: string;
31
+ } | {
32
+ readonly type: "toolCall";
33
+ readonly call: HealedToolCall;
34
+ };
35
+ /**
36
+ * State machine that consumes streamed visible text and emits cleaned text,
37
+ * thinking deltas, and reconstructed tool calls.
38
+ *
39
+ * Feed only one stream channel (usually `delta.content` / `message.content`).
40
+ * Mixing reasoning and visible text into the same instance can corrupt the
41
+ * held-back partial tag buffer.
42
+ */
43
+ export declare class StreamMarkupHealing {
44
+ #private;
45
+ constructor(options: StreamMarkupHealingOptions);
46
+ get pattern(): StreamMarkupHealingPattern;
47
+ /**
48
+ * Feed a chunk and return visible text only. Reconstructed tool calls are
49
+ * stored for {@link drainCompleted}; thinking blocks are intentionally not
50
+ * returned by this compatibility helper. Use {@link feedEvents} when the
51
+ * caller needs ordered text/thinking/tool-call events.
52
+ */
53
+ feed(text: string): string;
54
+ /** Feed a chunk and return cleaned text/thinking/tool-call events in stream order. */
55
+ feedEvents(text: string): StreamMarkupHealingEvent[];
56
+ /**
57
+ * Like {@link feed}, but discards completed calls. Used when the upstream
58
+ * chunk also carries structured `tool_calls`, keeping that structured payload
59
+ * as the single source of truth.
60
+ */
61
+ consumeWithoutCalls(text: string): string;
62
+ /** Drain accumulated tool calls from calls to {@link feed}. */
63
+ drainCompleted(): HealedToolCall[];
64
+ /**
65
+ * Flush held-back stream-end fragments as ordered events. Partial tool-call
66
+ * sections/envelopes are dropped; unterminated thinking blocks are emitted as
67
+ * thinking, matching the previous MiniMax parser behavior.
68
+ */
69
+ flushEvents(): StreamMarkupHealingEvent[];
70
+ /** Flush held-back text only. Reconstructed calls are retained for {@link drainCompleted}. */
71
+ flushPending(): string;
72
+ /** True once any configured tool-call section/envelope has fully closed. */
73
+ get sectionClosed(): boolean;
74
+ }
75
+ /** Cheap model/provider gate for Kimi-K2 chat-template token leaks. */
76
+ export declare function modelMayLeakKimiToolCalls(provider: string, modelId: string): boolean;
77
+ /** Cheap model/provider gate for DeepSeek DSML envelope leaks. */
78
+ export declare function modelMayLeakDsmlToolCalls(provider: string, modelId: string): boolean;
79
+ /** Cheap model/provider gate for Anthropic ANTML function-call envelope leaks. */
80
+ export declare function modelMayLeakAntmlToolCalls(provider: string, modelId: string): boolean;
81
+ export declare function getStreamMarkupHealingPattern(provider: string, modelId: string, options?: {
82
+ readonly parseThinkingTags?: boolean;
83
+ }): StreamMarkupHealingPattern | undefined;
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "type": "module",
3
3
  "name": "@oh-my-pi/pi-ai",
4
- "version": "15.5.7",
4
+ "version": "15.5.9",
5
5
  "description": "Unified LLM API with automatic model discovery and provider configuration",
6
6
  "homepage": "https://omp.sh",
7
7
  "author": "Can Boluk",
@@ -40,7 +40,7 @@
40
40
  "dependencies": {
41
41
  "@anthropic-ai/sdk": "^0.94.0",
42
42
  "@bufbuild/protobuf": "^2.12.0",
43
- "@oh-my-pi/pi-utils": "15.5.7",
43
+ "@oh-my-pi/pi-utils": "15.5.9",
44
44
  "openai": "^6.36.0",
45
45
  "partial-json": "^0.1.7",
46
46
  "zod": "4.4.3"