npm - @oh-my-pi/pi-ai - Versions diffs - 15.12.3 → 15.13.0 - Mend

@oh-my-pi/pi-ai 15.12.3 → 15.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (58) hide show

package/CHANGELOG.md +49 -1
package/dist/types/auth-broker/remote-store.d.ts +1 -0
package/dist/types/auth-broker/wire-schemas.d.ts +1 -1
package/dist/types/auth-gateway/types.d.ts +7 -1
package/dist/types/auth-storage.d.ts +19 -0
package/dist/types/providers/anthropic-client.d.ts +2 -0
package/dist/types/providers/anthropic-messages-server-schema.d.ts +1 -1
package/dist/types/providers/anthropic-messages-server.d.ts +2 -2
package/dist/types/providers/google-gemini-cli.d.ts +1 -1
package/dist/types/providers/google-shared.d.ts +17 -0
package/dist/types/providers/openai-chat-server-schema.d.ts +2 -2
package/dist/types/providers/openai-chat-server.d.ts +2 -2
package/dist/types/providers/openai-chat-wire.d.ts +644 -0
package/dist/types/providers/openai-codex-responses.d.ts +1 -1
package/dist/types/providers/openai-completions.d.ts +1 -1
package/dist/types/providers/openai-responses-server-schema.d.ts +2 -2
package/dist/types/providers/openai-responses-server.d.ts +2 -2
package/dist/types/providers/openai-responses-shared.d.ts +5 -6
package/dist/types/providers/openai-responses-wire.d.ts +6065 -0
package/dist/types/providers/openai-responses.d.ts +3 -3
package/dist/types/providers/pi-native-server.d.ts +2 -1
package/dist/types/usage.d.ts +1 -1
package/dist/types/utils/openai-http.d.ts +58 -0
package/dist/types/utils.d.ts +1 -1
package/package.json +4 -5
package/src/auth-broker/remote-store.ts +9 -0
package/src/auth-broker/wire-schemas.ts +1 -1
package/src/auth-gateway/server.ts +16 -2
package/src/auth-gateway/types.ts +8 -0
package/src/auth-storage.ts +100 -8
package/src/providers/amazon-bedrock.ts +19 -1
package/src/providers/anthropic-client.ts +2 -0
package/src/providers/anthropic-messages-server-schema.ts +1 -1
package/src/providers/anthropic-messages-server.ts +31 -10
package/src/providers/anthropic.ts +17 -9
package/src/providers/azure-openai-responses.ts +72 -50
package/src/providers/gitlab-duo.ts +3 -3
package/src/providers/google-gemini-cli.ts +39 -10
package/src/providers/google-shared.ts +112 -35
package/src/providers/ollama.ts +19 -1
package/src/providers/openai-anthropic-shim.ts +2 -2
package/src/providers/openai-chat-server-schema.ts +3 -2
package/src/providers/openai-chat-server.ts +30 -8
package/src/providers/openai-chat-wire.ts +847 -0
package/src/providers/openai-codex-responses.ts +37 -14
package/src/providers/openai-completions.ts +174 -141
package/src/providers/openai-responses-server-schema.ts +3 -2
package/src/providers/openai-responses-server.ts +50 -43
package/src/providers/openai-responses-shared.ts +29 -20
package/src/providers/openai-responses-wire.ts +6391 -0
package/src/providers/openai-responses.ts +69 -72
package/src/providers/pi-native-server.ts +42 -15
package/src/registry/oauth/gitlab-duo.ts +87 -12
package/src/registry/zai.ts +1 -1
package/src/stream.ts +20 -7
package/src/usage.ts +1 -1
package/src/utils/openai-http.ts +157 -0
package/src/utils.ts +1 -1

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,54 @@
 ## [Unreleased]
+## [15.13.0] - 2026-06-14
+### Fixed
+- Fixed OpenAI Responses/Realtime SSE stream handler crashing with "Error Code undefined: undefined" when parsing error events with nested error details by falling back to the nested error object fields.
+- Fixed OpenAI-compatible providers that reject forced `tool_choice` on thinking-required models by downgrading unsupported forced choices to `auto` while keeping tools available ([#2546](https://github.com/can1357/oh-my-pi/issues/2546)).
+- Fixed GitHub Copilot Anthropic transport (`api.githubcopilot.com/v1/messages`) returning `400 tools.0.custom.eager_input_streaming: Extra inputs are not permitted` on every tool-bearing turn by stopping the emission of the per-tool `eager_input_streaming` flag and the `fine-grained-tool-streaming-2025-05-14` beta header on the Copilot transport — the proxy whitelists neither ([#2558](https://github.com/can1357/oh-my-pi/issues/2558)).
+- Disabled Bun's native ~300s pre-response `fetch` timeout in every streaming provider (OpenAI completions/responses, Azure responses, Anthropic, Codex SSE, Bedrock, Gemini CLI, Ollama). The configurable first-event/idle/SDK watchdogs (`PI_STREAM_FIRST_EVENT_TIMEOUT_MS`, `PI_OPENAI_STREAM_IDLE_TIMEOUT_MS`, `compat.streamIdleTimeoutMs`) were silently capped by Bun's hidden ceiling, so cold large-context streams (e.g. self-hosted vLLM at multi-hundred-K prompts) died at exactly 300s with `TimeoutError: The operation timed out.` Direct callers of `./providers/{amazon-bedrock,google-gemini-cli,ollama,openai-codex-responses}` (which bypass `register-builtins`' iterator-level watchdog) now install a pre-response `AbortSignal.timeout(firstEventTimeoutMs)` alongside the disable, so a stalled upstream still fails within the configured budget instead of hanging forever ([#2422](https://github.com/can1357/oh-my-pi/issues/2422))
+- Fixed Gemini / Antigravity streams (Google Cloud Code Assist API) creating a trailing empty text block and emitting redundant `text_start`/`text_delta`/`text_end` events at the end of the turn when the final SSE chunk contains an empty text part (`text: ""`). The parser now ignores empty text parts, preserving the active transcript block state and ensuring proper nesting and rendering of subsequent background jobs or new turns.
+- Preserved terminal Google `thoughtSignature`s by still extracting and applying the signature on the active block even when the text part is empty or undefined.
+- Stopped Gemini Antigravity sessions (`gemini-3*` / Claude under Cloud Code Assist) from leaking system rule reminders and personality preambles into the final response, by appending an explicit 'do not output rule checks' instruction to the injected system parts.
+- Fixed Gemini / Antigravity streams (Google Cloud Code Assist API) letting a `functionCall` part's own `thoughtSignature` clobber the preceding text or thinking block's signature on `think → tool` and `text → tool` turns. A signed function-call part has `text: undefined`, so it fell into the terminal-signature branch while the prior block was still active; that branch now skips function-call parts, leaving the tool call's signature on the tool call where it belongs and preventing corrupted signatures on same-model replay.
+- Fixed MiniMax-M3 OpenAI-compatible streams rendering reasoning twice when the same chunk carried both `<think>…</think>` content and structured `reasoning_content`; structured reasoning now wins and cumulative MiniMax reasoning snapshots are collapsed to deltas using a per-signature snapshot tracker that survives the `</think>`-to-text block transition (so post-answer cumulative snapshots don't reinstate a duplicate thinking block). ([#2433](https://github.com/can1357/oh-my-pi/issues/2433))
+## [15.12.6] - 2026-06-14
+### Changed
+- Bumped Z.AI (GLM Coding Plan) API key validation probe to glm-5.2.
+### Fixed
+- Fixed tool schema conversion for non-Cloud Code Assist Google Gemini models by normalizing parameters with `normalizeSchemaForGoogle` to prevent un-normalized schema properties (such as `additionalProperties: false` or type arrays) from causing Gemini API errors.
+- Fixed OpenAI-family request builders dropping forced named `tool_choice` directives when the named tool is absent from the serialized `tools` array, preventing spec-strict providers from rejecting self-inconsistent requests. ([#1701](https://github.com/can1357/oh-my-pi/issues/1701))
+## [15.12.4] - 2026-06-13
+### Added
+- Added `GITLAB_CLIENT_ID` and `GITLAB_REDIRECT_URI` env-var overrides for the GitLab Duo OAuth login flow so users running with their own GitLab OAuth application can replace the bundled credentials when GitLab rejects the bundled `client_id`'s redirect URI. Setting `GITLAB_REDIRECT_URI` also disables the random-port fallback (strict OAuth providers reject mismatched URIs anyway). ([#2424](https://github.com/can1357/oh-my-pi/issues/2424))
+- Added `AuthStorage.listStoredCredentials()` and `AuthStorage.removeCredential()` for per-account credential management.
+### Changed
+- Replaced the OpenAI SDK client usage in `openai-completions`, `openai-responses`, `azure-openai-responses`, and `openai-codex-responses` with the new internal `postOpenAIStream` OpenAI-wire JSON/SSE transport
+### Fixed
+- Fixed streaming providers to cancel upstream model requests when the client closes the response body, so interrupted SSE sessions stop instead of continuing in the background
+- Fixed: provider request builders treat unknown `model.maxTokens` (`null`) as "no model cap" instead of coercing to `0` via `Math.min`; Anthropic falls back to the 64k Claude-Code cap for its required `max_tokens`.
+- Fixed transient stream failures on OpenAI-compatible providers by retrying HTTP 408/429/5xx responses and transient network errors with Retry-After/quota-hint aware backoff
+- Fixed SSE stream handling for OpenAI-compatible responses by parsing wire-level JSON frames directly and honoring `[DONE]` termination
+- Fixed stream error handling for OpenAI-compatible providers by preserving structured HTTP status/headers and response body details from failed requests for retry and strict-tool fallback logic
+- Fixed OpenAI-compat streams ending with a bare `finish_reason: "error"` (gateways like OpenRouter reporting upstream failures, e.g. Gemini `MALFORMED_FUNCTION_CALL`) surfacing as a non-retryable `Provider finish_reason: error`. The reason is now mapped to `Provider returned error finish_reason`, which the session retry classifier recognizes as transient, so the turn auto-retries instead of stopping with a pinned error banner.
+- Fixed `SqliteAuthCredentialStore.open()` crashing with `SQLITE_BUSY_RECOVERY` (errno 261) when several `omp --session` panes restore concurrently after an unclean shutdown: `PRAGMA busy_timeout = 5000` now runs as a standalone statement BEFORE `PRAGMA journal_mode=WAL` (the first lock-taking statement during WAL recovery), and `open()` retries the BUSY family — `SQLITE_BUSY`, `SQLITE_BUSY_RECOVERY`, `SQLITE_BUSY_SNAPSHOT`, `SQLITE_BUSY_TIMEOUT` — with bounded exponential backoff. The exhausted-retry error message includes the DB path. Exported `isSqliteBusyError(err)` for callers that need the same classifier ([#2421](https://github.com/can1357/oh-my-pi/issues/2421)).
+- Fixed MiniMax-M3 OpenAI-compatible streams rendering reasoning twice when the same chunk carried both `<think>…</think>` content and structured `reasoning_content`; structured reasoning now wins and cumulative MiniMax reasoning snapshots are collapsed to deltas. ([#2433](https://github.com/can1357/oh-my-pi/issues/2433))
+- Fixed Gemini turns silently halting the agent when the model returned `finishReason: STOP` with only an empty (or whitespace-only) text part and no tool call — the well-known "empty response" failure. All Google surfaces (public Generative Language `streamGoogle`, Vertex `streamGoogleVertex`, and Cloud Code Assist `google-gemini-cli`/`google-antigravity`) now classify such a turn as empty via the shared `hasMeaningfulGoogleContent` check and retry it up to `MAX_EMPTY_STREAM_RETRIES` times before surfacing an error. The Cloud Code Assist path previously had an empty-stream retry that never fired for this case (its `hasContent` flag counted an empty-string text part as content), and the public/Vertex path had no retry at all; the retry now emits a single `start` event so no duplicate partial message leaks downstream.
 ## [15.12.1] - 2026-06-12
 ### Added
@@ -3369,4 +3417,4 @@ _Dedicated to Peter's shoulder ([@steipete](https://twitter.com/steipete))_
 ## [0.9.4] - 2025-11-26
-Initial release with multi-provider LLM support.
+Initial release with multi-provider LLM support.

package/dist/types/auth-broker/remote-store.d.ts CHANGED Viewed

@@ -37,6 +37,7 @@ export declare class RemoteAuthCredentialStore implements AuthCredentialStore {
      */
     updateAuthCredential(id: number, credential: AuthCredential): void;
     deleteAuthCredential(id: number, disabledCause: string): void;
+    deleteAuthCredentialRemote(id: number, disabledCause: string): Promise<boolean>;
     tryDisableAuthCredentialIfMatches(id: number, _expectedData: string, disabledCause: string): boolean;
     waitForFreshSnapshot(maxWaitMs: number, opts?: {
         signal?: AbortSignal;

package/dist/types/auth-broker/wire-schemas.d.ts CHANGED Viewed

@@ -10,7 +10,7 @@
  * keys are rejected — the previous implementation used a hand-rolled
  * `hasOnlyFields` allowlist for the same effect.
  */
-import * as z from "zod/v4";
+import { z } from "zod/v4";
 /** Real OAuth credential (broker-side) — refresh token is the actual upstream value. */
 export declare const oauthCredentialSchema: z.ZodObject<{
     type: z.ZodLiteral<"oauth">;

package/dist/types/auth-gateway/types.d.ts CHANGED Viewed

@@ -90,10 +90,16 @@ export interface AuthGatewayParsedRequest {
     stream: boolean;
     options: AuthGatewayParsedRequestOptions;
 }
+export interface AuthGatewayStreamControl {
+    /** Gateway request signal. Encoders stop producing frames when it aborts. */
+    signal?: AbortSignal;
+    /** Called when the HTTP response body is cancelled by the client. */
+    onCancel?: (reason?: unknown) => void;
+}
 export interface AuthGatewayFormatModule {
     parseRequest(body: unknown, headers?: Headers): AuthGatewayParsedRequest;
     encodeResponse(message: AssistantMessage, requestedModelId: string): Record<string, unknown>;
-    encodeStream(events: AssistantMessageEventStream, requestedModelId: string, options?: AuthGatewayParsedRequestOptions): ReadableStream<Uint8Array>;
+    encodeStream(events: AssistantMessageEventStream, requestedModelId: string, options?: AuthGatewayParsedRequestOptions, control?: AuthGatewayStreamControl): ReadableStream<Uint8Array>;
     /**
      * Emit a protocol-specific error envelope. OpenAI returns
      * `{ error: { message, type } }`; Anthropic returns

package/dist/types/auth-storage.d.ts CHANGED Viewed

@@ -308,6 +308,11 @@ export interface AuthCredentialStore {
      * `replaceAuthCredentialsForProvider`.
      */
     replaceAuthCredentialsRemote?(provider: string, credentials: AuthCredential[]): Promise<StoredAuthCredential[]>;
+    /**
+     * Optional async write hook for disabling one stored credential. Remote stores
+     * use it to await broker persistence before AuthStorage updates its snapshot.
+     */
+    deleteAuthCredentialRemote?(id: number, disabledCause: string): Promise<boolean>;
     /**
      * Optional async write hook for clearing every credential for a provider
      * (logout). When present, `AuthStorage.remove` routes through this instead
@@ -585,10 +590,18 @@ export declare class AuthStorage {
      * Set credential for a provider.
      */
     set(provider: string, credential: AuthCredentialEntry): Promise<void>;
+    /**
+     * List stored credential rows, optionally filtered by provider.
+     */
+    listStoredCredentials(provider?: string): StoredAuthCredential[];
     /**
      * Remove credential for a provider.
      */
     remove(provider: string): Promise<void>;
+    /**
+     * Remove one stored credential for a provider.
+     */
+    removeCredential(provider: string, credentialId: number): Promise<boolean>;
     /**
      * List all providers with credentials.
      */
@@ -890,6 +903,12 @@ export declare class AuthStorage {
      */
     describeCredentialSource(provider: string, sessionId?: string): string | undefined;
 }
+/**
+ * SQLite's busy result code family — base `SQLITE_BUSY` plus the extended
+ * variants `SQLITE_BUSY_RECOVERY` (concurrent WAL recovery), `SQLITE_BUSY_SNAPSHOT`,
+ * and `SQLITE_BUSY_TIMEOUT`. All warrant the same backoff-and-retry treatment.
+ */
+export declare function isSqliteBusyError(err: unknown): boolean;
 /**
  * Default SQLite-backed implementation of {@link AuthCredentialStore}.
  *

package/dist/types/providers/anthropic-client.d.ts CHANGED Viewed

@@ -25,6 +25,8 @@ export type AnthropicFetchOptions = RequestInit & {
         cert?: string;
         key?: string;
     };
+    /** Bun extension: see {@link FetchWithRetryOptions.timeout} — `false` disables Bun's native fetch TTFT timeout (issue #2422). */
+    timeout?: number | false;
 };
 export interface AnthropicClientOptions {
     /** Sent as `X-Api-Key` unless the header is already present in `defaultHeaders`. */

package/dist/types/providers/anthropic-messages-server-schema.d.ts CHANGED Viewed

@@ -7,7 +7,7 @@
  * Used by `anthropic-messages.ts:parseRequest` to validate the inbound JSON
  * before walking it into pi-ai's canonical `Context`.
  */
-import * as z from "zod/v4";
+import { z } from "zod/v4";
 import type { ContentBlockParam, ImageBlockParam, MessageCreateParams, MessageParam, TextBlockParam, Tool, ToolChoice } from "./anthropic-wire";
 export declare const cacheControlSchema: z.ZodObject<{
     type: z.ZodLiteral<"ephemeral">;

package/dist/types/providers/anthropic-messages-server.d.ts CHANGED Viewed

@@ -4,11 +4,11 @@ import type { AssistantMessage, AssistantMessageEventStream } from "../types";
  * gateway translation. Inbound: foreign HTTP body → omp Context. Outbound:
  * omp AssistantMessage[Stream] → Anthropic-shaped JSON / SSE.
  */
-import type { AuthGatewayParsedRequest as ParsedRequest } from "../auth-gateway/types";
+import type { AuthGatewayStreamControl, AuthGatewayParsedRequest as ParsedRequest } from "../auth-gateway/types";
 export type { ParsedRequest };
 export declare function parseRequest(body: unknown, headers?: Headers): ParsedRequest;
 export declare function encodeResponse(message: AssistantMessage, requestedModelId: string): Record<string, unknown>;
-export declare function encodeStream(events: AssistantMessageEventStream, requestedModelId: string): ReadableStream<Uint8Array>;
+export declare function encodeStream(events: AssistantMessageEventStream, requestedModelId: string, _options?: ParsedRequest["options"], control?: AuthGatewayStreamControl): ReadableStream<Uint8Array>;
 /**
  * Anthropic error envelope: `{ type: "error", error: { type, message } }`.
  * See https://docs.anthropic.com/en/api/errors. Returned as a `Response` so

package/dist/types/providers/google-gemini-cli.d.ts CHANGED Viewed

@@ -53,7 +53,7 @@ export interface GoogleGeminiCliOptions extends StreamOptions {
     requestModelId?: string;
     projectId?: string;
 }
-export { ANTIGRAVITY_SYSTEM_INSTRUCTION, getAntigravityUserAgent, getGeminiCliHeaders, getGeminiCliUserAgent, } from "@oh-my-pi/pi-catalog/wire/gemini-headers";
+export { ANTIGRAVITY_NO_PREAMBLE_INSTRUCTION, ANTIGRAVITY_SYSTEM_INSTRUCTION, getAntigravityUserAgent, getGeminiCliHeaders, getGeminiCliUserAgent, } from "@oh-my-pi/pi-catalog/wire/gemini-headers";
 interface ParsedGeminiCliCredentials {
     accessToken: string;
     projectId: string;

package/dist/types/providers/google-shared.d.ts CHANGED Viewed

@@ -97,6 +97,23 @@ export declare function mapStopReason(reason: FinishReason): StopReason;
  * Map string finish reason to our StopReason (for raw API responses).
  */
 export declare function mapStopReasonString(reason: string): StopReason;
+/**
+ * Bounded retries for the well-known Gemini "empty response" failure: a benign
+ * `finishReason: STOP` carrying only an empty/whitespace text part and no tool call.
+ * Shared by the public/Vertex `streamGoogleGenAI` path and the Cloud Code Assist
+ * (`google-gemini-cli`/`google-antigravity`) provider so both apply the same policy.
+ */
+export declare const MAX_EMPTY_STREAM_RETRIES = 2;
+export declare const EMPTY_STREAM_BASE_DELAY_MS = 500;
+/**
+ * Whether a completed Google assistant message carries content worth delivering.
+ *
+ * A tool call or any non-whitespace text counts as meaningful. An empty/whitespace-only
+ * text part — or thinking that never produced an answer — is the "empty response" failure:
+ * delivered as-is the agent loop has nothing to act on and silently halts, so the request
+ * must be retried instead of surfaced.
+ */
+export declare function hasMeaningfulGoogleContent(output: AssistantMessage): boolean;
 export declare function nextToolCallId(name: string): string;
 /**
  * Push the appropriate `text_end` / `thinking_end` event for the given block.

package/dist/types/providers/openai-chat-server-schema.d.ts CHANGED Viewed

@@ -7,8 +7,8 @@
  * non-strict defaults (e.g. `stream_options.include_obfuscation`) — does not
  * trip 400s on shapes we simply ignore.
  */
-import type { ChatCompletionContentPart, ChatCompletionCreateParams, ChatCompletionMessageParam, ChatCompletionMessageToolCall, ChatCompletionTool, ChatCompletionToolChoiceOption } from "openai/resources/chat/completions";
-import * as z from "zod/v4";
+import { z } from "zod/v4";
+import type { ChatCompletionContentPart, ChatCompletionCreateParams, ChatCompletionMessageParam, ChatCompletionMessageToolCall, ChatCompletionTool, ChatCompletionToolChoiceOption } from "./openai-chat-wire";
 export declare const textPartSchema: z.ZodObject<{
     type: z.ZodLiteral<"text">;
     text: z.ZodString;

package/dist/types/providers/openai-chat-server.d.ts CHANGED Viewed

@@ -2,12 +2,12 @@
  * Parsed inbound OpenAI chat-completions request, ready to feed into pi-ai
  * `stream(model, context, options)`.
  */
-import type { AuthGatewayParsedRequest as ParsedRequest } from "../auth-gateway/types";
+import type { AuthGatewayStreamControl, AuthGatewayParsedRequest as ParsedRequest } from "../auth-gateway/types";
 import type { AssistantMessage, AssistantMessageEventStream } from "../types";
 export type { ParsedRequest };
 export declare function parseRequest(body: unknown, headers?: Headers): ParsedRequest;
 export declare function encodeResponse(message: AssistantMessage, requestedModelId: string): Record<string, unknown>;
-export declare function encodeStream(events: AssistantMessageEventStream, requestedModelId: string, options?: ParsedRequest["options"]): ReadableStream<Uint8Array>;
+export declare function encodeStream(events: AssistantMessageEventStream, requestedModelId: string, options?: ParsedRequest["options"], control?: AuthGatewayStreamControl): ReadableStream<Uint8Array>;
 /**
  * OpenAI chat-completions error envelope:
  *   `{ error: { message, type } }`