@oh-my-pi/pi-ai 15.3.2 → 15.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +69 -0
- package/dist/types/auth-gateway/types.d.ts +1 -1
- package/dist/types/auth-storage.d.ts +31 -0
- package/dist/types/index.d.ts +0 -1
- package/dist/types/providers/amazon-bedrock.d.ts +16 -0
- package/dist/types/providers/cursor.d.ts +7 -1
- package/dist/types/providers/mock.d.ts +0 -2
- package/dist/types/providers/openai-responses-shared.d.ts +2 -0
- package/dist/types/types.d.ts +31 -8
- package/dist/types/utils/abortable-iterator.d.ts +4 -0
- package/dist/types/utils/anthropic-auth.d.ts +11 -24
- package/dist/types/utils/idle-iterator.d.ts +11 -9
- package/dist/types/utils/oauth/index.d.ts +8 -5
- package/dist/types/utils/sdk-stream-timeout.d.ts +33 -0
- package/package.json +2 -2
- package/src/auth-gateway/server.ts +3 -1
- package/src/auth-gateway/types.ts +1 -1
- package/src/auth-storage.ts +114 -24
- package/src/index.ts +0 -1
- package/src/providers/amazon-bedrock.ts +80 -24
- package/src/providers/anthropic.ts +55 -18
- package/src/providers/azure-openai-responses.ts +33 -10
- package/src/providers/cursor.ts +149 -28
- package/src/providers/google-gemini-cli.ts +17 -36
- package/src/providers/mock.ts +0 -4
- package/src/providers/openai-codex-responses.ts +173 -79
- package/src/providers/openai-completions-compat.ts +7 -1
- package/src/providers/openai-completions.ts +33 -37
- package/src/providers/openai-responses-shared.ts +25 -0
- package/src/providers/openai-responses.ts +49 -46
- package/src/providers/pi-native-server.ts +1 -0
- package/src/providers/register-builtins.ts +61 -8
- package/src/providers/transform-messages.ts +25 -0
- package/src/stream.ts +6 -2
- package/src/types.ts +31 -8
- package/src/usage/gemini.ts +15 -13
- package/src/usage/google-antigravity.ts +13 -12
- package/src/usage/kimi.ts +9 -14
- package/src/utils/abortable-iterator.ts +69 -0
- package/src/utils/anthropic-auth.ts +22 -143
- package/src/utils/idle-iterator.ts +26 -31
- package/src/utils/oauth/index.ts +23 -17
- package/src/utils/oauth/moonshot.ts +11 -4
- package/src/utils/sdk-stream-timeout.ts +43 -0
- package/dist/types/utils/h2-fetch.d.ts +0 -22
- package/src/utils/h2-fetch.ts +0 -60
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,75 @@
|
|
|
2
2
|
|
|
3
3
|
## [Unreleased]
|
|
4
4
|
|
|
5
|
+
## [15.4.1] - 2026-05-26
|
|
6
|
+
### Added
|
|
7
|
+
|
|
8
|
+
- Added `isOpenAICompletionsProgressChunk` export to identify real progress chunks vs. keepalives in OpenAI completions streams
|
|
9
|
+
- Added per-provider stream watchdog overrides via `getStreamIdleTimeoutMs(fallbackMs)` and `getStreamFirstEventTimeoutMs(idleTimeoutMs, fallbackMs)` to allow providers like Google Gemini CLI to extend first-event timeouts without affecting global defaults
|
|
10
|
+
- Added `promptCacheKey` to `StreamOptions` and passed it through stream option mapping so callers can specify an explicit prompt-cache key separate from `sessionId`
|
|
11
|
+
- Added `promptCacheKey` support to the native server option whitelist so `promptCacheKey` is accepted by `pi-native-server` streams
|
|
12
|
+
- Restored the per-provider stream watchdog (`iterateWithIdleTimeout`) on top of the abortable iterator. The lazy stream forwarder in `register-builtins` now wraps every provider's event stream with the first-event + steady-state idle watchdog (`PI_STREAM_FIRST_EVENT_TIMEOUT_MS`, `PI_STREAM_IDLE_TIMEOUT_MS`; aliases honored), and Anthropic / OpenAI Completions / OpenAI Responses / Azure OpenAI Responses / Codex SSE re-emit their per-provider progress predicates so empty keepalive frames cannot keep a stalled stream alive. Reverts the partial regression from #1392 that left Codex WebSocket subagent runs hanging silently for hours when the broker dropped frames between deltas. The Codex WebSocket transport additionally now resets `lastProgressAt` only on progress events (not keepalives), giving the 300s WS-internal idle ceiling the same liveness semantics as the SSE path.
|
|
13
|
+
|
|
14
|
+
### Changed
|
|
15
|
+
|
|
16
|
+
- Enabled OpenAI Codex WebSocket streams to apply `streamIdleTimeoutMs` and `streamFirstEventTimeoutMs` from `StreamOptions` per request instead of fixed internal defaults
|
|
17
|
+
- Changed stream idle watchdog implementation from `iterateUntilAbort` to `iterateWithIdleTimeout`, which now enforces maximum idle gaps between streamed events and distinguishes between first-event and steady-state timeouts
|
|
18
|
+
- Changed Anthropic, OpenAI Responses, OpenAI Completions, Azure OpenAI Responses, and OpenAI Codex Responses providers to use the new idle-timeout iterator with per-provider progress predicates so empty keepalive frames cannot keep a stalled stream alive
|
|
19
|
+
- Changed Codex WebSocket transport to reset `lastProgressAt` only on progress events (not keepalives), giving the 300s WS-internal idle ceiling the same liveness semantics as the SSE path
|
|
20
|
+
- Changed Google Gemini CLI stream forwarding defaults to use a 5-minute first-event floor via per-provider lazy-stream limits to avoid premature first-event timeouts on slow startup
|
|
21
|
+
- Changed OpenAI Responses and OpenAI Codex request handling to keep `sessionId` for provider routing and conversation headers while `promptCacheKey` controls the `prompt_cache_key` payload independently
|
|
22
|
+
- Changed `StreamOptions.streamIdleTimeoutMs` documentation to clarify it is now wired into every built-in provider and the lazy stream forwarder, and that `streamFirstEventTimeoutMs` is honored at both the SDK-request layer and the iterator-watchdog layer
|
|
23
|
+
- Changed OpenAI Responses and OpenAI Codex request handling so `sessionId` continues to drive provider routing and state while `promptCacheKey` controls the `prompt_cache_key` payload
|
|
24
|
+
- Changed Google Gemini CLI stream forwarding defaults to use a 5-minute first-event floor to avoid premature first-event timeouts on slow startup
|
|
25
|
+
- Changed auth-gateway request mapping to preserve incoming `prompt_cache_key` as both `promptCacheKey` and `sessionId` when routing OpenAI-compatible sessions
|
|
26
|
+
- Un-deprecated `StreamOptions.streamIdleTimeoutMs`; the option is wired into every built-in provider and the lazy stream forwarder again. `streamFirstEventTimeoutMs` is now honored at both the SDK-request layer (via `createSdkStreamRequestOptions`) and the iterator-watchdog layer, in cooperation.
|
|
27
|
+
|
|
28
|
+
### Removed
|
|
29
|
+
|
|
30
|
+
- Removed `installH2Fetch` and the `fetch` patch that forced HTTP/2 on HTTPS requests; callers now use the default Bun `fetch` transport
|
|
31
|
+
|
|
32
|
+
### Fixed
|
|
33
|
+
|
|
34
|
+
- Fixed first-item timeout handling so `iterateWithIdleTimeout` no longer keeps first-event timers active after the source throws or the consumer stops before semantic progress
|
|
35
|
+
- Fixed silent multi-hour hangs on Codex WebSocket subagent runs when the broker dropped frames between deltas by restoring per-provider stream watchdogs with progress-event filtering
|
|
36
|
+
- Fixed z.ai/GLM-via-OpenRouter subagent stalls where no-op keepalive chunks reset the idle watchdog indefinitely by filtering non-progress items before resetting the deadline
|
|
37
|
+
|
|
38
|
+
## [15.4.0] - 2026-05-26
|
|
39
|
+
### Breaking Changes
|
|
40
|
+
|
|
41
|
+
- Removed `findAnthropicAuth` from `anthropic-auth` and replaced store-driven auth discovery with `buildAnthropicAuthConfig`, requiring callers to provide an already-resolved API key before building Anthropic auth config
|
|
42
|
+
|
|
43
|
+
### Added
|
|
44
|
+
|
|
45
|
+
- Added `PI_CODEX_WEBSOCKET_FIRST_EVENT_TIMEOUT_MS` and `PI_CODEX_WEBSOCKET_IDLE_TIMEOUT_MS` options to tune Codex WebSocket timeout behavior before fallback
|
|
46
|
+
- Added `AuthStorage.getOAuthAccess` to return a refreshed OAuth access token with identity metadata (`accountId`, `email`, `projectId`, `enterpriseUrl`) for callers that need bearer-token headers together
|
|
47
|
+
- Added Codex WebSocket forwarding to the `onSseEvent` observer so the raw provider-stream debug viewer captures the inbound JSON frames and the outbound request frame from the WS transport using the same synthesized SSE-wire shape (`event:` + `data:` lines, prefixed with a `: ws ← <type>` (inbound) or `: ws → <type>` (outbound) comment).
|
|
48
|
+
|
|
49
|
+
### Changed
|
|
50
|
+
|
|
51
|
+
- Changed OAuth selection in `AuthStorage` to treat credentials as stale when they are within 60 seconds of expiry and rotate them preemptively
|
|
52
|
+
- Changed Google Gemini CLI, Google Gemini usage, Antigravity usage, and Kimi usage flows to stop refreshing OAuth tokens directly and rely on `AuthStorage` for token rotation
|
|
53
|
+
|
|
54
|
+
### Deprecated
|
|
55
|
+
|
|
56
|
+
- Deprecated `streamIdleTimeoutMs` in `StreamOptions` as a compatibility-only field that is no longer used by providers
|
|
57
|
+
|
|
58
|
+
### Removed
|
|
59
|
+
|
|
60
|
+
- Removed provider-local OAuth refresh helpers from Google Gemini CLI and Google/Kimi/Antigravity usage probes, preventing direct refresh calls from those usage paths
|
|
61
|
+
|
|
62
|
+
### Fixed
|
|
63
|
+
|
|
64
|
+
- Dropped truncated, thinking-only assistant turns with only `thinking`/`redacted_thinking` blocks and no `text` or `tool` content during message transformation, preventing Anthropic requests from sending consecutive assistant messages after a `max_tokens`/`error`/`aborted` interruption
|
|
65
|
+
- Fixed Amazon Bedrock bearer-token authentication to honor `AWS_BEARER_TOKEN_BEDROCK` before resolving AWS profiles or running `credential_process`, matching Bedrock API-key precedence. ([#1399](https://github.com/can1357/oh-my-pi/issues/1399))
|
|
66
|
+
- Updated `isRetryableError` to treat Bun HTTP/2 transport errors (`HTTP2StreamReset`, `HTTP2RefusedStream`) as retryable so transient stream-reset failures can be retried
|
|
67
|
+
- Fixed Codex WebSocket streaming to recover from stalled sessions by falling back to SSE when the first event or subsequent progress is delayed beyond the configured websocket timeout
|
|
68
|
+
- Fixed expired OAuth handling so provider-level paths no longer attempt direct token refresh calls for expired credentials and instead rely on `AuthStorage` for rotation
|
|
69
|
+
- Fixed provider streams aborting slow-but-valid first tokens or silent inter-event gaps with OMP-owned first-event/idle watchdog errors. Built-in lazy streams, OpenAI/Anthropic/Azure/Codex SSE, and Codex WebSocket streams now wait for provider output, provider/socket errors, caller aborts, or explicit request-layer timeouts instead of treating provider silence as failure ([#1392](https://github.com/can1357/oh-my-pi/issues/1392)).
|
|
70
|
+
- Fixed Claude Opus 4.7 on Amazon Bedrock streaming no reasoning output (and appearing to hang on long reasoning runs) because Anthropic silently switched the adaptive-thinking display default to `"omitted"`. The Bedrock provider now sends `thinking.display = "summarized"` by default on Opus 4.7+ adaptive models and on budget-based Claude models, mirroring the existing direct-Anthropic behavior. `BedrockOptions.thinkingDisplay` (`"summarized" | "omitted"`) is exposed for callers that want to opt out, and `hideThinkingSummary` now wires through to the Bedrock case ([#1373](https://github.com/can1357/oh-my-pi/issues/1373)).
|
|
71
|
+
- Fixed Cursor Composer resume/tool-continuation turns failing with `Cannot send empty user message to Cursor API`. Empty current user turns now use Cursor's `resumeAction` instead of constructing an invalid `userMessageAction` ([#1376](https://github.com/can1357/oh-my-pi/issues/1376)).
|
|
72
|
+
- Fixed `pi-ai login moonshot` failing with `invalid temperature: only 1 is allowed for this model` (HTTP 400) because the API-key validator probed `kimi-k2.5` with `temperature: 0`. Moonshot login now validates against `GET /v1/models`, matching the DeepSeek/Fireworks/NanoGPT/ZenMux pattern and authenticating the key without invoking model-specific parameter restrictions.
|
|
73
|
+
|
|
5
74
|
## [15.3.2] - 2026-05-25
|
|
6
75
|
### Added
|
|
7
76
|
|
|
@@ -58,7 +58,7 @@ export interface AuthGatewayParsedRequestOptions {
|
|
|
58
58
|
serviceTier?: ServiceTier;
|
|
59
59
|
/** Cache retention hint derived from inbound `cache_control` markers. */
|
|
60
60
|
cacheRetention?: CacheRetention;
|
|
61
|
-
/** OpenAI Responses `prompt_cache_key`;
|
|
61
|
+
/** OpenAI Responses `prompt_cache_key`; also seeds provider routing when no separate session id exists. */
|
|
62
62
|
promptCacheKey?: string;
|
|
63
63
|
/** OpenAI Responses `previous_response_id` for response chaining. */
|
|
64
64
|
previousResponseId?: string;
|
|
@@ -291,6 +291,21 @@ type AuthApiKeyOptions = {
|
|
|
291
291
|
*/
|
|
292
292
|
signal?: AbortSignal;
|
|
293
293
|
};
|
|
294
|
+
/**
|
|
295
|
+
* Refreshed OAuth access plus identity metadata returned by
|
|
296
|
+
* {@link AuthStorage.getOAuthAccess}. Callers that authenticate via a bearer
|
|
297
|
+
* AND need the credential's identity (Codex `chatgpt-account-id`, Google
|
|
298
|
+
* `projectId`, GitHub `enterpriseUrl`) consume this shape directly; the
|
|
299
|
+
* refresh slot is deliberately omitted because rotating refresh tokens never
|
|
300
|
+
* leave {@link AuthStorage}.
|
|
301
|
+
*/
|
|
302
|
+
export interface OAuthAccess {
|
|
303
|
+
accessToken: string;
|
|
304
|
+
accountId?: string;
|
|
305
|
+
email?: string;
|
|
306
|
+
projectId?: string;
|
|
307
|
+
enterpriseUrl?: string;
|
|
308
|
+
}
|
|
294
309
|
export interface InvalidateCredentialMatchingOptions {
|
|
295
310
|
signal?: AbortSignal;
|
|
296
311
|
sessionId?: string;
|
|
@@ -493,6 +508,22 @@ export declare class AuthStorage {
|
|
|
493
508
|
* 6. Fallback resolver (models.yml custom providers, last-resort)
|
|
494
509
|
*/
|
|
495
510
|
getApiKey(provider: string, sessionId?: string, options?: AuthApiKeyOptions): Promise<string | undefined>;
|
|
511
|
+
/**
|
|
512
|
+
* Resolve the OAuth credential for `provider`, refreshing through the same
|
|
513
|
+
* pipeline as {@link AuthStorage.getApiKey} but returning the refreshed
|
|
514
|
+
* {@link OAuthAccess} (raw access token + identity metadata) instead of
|
|
515
|
+
* the API-key bytes.
|
|
516
|
+
*
|
|
517
|
+
* Use this when the caller needs to inject identity headers alongside the
|
|
518
|
+
* bearer (Codex `chatgpt-account-id`, Google `project`, GitHub
|
|
519
|
+
* `enterpriseUrl`). For pure "give me the bytes for `Authorization`"
|
|
520
|
+
* scenarios, prefer {@link AuthStorage.getApiKey}.
|
|
521
|
+
*
|
|
522
|
+
* Returns `undefined` when no OAuth credential is available, the
|
|
523
|
+
* credential fails to refresh, or runtime/config overrides have replaced
|
|
524
|
+
* OAuth with an explicit API key.
|
|
525
|
+
*/
|
|
526
|
+
getOAuthAccess(provider: string, sessionId?: string, options?: AuthApiKeyOptions): Promise<OAuthAccess | undefined>;
|
|
496
527
|
invalidateCredentialMatching(provider: string, apiKey: string, options?: InvalidateCredentialMatchingOptions): Promise<boolean>;
|
|
497
528
|
invalidateCredentialMatching(provider: string, apiKey: string, signal?: AbortSignal): Promise<boolean>;
|
|
498
529
|
/**
|
package/dist/types/index.d.ts
CHANGED
|
@@ -40,7 +40,6 @@ export * from "./usage/zai";
|
|
|
40
40
|
export * from "./utils/anthropic-auth";
|
|
41
41
|
export * from "./utils/discovery";
|
|
42
42
|
export * from "./utils/event-stream";
|
|
43
|
-
export * from "./utils/h2-fetch";
|
|
44
43
|
export * from "./utils/oauth";
|
|
45
44
|
export type { OAuthCredentials, OAuthProvider, OAuthProviderId, OAuthProviderInfo, } from "./utils/oauth/types";
|
|
46
45
|
export * from "./utils/overflow";
|
|
@@ -8,9 +8,12 @@
|
|
|
8
8
|
*/
|
|
9
9
|
import type { Effort } from "../model-thinking";
|
|
10
10
|
import type { StreamFunction, StreamOptions, ThinkingBudgets } from "../types";
|
|
11
|
+
export type BedrockThinkingDisplay = "summarized" | "omitted";
|
|
11
12
|
export interface BedrockOptions extends StreamOptions {
|
|
12
13
|
region?: string;
|
|
13
14
|
profile?: string;
|
|
15
|
+
/** Amazon Bedrock API key sent as `Authorization: Bearer`, ahead of SigV4 credential resolution. */
|
|
16
|
+
bearerToken?: string;
|
|
14
17
|
toolChoice?: "auto" | "any" | "none" | {
|
|
15
18
|
type: "tool";
|
|
16
19
|
name: string;
|
|
@@ -18,5 +21,18 @@ export interface BedrockOptions extends StreamOptions {
|
|
|
18
21
|
reasoning?: Effort;
|
|
19
22
|
thinkingBudgets?: ThinkingBudgets;
|
|
20
23
|
interleavedThinking?: boolean;
|
|
24
|
+
/**
|
|
25
|
+
* Controls how Claude returns thinking content in Bedrock responses.
|
|
26
|
+
* - `"summarized"`: thinking blocks include human-readable summaries (default here).
|
|
27
|
+
* - `"omitted"`: thinking content is suppressed; the encrypted signature still
|
|
28
|
+
* travels back for multi-turn continuity.
|
|
29
|
+
*
|
|
30
|
+
* Starting with Claude Opus 4.7 the Anthropic API default is `"omitted"`, which
|
|
31
|
+
* leaves callers waiting on a silent stream during long reasoning runs (issue
|
|
32
|
+
* #1373). We default to `"summarized"` so adaptive-thinking models that accept
|
|
33
|
+
* the field keep producing visible thinking deltas. Older adaptive-thinking
|
|
34
|
+
* models (Opus 4.6, Sonnet 4.6+) reject the field, so we omit it for them.
|
|
35
|
+
*/
|
|
36
|
+
thinkingDisplay?: BedrockThinkingDisplay;
|
|
21
37
|
}
|
|
22
38
|
export declare const streamBedrock: StreamFunction<"bedrock-converse-stream">;
|
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
import
|
|
1
|
+
import { type JsonValue } from "@bufbuild/protobuf";
|
|
2
|
+
import type { CursorExecHandlerResult, CursorExecHandlers, CursorToolResultHandler, Message, StreamFunction, StreamOptions, ToolResultMessage } from "../types";
|
|
2
3
|
export declare const CURSOR_API_URL = "https://api2.cursor.sh";
|
|
3
4
|
export declare const CURSOR_CLIENT_VERSION = "cli-2026.01.09-231024f";
|
|
4
5
|
export interface CursorOptions extends StreamOptions {
|
|
@@ -34,3 +35,8 @@ export declare function resolveExecHandler<TArgs, TResult>(args: TArgs, handler:
|
|
|
34
35
|
* an empty `rootPromptMessagesJson` head.
|
|
35
36
|
*/
|
|
36
37
|
export declare function buildCursorSystemPromptJsons(systemPrompt: readonly string[] | undefined): string[];
|
|
38
|
+
/** Exported for tests: decodes Cursor history blobs built from conversation messages. */
|
|
39
|
+
export declare function buildCursorHistoryForTest(messages: Message[]): {
|
|
40
|
+
rootPromptMessagesJson: unknown[];
|
|
41
|
+
turnUserMessagesJson: JsonValue[];
|
|
42
|
+
};
|
|
@@ -163,8 +163,6 @@ export declare class MockModel implements Model<MockApi> {
|
|
|
163
163
|
/** Reset recorded calls AND the extras queue. The constructor `responses` are NOT reset. */
|
|
164
164
|
reset(): void;
|
|
165
165
|
}
|
|
166
|
-
/** @deprecated Use {@link MockModel}; the class IS the handle. */
|
|
167
|
-
export type MockModelHandle = MockModel;
|
|
168
166
|
/** Check whether `model` was produced by `createMockModel`. */
|
|
169
167
|
export declare function isMockModel(model: Model<Api>): model is MockModel;
|
|
170
168
|
/** Construct a mock model. */
|
|
@@ -2,6 +2,8 @@ import type OpenAI from "openai";
|
|
|
2
2
|
import type { ResponseInput, ResponseInputContent, ResponseOutputItem } from "openai/resources/responses/responses";
|
|
3
3
|
import { type Api, type AssistantMessage, type ImageContent, type Model, type ServiceTier, type StopReason, type StreamOptions, type TextContent, type TextSignatureV1, type ToolResultMessage } from "../types";
|
|
4
4
|
import type { AssistantMessageEventStream } from "../utils/event-stream";
|
|
5
|
+
export declare const OPENAI_RESPONSES_PROGRESS_EVENT_TYPES: ReadonlySet<string>;
|
|
6
|
+
export declare function isOpenAIResponsesProgressEvent(event: unknown): boolean;
|
|
5
7
|
export declare function encodeTextSignatureV1(id: string, phase?: TextSignatureV1["phase"]): string;
|
|
6
8
|
export declare function parseTextSignature(signature: string | undefined): {
|
|
7
9
|
id: string;
|
package/dist/types/types.d.ts
CHANGED
|
@@ -185,11 +185,17 @@ export interface StreamOptions {
|
|
|
185
185
|
*/
|
|
186
186
|
metadata?: Record<string, unknown>;
|
|
187
187
|
/**
|
|
188
|
-
* Optional session identifier for providers that support session-based
|
|
189
|
-
*
|
|
190
|
-
*
|
|
188
|
+
* Optional session identifier for providers that support session-based
|
|
189
|
+
* routing, request affinity, or transport reuse. Providers may also use this
|
|
190
|
+
* as the prompt-cache key when `promptCacheKey` is not set.
|
|
191
191
|
*/
|
|
192
192
|
sessionId?: string;
|
|
193
|
+
/**
|
|
194
|
+
* Optional prompt-cache identity. When set, OpenAI Responses-compatible
|
|
195
|
+
* providers use this for `prompt_cache_key` while keeping `sessionId` for
|
|
196
|
+
* provider routing / conversation headers.
|
|
197
|
+
*/
|
|
198
|
+
promptCacheKey?: string;
|
|
193
199
|
/**
|
|
194
200
|
* Provider-scoped mutable state store for this agent session.
|
|
195
201
|
* Providers can use this to persist transport/session state between turns.
|
|
@@ -205,20 +211,37 @@ export interface StreamOptions {
|
|
|
205
211
|
*/
|
|
206
212
|
onResponse?: (response: ProviderResponseMetadata, model?: Model<Api>) => void | Promise<void>;
|
|
207
213
|
/**
|
|
208
|
-
* Optional callback for raw Server-Sent Events as they arrive from HTTP streaming providers
|
|
214
|
+
* Optional callback for raw Server-Sent Events as they arrive from HTTP streaming providers,
|
|
215
|
+
* plus synthesized SSE-shaped frames for the Codex WebSocket transport (one synthetic frame
|
|
216
|
+
* per JSON request/response message). WebSocket frames are tagged with a leading
|
|
217
|
+
* `: ws → <type>` (outbound) or `: ws ← <type>` (inbound) comment line in `RawSseEvent.raw`.
|
|
209
218
|
*
|
|
210
219
|
* Diagnostic only: provider implementations must ignore callback failures and must not
|
|
211
220
|
* let observers alter stream contents.
|
|
212
221
|
*/
|
|
213
222
|
onSseEvent?: (event: RawSseEvent, model?: Model<Api>) => void;
|
|
214
223
|
/**
|
|
215
|
-
* Optional override for the first
|
|
216
|
-
*
|
|
224
|
+
* Optional override for the first-event watchdog in milliseconds. Built-in
|
|
225
|
+
* providers apply this budget twice when they can: once to the underlying
|
|
226
|
+
* SDK/request while waiting for the HTTP stream object to exist, then again
|
|
227
|
+
* in the iterator while waiting for the first semantic stream event. Set to
|
|
228
|
+
* `0` to disable both layers for this request. After the first semantic
|
|
229
|
+
* event arrives, `streamIdleTimeoutMs` governs inter-event stalls. Falls
|
|
230
|
+
* back to `PI_STREAM_FIRST_EVENT_TIMEOUT_MS` and then to a 100s default.
|
|
231
|
+
*
|
|
232
|
+
* Iterator-level honored by: every built-in provider (via the lazy-stream
|
|
233
|
+
* forwarder in `register-builtins`). SDK-request honored by:
|
|
234
|
+
* `openai-completions`, `openai-responses`, `azure-openai-responses`,
|
|
235
|
+
* `anthropic-messages`.
|
|
217
236
|
*/
|
|
218
237
|
streamFirstEventTimeoutMs?: number;
|
|
219
238
|
/**
|
|
220
|
-
* Optional override for the maximum idle gap between streamed events in
|
|
221
|
-
*
|
|
239
|
+
* Optional override for the maximum idle gap between streamed events in
|
|
240
|
+
* milliseconds. Once the first event arrives, this guards against silent
|
|
241
|
+
* mid-stream stalls (broker dies, half-open socket, model produces no real
|
|
242
|
+
* progress for too long). Set to `0` to disable. Falls back to
|
|
243
|
+
* `PI_STREAM_IDLE_TIMEOUT_MS` (alias: `PI_OPENAI_STREAM_IDLE_TIMEOUT_MS`)
|
|
244
|
+
* and then to a 120s default.
|
|
222
245
|
*/
|
|
223
246
|
streamIdleTimeoutMs?: number;
|
|
224
247
|
/**
|
|
@@ -1,44 +1,31 @@
|
|
|
1
|
-
import { type AuthCredentialStore } from "../auth-storage";
|
|
2
1
|
/** Auth configuration for Anthropic */
|
|
3
2
|
export interface AnthropicAuthConfig {
|
|
4
3
|
apiKey: string;
|
|
5
4
|
baseUrl: string;
|
|
6
5
|
isOAuth: boolean;
|
|
7
6
|
}
|
|
8
|
-
|
|
9
|
-
export interface AnthropicOAuthCredential {
|
|
10
|
-
type: "oauth";
|
|
11
|
-
access: string;
|
|
12
|
-
refresh?: string;
|
|
13
|
-
/** Expiry timestamp in milliseconds */
|
|
14
|
-
expires: number;
|
|
15
|
-
}
|
|
7
|
+
export declare function resolveAnthropicBaseUrlFromEnv(): string | undefined;
|
|
16
8
|
/**
|
|
17
9
|
* Checks if a token is an OAuth token by looking for sk-ant-oat prefix.
|
|
18
|
-
* @param apiKey - The API key to check
|
|
19
|
-
* @returns True if the token is an OAuth token
|
|
20
10
|
*/
|
|
21
11
|
export declare function isOAuthToken(apiKey: string): boolean;
|
|
22
12
|
/**
|
|
23
|
-
*
|
|
24
|
-
*
|
|
25
|
-
*
|
|
26
|
-
*
|
|
27
|
-
*
|
|
28
|
-
*
|
|
29
|
-
*
|
|
30
|
-
*
|
|
13
|
+
* Build an {@link AnthropicAuthConfig} from an already-resolved API key.
|
|
14
|
+
*
|
|
15
|
+
* `apiKey` is whatever the caller chose for `Authorization`/`x-api-key` —
|
|
16
|
+
* usually `authStorage.getApiKey("anthropic")`. `baseUrl` overrides the
|
|
17
|
+
* env-derived base; pass `undefined` to fall back to FOUNDRY/ANTHROPIC env
|
|
18
|
+
* resolution and finally `DEFAULT_BASE_URL`.
|
|
19
|
+
*
|
|
20
|
+
* `isOAuth` is derived from the token prefix so the helper stays pure: callers
|
|
21
|
+
* never have to thread the OAuth flag through their own resolution logic.
|
|
31
22
|
*/
|
|
32
|
-
export declare function
|
|
23
|
+
export declare function buildAnthropicAuthConfig(apiKey: string, baseUrl?: string): AnthropicAuthConfig;
|
|
33
24
|
/**
|
|
34
25
|
* Builds HTTP headers for Anthropic API requests (search variant).
|
|
35
|
-
* @param auth - The authentication configuration
|
|
36
|
-
* @returns Headers object ready for use in fetch requests
|
|
37
26
|
*/
|
|
38
27
|
export declare function buildAnthropicSearchHeaders(auth: AnthropicAuthConfig): Record<string, string>;
|
|
39
28
|
/**
|
|
40
29
|
* Builds the full API URL for Anthropic messages endpoint.
|
|
41
|
-
* @param auth - The authentication configuration
|
|
42
|
-
* @returns The complete API URL with beta query parameter
|
|
43
30
|
*/
|
|
44
31
|
export declare function buildAnthropicUrl(auth: AnthropicAuthConfig): string;
|
|
@@ -3,8 +3,12 @@
|
|
|
3
3
|
*
|
|
4
4
|
* `PI_OPENAI_STREAM_IDLE_TIMEOUT_MS` is accepted as a backward-compatible alias.
|
|
5
5
|
* Set `PI_STREAM_IDLE_TIMEOUT_MS=0` to disable the watchdog.
|
|
6
|
+
*
|
|
7
|
+
* Providers that legitimately stream much slower than the global default can pass
|
|
8
|
+
* `fallbackMs` to widen the floor used when neither env var nor caller option is set.
|
|
9
|
+
* Caller options still take precedence; env overrides still trump the fallback.
|
|
6
10
|
*/
|
|
7
|
-
export declare function getStreamIdleTimeoutMs(): number | undefined;
|
|
11
|
+
export declare function getStreamIdleTimeoutMs(fallbackMs?: number): number | undefined;
|
|
8
12
|
/**
|
|
9
13
|
* Returns the idle timeout used for OpenAI-family streaming transports.
|
|
10
14
|
*
|
|
@@ -17,16 +21,14 @@ export declare function getOpenAIStreamIdleTimeoutMs(): number | undefined;
|
|
|
17
21
|
* so the default never undershoots the steady-state idle timeout.
|
|
18
22
|
*
|
|
19
23
|
* Set `PI_STREAM_FIRST_EVENT_TIMEOUT_MS=0` to disable the watchdog.
|
|
24
|
+
*
|
|
25
|
+
* Providers whose first response can legitimately take longer (heavy reasoning,
|
|
26
|
+
* slow cold-start proxies) can pass `fallbackMs` to widen the floor used when
|
|
27
|
+
* neither env var nor caller option is set. Caller options still take precedence;
|
|
28
|
+
* env overrides still trump the fallback.
|
|
20
29
|
*/
|
|
21
|
-
export declare function getStreamFirstEventTimeoutMs(idleTimeoutMs?: number): number | undefined;
|
|
22
|
-
export type Watchdog = NodeJS.Timeout | undefined;
|
|
23
|
-
/**
|
|
24
|
-
* Starts a watchdog that aborts a request if no first stream event arrives in time.
|
|
25
|
-
* Call `markFirstEventReceived()` as soon as the first event is observed.
|
|
26
|
-
*/
|
|
27
|
-
export declare function createWatchdog(timeoutMs: number | undefined, onTimeout: () => void): Watchdog;
|
|
30
|
+
export declare function getStreamFirstEventTimeoutMs(idleTimeoutMs?: number, fallbackMs?: number): number | undefined;
|
|
28
31
|
export interface IdleTimeoutIteratorOptions {
|
|
29
|
-
watchdog?: Watchdog;
|
|
30
32
|
idleTimeoutMs?: number;
|
|
31
33
|
firstItemTimeoutMs?: number;
|
|
32
34
|
errorMessage: string;
|
|
@@ -17,13 +17,16 @@ export declare function unregisterOAuthProviders(sourceId: string): void;
|
|
|
17
17
|
*/
|
|
18
18
|
export declare function refreshOAuthToken(provider: OAuthProvider, credentials: OAuthCredentials): Promise<OAuthCredentials>;
|
|
19
19
|
/**
|
|
20
|
-
*
|
|
21
|
-
* Automatically refreshes expired tokens.
|
|
20
|
+
* Build API-key bytes for a provider from an already-fresh OAuth credential.
|
|
22
21
|
*
|
|
23
|
-
*
|
|
24
|
-
*
|
|
22
|
+
* Refresh is owned by AuthStorage. This helper deliberately refuses expired
|
|
23
|
+
* credentials so it cannot POST broker redaction sentinels to upstream token
|
|
24
|
+
* endpoints as a side channel.
|
|
25
|
+
*
|
|
26
|
+
* For providers that need credential metadata at request time, returns
|
|
27
|
+
* JSON-encoded credentials plus expiry metadata for diagnostics/edge guards.
|
|
25
28
|
* @returns API key string, or null if no credentials
|
|
26
|
-
* @throws Error if
|
|
29
|
+
* @throws Error if the credential is expired and must be refreshed upstream
|
|
27
30
|
*/
|
|
28
31
|
export declare function getOAuthApiKey(provider: OAuthProvider, credentials: Record<string, OAuthCredentials>): Promise<{
|
|
29
32
|
newCredentials: OAuthCredentials;
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared helpers for mapping `StreamOptions.streamFirstEventTimeoutMs` onto
|
|
3
|
+
* underlying SDK request-timeout options.
|
|
4
|
+
*
|
|
5
|
+
* The hint is intentionally not a watchdog — it just narrows the SDK's
|
|
6
|
+
* "transport timeout" window so a stuck pre-stream request fails fast
|
|
7
|
+
* instead of hanging on the default (often multi-minute) SDK timeout. Once
|
|
8
|
+
* the stream actually starts, silence is not failure; callers must abort
|
|
9
|
+
* to interrupt a quiet stream.
|
|
10
|
+
*/
|
|
11
|
+
/**
|
|
12
|
+
* Coerce a caller-supplied `streamFirstEventTimeoutMs` into a positive integer suitable
|
|
13
|
+
* for the SDK's `timeout` option. Returns `undefined` when the caller passed nothing,
|
|
14
|
+
* a non-finite value, or a non-positive value (preserving the SDK's default).
|
|
15
|
+
*/
|
|
16
|
+
export declare function resolveSdkTimeoutMs(streamFirstEventTimeoutMs: number | undefined): number | undefined;
|
|
17
|
+
/**
|
|
18
|
+
* Build per-request SDK options that combine an abort signal with the optional
|
|
19
|
+
* `streamFirstEventTimeoutMs` request-timeout hint.
|
|
20
|
+
*
|
|
21
|
+
* The returned `{ signal, timeout?, maxRetries? }` shape is compatible with both
|
|
22
|
+
* OpenAI's and Anthropic's `RequestOptions` (and any other SDK that follows the
|
|
23
|
+
* Stainless conventions), so callers from any of those providers can spread the
|
|
24
|
+
* result directly into `client.X.create(params, requestOptions)`.
|
|
25
|
+
*
|
|
26
|
+
* When the hint is set, retries are forced to zero so the SDK does not silently
|
|
27
|
+
* extend the caller's explicit deadline by re-attempting after a timeout.
|
|
28
|
+
*/
|
|
29
|
+
export declare function createSdkStreamRequestOptions(signal: AbortSignal, streamFirstEventTimeoutMs: number | undefined): {
|
|
30
|
+
signal: AbortSignal;
|
|
31
|
+
timeout?: number;
|
|
32
|
+
maxRetries?: number;
|
|
33
|
+
};
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"type": "module",
|
|
3
3
|
"name": "@oh-my-pi/pi-ai",
|
|
4
|
-
"version": "15.
|
|
4
|
+
"version": "15.4.1",
|
|
5
5
|
"description": "Unified LLM API with automatic model discovery and provider configuration",
|
|
6
6
|
"homepage": "https://omp.sh",
|
|
7
7
|
"author": "Can Boluk",
|
|
@@ -43,7 +43,7 @@
|
|
|
43
43
|
"dependencies": {
|
|
44
44
|
"@anthropic-ai/sdk": "^0.94.0",
|
|
45
45
|
"@bufbuild/protobuf": "^2.12.0",
|
|
46
|
-
"@oh-my-pi/pi-utils": "15.
|
|
46
|
+
"@oh-my-pi/pi-utils": "15.4.1",
|
|
47
47
|
"openai": "^6.36.0",
|
|
48
48
|
"partial-json": "^0.1.7",
|
|
49
49
|
"zod": "4.4.3"
|
|
@@ -145,7 +145,9 @@ function buildStreamOptions(parsed: ParsedFormatRequest, api: Api, signal: Abort
|
|
|
145
145
|
// Client-supplied `prompt_cache_key` wins; otherwise derive a stable
|
|
146
146
|
// key from the model + system + tools so prefix caching engages on
|
|
147
147
|
// Codex-class backends across turns of the same logical conversation.
|
|
148
|
-
|
|
148
|
+
const promptCacheKey = options.promptCacheKey ?? deriveSessionId(parsed.modelId, parsed.context);
|
|
149
|
+
opts.promptCacheKey = promptCacheKey;
|
|
150
|
+
opts.sessionId = promptCacheKey;
|
|
149
151
|
if (options.thinkingBudgets) {
|
|
150
152
|
opts.thinkingBudgets = { ...(opts.thinkingBudgets ?? {}), ...options.thinkingBudgets };
|
|
151
153
|
}
|
|
@@ -67,7 +67,7 @@ export interface AuthGatewayParsedRequestOptions {
|
|
|
67
67
|
serviceTier?: ServiceTier;
|
|
68
68
|
/** Cache retention hint derived from inbound `cache_control` markers. */
|
|
69
69
|
cacheRetention?: CacheRetention;
|
|
70
|
-
/** OpenAI Responses `prompt_cache_key`;
|
|
70
|
+
/** OpenAI Responses `prompt_cache_key`; also seeds provider routing when no separate session id exists. */
|
|
71
71
|
promptCacheKey?: string;
|
|
72
72
|
/** OpenAI Responses `previous_response_id` for response chaining. */
|
|
73
73
|
previousResponseId?: string;
|