@oh-my-pi/pi-ai 15.3.2 → 15.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/CHANGELOG.md +69 -0
  2. package/dist/types/auth-gateway/types.d.ts +1 -1
  3. package/dist/types/auth-storage.d.ts +31 -0
  4. package/dist/types/index.d.ts +0 -1
  5. package/dist/types/providers/amazon-bedrock.d.ts +16 -0
  6. package/dist/types/providers/cursor.d.ts +7 -1
  7. package/dist/types/providers/mock.d.ts +0 -2
  8. package/dist/types/providers/openai-responses-shared.d.ts +2 -0
  9. package/dist/types/types.d.ts +31 -8
  10. package/dist/types/utils/abortable-iterator.d.ts +4 -0
  11. package/dist/types/utils/anthropic-auth.d.ts +11 -24
  12. package/dist/types/utils/idle-iterator.d.ts +11 -9
  13. package/dist/types/utils/oauth/index.d.ts +8 -5
  14. package/dist/types/utils/sdk-stream-timeout.d.ts +33 -0
  15. package/package.json +2 -2
  16. package/src/auth-gateway/server.ts +3 -1
  17. package/src/auth-gateway/types.ts +1 -1
  18. package/src/auth-storage.ts +114 -24
  19. package/src/index.ts +0 -1
  20. package/src/providers/amazon-bedrock.ts +80 -24
  21. package/src/providers/anthropic.ts +55 -18
  22. package/src/providers/azure-openai-responses.ts +33 -10
  23. package/src/providers/cursor.ts +149 -28
  24. package/src/providers/google-gemini-cli.ts +17 -36
  25. package/src/providers/mock.ts +0 -4
  26. package/src/providers/openai-codex-responses.ts +173 -79
  27. package/src/providers/openai-completions-compat.ts +7 -1
  28. package/src/providers/openai-completions.ts +33 -37
  29. package/src/providers/openai-responses-shared.ts +25 -0
  30. package/src/providers/openai-responses.ts +49 -46
  31. package/src/providers/pi-native-server.ts +1 -0
  32. package/src/providers/register-builtins.ts +61 -8
  33. package/src/providers/transform-messages.ts +25 -0
  34. package/src/stream.ts +6 -2
  35. package/src/types.ts +31 -8
  36. package/src/usage/gemini.ts +15 -13
  37. package/src/usage/google-antigravity.ts +13 -12
  38. package/src/usage/kimi.ts +9 -14
  39. package/src/utils/abortable-iterator.ts +69 -0
  40. package/src/utils/anthropic-auth.ts +22 -143
  41. package/src/utils/idle-iterator.ts +26 -31
  42. package/src/utils/oauth/index.ts +23 -17
  43. package/src/utils/oauth/moonshot.ts +11 -4
  44. package/src/utils/sdk-stream-timeout.ts +43 -0
  45. package/dist/types/utils/h2-fetch.d.ts +0 -22
  46. package/src/utils/h2-fetch.ts +0 -60
package/CHANGELOG.md CHANGED
@@ -2,6 +2,75 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ## [15.4.1] - 2026-05-26
6
+ ### Added
7
+
8
+ - Added `isOpenAICompletionsProgressChunk` export to identify real progress chunks vs. keepalives in OpenAI completions streams
9
+ - Added per-provider stream watchdog overrides via `getStreamIdleTimeoutMs(fallbackMs)` and `getStreamFirstEventTimeoutMs(idleTimeoutMs, fallbackMs)` to allow providers like Google Gemini CLI to extend first-event timeouts without affecting global defaults
10
+ - Added `promptCacheKey` to `StreamOptions` and passed it through stream option mapping so callers can specify an explicit prompt-cache key separate from `sessionId`
11
+ - Added `promptCacheKey` support to the native server option whitelist so `promptCacheKey` is accepted by `pi-native-server` streams
12
+ - Restored the per-provider stream watchdog (`iterateWithIdleTimeout`) on top of the abortable iterator. The lazy stream forwarder in `register-builtins` now wraps every provider's event stream with the first-event + steady-state idle watchdog (`PI_STREAM_FIRST_EVENT_TIMEOUT_MS`, `PI_STREAM_IDLE_TIMEOUT_MS`; aliases honored), and Anthropic / OpenAI Completions / OpenAI Responses / Azure OpenAI Responses / Codex SSE re-emit their per-provider progress predicates so empty keepalive frames cannot keep a stalled stream alive. Reverts the partial regression from #1392 that left Codex WebSocket subagent runs hanging silently for hours when the broker dropped frames between deltas. The Codex WebSocket transport additionally now resets `lastProgressAt` only on progress events (not keepalives), giving the 300s WS-internal idle ceiling the same liveness semantics as the SSE path.
13
+
14
+ ### Changed
15
+
16
+ - Enabled OpenAI Codex WebSocket streams to apply `streamIdleTimeoutMs` and `streamFirstEventTimeoutMs` from `StreamOptions` per request instead of fixed internal defaults
17
+ - Changed stream idle watchdog implementation from `iterateUntilAbort` to `iterateWithIdleTimeout`, which now enforces maximum idle gaps between streamed events and distinguishes between first-event and steady-state timeouts
18
+ - Changed Anthropic, OpenAI Responses, OpenAI Completions, Azure OpenAI Responses, and OpenAI Codex Responses providers to use the new idle-timeout iterator with per-provider progress predicates so empty keepalive frames cannot keep a stalled stream alive
19
+ - Changed Codex WebSocket transport to reset `lastProgressAt` only on progress events (not keepalives), giving the 300s WS-internal idle ceiling the same liveness semantics as the SSE path
20
+ - Changed Google Gemini CLI stream forwarding defaults to use a 5-minute first-event floor via per-provider lazy-stream limits to avoid premature first-event timeouts on slow startup
21
+ - Changed OpenAI Responses and OpenAI Codex request handling to keep `sessionId` for provider routing and conversation headers while `promptCacheKey` controls the `prompt_cache_key` payload independently
22
+ - Changed `StreamOptions.streamIdleTimeoutMs` documentation to clarify it is now wired into every built-in provider and the lazy stream forwarder, and that `streamFirstEventTimeoutMs` is honored at both the SDK-request layer and the iterator-watchdog layer
23
+ - Changed OpenAI Responses and OpenAI Codex request handling so `sessionId` continues to drive provider routing and state while `promptCacheKey` controls the `prompt_cache_key` payload
24
+ - Changed Google Gemini CLI stream forwarding defaults to use a 5-minute first-event floor to avoid premature first-event timeouts on slow startup
25
+ - Changed auth-gateway request mapping to preserve incoming `prompt_cache_key` as both `promptCacheKey` and `sessionId` when routing OpenAI-compatible sessions
26
+ - Un-deprecated `StreamOptions.streamIdleTimeoutMs`; the option is wired into every built-in provider and the lazy stream forwarder again. `streamFirstEventTimeoutMs` is now honored at both the SDK-request layer (via `createSdkStreamRequestOptions`) and the iterator-watchdog layer, in cooperation.
27
+
28
+ ### Removed
29
+
30
+ - Removed `installH2Fetch` and the `fetch` patch that forced HTTP/2 on HTTPS requests; callers now use the default Bun `fetch` transport
31
+
32
+ ### Fixed
33
+
34
+ - Fixed first-item timeout handling so `iterateWithIdleTimeout` no longer keeps first-event timers active after the source throws or the consumer stops before semantic progress
35
+ - Fixed silent multi-hour hangs on Codex WebSocket subagent runs when the broker dropped frames between deltas by restoring per-provider stream watchdogs with progress-event filtering
36
+ - Fixed z.ai/GLM-via-OpenRouter subagent stalls where no-op keepalive chunks reset the idle watchdog indefinitely by filtering non-progress items before resetting the deadline
37
+
38
+ ## [15.4.0] - 2026-05-26
39
+ ### Breaking Changes
40
+
41
+ - Removed `findAnthropicAuth` from `anthropic-auth` and replaced store-driven auth discovery with `buildAnthropicAuthConfig`, requiring callers to provide an already-resolved API key before building Anthropic auth config
42
+
43
+ ### Added
44
+
45
+ - Added `PI_CODEX_WEBSOCKET_FIRST_EVENT_TIMEOUT_MS` and `PI_CODEX_WEBSOCKET_IDLE_TIMEOUT_MS` options to tune Codex WebSocket timeout behavior before fallback
46
+ - Added `AuthStorage.getOAuthAccess` to return a refreshed OAuth access token with identity metadata (`accountId`, `email`, `projectId`, `enterpriseUrl`) for callers that need bearer-token headers together
47
+ - Added Codex WebSocket forwarding to the `onSseEvent` observer so the raw provider-stream debug viewer captures the inbound JSON frames and the outbound request frame from the WS transport using the same synthesized SSE-wire shape (`event:` + `data:` lines, prefixed with a `: ws ← <type>` (inbound) or `: ws → <type>` (outbound) comment).
48
+
49
+ ### Changed
50
+
51
+ - Changed OAuth selection in `AuthStorage` to treat credentials as stale when they are within 60 seconds of expiry and rotate them preemptively
52
+ - Changed Google Gemini CLI, Google Gemini usage, Antigravity usage, and Kimi usage flows to stop refreshing OAuth tokens directly and rely on `AuthStorage` for token rotation
53
+
54
+ ### Deprecated
55
+
56
+ - Deprecated `streamIdleTimeoutMs` in `StreamOptions` as a compatibility-only field that is no longer used by providers
57
+
58
+ ### Removed
59
+
60
+ - Removed provider-local OAuth refresh helpers from Google Gemini CLI and Google/Kimi/Antigravity usage probes, preventing direct refresh calls from those usage paths
61
+
62
+ ### Fixed
63
+
64
+ - Dropped truncated, thinking-only assistant turns with only `thinking`/`redacted_thinking` blocks and no `text` or `tool` content during message transformation, preventing Anthropic requests from sending consecutive assistant messages after a `max_tokens`/`error`/`aborted` interruption
65
+ - Fixed Amazon Bedrock bearer-token authentication to honor `AWS_BEARER_TOKEN_BEDROCK` before resolving AWS profiles or running `credential_process`, matching Bedrock API-key precedence. ([#1399](https://github.com/can1357/oh-my-pi/issues/1399))
66
+ - Updated `isRetryableError` to treat Bun HTTP/2 transport errors (`HTTP2StreamReset`, `HTTP2RefusedStream`) as retryable so transient stream-reset failures can be retried
67
+ - Fixed Codex WebSocket streaming to recover from stalled sessions by falling back to SSE when the first event or subsequent progress is delayed beyond the configured websocket timeout
68
+ - Fixed expired OAuth handling so provider-level paths no longer attempt direct token refresh calls for expired credentials and instead rely on `AuthStorage` for rotation
69
+ - Fixed provider streams aborting slow-but-valid first tokens or silent inter-event gaps with OMP-owned first-event/idle watchdog errors. Built-in lazy streams, OpenAI/Anthropic/Azure/Codex SSE, and Codex WebSocket streams now wait for provider output, provider/socket errors, caller aborts, or explicit request-layer timeouts instead of treating provider silence as failure ([#1392](https://github.com/can1357/oh-my-pi/issues/1392)).
70
+ - Fixed Claude Opus 4.7 on Amazon Bedrock streaming no reasoning output (and appearing to hang on long reasoning runs) because Anthropic silently switched the adaptive-thinking display default to `"omitted"`. The Bedrock provider now sends `thinking.display = "summarized"` by default on Opus 4.7+ adaptive models and on budget-based Claude models, mirroring the existing direct-Anthropic behavior. `BedrockOptions.thinkingDisplay` (`"summarized" | "omitted"`) is exposed for callers that want to opt out, and `hideThinkingSummary` now wires through to the Bedrock case ([#1373](https://github.com/can1357/oh-my-pi/issues/1373)).
71
+ - Fixed Cursor Composer resume/tool-continuation turns failing with `Cannot send empty user message to Cursor API`. Empty current user turns now use Cursor's `resumeAction` instead of constructing an invalid `userMessageAction` ([#1376](https://github.com/can1357/oh-my-pi/issues/1376)).
72
+ - Fixed `pi-ai login moonshot` failing with `invalid temperature: only 1 is allowed for this model` (HTTP 400) because the API-key validator probed `kimi-k2.5` with `temperature: 0`. Moonshot login now validates against `GET /v1/models`, matching the DeepSeek/Fireworks/NanoGPT/ZenMux pattern and authenticating the key without invoking model-specific parameter restrictions.
73
+
5
74
  ## [15.3.2] - 2026-05-25
6
75
  ### Added
7
76
 
@@ -58,7 +58,7 @@ export interface AuthGatewayParsedRequestOptions {
58
58
  serviceTier?: ServiceTier;
59
59
  /** Cache retention hint derived from inbound `cache_control` markers. */
60
60
  cacheRetention?: CacheRetention;
61
- /** OpenAI Responses `prompt_cache_key`; bridges to pi-ai `sessionId`. */
61
+ /** OpenAI Responses `prompt_cache_key`; also seeds provider routing when no separate session id exists. */
62
62
  promptCacheKey?: string;
63
63
  /** OpenAI Responses `previous_response_id` for response chaining. */
64
64
  previousResponseId?: string;
@@ -291,6 +291,21 @@ type AuthApiKeyOptions = {
291
291
  */
292
292
  signal?: AbortSignal;
293
293
  };
294
+ /**
295
+ * Refreshed OAuth access plus identity metadata returned by
296
+ * {@link AuthStorage.getOAuthAccess}. Callers that authenticate via a bearer
297
+ * AND need the credential's identity (Codex `chatgpt-account-id`, Google
298
+ * `projectId`, GitHub `enterpriseUrl`) consume this shape directly; the
299
+ * refresh slot is deliberately omitted because rotating refresh tokens never
300
+ * leave {@link AuthStorage}.
301
+ */
302
+ export interface OAuthAccess {
303
+ accessToken: string;
304
+ accountId?: string;
305
+ email?: string;
306
+ projectId?: string;
307
+ enterpriseUrl?: string;
308
+ }
294
309
  export interface InvalidateCredentialMatchingOptions {
295
310
  signal?: AbortSignal;
296
311
  sessionId?: string;
@@ -493,6 +508,22 @@ export declare class AuthStorage {
493
508
  * 6. Fallback resolver (models.yml custom providers, last-resort)
494
509
  */
495
510
  getApiKey(provider: string, sessionId?: string, options?: AuthApiKeyOptions): Promise<string | undefined>;
511
+ /**
512
+ * Resolve the OAuth credential for `provider`, refreshing through the same
513
+ * pipeline as {@link AuthStorage.getApiKey} but returning the refreshed
514
+ * {@link OAuthAccess} (raw access token + identity metadata) instead of
515
+ * the API-key bytes.
516
+ *
517
+ * Use this when the caller needs to inject identity headers alongside the
518
+ * bearer (Codex `chatgpt-account-id`, Google `project`, GitHub
519
+ * `enterpriseUrl`). For pure "give me the bytes for `Authorization`"
520
+ * scenarios, prefer {@link AuthStorage.getApiKey}.
521
+ *
522
+ * Returns `undefined` when no OAuth credential is available, the
523
+ * credential fails to refresh, or runtime/config overrides have replaced
524
+ * OAuth with an explicit API key.
525
+ */
526
+ getOAuthAccess(provider: string, sessionId?: string, options?: AuthApiKeyOptions): Promise<OAuthAccess | undefined>;
496
527
  invalidateCredentialMatching(provider: string, apiKey: string, options?: InvalidateCredentialMatchingOptions): Promise<boolean>;
497
528
  invalidateCredentialMatching(provider: string, apiKey: string, signal?: AbortSignal): Promise<boolean>;
498
529
  /**
@@ -40,7 +40,6 @@ export * from "./usage/zai";
40
40
  export * from "./utils/anthropic-auth";
41
41
  export * from "./utils/discovery";
42
42
  export * from "./utils/event-stream";
43
- export * from "./utils/h2-fetch";
44
43
  export * from "./utils/oauth";
45
44
  export type { OAuthCredentials, OAuthProvider, OAuthProviderId, OAuthProviderInfo, } from "./utils/oauth/types";
46
45
  export * from "./utils/overflow";
@@ -8,9 +8,12 @@
8
8
  */
9
9
  import type { Effort } from "../model-thinking";
10
10
  import type { StreamFunction, StreamOptions, ThinkingBudgets } from "../types";
11
+ export type BedrockThinkingDisplay = "summarized" | "omitted";
11
12
  export interface BedrockOptions extends StreamOptions {
12
13
  region?: string;
13
14
  profile?: string;
15
+ /** Amazon Bedrock API key sent as `Authorization: Bearer`, ahead of SigV4 credential resolution. */
16
+ bearerToken?: string;
14
17
  toolChoice?: "auto" | "any" | "none" | {
15
18
  type: "tool";
16
19
  name: string;
@@ -18,5 +21,18 @@ export interface BedrockOptions extends StreamOptions {
18
21
  reasoning?: Effort;
19
22
  thinkingBudgets?: ThinkingBudgets;
20
23
  interleavedThinking?: boolean;
24
+ /**
25
+ * Controls how Claude returns thinking content in Bedrock responses.
26
+ * - `"summarized"`: thinking blocks include human-readable summaries (default here).
27
+ * - `"omitted"`: thinking content is suppressed; the encrypted signature still
28
+ * travels back for multi-turn continuity.
29
+ *
30
+ * Starting with Claude Opus 4.7 the Anthropic API default is `"omitted"`, which
31
+ * leaves callers waiting on a silent stream during long reasoning runs (issue
32
+ * #1373). We default to `"summarized"` so adaptive-thinking models that accept
33
+ * the field keep producing visible thinking deltas. Older adaptive-thinking
34
+ * models (Opus 4.6, Sonnet 4.6+) reject the field, so we omit it for them.
35
+ */
36
+ thinkingDisplay?: BedrockThinkingDisplay;
21
37
  }
22
38
  export declare const streamBedrock: StreamFunction<"bedrock-converse-stream">;
@@ -1,4 +1,5 @@
1
- import type { CursorExecHandlerResult, CursorExecHandlers, CursorToolResultHandler, StreamFunction, StreamOptions, ToolResultMessage } from "../types";
1
+ import { type JsonValue } from "@bufbuild/protobuf";
2
+ import type { CursorExecHandlerResult, CursorExecHandlers, CursorToolResultHandler, Message, StreamFunction, StreamOptions, ToolResultMessage } from "../types";
2
3
  export declare const CURSOR_API_URL = "https://api2.cursor.sh";
3
4
  export declare const CURSOR_CLIENT_VERSION = "cli-2026.01.09-231024f";
4
5
  export interface CursorOptions extends StreamOptions {
@@ -34,3 +35,8 @@ export declare function resolveExecHandler<TArgs, TResult>(args: TArgs, handler:
34
35
  * an empty `rootPromptMessagesJson` head.
35
36
  */
36
37
  export declare function buildCursorSystemPromptJsons(systemPrompt: readonly string[] | undefined): string[];
38
+ /** Exported for tests: decodes Cursor history blobs built from conversation messages. */
39
+ export declare function buildCursorHistoryForTest(messages: Message[]): {
40
+ rootPromptMessagesJson: unknown[];
41
+ turnUserMessagesJson: JsonValue[];
42
+ };
@@ -163,8 +163,6 @@ export declare class MockModel implements Model<MockApi> {
163
163
  /** Reset recorded calls AND the extras queue. The constructor `responses` are NOT reset. */
164
164
  reset(): void;
165
165
  }
166
- /** @deprecated Use {@link MockModel}; the class IS the handle. */
167
- export type MockModelHandle = MockModel;
168
166
  /** Check whether `model` was produced by `createMockModel`. */
169
167
  export declare function isMockModel(model: Model<Api>): model is MockModel;
170
168
  /** Construct a mock model. */
@@ -2,6 +2,8 @@ import type OpenAI from "openai";
2
2
  import type { ResponseInput, ResponseInputContent, ResponseOutputItem } from "openai/resources/responses/responses";
3
3
  import { type Api, type AssistantMessage, type ImageContent, type Model, type ServiceTier, type StopReason, type StreamOptions, type TextContent, type TextSignatureV1, type ToolResultMessage } from "../types";
4
4
  import type { AssistantMessageEventStream } from "../utils/event-stream";
5
+ export declare const OPENAI_RESPONSES_PROGRESS_EVENT_TYPES: ReadonlySet<string>;
6
+ export declare function isOpenAIResponsesProgressEvent(event: unknown): boolean;
5
7
  export declare function encodeTextSignatureV1(id: string, phase?: TextSignatureV1["phase"]): string;
6
8
  export declare function parseTextSignature(signature: string | undefined): {
7
9
  id: string;
@@ -185,11 +185,17 @@ export interface StreamOptions {
185
185
  */
186
186
  metadata?: Record<string, unknown>;
187
187
  /**
188
- * Optional session identifier for providers that support session-based caching.
189
- * Providers can use this to enable prompt caching, request routing, or other
190
- * session-aware features. Ignored by providers that don't support it.
188
+ * Optional session identifier for providers that support session-based
189
+ * routing, request affinity, or transport reuse. Providers may also use this
190
+ * as the prompt-cache key when `promptCacheKey` is not set.
191
191
  */
192
192
  sessionId?: string;
193
+ /**
194
+ * Optional prompt-cache identity. When set, OpenAI Responses-compatible
195
+ * providers use this for `prompt_cache_key` while keeping `sessionId` for
196
+ * provider routing / conversation headers.
197
+ */
198
+ promptCacheKey?: string;
193
199
  /**
194
200
  * Provider-scoped mutable state store for this agent session.
195
201
  * Providers can use this to persist transport/session state between turns.
@@ -205,20 +211,37 @@ export interface StreamOptions {
205
211
  */
206
212
  onResponse?: (response: ProviderResponseMetadata, model?: Model<Api>) => void | Promise<void>;
207
213
  /**
208
- * Optional callback for raw Server-Sent Events as they arrive from HTTP streaming providers.
214
+ * Optional callback for raw Server-Sent Events as they arrive from HTTP streaming providers,
215
+ * plus synthesized SSE-shaped frames for the Codex WebSocket transport (one synthetic frame
216
+ * per JSON request/response message). WebSocket frames are tagged with a leading
217
+ * `: ws → <type>` (outbound) or `: ws ← <type>` (inbound) comment line in `RawSseEvent.raw`.
209
218
  *
210
219
  * Diagnostic only: provider implementations must ignore callback failures and must not
211
220
  * let observers alter stream contents.
212
221
  */
213
222
  onSseEvent?: (event: RawSseEvent, model?: Model<Api>) => void;
214
223
  /**
215
- * Optional override for the first streamed event watchdog in milliseconds.
216
- * Set to 0 to disable the first-event watchdog for this request.
224
+ * Optional override for the first-event watchdog in milliseconds. Built-in
225
+ * providers apply this budget twice when they can: once to the underlying
226
+ * SDK/request while waiting for the HTTP stream object to exist, then again
227
+ * in the iterator while waiting for the first semantic stream event. Set to
228
+ * `0` to disable both layers for this request. After the first semantic
229
+ * event arrives, `streamIdleTimeoutMs` governs inter-event stalls. Falls
230
+ * back to `PI_STREAM_FIRST_EVENT_TIMEOUT_MS` and then to a 100s default.
231
+ *
232
+ * Iterator-level honored by: every built-in provider (via the lazy-stream
233
+ * forwarder in `register-builtins`). SDK-request honored by:
234
+ * `openai-completions`, `openai-responses`, `azure-openai-responses`,
235
+ * `anthropic-messages`.
217
236
  */
218
237
  streamFirstEventTimeoutMs?: number;
219
238
  /**
220
- * Optional override for the maximum idle gap between streamed events in milliseconds.
221
- * Set to 0 to disable the inter-event idle watchdog for this request.
239
+ * Optional override for the maximum idle gap between streamed events in
240
+ * milliseconds. Once the first event arrives, this guards against silent
241
+ * mid-stream stalls (broker dies, half-open socket, model produces no real
242
+ * progress for too long). Set to `0` to disable. Falls back to
243
+ * `PI_STREAM_IDLE_TIMEOUT_MS` (alias: `PI_OPENAI_STREAM_IDLE_TIMEOUT_MS`)
244
+ * and then to a 120s default.
222
245
  */
223
246
  streamIdleTimeoutMs?: number;
224
247
  /**
@@ -0,0 +1,4 @@
1
+ /**
2
+ * Iterates a provider stream until it yields, ends, errors, or the caller aborts.
3
+ */
4
+ export declare function iterateUntilAbort<T>(iterable: AsyncIterable<T>, signal?: AbortSignal): AsyncGenerator<T>;
@@ -1,44 +1,31 @@
1
- import { type AuthCredentialStore } from "../auth-storage";
2
1
  /** Auth configuration for Anthropic */
3
2
  export interface AnthropicAuthConfig {
4
3
  apiKey: string;
5
4
  baseUrl: string;
6
5
  isOAuth: boolean;
7
6
  }
8
- /** OAuth credential for Anthropic API access */
9
- export interface AnthropicOAuthCredential {
10
- type: "oauth";
11
- access: string;
12
- refresh?: string;
13
- /** Expiry timestamp in milliseconds */
14
- expires: number;
15
- }
7
+ export declare function resolveAnthropicBaseUrlFromEnv(): string | undefined;
16
8
  /**
17
9
  * Checks if a token is an OAuth token by looking for sk-ant-oat prefix.
18
- * @param apiKey - The API key to check
19
- * @returns True if the token is an OAuth token
20
10
  */
21
11
  export declare function isOAuthToken(apiKey: string): boolean;
22
12
  /**
23
- * Finds Anthropic auth config using priority:
24
- * 1. ANTHROPIC_SEARCH_API_KEY / ANTHROPIC_SEARCH_BASE_URL
25
- * 2. ANTHROPIC_FOUNDRY_API_KEY override when Foundry mode is enabled
26
- * 3. OAuth in agent.db (with 5-minute expiry buffer)
27
- * 4. API key in agent.db
28
- * 5. ANTHROPIC_API_KEY / ANTHROPIC_BASE_URL fallback
29
- * @param store - Optional credential store (creates one from default db path if not provided)
30
- * @returns The first valid auth configuration found, or null if none available
13
+ * Build an {@link AnthropicAuthConfig} from an already-resolved API key.
14
+ *
15
+ * `apiKey` is whatever the caller chose for `Authorization`/`x-api-key` —
16
+ * usually `authStorage.getApiKey("anthropic")`. `baseUrl` overrides the
17
+ * env-derived base; pass `undefined` to fall back to FOUNDRY/ANTHROPIC env
18
+ * resolution and finally `DEFAULT_BASE_URL`.
19
+ *
20
+ * `isOAuth` is derived from the token prefix so the helper stays pure: callers
21
+ * never have to thread the OAuth flag through their own resolution logic.
31
22
  */
32
- export declare function findAnthropicAuth(store?: AuthCredentialStore): Promise<AnthropicAuthConfig | null>;
23
+ export declare function buildAnthropicAuthConfig(apiKey: string, baseUrl?: string): AnthropicAuthConfig;
33
24
  /**
34
25
  * Builds HTTP headers for Anthropic API requests (search variant).
35
- * @param auth - The authentication configuration
36
- * @returns Headers object ready for use in fetch requests
37
26
  */
38
27
  export declare function buildAnthropicSearchHeaders(auth: AnthropicAuthConfig): Record<string, string>;
39
28
  /**
40
29
  * Builds the full API URL for Anthropic messages endpoint.
41
- * @param auth - The authentication configuration
42
- * @returns The complete API URL with beta query parameter
43
30
  */
44
31
  export declare function buildAnthropicUrl(auth: AnthropicAuthConfig): string;
@@ -3,8 +3,12 @@
3
3
  *
4
4
  * `PI_OPENAI_STREAM_IDLE_TIMEOUT_MS` is accepted as a backward-compatible alias.
5
5
  * Set `PI_STREAM_IDLE_TIMEOUT_MS=0` to disable the watchdog.
6
+ *
7
+ * Providers that legitimately stream much slower than the global default can pass
8
+ * `fallbackMs` to widen the floor used when neither env var nor caller option is set.
9
+ * Caller options still take precedence; env overrides still trump the fallback.
6
10
  */
7
- export declare function getStreamIdleTimeoutMs(): number | undefined;
11
+ export declare function getStreamIdleTimeoutMs(fallbackMs?: number): number | undefined;
8
12
  /**
9
13
  * Returns the idle timeout used for OpenAI-family streaming transports.
10
14
  *
@@ -17,16 +21,14 @@ export declare function getOpenAIStreamIdleTimeoutMs(): number | undefined;
17
21
  * so the default never undershoots the steady-state idle timeout.
18
22
  *
19
23
  * Set `PI_STREAM_FIRST_EVENT_TIMEOUT_MS=0` to disable the watchdog.
24
+ *
25
+ * Providers whose first response can legitimately take longer (heavy reasoning,
26
+ * slow cold-start proxies) can pass `fallbackMs` to widen the floor used when
27
+ * neither env var nor caller option is set. Caller options still take precedence;
28
+ * env overrides still trump the fallback.
20
29
  */
21
- export declare function getStreamFirstEventTimeoutMs(idleTimeoutMs?: number): number | undefined;
22
- export type Watchdog = NodeJS.Timeout | undefined;
23
- /**
24
- * Starts a watchdog that aborts a request if no first stream event arrives in time.
25
- * Call `markFirstEventReceived()` as soon as the first event is observed.
26
- */
27
- export declare function createWatchdog(timeoutMs: number | undefined, onTimeout: () => void): Watchdog;
30
+ export declare function getStreamFirstEventTimeoutMs(idleTimeoutMs?: number, fallbackMs?: number): number | undefined;
28
31
  export interface IdleTimeoutIteratorOptions {
29
- watchdog?: Watchdog;
30
32
  idleTimeoutMs?: number;
31
33
  firstItemTimeoutMs?: number;
32
34
  errorMessage: string;
@@ -17,13 +17,16 @@ export declare function unregisterOAuthProviders(sourceId: string): void;
17
17
  */
18
18
  export declare function refreshOAuthToken(provider: OAuthProvider, credentials: OAuthCredentials): Promise<OAuthCredentials>;
19
19
  /**
20
- * Get API key for a provider from OAuth credentials.
21
- * Automatically refreshes expired tokens.
20
+ * Build API-key bytes for a provider from an already-fresh OAuth credential.
22
21
  *
23
- * For providers that need credential metadata at request time, returns JSON-encoded credentials
24
- * plus refresh/expiry metadata for proactive refresh support.
22
+ * Refresh is owned by AuthStorage. This helper deliberately refuses expired
23
+ * credentials so it cannot POST broker redaction sentinels to upstream token
24
+ * endpoints as a side channel.
25
+ *
26
+ * For providers that need credential metadata at request time, returns
27
+ * JSON-encoded credentials plus expiry metadata for diagnostics/edge guards.
25
28
  * @returns API key string, or null if no credentials
26
- * @throws Error if refresh fails
29
+ * @throws Error if the credential is expired and must be refreshed upstream
27
30
  */
28
31
  export declare function getOAuthApiKey(provider: OAuthProvider, credentials: Record<string, OAuthCredentials>): Promise<{
29
32
  newCredentials: OAuthCredentials;
@@ -0,0 +1,33 @@
1
+ /**
2
+ * Shared helpers for mapping `StreamOptions.streamFirstEventTimeoutMs` onto
3
+ * underlying SDK request-timeout options.
4
+ *
5
+ * The hint is intentionally not a watchdog — it just narrows the SDK's
6
+ * "transport timeout" window so a stuck pre-stream request fails fast
7
+ * instead of hanging on the default (often multi-minute) SDK timeout. Once
8
+ * the stream actually starts, silence is not failure; callers must abort
9
+ * to interrupt a quiet stream.
10
+ */
11
+ /**
12
+ * Coerce a caller-supplied `streamFirstEventTimeoutMs` into a positive integer suitable
13
+ * for the SDK's `timeout` option. Returns `undefined` when the caller passed nothing,
14
+ * a non-finite value, or a non-positive value (preserving the SDK's default).
15
+ */
16
+ export declare function resolveSdkTimeoutMs(streamFirstEventTimeoutMs: number | undefined): number | undefined;
17
+ /**
18
+ * Build per-request SDK options that combine an abort signal with the optional
19
+ * `streamFirstEventTimeoutMs` request-timeout hint.
20
+ *
21
+ * The returned `{ signal, timeout?, maxRetries? }` shape is compatible with both
22
+ * OpenAI's and Anthropic's `RequestOptions` (and any other SDK that follows the
23
+ * Stainless conventions), so callers from any of those providers can spread the
24
+ * result directly into `client.X.create(params, requestOptions)`.
25
+ *
26
+ * When the hint is set, retries are forced to zero so the SDK does not silently
27
+ * extend the caller's explicit deadline by re-attempting after a timeout.
28
+ */
29
+ export declare function createSdkStreamRequestOptions(signal: AbortSignal, streamFirstEventTimeoutMs: number | undefined): {
30
+ signal: AbortSignal;
31
+ timeout?: number;
32
+ maxRetries?: number;
33
+ };
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "type": "module",
3
3
  "name": "@oh-my-pi/pi-ai",
4
- "version": "15.3.2",
4
+ "version": "15.4.1",
5
5
  "description": "Unified LLM API with automatic model discovery and provider configuration",
6
6
  "homepage": "https://omp.sh",
7
7
  "author": "Can Boluk",
@@ -43,7 +43,7 @@
43
43
  "dependencies": {
44
44
  "@anthropic-ai/sdk": "^0.94.0",
45
45
  "@bufbuild/protobuf": "^2.12.0",
46
- "@oh-my-pi/pi-utils": "15.3.2",
46
+ "@oh-my-pi/pi-utils": "15.4.1",
47
47
  "openai": "^6.36.0",
48
48
  "partial-json": "^0.1.7",
49
49
  "zod": "4.4.3"
@@ -145,7 +145,9 @@ function buildStreamOptions(parsed: ParsedFormatRequest, api: Api, signal: Abort
145
145
  // Client-supplied `prompt_cache_key` wins; otherwise derive a stable
146
146
  // key from the model + system + tools so prefix caching engages on
147
147
  // Codex-class backends across turns of the same logical conversation.
148
- opts.sessionId = options.promptCacheKey ?? deriveSessionId(parsed.modelId, parsed.context);
148
+ const promptCacheKey = options.promptCacheKey ?? deriveSessionId(parsed.modelId, parsed.context);
149
+ opts.promptCacheKey = promptCacheKey;
150
+ opts.sessionId = promptCacheKey;
149
151
  if (options.thinkingBudgets) {
150
152
  opts.thinkingBudgets = { ...(opts.thinkingBudgets ?? {}), ...options.thinkingBudgets };
151
153
  }
@@ -67,7 +67,7 @@ export interface AuthGatewayParsedRequestOptions {
67
67
  serviceTier?: ServiceTier;
68
68
  /** Cache retention hint derived from inbound `cache_control` markers. */
69
69
  cacheRetention?: CacheRetention;
70
- /** OpenAI Responses `prompt_cache_key`; bridges to pi-ai `sessionId`. */
70
+ /** OpenAI Responses `prompt_cache_key`; also seeds provider routing when no separate session id exists. */
71
71
  promptCacheKey?: string;
72
72
  /** OpenAI Responses `previous_response_id` for response chaining. */
73
73
  previousResponseId?: string;