@oh-my-pi/pi-ai 14.9.9 → 15.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +21 -0
- package/package.json +5 -5
- package/src/auth-storage.ts +160 -17
- package/src/index.ts +1 -0
- package/src/provider-models/ollama.ts +5 -1
- package/src/providers/anthropic.ts +12 -9
- package/src/providers/azure-openai-responses.ts +19 -83
- package/src/providers/google-gemini-cli.ts +37 -204
- package/src/providers/google-gemini-headers.ts +0 -100
- package/src/providers/google-shared.ts +446 -4
- package/src/providers/google-vertex.ts +19 -371
- package/src/providers/google.ts +16 -359
- package/src/providers/kimi.ts +15 -96
- package/src/providers/ollama.ts +17 -2
- package/src/providers/openai-anthropic-shim.ts +136 -0
- package/src/providers/openai-codex-responses.ts +38 -199
- package/src/providers/openai-completions.ts +17 -24
- package/src/providers/openai-responses-shared.ts +143 -24
- package/src/providers/openai-responses.ts +20 -76
- package/src/providers/synthetic.ts +15 -102
- package/src/types.ts +13 -1
- package/src/utils/h2-fetch.ts +47 -0
- package/src/utils/http-inspector.ts +2 -2
- package/src/utils/oauth/github-copilot.ts +6 -10
- package/src/utils/oauth/kimi.ts +4 -3
- package/src/utils/oauth/lm-studio.ts +0 -2
- package/src/utils/retry.ts +8 -130
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,27 @@
|
|
|
2
2
|
|
|
3
3
|
## [Unreleased]
|
|
4
4
|
|
|
5
|
+
## [15.0.1] - 2026-05-14
|
|
6
|
+
### Breaking Changes
|
|
7
|
+
|
|
8
|
+
- Increased the minimum Bun runtime version to `>=1.3.14` for the `@aws-?` package
|
|
9
|
+
|
|
10
|
+
### Added
|
|
11
|
+
|
|
12
|
+
- Added `installH2Fetch` to patch `globalThis.fetch` so HTTPS requests attempt HTTP/2 over ALPN with automatic HTTP/1.1 fallback when HTTP/2 is unsupported
|
|
13
|
+
- Added priority service-tier traffic to the `premiumRequests` accounting on OpenAI and OpenAI Codex providers. Sending `serviceTier: "priority"` now increments `usage.premiumRequests` by 1 per request, matching the existing GitHub Copilot premium-request budget semantics so downstream consumers (e.g. the `omp stats` "Premium Reqs" card and `/usage`) reflect priority traffic alongside Copilot premium calls.
|
|
14
|
+
|
|
15
|
+
## [15.0.0] - 2026-05-13
|
|
16
|
+
|
|
17
|
+
### Added
|
|
18
|
+
|
|
19
|
+
- Added `AuthStorage.onCredentialDisabled(listener)` — a multi-subscriber `on/off` API for `credential_disabled` events. Returns an unsubscribe function; calling it more than once is a no-op. Multiple subscribers all receive every disable event, with synchronous and async exceptions isolated per-listener so a misbehaving subscriber cannot starve the rest of the chain. Buffer-and-replay semantics are preserved: events emitted while no listener is subscribed are buffered (FIFO, capped at 32) and replayed once to the listener that triggers the empty→non-empty transition. After every subscriber unsubscribes, subsequent disable events buffer again until the next subscribe.
|
|
20
|
+
|
|
21
|
+
### Fixed
|
|
22
|
+
|
|
23
|
+
- Fixed OAuth credentials being silently disabled when two omp processes (or any two `AuthStorage` instances sharing a `agent.db`) race on token refresh. Anthropic rotates refresh tokens on every use, so the loser's `invalid_grant` response previously soft-deleted the row that the winner just rotated, forcing the user to `/login` again. `#tryOAuthCredential` now re-reads the row from disk before declaring a definitive failure: if the persisted `refresh` differs from the snapshot it tried, the peer-rotated credential is reloaded and the request retries against the fresh token instead of disabling the live row.
|
|
24
|
+
- Closed a remaining race window in OAuth refresh-failure handling: between re-reading the credential row to check for peer rotation and the subsequent soft-delete, another process could still complete a refresh and rotate the row, leaving us to disable the freshly-rotated credential by `id`. The disable now runs as a single CAS update conditioned on the row's `data` still matching the snapshot we tried to refresh, and on `disabled_cause IS NULL`. If the CAS reports 0 rows changed (peer rotation, or row already disabled by a concurrent failure on the same snapshot), we reload from disk and retry instead of mutating the wrong row or emitting a spurious `credential_disabled` event.
|
|
25
|
+
|
|
5
26
|
## [14.9.3] - 2026-05-10
|
|
6
27
|
|
|
7
28
|
### Fixed
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"type": "module",
|
|
3
3
|
"name": "@oh-my-pi/pi-ai",
|
|
4
|
-
"version": "
|
|
4
|
+
"version": "15.0.1",
|
|
5
5
|
"description": "Unified LLM API with automatic model discovery and provider configuration",
|
|
6
6
|
"homepage": "https://github.com/can1357/oh-my-pi",
|
|
7
7
|
"author": "Can Boluk",
|
|
@@ -46,8 +46,8 @@
|
|
|
46
46
|
"@aws-sdk/credential-provider-node": "^3.972.39",
|
|
47
47
|
"@bufbuild/protobuf": "^2.12.0",
|
|
48
48
|
"@google/genai": "^1.52.0",
|
|
49
|
-
"@oh-my-pi/pi-natives": "
|
|
50
|
-
"@oh-my-pi/pi-utils": "
|
|
49
|
+
"@oh-my-pi/pi-natives": "15.0.1",
|
|
50
|
+
"@oh-my-pi/pi-utils": "15.0.1",
|
|
51
51
|
"@sinclair/typebox": "^0.34.49",
|
|
52
52
|
"@smithy/node-http-handler": "^4.6.1",
|
|
53
53
|
"ajv": "^8.20.0",
|
|
@@ -58,10 +58,10 @@
|
|
|
58
58
|
"zod": "4.4.3"
|
|
59
59
|
},
|
|
60
60
|
"devDependencies": {
|
|
61
|
-
"@types/bun": "^1.3.
|
|
61
|
+
"@types/bun": "^1.3.14"
|
|
62
62
|
},
|
|
63
63
|
"engines": {
|
|
64
|
-
"bun": ">=1.3.
|
|
64
|
+
"bun": ">=1.3.14"
|
|
65
65
|
},
|
|
66
66
|
"files": [
|
|
67
67
|
"src",
|
package/src/auth-storage.ts
CHANGED
|
@@ -154,6 +154,12 @@ const USAGE_CACHE_PREFIX = "usage_cache:";
|
|
|
154
154
|
const USAGE_REPORT_TTL_MS = 30_000;
|
|
155
155
|
const DEFAULT_USAGE_REQUEST_TIMEOUT_MS = 3_000;
|
|
156
156
|
const DEFAULT_OAUTH_REFRESH_TIMEOUT_MS = 10_000;
|
|
157
|
+
/**
|
|
158
|
+
* Cap on the buffered credential_disabled backlog held while no handler is attached.
|
|
159
|
+
* In practice the backlog is 0–N where N ≈ active providers (≤ ~20). The cap exists so
|
|
160
|
+
* pathological detach-without-reattach loops can't grow memory unboundedly.
|
|
161
|
+
*/
|
|
162
|
+
const MAX_PENDING_DISABLED_EVENTS = 32;
|
|
157
163
|
|
|
158
164
|
type UsageCacheEntry<T> = {
|
|
159
165
|
value: T;
|
|
@@ -283,7 +289,16 @@ export class AuthStorage {
|
|
|
283
289
|
#fallbackResolver?: (provider: string) => string | undefined;
|
|
284
290
|
#store: AuthCredentialStore;
|
|
285
291
|
#configValueResolver: (config: string) => Promise<string | undefined>;
|
|
286
|
-
#
|
|
292
|
+
#credentialDisabledListeners: Set<(event: CredentialDisabledEvent) => void | Promise<void>> = new Set();
|
|
293
|
+
/**
|
|
294
|
+
* Buffer for credential_disabled events fired while no listener is subscribed.
|
|
295
|
+
* Drained (in insertion order) to the first listener that triggers the empty→non-empty
|
|
296
|
+
* transition via {@link AuthStorage.onCredentialDisabled}. Bounded at
|
|
297
|
+
* {@link MAX_PENDING_DISABLED_EVENTS}; oldest entries are dropped to keep memory predictable
|
|
298
|
+
* if a long-lived AuthStorage somehow accumulates a backlog (provider count is naturally small,
|
|
299
|
+
* but a process that runs without subscribers for a long time shouldn't grow this unboundedly).
|
|
300
|
+
*/
|
|
301
|
+
#pendingDisabledEvents: CredentialDisabledEvent[] = [];
|
|
287
302
|
#closed = false;
|
|
288
303
|
|
|
289
304
|
constructor(store: AuthCredentialStore, options: AuthStorageOptions = {}) {
|
|
@@ -294,7 +309,11 @@ export class AuthStorage {
|
|
|
294
309
|
this.#usageCache = new AuthStorageUsageCache(this.#store);
|
|
295
310
|
this.#usageFetch = options.usageFetch ?? fetch;
|
|
296
311
|
this.#usageRequestTimeoutMs = options.usageRequestTimeoutMs ?? DEFAULT_USAGE_REQUEST_TIMEOUT_MS;
|
|
297
|
-
|
|
312
|
+
if (options.onCredentialDisabled) {
|
|
313
|
+
// Constructor-registered subscribers are permanent for this AuthStorage's lifetime;
|
|
314
|
+
// the unsubscribe handle is intentionally discarded.
|
|
315
|
+
this.onCredentialDisabled(options.onCredentialDisabled);
|
|
316
|
+
}
|
|
298
317
|
this.#usageLogger =
|
|
299
318
|
options.usageLogger ??
|
|
300
319
|
({
|
|
@@ -324,6 +343,39 @@ export class AuthStorage {
|
|
|
324
343
|
this.#store.close();
|
|
325
344
|
}
|
|
326
345
|
|
|
346
|
+
/**
|
|
347
|
+
* Subscribe to {@link CredentialDisabledEvent}s. Multiple subscribers are supported and
|
|
348
|
+
* each fires for every disable event; subscribers are invoked in registration order with
|
|
349
|
+
* exceptions and async rejections isolated per-listener so a misbehaving subscriber
|
|
350
|
+
* cannot break the disable path or starve the rest of the chain.
|
|
351
|
+
*
|
|
352
|
+
* If `credential_disabled` events were emitted while no listener was subscribed, they are
|
|
353
|
+
* replayed (in insertion order) to the listener that triggers the empty→non-empty
|
|
354
|
+
* transition. The drain is one-shot — listeners that subscribe after that no longer see
|
|
355
|
+
* past events.
|
|
356
|
+
*
|
|
357
|
+
* Returns an unsubscribe function. The function is idempotent: calling it more than once
|
|
358
|
+
* is a no-op. After every subscriber has unsubscribed, subsequent disable events buffer
|
|
359
|
+
* again until the next subscribe.
|
|
360
|
+
*
|
|
361
|
+
* @param listener Callback invoked with each disable event. May be sync or async.
|
|
362
|
+
* @returns A function that removes this listener from the subscriber set.
|
|
363
|
+
*/
|
|
364
|
+
onCredentialDisabled(listener: (event: CredentialDisabledEvent) => void | Promise<void>): () => void {
|
|
365
|
+
const wasEmpty = this.#credentialDisabledListeners.size === 0;
|
|
366
|
+
this.#credentialDisabledListeners.add(listener);
|
|
367
|
+
if (wasEmpty && this.#pendingDisabledEvents.length > 0) {
|
|
368
|
+
const drained = this.#pendingDisabledEvents;
|
|
369
|
+
this.#pendingDisabledEvents = [];
|
|
370
|
+
for (const event of drained) {
|
|
371
|
+
this.#invokeListener(listener, event);
|
|
372
|
+
}
|
|
373
|
+
}
|
|
374
|
+
return () => {
|
|
375
|
+
this.#credentialDisabledListeners.delete(listener);
|
|
376
|
+
};
|
|
377
|
+
}
|
|
378
|
+
|
|
327
379
|
/**
|
|
328
380
|
* Set a runtime API key override (not persisted to disk).
|
|
329
381
|
* Used for CLI --api-key flag.
|
|
@@ -615,33 +667,65 @@ export class AuthStorage {
|
|
|
615
667
|
}
|
|
616
668
|
|
|
617
669
|
/**
|
|
618
|
-
*
|
|
619
|
-
*
|
|
620
|
-
*
|
|
670
|
+
* CAS-style disable used when OAuth refresh definitively fails: only disables
|
|
671
|
+
* persisted `data` still matches the credential we attempted to refresh.
|
|
672
|
+
* Returns `false` when a peer rotated the row between our pre-check and the
|
|
673
|
+
* disable, so the caller can reload and retry instead of clobbering the
|
|
674
|
+
* freshly-rotated credential.
|
|
621
675
|
*/
|
|
622
|
-
#
|
|
676
|
+
#tryDisableCredentialAtIfMatches(
|
|
677
|
+
provider: string,
|
|
678
|
+
index: number,
|
|
679
|
+
expectedCredential: AuthCredential,
|
|
680
|
+
disabledCause: string,
|
|
681
|
+
): boolean {
|
|
623
682
|
const entries = this.#getStoredCredentials(provider);
|
|
624
|
-
if (index < 0 || index >= entries.length) return;
|
|
625
|
-
|
|
683
|
+
if (index < 0 || index >= entries.length) return false;
|
|
684
|
+
const target = entries[index];
|
|
685
|
+
const serialized = serializeCredential(provider, expectedCredential);
|
|
686
|
+
if (!serialized) return false;
|
|
687
|
+
const disabled = this.#store.tryDisableAuthCredentialIfMatches(target.id, serialized.data, disabledCause);
|
|
688
|
+
if (!disabled) return false;
|
|
626
689
|
const updated = entries.filter((_value, idx) => idx !== index);
|
|
627
690
|
this.#setStoredCredentials(provider, updated);
|
|
628
691
|
this.#resetProviderAssignments(provider);
|
|
629
692
|
this.#emitCredentialDisabled({ provider, disabledCause });
|
|
693
|
+
return true;
|
|
630
694
|
}
|
|
631
695
|
|
|
632
696
|
#emitCredentialDisabled(event: CredentialDisabledEvent): void {
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
697
|
+
if (this.#credentialDisabledListeners.size === 0) {
|
|
698
|
+
// No subscribers — buffer for later replay. Cap the backlog so a process that runs
|
|
699
|
+
// without subscribers for a long time can't grow memory unboundedly; drop oldest
|
|
700
|
+
// under pressure.
|
|
701
|
+
if (this.#pendingDisabledEvents.length >= MAX_PENDING_DISABLED_EVENTS) {
|
|
702
|
+
this.#pendingDisabledEvents.shift();
|
|
703
|
+
}
|
|
704
|
+
this.#pendingDisabledEvents.push(event);
|
|
705
|
+
return;
|
|
706
|
+
}
|
|
707
|
+
// Snapshot before iteration so a listener that subscribes/unsubscribes during fan-out
|
|
708
|
+
// can't observe a partially-mutated set or receive an event it just registered for.
|
|
709
|
+
const listeners = [...this.#credentialDisabledListeners];
|
|
710
|
+
for (const listener of listeners) {
|
|
711
|
+
this.#invokeListener(listener, event);
|
|
712
|
+
}
|
|
713
|
+
}
|
|
714
|
+
|
|
715
|
+
#invokeListener(
|
|
716
|
+
listener: (event: CredentialDisabledEvent) => void | Promise<void>,
|
|
717
|
+
event: CredentialDisabledEvent,
|
|
718
|
+
): void {
|
|
719
|
+
const logListenerError = (error: unknown): void => {
|
|
720
|
+
logger.warn("onCredentialDisabled listener threw", { provider: event.provider, error: String(error) });
|
|
637
721
|
};
|
|
638
722
|
try {
|
|
639
|
-
const result =
|
|
723
|
+
const result = listener(event);
|
|
640
724
|
if (result && typeof (result as PromiseLike<void>).then === "function") {
|
|
641
|
-
(result as Promise<void>).catch(
|
|
725
|
+
(result as Promise<void>).catch(logListenerError);
|
|
642
726
|
}
|
|
643
727
|
} catch (error) {
|
|
644
|
-
|
|
728
|
+
logListenerError(error);
|
|
645
729
|
}
|
|
646
730
|
}
|
|
647
731
|
|
|
@@ -1993,8 +2077,45 @@ export class AuthStorage {
|
|
|
1993
2077
|
});
|
|
1994
2078
|
|
|
1995
2079
|
if (isDefinitiveFailure) {
|
|
1996
|
-
//
|
|
1997
|
-
|
|
2080
|
+
// The credential at this index may have been rotated by another process between
|
|
2081
|
+
// our in-memory snapshot and the refresh attempt: Anthropic rotates refresh
|
|
2082
|
+
// tokens on every use, so the peer's success leaves our stored token invalid.
|
|
2083
|
+
// Re-read the row from disk before marking it disabled — if the persisted
|
|
2084
|
+
// refresh token has changed, the peer rotation succeeded and we should pick
|
|
2085
|
+
// up the new credential instead of soft-deleting the row that the peer just
|
|
2086
|
+
// updated.
|
|
2087
|
+
const credentialId = this.#getStoredCredentials(provider)[selection.index]?.id;
|
|
2088
|
+
if (credentialId !== undefined) {
|
|
2089
|
+
const latestRow = this.#store.listAuthCredentials(provider).find(row => row.id === credentialId);
|
|
2090
|
+
const latestCredential = latestRow?.credential;
|
|
2091
|
+
if (latestCredential?.type === "oauth" && latestCredential.refresh !== selection.credential.refresh) {
|
|
2092
|
+
logger.debug("OAuth refresh race detected; another process rotated token first", {
|
|
2093
|
+
provider,
|
|
2094
|
+
index: selection.index,
|
|
2095
|
+
credentialId,
|
|
2096
|
+
});
|
|
2097
|
+
await this.reload();
|
|
2098
|
+
return this.getApiKey(provider, sessionId, options);
|
|
2099
|
+
}
|
|
2100
|
+
}
|
|
2101
|
+
// Permanently disable invalid credentials with an explicit cause for inspection/debugging.
|
|
2102
|
+
// Use a CAS-style disable conditioned on the row still containing the stale credential
|
|
2103
|
+
// we tried to refresh, so a peer rotation that lands between the pre-check above and
|
|
2104
|
+
// this disable doesn't soft-delete the freshly-rotated row.
|
|
2105
|
+
const disabled = this.#tryDisableCredentialAtIfMatches(
|
|
2106
|
+
provider,
|
|
2107
|
+
selection.index,
|
|
2108
|
+
selection.credential,
|
|
2109
|
+
`oauth refresh failed: ${errorMsg}`,
|
|
2110
|
+
);
|
|
2111
|
+
if (!disabled) {
|
|
2112
|
+
logger.debug("OAuth refresh disable lost CAS; reloading after peer rotation", {
|
|
2113
|
+
provider,
|
|
2114
|
+
index: selection.index,
|
|
2115
|
+
});
|
|
2116
|
+
await this.reload();
|
|
2117
|
+
return this.getApiKey(provider, sessionId, options);
|
|
2118
|
+
}
|
|
1998
2119
|
if (this.#getCredentialsForProvider(provider).some(credential => credential.type === "oauth")) {
|
|
1999
2120
|
return this.getApiKey(provider, sessionId, options);
|
|
2000
2121
|
}
|
|
@@ -2280,6 +2401,7 @@ export class AuthCredentialStore {
|
|
|
2280
2401
|
#insertStmt: Statement;
|
|
2281
2402
|
#updateStmt: Statement;
|
|
2282
2403
|
#deleteStmt: Statement;
|
|
2404
|
+
#deleteIfMatchesStmt: Statement;
|
|
2283
2405
|
#deleteByProviderStmt: Statement;
|
|
2284
2406
|
#hardDeleteStmt: Statement;
|
|
2285
2407
|
#getCacheStmt: Statement;
|
|
@@ -2309,6 +2431,9 @@ export class AuthCredentialStore {
|
|
|
2309
2431
|
this.#deleteStmt = this.#db.prepare(
|
|
2310
2432
|
`UPDATE auth_credentials SET disabled_cause = ?, updated_at = ${SQLITE_NOW_EPOCH} WHERE id = ?`,
|
|
2311
2433
|
);
|
|
2434
|
+
this.#deleteIfMatchesStmt = this.#db.prepare(
|
|
2435
|
+
`UPDATE auth_credentials SET disabled_cause = ?, updated_at = ${SQLITE_NOW_EPOCH} WHERE id = ? AND data = ? AND disabled_cause IS NULL`,
|
|
2436
|
+
);
|
|
2312
2437
|
this.#deleteByProviderStmt = this.#db.prepare(
|
|
2313
2438
|
`UPDATE auth_credentials SET disabled_cause = ?, updated_at = ${SQLITE_NOW_EPOCH} WHERE provider = ? AND disabled_cause IS NULL`,
|
|
2314
2439
|
);
|
|
@@ -2707,6 +2832,23 @@ export class AuthCredentialStore {
|
|
|
2707
2832
|
}
|
|
2708
2833
|
}
|
|
2709
2834
|
|
|
2835
|
+
/**
|
|
2836
|
+
* CAS-style disable: only soft-deletes the row when its `data` column still
|
|
2837
|
+
* matches `expectedData` and the row has not already been disabled. Used by
|
|
2838
|
+
* the OAuth refresh-failure path to avoid clobbering a peer that rotated the
|
|
2839
|
+
* row between our pre-check and the disable.
|
|
2840
|
+
*/
|
|
2841
|
+
tryDisableAuthCredentialIfMatches(id: number, expectedData: string, disabledCause: string): boolean {
|
|
2842
|
+
try {
|
|
2843
|
+
const result = this.#deleteIfMatchesStmt.run(normalizeDisabledCause(disabledCause), id, expectedData) as {
|
|
2844
|
+
changes: number;
|
|
2845
|
+
};
|
|
2846
|
+
return result.changes === 1;
|
|
2847
|
+
} catch {
|
|
2848
|
+
return false;
|
|
2849
|
+
}
|
|
2850
|
+
}
|
|
2851
|
+
|
|
2710
2852
|
deleteAuthCredentialsForProvider(provider: string, disabledCause: string): void {
|
|
2711
2853
|
try {
|
|
2712
2854
|
this.#deleteByProviderStmt.run(normalizeDisabledCause(disabledCause), provider);
|
|
@@ -2816,6 +2958,7 @@ export class AuthCredentialStore {
|
|
|
2816
2958
|
this.#insertStmt.finalize();
|
|
2817
2959
|
this.#updateStmt.finalize();
|
|
2818
2960
|
this.#deleteStmt.finalize();
|
|
2961
|
+
this.#deleteIfMatchesStmt.finalize();
|
|
2819
2962
|
this.#deleteByProviderStmt.finalize();
|
|
2820
2963
|
this.#hardDeleteStmt.finalize();
|
|
2821
2964
|
this.#getCacheStmt.finalize();
|
package/src/index.ts
CHANGED
|
@@ -37,6 +37,7 @@ export * from "./usage/zai";
|
|
|
37
37
|
export * from "./utils/anthropic-auth";
|
|
38
38
|
export * from "./utils/discovery";
|
|
39
39
|
export * from "./utils/event-stream";
|
|
40
|
+
export * from "./utils/h2-fetch";
|
|
40
41
|
export * from "./utils/overflow";
|
|
41
42
|
export * from "./utils/retry";
|
|
42
43
|
export * from "./utils/schema";
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { fetchWithRetry } from "@oh-my-pi/pi-utils";
|
|
1
2
|
import type { ModelManagerOptions } from "../model-manager";
|
|
2
3
|
import { Effort } from "../model-thinking";
|
|
3
4
|
import type { ThinkingConfig } from "../types";
|
|
@@ -18,6 +19,8 @@ type OllamaShowResponse = {
|
|
|
18
19
|
model_info?: Record<string, unknown>;
|
|
19
20
|
};
|
|
20
21
|
|
|
22
|
+
const OLLAMA_RETRY_DELAYS_MS = [2_000, 5_000, 10_000];
|
|
23
|
+
|
|
21
24
|
function trimTrailingSlash(value: string): string {
|
|
22
25
|
return value.endsWith("/") ? value.slice(0, -1) : value;
|
|
23
26
|
}
|
|
@@ -94,9 +97,10 @@ export function ollamaCloudModelManagerOptions(
|
|
|
94
97
|
if (!apiKey) {
|
|
95
98
|
return [];
|
|
96
99
|
}
|
|
97
|
-
const response = await
|
|
100
|
+
const response = await fetchWithRetry(`${baseUrl}/api/tags`, {
|
|
98
101
|
method: "GET",
|
|
99
102
|
headers: createCloudHeaders(apiKey),
|
|
103
|
+
defaultDelayMs: OLLAMA_RETRY_DELAYS_MS,
|
|
100
104
|
});
|
|
101
105
|
if (!response.ok) {
|
|
102
106
|
throw new Error(`HTTP ${response.status} from ${baseUrl}/api/tags`);
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import * as nodeCrypto from "node:crypto";
|
|
2
2
|
import * as fs from "node:fs";
|
|
3
|
+
import { scheduler } from "node:timers/promises";
|
|
3
4
|
import * as tls from "node:tls";
|
|
4
5
|
import Anthropic, { type ClientOptions as AnthropicSdkClientOptions } from "@anthropic-ai/sdk";
|
|
5
6
|
import type {
|
|
@@ -8,7 +9,14 @@ import type {
|
|
|
8
9
|
MessageParam,
|
|
9
10
|
RawMessageStreamEvent,
|
|
10
11
|
} from "@anthropic-ai/sdk/resources/messages";
|
|
11
|
-
import {
|
|
12
|
+
import {
|
|
13
|
+
$env,
|
|
14
|
+
extractHttpStatusFromError,
|
|
15
|
+
isEnoent,
|
|
16
|
+
isRetryableError,
|
|
17
|
+
isUnexpectedSocketCloseMessage,
|
|
18
|
+
readSseEvents,
|
|
19
|
+
} from "@oh-my-pi/pi-utils";
|
|
12
20
|
import { hasOpus47ApiRestrictions, mapEffortToAnthropicAdaptiveEffort } from "../model-thinking";
|
|
13
21
|
import { calculateCost } from "../models";
|
|
14
22
|
import { getEnvApiKey, OUTPUT_FALLBACK_BUFFER } from "../stream";
|
|
@@ -48,12 +56,7 @@ import { getStreamFirstEventTimeoutMs, getStreamIdleTimeoutMs, iterateWithIdleTi
|
|
|
48
56
|
import { parseJsonWithRepair, parseStreamingJson } from "../utils/json-parse";
|
|
49
57
|
import { parseGitHubCopilotApiKey } from "../utils/oauth/github-copilot";
|
|
50
58
|
import { notifyProviderResponse } from "../utils/provider-response";
|
|
51
|
-
import {
|
|
52
|
-
extractHttpStatusFromError,
|
|
53
|
-
isCopilotRetryableError,
|
|
54
|
-
isRetryableError,
|
|
55
|
-
isUnexpectedSocketCloseMessage,
|
|
56
|
-
} from "../utils/retry";
|
|
59
|
+
import { isCopilotTransientModelError } from "../utils/retry";
|
|
57
60
|
import { COMBINATOR_KEYS, NO_STRICT } from "../utils/schema";
|
|
58
61
|
import { notifyRawSseEvent, wrapFetchForSseDebug } from "../utils/sse-debug";
|
|
59
62
|
import {
|
|
@@ -844,7 +847,7 @@ function isProviderRetryableStreamEnvelopeError(error: unknown): boolean {
|
|
|
844
847
|
|
|
845
848
|
export function isProviderRetryableError(error: unknown, provider?: string): boolean {
|
|
846
849
|
if (!(error instanceof Error)) return false;
|
|
847
|
-
if (provider === "github-copilot" &&
|
|
850
|
+
if (provider === "github-copilot" && isCopilotTransientModelError(error)) return true;
|
|
848
851
|
const msg = error.message.toLowerCase();
|
|
849
852
|
if (
|
|
850
853
|
isUnexpectedSocketCloseMessage(msg) ||
|
|
@@ -1287,7 +1290,7 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
|
|
|
1287
1290
|
}
|
|
1288
1291
|
providerRetryAttempt++;
|
|
1289
1292
|
const delayMs = PROVIDER_BASE_DELAY_MS * 2 ** (providerRetryAttempt - 1);
|
|
1290
|
-
await
|
|
1293
|
+
await scheduler.wait(delayMs, { signal: options?.signal });
|
|
1291
1294
|
output.content.length = 0;
|
|
1292
1295
|
output.responseId = undefined;
|
|
1293
1296
|
output.errorMessage = strictFallbackErrorMessage;
|
|
@@ -6,17 +6,15 @@ import type {
|
|
|
6
6
|
ResponseInput,
|
|
7
7
|
} from "openai/resources/responses/responses";
|
|
8
8
|
import { getEnvApiKey } from "../stream";
|
|
9
|
-
import {
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
type Tool,
|
|
19
|
-
type ToolChoice,
|
|
9
|
+
import type {
|
|
10
|
+
AssistantMessage,
|
|
11
|
+
Context,
|
|
12
|
+
Model,
|
|
13
|
+
ServiceTier,
|
|
14
|
+
StreamFunction,
|
|
15
|
+
StreamOptions,
|
|
16
|
+
Tool,
|
|
17
|
+
ToolChoice,
|
|
20
18
|
} from "../types";
|
|
21
19
|
import { normalizeSystemPrompts } from "../utils";
|
|
22
20
|
import { createAbortSourceTracker } from "../utils/abort";
|
|
@@ -33,8 +31,11 @@ import { mapToOpenAIResponsesToolChoice } from "../utils/tool-choice";
|
|
|
33
31
|
import { normalizeOpenAIResponsesPromptCacheKey, supportsDeveloperRole } from "./openai-responses";
|
|
34
32
|
import {
|
|
35
33
|
appendResponsesToolResultMessages,
|
|
34
|
+
applyCommonResponsesSamplingParams,
|
|
35
|
+
applyResponsesReasoningParams,
|
|
36
36
|
convertResponsesAssistantMessage,
|
|
37
37
|
convertResponsesInputContent,
|
|
38
|
+
createInitialResponsesAssistantMessage,
|
|
38
39
|
normalizeResponsesToolCallIdForTransform,
|
|
39
40
|
processResponsesStream,
|
|
40
41
|
} from "./openai-responses-shared";
|
|
@@ -101,23 +102,11 @@ export const streamAzureOpenAIResponses: StreamFunction<"azure-openai-responses"
|
|
|
101
102
|
let firstTokenTime: number | undefined;
|
|
102
103
|
const deploymentName = resolveDeploymentName(model, options);
|
|
103
104
|
|
|
104
|
-
const output: AssistantMessage =
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
model: model.id,
|
|
110
|
-
usage: {
|
|
111
|
-
input: 0,
|
|
112
|
-
output: 0,
|
|
113
|
-
cacheRead: 0,
|
|
114
|
-
cacheWrite: 0,
|
|
115
|
-
totalTokens: 0,
|
|
116
|
-
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
|
117
|
-
},
|
|
118
|
-
stopReason: "stop",
|
|
119
|
-
timestamp: Date.now(),
|
|
120
|
-
};
|
|
105
|
+
const output: AssistantMessage = createInitialResponsesAssistantMessage(
|
|
106
|
+
"azure-openai-responses",
|
|
107
|
+
model.provider,
|
|
108
|
+
model.id,
|
|
109
|
+
);
|
|
121
110
|
let rawRequestDump: RawHttpRequestDump | undefined;
|
|
122
111
|
const abortTracker = createAbortSourceTracker(options?.signal);
|
|
123
112
|
const firstEventTimeoutAbortError = new Error(AZURE_OPENAI_RESPONSES_FIRST_EVENT_TIMEOUT_MESSAGE);
|
|
@@ -279,31 +268,7 @@ function buildParams(
|
|
|
279
268
|
prompt_cache_key: normalizeOpenAIResponsesPromptCacheKey(options?.sessionId),
|
|
280
269
|
};
|
|
281
270
|
|
|
282
|
-
|
|
283
|
-
params.max_output_tokens = options?.maxTokens;
|
|
284
|
-
}
|
|
285
|
-
|
|
286
|
-
if (options?.temperature !== undefined) {
|
|
287
|
-
params.temperature = options?.temperature;
|
|
288
|
-
}
|
|
289
|
-
if (options?.topP !== undefined) {
|
|
290
|
-
params.top_p = options.topP;
|
|
291
|
-
}
|
|
292
|
-
if (options?.topK !== undefined) {
|
|
293
|
-
params.top_k = options.topK;
|
|
294
|
-
}
|
|
295
|
-
if (options?.minP !== undefined) {
|
|
296
|
-
params.min_p = options.minP;
|
|
297
|
-
}
|
|
298
|
-
if (options?.presencePenalty !== undefined) {
|
|
299
|
-
params.presence_penalty = options.presencePenalty;
|
|
300
|
-
}
|
|
301
|
-
if (options?.repetitionPenalty !== undefined) {
|
|
302
|
-
params.repetition_penalty = options.repetitionPenalty;
|
|
303
|
-
}
|
|
304
|
-
if (shouldSendServiceTier(options?.serviceTier, model.provider)) {
|
|
305
|
-
params.service_tier = options.serviceTier;
|
|
306
|
-
}
|
|
271
|
+
applyCommonResponsesSamplingParams(params, options, model.provider);
|
|
307
272
|
|
|
308
273
|
if (context.tools) {
|
|
309
274
|
params.tools = convertTools(context.tools);
|
|
@@ -312,36 +277,7 @@ function buildParams(
|
|
|
312
277
|
}
|
|
313
278
|
}
|
|
314
279
|
|
|
315
|
-
|
|
316
|
-
// Always request encrypted reasoning content so reasoning items can be
|
|
317
|
-
// replayed in multi-turn conversations when store is false (items aren't
|
|
318
|
-
// persisted server-side, so we must include the full content).
|
|
319
|
-
// See: https://github.com/can1357/oh-my-pi/issues/41
|
|
320
|
-
params.include = ["reasoning.encrypted_content"];
|
|
321
|
-
|
|
322
|
-
if (options?.reasoning || options?.reasoningSummary !== undefined) {
|
|
323
|
-
const reasoningParams: NonNullable<typeof params.reasoning> = {
|
|
324
|
-
effort: options?.reasoning || "medium",
|
|
325
|
-
};
|
|
326
|
-
if (options?.reasoningSummary !== null) {
|
|
327
|
-
reasoningParams.summary = options?.reasoningSummary || "auto";
|
|
328
|
-
}
|
|
329
|
-
params.reasoning = reasoningParams;
|
|
330
|
-
} else {
|
|
331
|
-
if (model.name.toLowerCase().startsWith("gpt-5")) {
|
|
332
|
-
// Jesus Christ, see https://community.openai.com/t/need-reasoning-false-option-for-gpt-5/1351588/7
|
|
333
|
-
messages.push({
|
|
334
|
-
role: "developer",
|
|
335
|
-
content: [
|
|
336
|
-
{
|
|
337
|
-
type: "input_text",
|
|
338
|
-
text: "# Juice: 0 !important",
|
|
339
|
-
},
|
|
340
|
-
],
|
|
341
|
-
});
|
|
342
|
-
}
|
|
343
|
-
}
|
|
344
|
-
}
|
|
280
|
+
applyResponsesReasoningParams(params, model, options, messages);
|
|
345
281
|
|
|
346
282
|
return params;
|
|
347
283
|
}
|