@oh-my-pi/pi-ai 14.9.9 → 15.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,27 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ## [15.0.1] - 2026-05-14
6
+ ### Breaking Changes
7
+
8
+ - Increased the minimum Bun runtime version to `>=1.3.14` for the `@aws-?` package
9
+
10
+ ### Added
11
+
12
+ - Added `installH2Fetch` to patch `globalThis.fetch` so HTTPS requests attempt HTTP/2 over ALPN with automatic HTTP/1.1 fallback when HTTP/2 is unsupported
13
+ - Added priority service-tier traffic to the `premiumRequests` accounting on OpenAI and OpenAI Codex providers. Sending `serviceTier: "priority"` now increments `usage.premiumRequests` by 1 per request, matching the existing GitHub Copilot premium-request budget semantics so downstream consumers (e.g. the `omp stats` "Premium Reqs" card and `/usage`) reflect priority traffic alongside Copilot premium calls.
14
+
15
+ ## [15.0.0] - 2026-05-13
16
+
17
+ ### Added
18
+
19
+ - Added `AuthStorage.onCredentialDisabled(listener)` — a multi-subscriber `on/off` API for `credential_disabled` events. Returns an unsubscribe function; calling it more than once is a no-op. Multiple subscribers all receive every disable event, with synchronous and async exceptions isolated per-listener so a misbehaving subscriber cannot starve the rest of the chain. Buffer-and-replay semantics are preserved: events emitted while no listener is subscribed are buffered (FIFO, capped at 32) and replayed once to the listener that triggers the empty→non-empty transition. After every subscriber unsubscribes, subsequent disable events buffer again until the next subscribe.
20
+
21
+ ### Fixed
22
+
23
+ - Fixed OAuth credentials being silently disabled when two omp processes (or any two `AuthStorage` instances sharing a `agent.db`) race on token refresh. Anthropic rotates refresh tokens on every use, so the loser's `invalid_grant` response previously soft-deleted the row that the winner just rotated, forcing the user to `/login` again. `#tryOAuthCredential` now re-reads the row from disk before declaring a definitive failure: if the persisted `refresh` differs from the snapshot it tried, the peer-rotated credential is reloaded and the request retries against the fresh token instead of disabling the live row.
24
+ - Closed a remaining race window in OAuth refresh-failure handling: between re-reading the credential row to check for peer rotation and the subsequent soft-delete, another process could still complete a refresh and rotate the row, leaving us to disable the freshly-rotated credential by `id`. The disable now runs as a single CAS update conditioned on the row's `data` still matching the snapshot we tried to refresh, and on `disabled_cause IS NULL`. If the CAS reports 0 rows changed (peer rotation, or row already disabled by a concurrent failure on the same snapshot), we reload from disk and retry instead of mutating the wrong row or emitting a spurious `credential_disabled` event.
25
+
5
26
  ## [14.9.3] - 2026-05-10
6
27
 
7
28
  ### Fixed
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "type": "module",
3
3
  "name": "@oh-my-pi/pi-ai",
4
- "version": "14.9.9",
4
+ "version": "15.0.1",
5
5
  "description": "Unified LLM API with automatic model discovery and provider configuration",
6
6
  "homepage": "https://github.com/can1357/oh-my-pi",
7
7
  "author": "Can Boluk",
@@ -46,8 +46,8 @@
46
46
  "@aws-sdk/credential-provider-node": "^3.972.39",
47
47
  "@bufbuild/protobuf": "^2.12.0",
48
48
  "@google/genai": "^1.52.0",
49
- "@oh-my-pi/pi-natives": "14.9.9",
50
- "@oh-my-pi/pi-utils": "14.9.9",
49
+ "@oh-my-pi/pi-natives": "15.0.1",
50
+ "@oh-my-pi/pi-utils": "15.0.1",
51
51
  "@sinclair/typebox": "^0.34.49",
52
52
  "@smithy/node-http-handler": "^4.6.1",
53
53
  "ajv": "^8.20.0",
@@ -58,10 +58,10 @@
58
58
  "zod": "4.4.3"
59
59
  },
60
60
  "devDependencies": {
61
- "@types/bun": "^1.3.13"
61
+ "@types/bun": "^1.3.14"
62
62
  },
63
63
  "engines": {
64
- "bun": ">=1.3.7"
64
+ "bun": ">=1.3.14"
65
65
  },
66
66
  "files": [
67
67
  "src",
@@ -154,6 +154,12 @@ const USAGE_CACHE_PREFIX = "usage_cache:";
154
154
  const USAGE_REPORT_TTL_MS = 30_000;
155
155
  const DEFAULT_USAGE_REQUEST_TIMEOUT_MS = 3_000;
156
156
  const DEFAULT_OAUTH_REFRESH_TIMEOUT_MS = 10_000;
157
+ /**
158
+ * Cap on the buffered credential_disabled backlog held while no handler is attached.
159
+ * In practice the backlog is 0–N where N ≈ active providers (≤ ~20). The cap exists so
160
+ * pathological detach-without-reattach loops can't grow memory unboundedly.
161
+ */
162
+ const MAX_PENDING_DISABLED_EVENTS = 32;
157
163
 
158
164
  type UsageCacheEntry<T> = {
159
165
  value: T;
@@ -283,7 +289,16 @@ export class AuthStorage {
283
289
  #fallbackResolver?: (provider: string) => string | undefined;
284
290
  #store: AuthCredentialStore;
285
291
  #configValueResolver: (config: string) => Promise<string | undefined>;
286
- #onCredentialDisabled?: (event: CredentialDisabledEvent) => void | Promise<void>;
292
+ #credentialDisabledListeners: Set<(event: CredentialDisabledEvent) => void | Promise<void>> = new Set();
293
+ /**
294
+ * Buffer for credential_disabled events fired while no listener is subscribed.
295
+ * Drained (in insertion order) to the first listener that triggers the empty→non-empty
296
+ * transition via {@link AuthStorage.onCredentialDisabled}. Bounded at
297
+ * {@link MAX_PENDING_DISABLED_EVENTS}; oldest entries are dropped to keep memory predictable
298
+ * if a long-lived AuthStorage somehow accumulates a backlog (provider count is naturally small,
299
+ * but a process that runs without subscribers for a long time shouldn't grow this unboundedly).
300
+ */
301
+ #pendingDisabledEvents: CredentialDisabledEvent[] = [];
287
302
  #closed = false;
288
303
 
289
304
  constructor(store: AuthCredentialStore, options: AuthStorageOptions = {}) {
@@ -294,7 +309,11 @@ export class AuthStorage {
294
309
  this.#usageCache = new AuthStorageUsageCache(this.#store);
295
310
  this.#usageFetch = options.usageFetch ?? fetch;
296
311
  this.#usageRequestTimeoutMs = options.usageRequestTimeoutMs ?? DEFAULT_USAGE_REQUEST_TIMEOUT_MS;
297
- this.#onCredentialDisabled = options.onCredentialDisabled;
312
+ if (options.onCredentialDisabled) {
313
+ // Constructor-registered subscribers are permanent for this AuthStorage's lifetime;
314
+ // the unsubscribe handle is intentionally discarded.
315
+ this.onCredentialDisabled(options.onCredentialDisabled);
316
+ }
298
317
  this.#usageLogger =
299
318
  options.usageLogger ??
300
319
  ({
@@ -324,6 +343,39 @@ export class AuthStorage {
324
343
  this.#store.close();
325
344
  }
326
345
 
346
+ /**
347
+ * Subscribe to {@link CredentialDisabledEvent}s. Multiple subscribers are supported and
348
+ * each fires for every disable event; subscribers are invoked in registration order with
349
+ * exceptions and async rejections isolated per-listener so a misbehaving subscriber
350
+ * cannot break the disable path or starve the rest of the chain.
351
+ *
352
+ * If `credential_disabled` events were emitted while no listener was subscribed, they are
353
+ * replayed (in insertion order) to the listener that triggers the empty→non-empty
354
+ * transition. The drain is one-shot — listeners that subscribe after that no longer see
355
+ * past events.
356
+ *
357
+ * Returns an unsubscribe function. The function is idempotent: calling it more than once
358
+ * is a no-op. After every subscriber has unsubscribed, subsequent disable events buffer
359
+ * again until the next subscribe.
360
+ *
361
+ * @param listener Callback invoked with each disable event. May be sync or async.
362
+ * @returns A function that removes this listener from the subscriber set.
363
+ */
364
+ onCredentialDisabled(listener: (event: CredentialDisabledEvent) => void | Promise<void>): () => void {
365
+ const wasEmpty = this.#credentialDisabledListeners.size === 0;
366
+ this.#credentialDisabledListeners.add(listener);
367
+ if (wasEmpty && this.#pendingDisabledEvents.length > 0) {
368
+ const drained = this.#pendingDisabledEvents;
369
+ this.#pendingDisabledEvents = [];
370
+ for (const event of drained) {
371
+ this.#invokeListener(listener, event);
372
+ }
373
+ }
374
+ return () => {
375
+ this.#credentialDisabledListeners.delete(listener);
376
+ };
377
+ }
378
+
327
379
  /**
328
380
  * Set a runtime API key override (not persisted to disk).
329
381
  * Used for CLI --api-key flag.
@@ -615,33 +667,65 @@ export class AuthStorage {
615
667
  }
616
668
 
617
669
  /**
618
- * Disables credential at index (used when OAuth refresh fails).
619
- * The credential remains in the database but is excluded from active queries.
620
- * Cleans up provider entry if last credential disabled.
670
+ * CAS-style disable used when OAuth refresh definitively fails: only disables
671
+ * persisted `data` still matches the credential we attempted to refresh.
672
+ * Returns `false` when a peer rotated the row between our pre-check and the
673
+ * disable, so the caller can reload and retry instead of clobbering the
674
+ * freshly-rotated credential.
621
675
  */
622
- #disableCredentialAt(provider: string, index: number, disabledCause: string): void {
676
+ #tryDisableCredentialAtIfMatches(
677
+ provider: string,
678
+ index: number,
679
+ expectedCredential: AuthCredential,
680
+ disabledCause: string,
681
+ ): boolean {
623
682
  const entries = this.#getStoredCredentials(provider);
624
- if (index < 0 || index >= entries.length) return;
625
- this.#store.deleteAuthCredential(entries[index].id, disabledCause);
683
+ if (index < 0 || index >= entries.length) return false;
684
+ const target = entries[index];
685
+ const serialized = serializeCredential(provider, expectedCredential);
686
+ if (!serialized) return false;
687
+ const disabled = this.#store.tryDisableAuthCredentialIfMatches(target.id, serialized.data, disabledCause);
688
+ if (!disabled) return false;
626
689
  const updated = entries.filter((_value, idx) => idx !== index);
627
690
  this.#setStoredCredentials(provider, updated);
628
691
  this.#resetProviderAssignments(provider);
629
692
  this.#emitCredentialDisabled({ provider, disabledCause });
693
+ return true;
630
694
  }
631
695
 
632
696
  #emitCredentialDisabled(event: CredentialDisabledEvent): void {
633
- const handler = this.#onCredentialDisabled;
634
- if (!handler) return;
635
- const logHandlerError = (error: unknown): void => {
636
- logger.warn("onCredentialDisabled handler threw", { provider: event.provider, error: String(error) });
697
+ if (this.#credentialDisabledListeners.size === 0) {
698
+ // No subscribers — buffer for later replay. Cap the backlog so a process that runs
699
+ // without subscribers for a long time can't grow memory unboundedly; drop oldest
700
+ // under pressure.
701
+ if (this.#pendingDisabledEvents.length >= MAX_PENDING_DISABLED_EVENTS) {
702
+ this.#pendingDisabledEvents.shift();
703
+ }
704
+ this.#pendingDisabledEvents.push(event);
705
+ return;
706
+ }
707
+ // Snapshot before iteration so a listener that subscribes/unsubscribes during fan-out
708
+ // can't observe a partially-mutated set or receive an event it just registered for.
709
+ const listeners = [...this.#credentialDisabledListeners];
710
+ for (const listener of listeners) {
711
+ this.#invokeListener(listener, event);
712
+ }
713
+ }
714
+
715
+ #invokeListener(
716
+ listener: (event: CredentialDisabledEvent) => void | Promise<void>,
717
+ event: CredentialDisabledEvent,
718
+ ): void {
719
+ const logListenerError = (error: unknown): void => {
720
+ logger.warn("onCredentialDisabled listener threw", { provider: event.provider, error: String(error) });
637
721
  };
638
722
  try {
639
- const result = handler(event);
723
+ const result = listener(event);
640
724
  if (result && typeof (result as PromiseLike<void>).then === "function") {
641
- (result as Promise<void>).catch(logHandlerError);
725
+ (result as Promise<void>).catch(logListenerError);
642
726
  }
643
727
  } catch (error) {
644
- logHandlerError(error);
728
+ logListenerError(error);
645
729
  }
646
730
  }
647
731
 
@@ -1993,8 +2077,45 @@ export class AuthStorage {
1993
2077
  });
1994
2078
 
1995
2079
  if (isDefinitiveFailure) {
1996
- // Permanently disable invalid credentials with an explicit cause for inspection/debugging
1997
- this.#disableCredentialAt(provider, selection.index, `oauth refresh failed: ${errorMsg}`);
2080
+ // The credential at this index may have been rotated by another process between
2081
+ // our in-memory snapshot and the refresh attempt: Anthropic rotates refresh
2082
+ // tokens on every use, so the peer's success leaves our stored token invalid.
2083
+ // Re-read the row from disk before marking it disabled — if the persisted
2084
+ // refresh token has changed, the peer rotation succeeded and we should pick
2085
+ // up the new credential instead of soft-deleting the row that the peer just
2086
+ // updated.
2087
+ const credentialId = this.#getStoredCredentials(provider)[selection.index]?.id;
2088
+ if (credentialId !== undefined) {
2089
+ const latestRow = this.#store.listAuthCredentials(provider).find(row => row.id === credentialId);
2090
+ const latestCredential = latestRow?.credential;
2091
+ if (latestCredential?.type === "oauth" && latestCredential.refresh !== selection.credential.refresh) {
2092
+ logger.debug("OAuth refresh race detected; another process rotated token first", {
2093
+ provider,
2094
+ index: selection.index,
2095
+ credentialId,
2096
+ });
2097
+ await this.reload();
2098
+ return this.getApiKey(provider, sessionId, options);
2099
+ }
2100
+ }
2101
+ // Permanently disable invalid credentials with an explicit cause for inspection/debugging.
2102
+ // Use a CAS-style disable conditioned on the row still containing the stale credential
2103
+ // we tried to refresh, so a peer rotation that lands between the pre-check above and
2104
+ // this disable doesn't soft-delete the freshly-rotated row.
2105
+ const disabled = this.#tryDisableCredentialAtIfMatches(
2106
+ provider,
2107
+ selection.index,
2108
+ selection.credential,
2109
+ `oauth refresh failed: ${errorMsg}`,
2110
+ );
2111
+ if (!disabled) {
2112
+ logger.debug("OAuth refresh disable lost CAS; reloading after peer rotation", {
2113
+ provider,
2114
+ index: selection.index,
2115
+ });
2116
+ await this.reload();
2117
+ return this.getApiKey(provider, sessionId, options);
2118
+ }
1998
2119
  if (this.#getCredentialsForProvider(provider).some(credential => credential.type === "oauth")) {
1999
2120
  return this.getApiKey(provider, sessionId, options);
2000
2121
  }
@@ -2280,6 +2401,7 @@ export class AuthCredentialStore {
2280
2401
  #insertStmt: Statement;
2281
2402
  #updateStmt: Statement;
2282
2403
  #deleteStmt: Statement;
2404
+ #deleteIfMatchesStmt: Statement;
2283
2405
  #deleteByProviderStmt: Statement;
2284
2406
  #hardDeleteStmt: Statement;
2285
2407
  #getCacheStmt: Statement;
@@ -2309,6 +2431,9 @@ export class AuthCredentialStore {
2309
2431
  this.#deleteStmt = this.#db.prepare(
2310
2432
  `UPDATE auth_credentials SET disabled_cause = ?, updated_at = ${SQLITE_NOW_EPOCH} WHERE id = ?`,
2311
2433
  );
2434
+ this.#deleteIfMatchesStmt = this.#db.prepare(
2435
+ `UPDATE auth_credentials SET disabled_cause = ?, updated_at = ${SQLITE_NOW_EPOCH} WHERE id = ? AND data = ? AND disabled_cause IS NULL`,
2436
+ );
2312
2437
  this.#deleteByProviderStmt = this.#db.prepare(
2313
2438
  `UPDATE auth_credentials SET disabled_cause = ?, updated_at = ${SQLITE_NOW_EPOCH} WHERE provider = ? AND disabled_cause IS NULL`,
2314
2439
  );
@@ -2707,6 +2832,23 @@ export class AuthCredentialStore {
2707
2832
  }
2708
2833
  }
2709
2834
 
2835
+ /**
2836
+ * CAS-style disable: only soft-deletes the row when its `data` column still
2837
+ * matches `expectedData` and the row has not already been disabled. Used by
2838
+ * the OAuth refresh-failure path to avoid clobbering a peer that rotated the
2839
+ * row between our pre-check and the disable.
2840
+ */
2841
+ tryDisableAuthCredentialIfMatches(id: number, expectedData: string, disabledCause: string): boolean {
2842
+ try {
2843
+ const result = this.#deleteIfMatchesStmt.run(normalizeDisabledCause(disabledCause), id, expectedData) as {
2844
+ changes: number;
2845
+ };
2846
+ return result.changes === 1;
2847
+ } catch {
2848
+ return false;
2849
+ }
2850
+ }
2851
+
2710
2852
  deleteAuthCredentialsForProvider(provider: string, disabledCause: string): void {
2711
2853
  try {
2712
2854
  this.#deleteByProviderStmt.run(normalizeDisabledCause(disabledCause), provider);
@@ -2816,6 +2958,7 @@ export class AuthCredentialStore {
2816
2958
  this.#insertStmt.finalize();
2817
2959
  this.#updateStmt.finalize();
2818
2960
  this.#deleteStmt.finalize();
2961
+ this.#deleteIfMatchesStmt.finalize();
2819
2962
  this.#deleteByProviderStmt.finalize();
2820
2963
  this.#hardDeleteStmt.finalize();
2821
2964
  this.#getCacheStmt.finalize();
package/src/index.ts CHANGED
@@ -37,6 +37,7 @@ export * from "./usage/zai";
37
37
  export * from "./utils/anthropic-auth";
38
38
  export * from "./utils/discovery";
39
39
  export * from "./utils/event-stream";
40
+ export * from "./utils/h2-fetch";
40
41
  export * from "./utils/overflow";
41
42
  export * from "./utils/retry";
42
43
  export * from "./utils/schema";
@@ -1,3 +1,4 @@
1
+ import { fetchWithRetry } from "@oh-my-pi/pi-utils";
1
2
  import type { ModelManagerOptions } from "../model-manager";
2
3
  import { Effort } from "../model-thinking";
3
4
  import type { ThinkingConfig } from "../types";
@@ -18,6 +19,8 @@ type OllamaShowResponse = {
18
19
  model_info?: Record<string, unknown>;
19
20
  };
20
21
 
22
+ const OLLAMA_RETRY_DELAYS_MS = [2_000, 5_000, 10_000];
23
+
21
24
  function trimTrailingSlash(value: string): string {
22
25
  return value.endsWith("/") ? value.slice(0, -1) : value;
23
26
  }
@@ -94,9 +97,10 @@ export function ollamaCloudModelManagerOptions(
94
97
  if (!apiKey) {
95
98
  return [];
96
99
  }
97
- const response = await fetch(`${baseUrl}/api/tags`, {
100
+ const response = await fetchWithRetry(`${baseUrl}/api/tags`, {
98
101
  method: "GET",
99
102
  headers: createCloudHeaders(apiKey),
103
+ defaultDelayMs: OLLAMA_RETRY_DELAYS_MS,
100
104
  });
101
105
  if (!response.ok) {
102
106
  throw new Error(`HTTP ${response.status} from ${baseUrl}/api/tags`);
@@ -1,5 +1,6 @@
1
1
  import * as nodeCrypto from "node:crypto";
2
2
  import * as fs from "node:fs";
3
+ import { scheduler } from "node:timers/promises";
3
4
  import * as tls from "node:tls";
4
5
  import Anthropic, { type ClientOptions as AnthropicSdkClientOptions } from "@anthropic-ai/sdk";
5
6
  import type {
@@ -8,7 +9,14 @@ import type {
8
9
  MessageParam,
9
10
  RawMessageStreamEvent,
10
11
  } from "@anthropic-ai/sdk/resources/messages";
11
- import { $env, abortableSleep, isEnoent, readSseEvents } from "@oh-my-pi/pi-utils";
12
+ import {
13
+ $env,
14
+ extractHttpStatusFromError,
15
+ isEnoent,
16
+ isRetryableError,
17
+ isUnexpectedSocketCloseMessage,
18
+ readSseEvents,
19
+ } from "@oh-my-pi/pi-utils";
12
20
  import { hasOpus47ApiRestrictions, mapEffortToAnthropicAdaptiveEffort } from "../model-thinking";
13
21
  import { calculateCost } from "../models";
14
22
  import { getEnvApiKey, OUTPUT_FALLBACK_BUFFER } from "../stream";
@@ -48,12 +56,7 @@ import { getStreamFirstEventTimeoutMs, getStreamIdleTimeoutMs, iterateWithIdleTi
48
56
  import { parseJsonWithRepair, parseStreamingJson } from "../utils/json-parse";
49
57
  import { parseGitHubCopilotApiKey } from "../utils/oauth/github-copilot";
50
58
  import { notifyProviderResponse } from "../utils/provider-response";
51
- import {
52
- extractHttpStatusFromError,
53
- isCopilotRetryableError,
54
- isRetryableError,
55
- isUnexpectedSocketCloseMessage,
56
- } from "../utils/retry";
59
+ import { isCopilotTransientModelError } from "../utils/retry";
57
60
  import { COMBINATOR_KEYS, NO_STRICT } from "../utils/schema";
58
61
  import { notifyRawSseEvent, wrapFetchForSseDebug } from "../utils/sse-debug";
59
62
  import {
@@ -844,7 +847,7 @@ function isProviderRetryableStreamEnvelopeError(error: unknown): boolean {
844
847
 
845
848
  export function isProviderRetryableError(error: unknown, provider?: string): boolean {
846
849
  if (!(error instanceof Error)) return false;
847
- if (provider === "github-copilot" && isCopilotRetryableError(error)) return true;
850
+ if (provider === "github-copilot" && isCopilotTransientModelError(error)) return true;
848
851
  const msg = error.message.toLowerCase();
849
852
  if (
850
853
  isUnexpectedSocketCloseMessage(msg) ||
@@ -1287,7 +1290,7 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
1287
1290
  }
1288
1291
  providerRetryAttempt++;
1289
1292
  const delayMs = PROVIDER_BASE_DELAY_MS * 2 ** (providerRetryAttempt - 1);
1290
- await abortableSleep(delayMs, options?.signal);
1293
+ await scheduler.wait(delayMs, { signal: options?.signal });
1291
1294
  output.content.length = 0;
1292
1295
  output.responseId = undefined;
1293
1296
  output.errorMessage = strictFallbackErrorMessage;
@@ -6,17 +6,15 @@ import type {
6
6
  ResponseInput,
7
7
  } from "openai/resources/responses/responses";
8
8
  import { getEnvApiKey } from "../stream";
9
- import {
10
- type Api,
11
- type AssistantMessage,
12
- type Context,
13
- type Model,
14
- type ServiceTier,
15
- type StreamFunction,
16
- type StreamOptions,
17
- shouldSendServiceTier,
18
- type Tool,
19
- type ToolChoice,
9
+ import type {
10
+ AssistantMessage,
11
+ Context,
12
+ Model,
13
+ ServiceTier,
14
+ StreamFunction,
15
+ StreamOptions,
16
+ Tool,
17
+ ToolChoice,
20
18
  } from "../types";
21
19
  import { normalizeSystemPrompts } from "../utils";
22
20
  import { createAbortSourceTracker } from "../utils/abort";
@@ -33,8 +31,11 @@ import { mapToOpenAIResponsesToolChoice } from "../utils/tool-choice";
33
31
  import { normalizeOpenAIResponsesPromptCacheKey, supportsDeveloperRole } from "./openai-responses";
34
32
  import {
35
33
  appendResponsesToolResultMessages,
34
+ applyCommonResponsesSamplingParams,
35
+ applyResponsesReasoningParams,
36
36
  convertResponsesAssistantMessage,
37
37
  convertResponsesInputContent,
38
+ createInitialResponsesAssistantMessage,
38
39
  normalizeResponsesToolCallIdForTransform,
39
40
  processResponsesStream,
40
41
  } from "./openai-responses-shared";
@@ -101,23 +102,11 @@ export const streamAzureOpenAIResponses: StreamFunction<"azure-openai-responses"
101
102
  let firstTokenTime: number | undefined;
102
103
  const deploymentName = resolveDeploymentName(model, options);
103
104
 
104
- const output: AssistantMessage = {
105
- role: "assistant",
106
- content: [],
107
- api: "azure-openai-responses" as Api,
108
- provider: model.provider,
109
- model: model.id,
110
- usage: {
111
- input: 0,
112
- output: 0,
113
- cacheRead: 0,
114
- cacheWrite: 0,
115
- totalTokens: 0,
116
- cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
117
- },
118
- stopReason: "stop",
119
- timestamp: Date.now(),
120
- };
105
+ const output: AssistantMessage = createInitialResponsesAssistantMessage(
106
+ "azure-openai-responses",
107
+ model.provider,
108
+ model.id,
109
+ );
121
110
  let rawRequestDump: RawHttpRequestDump | undefined;
122
111
  const abortTracker = createAbortSourceTracker(options?.signal);
123
112
  const firstEventTimeoutAbortError = new Error(AZURE_OPENAI_RESPONSES_FIRST_EVENT_TIMEOUT_MESSAGE);
@@ -279,31 +268,7 @@ function buildParams(
279
268
  prompt_cache_key: normalizeOpenAIResponsesPromptCacheKey(options?.sessionId),
280
269
  };
281
270
 
282
- if (options?.maxTokens) {
283
- params.max_output_tokens = options?.maxTokens;
284
- }
285
-
286
- if (options?.temperature !== undefined) {
287
- params.temperature = options?.temperature;
288
- }
289
- if (options?.topP !== undefined) {
290
- params.top_p = options.topP;
291
- }
292
- if (options?.topK !== undefined) {
293
- params.top_k = options.topK;
294
- }
295
- if (options?.minP !== undefined) {
296
- params.min_p = options.minP;
297
- }
298
- if (options?.presencePenalty !== undefined) {
299
- params.presence_penalty = options.presencePenalty;
300
- }
301
- if (options?.repetitionPenalty !== undefined) {
302
- params.repetition_penalty = options.repetitionPenalty;
303
- }
304
- if (shouldSendServiceTier(options?.serviceTier, model.provider)) {
305
- params.service_tier = options.serviceTier;
306
- }
271
+ applyCommonResponsesSamplingParams(params, options, model.provider);
307
272
 
308
273
  if (context.tools) {
309
274
  params.tools = convertTools(context.tools);
@@ -312,36 +277,7 @@ function buildParams(
312
277
  }
313
278
  }
314
279
 
315
- if (model.reasoning) {
316
- // Always request encrypted reasoning content so reasoning items can be
317
- // replayed in multi-turn conversations when store is false (items aren't
318
- // persisted server-side, so we must include the full content).
319
- // See: https://github.com/can1357/oh-my-pi/issues/41
320
- params.include = ["reasoning.encrypted_content"];
321
-
322
- if (options?.reasoning || options?.reasoningSummary !== undefined) {
323
- const reasoningParams: NonNullable<typeof params.reasoning> = {
324
- effort: options?.reasoning || "medium",
325
- };
326
- if (options?.reasoningSummary !== null) {
327
- reasoningParams.summary = options?.reasoningSummary || "auto";
328
- }
329
- params.reasoning = reasoningParams;
330
- } else {
331
- if (model.name.toLowerCase().startsWith("gpt-5")) {
332
- // Jesus Christ, see https://community.openai.com/t/need-reasoning-false-option-for-gpt-5/1351588/7
333
- messages.push({
334
- role: "developer",
335
- content: [
336
- {
337
- type: "input_text",
338
- text: "# Juice: 0 !important",
339
- },
340
- ],
341
- });
342
- }
343
- }
344
- }
280
+ applyResponsesReasoningParams(params, model, options, messages);
345
281
 
346
282
  return params;
347
283
  }