@oh-my-pi/pi-ai 15.0.0 → 15.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,22 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ## [15.0.2] - 2026-05-15
6
+ ### Fixed
7
+
8
+ - Fixed `StreamOptions.fetch` typing to accept fetch-compatible override functions that do not expose `preconnect`, allowing custom fetch implementations to be used without type errors across runtimes
9
+ - Fixed Moonshot Kimi K2.6 forced tool calls to send `thinking: { type: "disabled" }`, avoiding `tool_choice 'specified' is incompatible with thinking enabled` 400s while preserving the requested named tool ([#1077](https://github.com/can1357/oh-my-pi/issues/1077)).
10
+
11
+ ## [15.0.1] - 2026-05-14
12
+ ### Breaking Changes
13
+
14
+ - Increased the minimum Bun runtime version to `>=1.3.14` for the `@aws-?` package
15
+
16
+ ### Added
17
+
18
+ - Added `installH2Fetch` to patch `globalThis.fetch` so HTTPS requests attempt HTTP/2 over ALPN with automatic HTTP/1.1 fallback when HTTP/2 is unsupported
19
+ - Added priority service-tier traffic to the `premiumRequests` accounting on OpenAI and OpenAI Codex providers. Sending `serviceTier: "priority"` now increments `usage.premiumRequests` by 1 per request, matching the existing GitHub Copilot premium-request budget semantics so downstream consumers (e.g. the `omp stats` "Premium Reqs" card and `/usage`) reflect priority traffic alongside Copilot premium calls.
20
+
5
21
  ## [15.0.0] - 2026-05-13
6
22
 
7
23
  ### Added
@@ -12,6 +28,11 @@
12
28
 
13
29
  - Fixed OAuth credentials being silently disabled when two omp processes (or any two `AuthStorage` instances sharing a `agent.db`) race on token refresh. Anthropic rotates refresh tokens on every use, so the loser's `invalid_grant` response previously soft-deleted the row that the winner just rotated, forcing the user to `/login` again. `#tryOAuthCredential` now re-reads the row from disk before declaring a definitive failure: if the persisted `refresh` differs from the snapshot it tried, the peer-rotated credential is reloaded and the request retries against the fresh token instead of disabling the live row.
14
30
  - Closed a remaining race window in OAuth refresh-failure handling: between re-reading the credential row to check for peer rotation and the subsequent soft-delete, another process could still complete a refresh and rotate the row, leaving us to disable the freshly-rotated credential by `id`. The disable now runs as a single CAS update conditioned on the row's `data` still matching the snapshot we tried to refresh, and on `disabled_cause IS NULL`. If the CAS reports 0 rows changed (peer rotation, or row already disabled by a concurrent failure on the same snapshot), we reload from disk and retry instead of mutating the wrong row or emitting a spurious `credential_disabled` event.
31
+ ### Changed
32
+ - Lowered the default steady-state stream idle timeout from 120s to 30s while preserving the existing environment overrides.
33
+
34
+ ### Fixed
35
+ - Lazy built-in provider streams now enforce the shared idle watchdog and abort stalled provider requests, so session auto-retry can continue after transient network drops instead of remaining stuck. Caller aborts still terminate as aborted.
15
36
 
16
37
  ## [14.9.3] - 2026-05-10
17
38
 
@@ -2344,4 +2365,4 @@ _Dedicated to Peter's shoulder ([@steipete](https://twitter.com/steipete))_
2344
2365
 
2345
2366
  ## [0.9.4] - 2025-11-26
2346
2367
 
2347
- Initial release with multi-provider LLM support.
2368
+ Initial release with multi-provider LLM support.
package/package.json CHANGED
@@ -1,9 +1,9 @@
1
1
  {
2
2
  "type": "module",
3
3
  "name": "@oh-my-pi/pi-ai",
4
- "version": "15.0.0",
4
+ "version": "15.0.2",
5
5
  "description": "Unified LLM API with automatic model discovery and provider configuration",
6
- "homepage": "https://github.com/can1357/oh-my-pi",
6
+ "homepage": "https://omp.sh",
7
7
  "author": "Can Boluk",
8
8
  "contributors": [
9
9
  "Mario Zechner"
@@ -46,8 +46,8 @@
46
46
  "@aws-sdk/credential-provider-node": "^3.972.39",
47
47
  "@bufbuild/protobuf": "^2.12.0",
48
48
  "@google/genai": "^1.52.0",
49
- "@oh-my-pi/pi-natives": "15.0.0",
50
- "@oh-my-pi/pi-utils": "15.0.0",
49
+ "@oh-my-pi/pi-natives": "15.0.2",
50
+ "@oh-my-pi/pi-utils": "15.0.2",
51
51
  "@sinclair/typebox": "^0.34.49",
52
52
  "@smithy/node-http-handler": "^4.6.1",
53
53
  "ajv": "^8.20.0",
@@ -58,10 +58,10 @@
58
58
  "zod": "4.4.3"
59
59
  },
60
60
  "devDependencies": {
61
- "@types/bun": "^1.3.13"
61
+ "@types/bun": "^1.3.14"
62
62
  },
63
63
  "engines": {
64
- "bun": ">=1.3.7"
64
+ "bun": ">=1.3.14"
65
65
  },
66
66
  "files": [
67
67
  "src",
package/src/index.ts CHANGED
@@ -37,6 +37,7 @@ export * from "./usage/zai";
37
37
  export * from "./utils/anthropic-auth";
38
38
  export * from "./utils/discovery";
39
39
  export * from "./utils/event-stream";
40
+ export * from "./utils/h2-fetch";
40
41
  export * from "./utils/overflow";
41
42
  export * from "./utils/retry";
42
43
  export * from "./utils/schema";
@@ -1,4 +1,4 @@
1
- import { abortableSleep } from "@oh-my-pi/pi-utils";
1
+ import { fetchWithRetry } from "@oh-my-pi/pi-utils";
2
2
  import type { ModelManagerOptions } from "../model-manager";
3
3
  import { Effort } from "../model-thinking";
4
4
  import type { ThinkingConfig } from "../types";
@@ -19,16 +19,7 @@ type OllamaShowResponse = {
19
19
  model_info?: Record<string, unknown>;
20
20
  };
21
21
 
22
- const MODEL_RETRY_DELAYS_MS = [2_000, 5_000, 10_000];
23
-
24
- async function fetchWithRetry(url: string, init: RequestInit): Promise<Response> {
25
- for (let attempt = 0; attempt < MODEL_RETRY_DELAYS_MS.length; attempt++) {
26
- const response = await fetch(url, init);
27
- if (response.ok || response.status < 500) return response;
28
- await abortableSleep(MODEL_RETRY_DELAYS_MS[attempt]!);
29
- }
30
- return fetch(url, init);
31
- }
22
+ const OLLAMA_RETRY_DELAYS_MS = [2_000, 5_000, 10_000];
32
23
 
33
24
  function trimTrailingSlash(value: string): string {
34
25
  return value.endsWith("/") ? value.slice(0, -1) : value;
@@ -109,6 +100,7 @@ export function ollamaCloudModelManagerOptions(
109
100
  const response = await fetchWithRetry(`${baseUrl}/api/tags`, {
110
101
  method: "GET",
111
102
  headers: createCloudHeaders(apiKey),
103
+ defaultDelayMs: OLLAMA_RETRY_DELAYS_MS,
112
104
  });
113
105
  if (!response.ok) {
114
106
  throw new Error(`HTTP ${response.status} from ${baseUrl}/api/tags`);
@@ -1,5 +1,6 @@
1
1
  import * as nodeCrypto from "node:crypto";
2
2
  import * as fs from "node:fs";
3
+ import { scheduler } from "node:timers/promises";
3
4
  import * as tls from "node:tls";
4
5
  import Anthropic, { type ClientOptions as AnthropicSdkClientOptions } from "@anthropic-ai/sdk";
5
6
  import type {
@@ -8,7 +9,14 @@ import type {
8
9
  MessageParam,
9
10
  RawMessageStreamEvent,
10
11
  } from "@anthropic-ai/sdk/resources/messages";
11
- import { $env, abortableSleep, isEnoent, readSseEvents } from "@oh-my-pi/pi-utils";
12
+ import {
13
+ $env,
14
+ extractHttpStatusFromError,
15
+ isEnoent,
16
+ isRetryableError,
17
+ isUnexpectedSocketCloseMessage,
18
+ readSseEvents,
19
+ } from "@oh-my-pi/pi-utils";
12
20
  import { hasOpus47ApiRestrictions, mapEffortToAnthropicAdaptiveEffort } from "../model-thinking";
13
21
  import { calculateCost } from "../models";
14
22
  import { getEnvApiKey, OUTPUT_FALLBACK_BUFFER } from "../stream";
@@ -17,6 +25,7 @@ import type {
17
25
  AssistantMessage,
18
26
  CacheRetention,
19
27
  Context,
28
+ FetchImpl,
20
29
  ImageContent,
21
30
  Message,
22
31
  Model,
@@ -48,12 +57,7 @@ import { getStreamFirstEventTimeoutMs, getStreamIdleTimeoutMs, iterateWithIdleTi
48
57
  import { parseJsonWithRepair, parseStreamingJson } from "../utils/json-parse";
49
58
  import { parseGitHubCopilotApiKey } from "../utils/oauth/github-copilot";
50
59
  import { notifyProviderResponse } from "../utils/provider-response";
51
- import {
52
- extractHttpStatusFromError,
53
- isCopilotRetryableError,
54
- isRetryableError,
55
- isUnexpectedSocketCloseMessage,
56
- } from "../utils/retry";
60
+ import { isCopilotTransientModelError } from "../utils/retry";
57
61
  import { COMBINATOR_KEYS, NO_STRICT } from "../utils/schema";
58
62
  import { notifyRawSseEvent, wrapFetchForSseDebug } from "../utils/sse-debug";
59
63
  import {
@@ -538,6 +542,7 @@ export type AnthropicClientOptionsArgs = {
538
542
  isOAuth?: boolean;
539
543
  hasTools?: boolean;
540
544
  onSseEvent?: AnthropicOptions["onSseEvent"];
545
+ fetch?: FetchImpl;
541
546
  };
542
547
 
543
548
  export type AnthropicClientOptionsResult = {
@@ -844,7 +849,7 @@ function isProviderRetryableStreamEnvelopeError(error: unknown): boolean {
844
849
 
845
850
  export function isProviderRetryableError(error: unknown, provider?: string): boolean {
846
851
  if (!(error instanceof Error)) return false;
847
- if (provider === "github-copilot" && isCopilotRetryableError(error)) return true;
852
+ if (provider === "github-copilot" && isCopilotTransientModelError(error)) return true;
848
853
  const msg = error.message.toLowerCase();
849
854
  if (
850
855
  isUnexpectedSocketCloseMessage(msg) ||
@@ -962,6 +967,7 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
962
967
  isOAuth: options?.isOAuth,
963
968
  hasTools: !!context.tools?.length,
964
969
  onSseEvent: options?.onSseEvent,
970
+ fetch: options?.fetch,
965
971
  });
966
972
  client = created.client;
967
973
  isOAuthToken = created.isOAuthToken;
@@ -1287,7 +1293,7 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
1287
1293
  }
1288
1294
  providerRetryAttempt++;
1289
1295
  const delayMs = PROVIDER_BASE_DELAY_MS * 2 ** (providerRetryAttempt - 1);
1290
- await abortableSleep(delayMs, options?.signal);
1296
+ await scheduler.wait(delayMs, { signal: options?.signal });
1291
1297
  output.content.length = 0;
1292
1298
  output.responseId = undefined;
1293
1299
  output.errorMessage = strictFallbackErrorMessage;
@@ -1402,7 +1408,12 @@ export function buildAnthropicClientOptions(args: AnthropicClientOptionsArgs): A
1402
1408
  const baseUrl = resolveAnthropicBaseUrl(model, apiKey);
1403
1409
  const foundryCustomHeaders = resolveAnthropicCustomHeaders(model);
1404
1410
  const tlsFetchOptions = buildClaudeCodeTlsFetchOptions(model, baseUrl);
1405
- const debugFetch = onSseEvent ? wrapFetchForSseDebug(fetch, event => onSseEvent(event, model)) : undefined;
1411
+ const baseFetch = args.fetch ?? fetch;
1412
+ const debugFetch = onSseEvent
1413
+ ? wrapFetchForSseDebug(baseFetch, event => onSseEvent(event, model))
1414
+ : args.fetch
1415
+ ? baseFetch
1416
+ : undefined;
1406
1417
  if (model.provider === "github-copilot") {
1407
1418
  const copilotApiKey = parseGitHubCopilotApiKey(apiKey).accessToken;
1408
1419
  const betaFeatures = [...extraBetas];
@@ -6,17 +6,15 @@ import type {
6
6
  ResponseInput,
7
7
  } from "openai/resources/responses/responses";
8
8
  import { getEnvApiKey } from "../stream";
9
- import {
10
- type Api,
11
- type AssistantMessage,
12
- type Context,
13
- type Model,
14
- type ServiceTier,
15
- type StreamFunction,
16
- type StreamOptions,
17
- shouldSendServiceTier,
18
- type Tool,
19
- type ToolChoice,
9
+ import type {
10
+ AssistantMessage,
11
+ Context,
12
+ Model,
13
+ ServiceTier,
14
+ StreamFunction,
15
+ StreamOptions,
16
+ Tool,
17
+ ToolChoice,
20
18
  } from "../types";
21
19
  import { normalizeSystemPrompts } from "../utils";
22
20
  import { createAbortSourceTracker } from "../utils/abort";
@@ -33,8 +31,11 @@ import { mapToOpenAIResponsesToolChoice } from "../utils/tool-choice";
33
31
  import { normalizeOpenAIResponsesPromptCacheKey, supportsDeveloperRole } from "./openai-responses";
34
32
  import {
35
33
  appendResponsesToolResultMessages,
34
+ applyCommonResponsesSamplingParams,
35
+ applyResponsesReasoningParams,
36
36
  convertResponsesAssistantMessage,
37
37
  convertResponsesInputContent,
38
+ createInitialResponsesAssistantMessage,
38
39
  normalizeResponsesToolCallIdForTransform,
39
40
  processResponsesStream,
40
41
  } from "./openai-responses-shared";
@@ -101,23 +102,11 @@ export const streamAzureOpenAIResponses: StreamFunction<"azure-openai-responses"
101
102
  let firstTokenTime: number | undefined;
102
103
  const deploymentName = resolveDeploymentName(model, options);
103
104
 
104
- const output: AssistantMessage = {
105
- role: "assistant",
106
- content: [],
107
- api: "azure-openai-responses" as Api,
108
- provider: model.provider,
109
- model: model.id,
110
- usage: {
111
- input: 0,
112
- output: 0,
113
- cacheRead: 0,
114
- cacheWrite: 0,
115
- totalTokens: 0,
116
- cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
117
- },
118
- stopReason: "stop",
119
- timestamp: Date.now(),
120
- };
105
+ const output: AssistantMessage = createInitialResponsesAssistantMessage(
106
+ "azure-openai-responses",
107
+ model.provider,
108
+ model.id,
109
+ );
121
110
  let rawRequestDump: RawHttpRequestDump | undefined;
122
111
  const abortTracker = createAbortSourceTracker(options?.signal);
123
112
  const firstEventTimeoutAbortError = new Error(AZURE_OPENAI_RESPONSES_FIRST_EVENT_TIMEOUT_MESSAGE);
@@ -252,6 +241,7 @@ function createClient(model: Model<"azure-openai-responses">, apiKey: string, op
252
241
 
253
242
  const { baseUrl, apiVersion } = resolveAzureConfig(model, options);
254
243
 
244
+ const baseFetch = options?.fetch ?? fetch;
255
245
  return new AzureOpenAI({
256
246
  apiKey,
257
247
  apiVersion,
@@ -259,7 +249,9 @@ function createClient(model: Model<"azure-openai-responses">, apiKey: string, op
259
249
  maxRetries: 5,
260
250
  defaultHeaders: headers,
261
251
  baseURL: baseUrl,
262
- fetch: options?.onSseEvent ? wrapFetchForSseDebug(fetch, event => options.onSseEvent?.(event, model)) : fetch,
252
+ fetch: options?.onSseEvent
253
+ ? wrapFetchForSseDebug(baseFetch, event => options.onSseEvent?.(event, model))
254
+ : baseFetch,
263
255
  });
264
256
  }
265
257
 
@@ -279,31 +271,7 @@ function buildParams(
279
271
  prompt_cache_key: normalizeOpenAIResponsesPromptCacheKey(options?.sessionId),
280
272
  };
281
273
 
282
- if (options?.maxTokens) {
283
- params.max_output_tokens = options?.maxTokens;
284
- }
285
-
286
- if (options?.temperature !== undefined) {
287
- params.temperature = options?.temperature;
288
- }
289
- if (options?.topP !== undefined) {
290
- params.top_p = options.topP;
291
- }
292
- if (options?.topK !== undefined) {
293
- params.top_k = options.topK;
294
- }
295
- if (options?.minP !== undefined) {
296
- params.min_p = options.minP;
297
- }
298
- if (options?.presencePenalty !== undefined) {
299
- params.presence_penalty = options.presencePenalty;
300
- }
301
- if (options?.repetitionPenalty !== undefined) {
302
- params.repetition_penalty = options.repetitionPenalty;
303
- }
304
- if (shouldSendServiceTier(options?.serviceTier, model.provider)) {
305
- params.service_tier = options.serviceTier;
306
- }
274
+ applyCommonResponsesSamplingParams(params, options, model.provider);
307
275
 
308
276
  if (context.tools) {
309
277
  params.tools = convertTools(context.tools);
@@ -312,36 +280,7 @@ function buildParams(
312
280
  }
313
281
  }
314
282
 
315
- if (model.reasoning) {
316
- // Always request encrypted reasoning content so reasoning items can be
317
- // replayed in multi-turn conversations when store is false (items aren't
318
- // persisted server-side, so we must include the full content).
319
- // See: https://github.com/can1357/oh-my-pi/issues/41
320
- params.include = ["reasoning.encrypted_content"];
321
-
322
- if (options?.reasoning || options?.reasoningSummary !== undefined) {
323
- const reasoningParams: NonNullable<typeof params.reasoning> = {
324
- effort: options?.reasoning || "medium",
325
- };
326
- if (options?.reasoningSummary !== null) {
327
- reasoningParams.summary = options?.reasoningSummary || "auto";
328
- }
329
- params.reasoning = reasoningParams;
330
- } else {
331
- if (model.name.toLowerCase().startsWith("gpt-5")) {
332
- // Jesus Christ, see https://community.openai.com/t/need-reasoning-false-option-for-gpt-5/1351588/7
333
- messages.push({
334
- role: "developer",
335
- content: [
336
- {
337
- type: "input_text",
338
- text: "# Juice: 0 !important",
339
- },
340
- ],
341
- });
342
- }
343
- }
344
- }
283
+ applyResponsesReasoningParams(params, model, options, messages);
345
284
 
346
285
  return params;
347
286
  }
@@ -1,5 +1,5 @@
1
1
  import { ANTHROPIC_THINKING, mapAnthropicToolChoice } from "../stream";
2
- import type { Api, Context, Model, SimpleStreamOptions } from "../types";
2
+ import type { Api, Context, FetchImpl, Model, SimpleStreamOptions } from "../types";
3
3
  import { AssistantMessageEventStream } from "../utils/event-stream";
4
4
  import type { OpenAICompletionsOptions } from "./openai-completions";
5
5
  import type { OpenAIResponsesOptions } from "./openai-responses";
@@ -172,13 +172,16 @@ interface DirectAccessToken {
172
172
 
173
173
  const directAccessCache = new Map<string, DirectAccessToken>();
174
174
 
175
- async function getDirectAccessToken(gitlabAccessToken: string): Promise<DirectAccessToken> {
175
+ async function getDirectAccessToken(
176
+ gitlabAccessToken: string,
177
+ fetchImpl: FetchImpl = fetch,
178
+ ): Promise<DirectAccessToken> {
176
179
  const cached = directAccessCache.get(gitlabAccessToken);
177
180
  if (cached && cached.expiresAt > Date.now()) {
178
181
  return cached;
179
182
  }
180
183
 
181
- const response = await fetch(`${GITLAB_COM_URL}/api/v4/ai/third_party_agents/direct_access`, {
184
+ const response = await fetchImpl(`${GITLAB_COM_URL}/api/v4/ai/third_party_agents/direct_access`, {
182
185
  method: "POST",
183
186
  headers: {
184
187
  Authorization: `Bearer ${gitlabAccessToken}`,
@@ -240,7 +243,7 @@ export function streamGitLabDuo(
240
243
  throw new Error(`Unsupported GitLab Duo model: ${model.id}`);
241
244
  }
242
245
 
243
- const directAccess = await getDirectAccessToken(options.apiKey);
246
+ const directAccess = await getDirectAccessToken(options.apiKey, options.fetch);
244
247
  const headers = {
245
248
  ...directAccess.headers,
246
249
  ...options.headers,
@@ -278,6 +281,7 @@ export function streamGitLabDuo(
278
281
  onPayload: options.onPayload,
279
282
  onResponse: options.onResponse,
280
283
  onSseEvent: options.onSseEvent,
284
+ fetch: options.fetch,
281
285
  thinkingEnabled: Boolean(reasoningEffort) && model.reasoning,
282
286
  thinkingBudgetTokens: reasoningEffort
283
287
  ? (options.thinkingBudgets?.[reasoningEffort] ?? ANTHROPIC_THINKING[reasoningEffort])
@@ -314,6 +318,7 @@ export function streamGitLabDuo(
314
318
  onPayload: options.onPayload,
315
319
  onResponse: options.onResponse,
316
320
  onSseEvent: options.onSseEvent,
321
+ fetch: options.fetch,
317
322
  reasoning: reasoningEffort,
318
323
  toolChoice: options.toolChoice,
319
324
  } satisfies OpenAIResponsesOptions,
@@ -345,6 +350,7 @@ export function streamGitLabDuo(
345
350
  onPayload: options.onPayload,
346
351
  onResponse: options.onResponse,
347
352
  onSseEvent: options.onSseEvent,
353
+ fetch: options.fetch,
348
354
  reasoning: reasoningEffort,
349
355
  toolChoice: options.toolChoice,
350
356
  } satisfies OpenAICompletionsOptions,