pi-omlx-picker 0.2.9 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.ts +19 -6
- package/package.json +1 -1
- package/src/catalog.ts +52 -21
- package/src/config.ts +12 -0
- package/src/overflow.ts +1 -0
- package/src/provider.ts +12 -7
- package/src/repeat-stop.ts +2 -5
- package/src/stream-events.ts +1 -14
- package/src/stream-writer.ts +7 -0
- package/src/stream.ts +89 -15
- package/src/thinking-format.ts +1 -1
package/index.ts
CHANGED
|
@@ -20,6 +20,7 @@ import {
|
|
|
20
20
|
} from "./src/catalog.ts";
|
|
21
21
|
import {
|
|
22
22
|
DEFAULT_OMLX_BASE_URL,
|
|
23
|
+
hasOmlxTarget,
|
|
23
24
|
loadConfig,
|
|
24
25
|
type OmlxConfig,
|
|
25
26
|
resolveConfiguredApiKey,
|
|
@@ -115,9 +116,18 @@ function registerModels(
|
|
|
115
116
|
models: OmlxModel[],
|
|
116
117
|
modelSettingsPath?: string,
|
|
117
118
|
): void {
|
|
119
|
+
const keyless = !resolveConfiguredApiKey();
|
|
118
120
|
pi.registerProvider(PROVIDER, {
|
|
119
121
|
name: "OMLX",
|
|
120
|
-
...toProviderConfig(
|
|
122
|
+
...toProviderConfig(
|
|
123
|
+
config.apiRoot,
|
|
124
|
+
config.apiKeyEnvVar,
|
|
125
|
+
models,
|
|
126
|
+
undefined,
|
|
127
|
+
{
|
|
128
|
+
keyless,
|
|
129
|
+
},
|
|
130
|
+
),
|
|
121
131
|
});
|
|
122
132
|
state.config = config;
|
|
123
133
|
state.catalog = models;
|
|
@@ -152,8 +162,9 @@ function registerCachedOrSetupModels(pi: ExtensionAPI, state: State): void {
|
|
|
152
162
|
apiRoot: DEFAULT_OMLX_BASE_URL,
|
|
153
163
|
apiKeyEnvVar: "OMLX_API_KEY",
|
|
154
164
|
};
|
|
165
|
+
const configured = resolveConfiguredApiKey() || hasOmlxTarget();
|
|
155
166
|
const cached = registrableCachedModels(readCatalogCache(config.apiRoot));
|
|
156
|
-
const fallbackCached =
|
|
167
|
+
const fallbackCached = configured
|
|
157
168
|
? undefined
|
|
158
169
|
: registrableCachedModels(readLastCatalogCache());
|
|
159
170
|
const models = cached ?? fallbackCached;
|
|
@@ -161,7 +172,7 @@ function registerCachedOrSetupModels(pi: ExtensionAPI, state: State): void {
|
|
|
161
172
|
state.config = config;
|
|
162
173
|
state.catalog = [];
|
|
163
174
|
state.registered = false;
|
|
164
|
-
state.lastError =
|
|
175
|
+
state.lastError = configured
|
|
165
176
|
? "No cached OMLX models with real max_context_window/max_tokens; waiting for live catalog refresh."
|
|
166
177
|
: "OMLX credentials are not set. Run /login and choose OMLX.";
|
|
167
178
|
state.lastRefreshAt = new Date().toISOString();
|
|
@@ -169,7 +180,9 @@ function registerCachedOrSetupModels(pi: ExtensionAPI, state: State): void {
|
|
|
169
180
|
return;
|
|
170
181
|
}
|
|
171
182
|
|
|
172
|
-
|
|
183
|
+
// A key OR a configured base URL (keyless server) is enough to register the
|
|
184
|
+
// real provider. Pi omits the auth header when the resolved key is empty.
|
|
185
|
+
if (configured) {
|
|
173
186
|
registerModels(pi, state, config, models);
|
|
174
187
|
return;
|
|
175
188
|
}
|
|
@@ -226,7 +239,7 @@ async function refreshProvider(
|
|
|
226
239
|
): Promise<RefreshResult> {
|
|
227
240
|
const config = loadConfig();
|
|
228
241
|
const apiKey = resolveConfiguredApiKey();
|
|
229
|
-
if (!apiKey) {
|
|
242
|
+
if (!apiKey && !hasOmlxTarget()) {
|
|
230
243
|
state.lastError = "OMLX credentials are not set";
|
|
231
244
|
return "not_configured";
|
|
232
245
|
}
|
|
@@ -237,7 +250,7 @@ async function refreshProvider(
|
|
|
237
250
|
|
|
238
251
|
let models: OmlxModel[];
|
|
239
252
|
try {
|
|
240
|
-
models = await fetchModels(config.apiRoot, apiKey, {
|
|
253
|
+
models = await fetchModels(config.apiRoot, apiKey ?? "", {
|
|
241
254
|
modelSettingsPath,
|
|
242
255
|
timeoutMs: opts.timeoutMs,
|
|
243
256
|
});
|
package/package.json
CHANGED
package/src/catalog.ts
CHANGED
|
@@ -10,6 +10,8 @@ export interface OmlxModel {
|
|
|
10
10
|
modelAlias?: string;
|
|
11
11
|
contextWindow?: number;
|
|
12
12
|
maxTokens?: number;
|
|
13
|
+
/** Model architectural ceiling (`max_model_len`). Prio-3 fallback and clamp limit. */
|
|
14
|
+
archContextWindow?: number;
|
|
13
15
|
thinkingDefault?: boolean | null;
|
|
14
16
|
taskBudgetTokens?: number;
|
|
15
17
|
maxToolResultTokens?: number;
|
|
@@ -36,7 +38,7 @@ export interface CatalogDebugEvent {
|
|
|
36
38
|
|
|
37
39
|
interface OpenAIModelsResponse {
|
|
38
40
|
object: string;
|
|
39
|
-
data: Array<{ id: string; object?: string }>;
|
|
41
|
+
data: Array<{ id: string; object?: string; max_model_len?: number | null }>;
|
|
40
42
|
}
|
|
41
43
|
|
|
42
44
|
interface OmlxModelsStatusResponse {
|
|
@@ -92,7 +94,10 @@ export function parseModelsResponse(json: unknown): OmlxModel[] {
|
|
|
92
94
|
if (!entry || typeof entry.id !== "string" || !entry.id) continue;
|
|
93
95
|
if (seen.has(entry.id)) continue;
|
|
94
96
|
seen.add(entry.id);
|
|
95
|
-
|
|
97
|
+
const m: OmlxModel = { id: entry.id };
|
|
98
|
+
if (typeof entry.max_model_len === "number" && entry.max_model_len > 0)
|
|
99
|
+
m.archContextWindow = entry.max_model_len;
|
|
100
|
+
out.push(m);
|
|
96
101
|
}
|
|
97
102
|
return out;
|
|
98
103
|
}
|
|
@@ -208,13 +213,15 @@ export async function fetchModels(
|
|
|
208
213
|
opts.modelSettingsPath,
|
|
209
214
|
opts.onDebug,
|
|
210
215
|
);
|
|
211
|
-
return
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
216
|
+
return resolveArchContextLimits(
|
|
217
|
+
await applyApiGlobalDefaultsIfNeeded(
|
|
218
|
+
models,
|
|
219
|
+
apiRoot,
|
|
220
|
+
apiKey,
|
|
221
|
+
opts.signal,
|
|
222
|
+
timeoutMs,
|
|
223
|
+
opts.onDebug,
|
|
224
|
+
),
|
|
218
225
|
);
|
|
219
226
|
} catch (err) {
|
|
220
227
|
if (err instanceof Error && err.name === "AbortError") throw err;
|
|
@@ -245,16 +252,35 @@ export async function fetchModels(
|
|
|
245
252
|
opts.modelSettingsPath,
|
|
246
253
|
opts.onDebug,
|
|
247
254
|
);
|
|
248
|
-
return
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
+
return resolveArchContextLimits(
|
|
256
|
+
await applyApiGlobalDefaultsIfNeeded(
|
|
257
|
+
models,
|
|
258
|
+
apiRoot,
|
|
259
|
+
apiKey,
|
|
260
|
+
opts.signal,
|
|
261
|
+
timeoutMs,
|
|
262
|
+
opts.onDebug,
|
|
263
|
+
),
|
|
255
264
|
);
|
|
256
265
|
}
|
|
257
266
|
|
|
267
|
+
/**
|
|
268
|
+
* Final context-window resolution, applied after model-specific (prio 1) and
|
|
269
|
+
* global (prio 2) settings. The model's architectural ceiling
|
|
270
|
+
* (`archContextWindow`, from `max_model_len`) is the prio-3 fallback when no
|
|
271
|
+
* user setting exists, and the hard clamp when a user setting exceeds it.
|
|
272
|
+
*/
|
|
273
|
+
export function resolveArchContextLimits(models: OmlxModel[]): OmlxModel[] {
|
|
274
|
+
return models.map((model) => {
|
|
275
|
+
const arch = model.archContextWindow;
|
|
276
|
+
if (arch == null) return model;
|
|
277
|
+
const next: OmlxModel = { ...model };
|
|
278
|
+
if (next.contextWindow == null) next.contextWindow = arch;
|
|
279
|
+
else if (next.contextWindow > arch) next.contextWindow = arch;
|
|
280
|
+
return next;
|
|
281
|
+
});
|
|
282
|
+
}
|
|
283
|
+
|
|
258
284
|
async function applyApiGlobalDefaultsIfNeeded(
|
|
259
285
|
models: OmlxModel[],
|
|
260
286
|
apiRoot: string,
|
|
@@ -263,7 +289,8 @@ async function applyApiGlobalDefaultsIfNeeded(
|
|
|
263
289
|
timeoutMs: number,
|
|
264
290
|
onDebug?: (event: CatalogDebugEvent) => void,
|
|
265
291
|
): Promise<OmlxModel[]> {
|
|
266
|
-
if (!models.some((m) =>
|
|
292
|
+
if (!models.some((m) => m.contextWindow == null || m.maxTokens == null))
|
|
293
|
+
return models;
|
|
267
294
|
let defaults: OmlxGlobalDefaults | undefined;
|
|
268
295
|
try {
|
|
269
296
|
defaults = await fetchGlobalDefaults(apiRoot, apiKey, signal, timeoutMs);
|
|
@@ -272,6 +299,7 @@ async function applyApiGlobalDefaultsIfNeeded(
|
|
|
272
299
|
details: { apiRoot, defaults },
|
|
273
300
|
});
|
|
274
301
|
} catch (err) {
|
|
302
|
+
if (signal?.aborted) throw err;
|
|
275
303
|
onDebug?.({
|
|
276
304
|
kind: "catalog_global_settings_failed",
|
|
277
305
|
details: {
|
|
@@ -281,12 +309,13 @@ async function applyApiGlobalDefaultsIfNeeded(
|
|
|
281
309
|
});
|
|
282
310
|
return models;
|
|
283
311
|
}
|
|
284
|
-
if (
|
|
312
|
+
if (defaults.contextWindow == null && defaults.maxTokens == null)
|
|
313
|
+
return models;
|
|
285
314
|
return models.map((model) => {
|
|
286
315
|
const next: OmlxModel = { ...model };
|
|
287
|
-
if (
|
|
316
|
+
if (next.contextWindow == null && defaults.contextWindow != null)
|
|
288
317
|
next.contextWindow = defaults.contextWindow;
|
|
289
|
-
if (
|
|
318
|
+
if (next.maxTokens == null && defaults.maxTokens != null)
|
|
290
319
|
next.maxTokens = defaults.maxTokens;
|
|
291
320
|
return next;
|
|
292
321
|
});
|
|
@@ -325,8 +354,10 @@ async function getJson(
|
|
|
325
354
|
timeoutMs: number,
|
|
326
355
|
): Promise<unknown> {
|
|
327
356
|
const signal = withTimeout(parent, timeoutMs);
|
|
357
|
+
// Empty key => keyless server (skip_api_key_verification): omit the header.
|
|
358
|
+
const headers = apiKey ? { Authorization: `Bearer ${apiKey}` } : undefined;
|
|
328
359
|
const res = await fetch(url, {
|
|
329
|
-
headers
|
|
360
|
+
headers,
|
|
330
361
|
signal,
|
|
331
362
|
}).catch((err) => {
|
|
332
363
|
if (err instanceof Error && err.name === "AbortError") {
|
package/src/config.ts
CHANGED
|
@@ -34,6 +34,18 @@ export function resolveConfiguredApiKey(
|
|
|
34
34
|
return loadOmlxCredential()?.apiKey;
|
|
35
35
|
}
|
|
36
36
|
|
|
37
|
+
/**
|
|
38
|
+
* True when the user has pointed us at a server even without an API key.
|
|
39
|
+
* OMLX servers run with `skip_api_key_verification: true` need no key; an
|
|
40
|
+
* explicit base URL (env or stored) is the signal that a keyless server is
|
|
41
|
+
* intended. With neither key nor base URL there is nothing to talk to.
|
|
42
|
+
*/
|
|
43
|
+
export function hasOmlxTarget(env: NodeJS.ProcessEnv = process.env): boolean {
|
|
44
|
+
if (env.OMLX_API_KEY || env.OMLX_BASE_URL) return true;
|
|
45
|
+
const stored = loadOmlxCredential();
|
|
46
|
+
return Boolean(stored?.apiKey || stored?.baseUrl);
|
|
47
|
+
}
|
|
48
|
+
|
|
37
49
|
// Legacy helper for older stored api_key credentials. Never fills only one side
|
|
38
50
|
// of the env pair; partial shell overrides remain explicit shell state.
|
|
39
51
|
export function applyStoredCredentialToEnv(
|
package/src/overflow.ts
CHANGED
|
@@ -4,6 +4,7 @@ const OMLX_OVERFLOW_RE =
|
|
|
4
4
|
/prompt too long[:.]?\s*(\d[\d,]*)\s*tokens?\s*exceeds\s*max(?:imum)?\s*context window of\s*(\d[\d,]*)\s*tokens?/i;
|
|
5
5
|
|
|
6
6
|
export function normalizeOverflowMessage(errorMessage: string): string {
|
|
7
|
+
if (errorMessage.startsWith("prompt is too long:")) return errorMessage;
|
|
7
8
|
const match = OMLX_OVERFLOW_RE.exec(errorMessage);
|
|
8
9
|
if (!match) return errorMessage;
|
|
9
10
|
const used = match[1];
|
package/src/provider.ts
CHANGED
|
@@ -23,22 +23,27 @@ export function toProviderConfig(
|
|
|
23
23
|
apiKeyEnvVar: string,
|
|
24
24
|
models: OmlxModel[],
|
|
25
25
|
onStreamTimeout?: (event: StreamTimeoutEvent) => void,
|
|
26
|
+
options: { keyless?: boolean } = {},
|
|
26
27
|
): ProviderConfig {
|
|
27
|
-
|
|
28
|
+
const config: ProviderConfig = {
|
|
28
29
|
baseUrl: apiRoot,
|
|
29
|
-
apiKey: `$${apiKeyEnvVar}`,
|
|
30
30
|
api: "openai-completions",
|
|
31
|
-
|
|
32
|
-
|
|
31
|
+
// Keyless server (skip_api_key_verification): no auth header. Pi rejects
|
|
32
|
+
// authHeader:true with no key, and resolveConfigValueOrThrow would throw
|
|
33
|
+
// on an unset $OMLX_API_KEY — so both apiKey and authHeader stay off.
|
|
34
|
+
authHeader: !options.keyless,
|
|
35
|
+
streamSimple: (model, context, streamOptions) =>
|
|
33
36
|
streamOmlxOpenAICompletions(
|
|
34
37
|
model,
|
|
35
38
|
context,
|
|
36
|
-
|
|
39
|
+
streamOptions,
|
|
37
40
|
resolveFirstDeltaTimeoutMs(),
|
|
38
41
|
onStreamTimeout,
|
|
39
42
|
),
|
|
40
43
|
models: models.map(toProviderModel),
|
|
41
44
|
};
|
|
45
|
+
if (!options.keyless) config.apiKey = `$${apiKeyEnvVar}`;
|
|
46
|
+
return config;
|
|
42
47
|
}
|
|
43
48
|
|
|
44
49
|
function requirePositive(
|
|
@@ -59,11 +64,11 @@ function toProviderModel(m: OmlxModel): ProviderModelConfig {
|
|
|
59
64
|
name: m.displayName ?? m.id,
|
|
60
65
|
reasoning,
|
|
61
66
|
input: m.modelType === "vlm" ? ["text", "image"] : ["text"],
|
|
62
|
-
cost: FREE_COST,
|
|
67
|
+
cost: { ...FREE_COST },
|
|
63
68
|
contextWindow: requirePositive(m.contextWindow, m.id, "max_context_window"),
|
|
64
69
|
maxTokens: requirePositive(m.maxTokens, m.id, "max_tokens"),
|
|
65
70
|
compat: reasoning
|
|
66
71
|
? { ...BASE_COMPAT, thinkingFormat: thinkingFormatFor(m.reasoningParser) }
|
|
67
|
-
: BASE_COMPAT,
|
|
72
|
+
: { ...BASE_COMPAT },
|
|
68
73
|
};
|
|
69
74
|
}
|
package/src/repeat-stop.ts
CHANGED
|
@@ -29,11 +29,8 @@ function extractAssistantParts(message: AssistantMessage): AssistantParts {
|
|
|
29
29
|
function lastAssistantMessage(
|
|
30
30
|
messages: Message[],
|
|
31
31
|
): AssistantMessage | undefined {
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
if (m.role === "assistant") return m;
|
|
35
|
-
}
|
|
36
|
-
return undefined;
|
|
32
|
+
const last = messages.at(-1);
|
|
33
|
+
return last?.role === "assistant" ? last : undefined;
|
|
37
34
|
}
|
|
38
35
|
|
|
39
36
|
function bigramCounts(s: string): Map<string, number> {
|
package/src/stream-events.ts
CHANGED
|
@@ -27,19 +27,6 @@ export function isThinkingEvent(event: AssistantMessageEvent): boolean {
|
|
|
27
27
|
);
|
|
28
28
|
}
|
|
29
29
|
|
|
30
|
-
export function mergeAbortSignals(
|
|
31
|
-
parent: AbortSignal | undefined,
|
|
32
|
-
child: AbortSignal,
|
|
33
|
-
): AbortSignal {
|
|
34
|
-
if (!parent) return child;
|
|
35
|
-
if (parent.aborted) return parent;
|
|
36
|
-
const controller = new AbortController();
|
|
37
|
-
const abort = () => controller.abort();
|
|
38
|
-
parent.addEventListener("abort", abort, { once: true });
|
|
39
|
-
child.addEventListener("abort", abort, { once: true });
|
|
40
|
-
return controller.signal;
|
|
41
|
-
}
|
|
42
|
-
|
|
43
30
|
export function eventPartial(
|
|
44
31
|
event: AssistantMessageEvent,
|
|
45
32
|
model: Model<Api>,
|
|
@@ -64,7 +51,7 @@ export function errorAssistantMessage(
|
|
|
64
51
|
api: model.api,
|
|
65
52
|
provider: model.provider,
|
|
66
53
|
model: model.id,
|
|
67
|
-
usage: ZERO_USAGE,
|
|
54
|
+
usage: { ...ZERO_USAGE, cost: { ...ZERO_USAGE.cost } },
|
|
68
55
|
stopReason,
|
|
69
56
|
errorMessage,
|
|
70
57
|
timestamp: Date.now(),
|
package/src/stream-writer.ts
CHANGED
|
@@ -21,6 +21,13 @@ export class StreamWriter {
|
|
|
21
21
|
}
|
|
22
22
|
|
|
23
23
|
push(event: AssistantMessageEvent): void {
|
|
24
|
+
if (event.type === "start") {
|
|
25
|
+
if (!this.startPushed) {
|
|
26
|
+
this.stream.push(this.startEvent ?? event);
|
|
27
|
+
this.startPushed = true;
|
|
28
|
+
}
|
|
29
|
+
return;
|
|
30
|
+
}
|
|
24
31
|
if (!this.startPushed) {
|
|
25
32
|
this.stream.push(
|
|
26
33
|
this.startEvent ?? {
|
package/src/stream.ts
CHANGED
|
@@ -7,18 +7,31 @@ import {
|
|
|
7
7
|
type Model,
|
|
8
8
|
type SimpleStreamOptions,
|
|
9
9
|
} from "@earendil-works/pi-ai";
|
|
10
|
-
|
|
10
|
+
// Resolve the concrete OpenAI Completions stream once, via the lazy API factory
|
|
11
|
+
// re-exported from compat. Calling the compat `streamSimple` dispatcher from
|
|
12
|
+
// inside this wrapper would re-resolve through the api-provider registry; and
|
|
13
|
+
// because this extension registers itself as the openai-completions handler,
|
|
14
|
+
// that routes dispatch -> wrapper -> dispatch -> ... and overflows the stack.
|
|
15
|
+
// The lazy factory returns a closure over the concrete implementation that
|
|
16
|
+
// loads the module on first call and calls it directly — no registry, no
|
|
17
|
+
// re-dispatch. It is captured here at module load, BEFORE the wrapper is
|
|
18
|
+
// registered, so it can never be the wrapper itself.
|
|
19
|
+
//
|
|
20
|
+
// Note: pi's extension loader (jiti) only aliases a fixed set of pi-ai
|
|
21
|
+
// subpaths (root, /compat, /oauth). Importing `@earendil-works/pi-ai/api/...`
|
|
22
|
+
// is not resolvable there, so the concrete module is reached through compat.
|
|
23
|
+
import { openAICompletionsApi } from "@earendil-works/pi-ai/compat";
|
|
24
|
+
import { PROVIDER_KEY } from "./auth-storage.ts";
|
|
11
25
|
import { normalizeErrorEvent } from "./overflow.ts";
|
|
12
26
|
import { isRepeatStop } from "./repeat-stop.ts";
|
|
13
|
-
import {
|
|
14
|
-
isMeaningfulBodyEvent,
|
|
15
|
-
isThinkingEvent,
|
|
16
|
-
mergeAbortSignals,
|
|
17
|
-
} from "./stream-events.ts";
|
|
27
|
+
import { isMeaningfulBodyEvent, isThinkingEvent } from "./stream-events.ts";
|
|
18
28
|
import { StreamWriter } from "./stream-writer.ts";
|
|
19
29
|
|
|
30
|
+
const streamOpenAICompletionsImpl = openAICompletionsApi().streamSimple;
|
|
31
|
+
|
|
20
32
|
const DEFAULT_FIRST_DELTA_TIMEOUT_MS = 120_000;
|
|
21
33
|
const FIRST_DELTA_MAX_ATTEMPTS = 2;
|
|
34
|
+
const MAX_REISSUES = 1;
|
|
22
35
|
|
|
23
36
|
export type StreamTimeoutEvent = {
|
|
24
37
|
model: string;
|
|
@@ -45,6 +58,32 @@ export function resolveFirstDeltaTimeoutMs(): number {
|
|
|
45
58
|
: DEFAULT_FIRST_DELTA_TIMEOUT_MS;
|
|
46
59
|
}
|
|
47
60
|
|
|
61
|
+
// Merge the parent (caller) signal with our own timeout signal. We compose the
|
|
62
|
+
// raw source signals directly via AbortSignal.any rather than chaining through
|
|
63
|
+
// a freshly-created controller per call: chaining previously-merged signals
|
|
64
|
+
// accumulates abort listeners across a long session and, when abort fires,
|
|
65
|
+
// propagates through N recursive .abort() calls that overflow the stack.
|
|
66
|
+
// AbortSignal.any keeps the merged signal detached from either source's
|
|
67
|
+
// listener set, and returns an already-aborted signal if either input is
|
|
68
|
+
// aborted (so a pre-aborted parent propagates immediately). Node >=22
|
|
69
|
+
// guarantees AbortSignal.any is available (engines).
|
|
70
|
+
function mergeTimeoutSignal(
|
|
71
|
+
parent: AbortSignal | undefined,
|
|
72
|
+
own: AbortSignal,
|
|
73
|
+
): AbortSignal {
|
|
74
|
+
if (!parent) return own;
|
|
75
|
+
return AbortSignal.any([parent, own]) as AbortSignal;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
// Always flush buffered thinking events before leaving runAttempt, including
|
|
79
|
+
// the timed-out and thrown paths. Previously these were dropped silently.
|
|
80
|
+
function flushThinking(
|
|
81
|
+
writer: StreamWriter,
|
|
82
|
+
events: AssistantMessageEvent[],
|
|
83
|
+
): void {
|
|
84
|
+
for (const held of events) writer.push(held);
|
|
85
|
+
}
|
|
86
|
+
|
|
48
87
|
async function runAttempt(
|
|
49
88
|
writer: StreamWriter,
|
|
50
89
|
model: Model<Api>,
|
|
@@ -56,11 +95,14 @@ async function runAttempt(
|
|
|
56
95
|
allowReissue: boolean,
|
|
57
96
|
): Promise<AttemptResult> {
|
|
58
97
|
const controller = new AbortController();
|
|
59
|
-
const signal =
|
|
98
|
+
const signal = mergeTimeoutSignal(options?.signal, controller.signal);
|
|
60
99
|
let timedOut = false;
|
|
61
100
|
let firstMeaningfulEvent = false;
|
|
62
101
|
|
|
63
102
|
const timer = setTimeout(() => {
|
|
103
|
+
// clearTimeout below does not stop a callback already queued on the
|
|
104
|
+
// event loop; firstMeaningfulEvent is a load-bearing guard against the
|
|
105
|
+
// timer firing just after the first body event cleared it.
|
|
64
106
|
if (writer.closed || firstMeaningfulEvent) return;
|
|
65
107
|
timedOut = true;
|
|
66
108
|
onTimeout?.({
|
|
@@ -76,7 +118,9 @@ async function runAttempt(
|
|
|
76
118
|
let bufferedThinking: AssistantMessageEvent[] = [];
|
|
77
119
|
|
|
78
120
|
try {
|
|
79
|
-
|
|
121
|
+
// Direct call into the concrete implementation: no dispatch, so a model
|
|
122
|
+
// whose provider registered this wrapper cannot recurse back into it.
|
|
123
|
+
const inner = streamOpenAICompletionsImpl(
|
|
80
124
|
model as Model<"openai-completions">,
|
|
81
125
|
context,
|
|
82
126
|
{ ...options, signal },
|
|
@@ -96,18 +140,27 @@ async function runAttempt(
|
|
|
96
140
|
continue;
|
|
97
141
|
}
|
|
98
142
|
if (allowReissue && isRepeatStop(event, context)) {
|
|
143
|
+
// Tear down the inner stream's network connection immediately so it
|
|
144
|
+
// does not drain unpredictably while the caller reissues.
|
|
145
|
+
controller.abort();
|
|
99
146
|
bufferedThinking = [];
|
|
100
147
|
return "reissue";
|
|
101
148
|
}
|
|
102
|
-
|
|
149
|
+
flushThinking(writer, bufferedThinking);
|
|
103
150
|
bufferedThinking = [];
|
|
104
151
|
writer.push(normalizeErrorEvent(event));
|
|
105
152
|
if (event.type === "done" || event.type === "error") break;
|
|
106
153
|
}
|
|
154
|
+
} catch (err) {
|
|
155
|
+
flushThinking(writer, bufferedThinking);
|
|
156
|
+
if (timedOut) return "timed-out";
|
|
157
|
+
throw err;
|
|
107
158
|
} finally {
|
|
108
159
|
clearTimeout(timer);
|
|
109
160
|
}
|
|
110
161
|
|
|
162
|
+
flushThinking(writer, bufferedThinking);
|
|
163
|
+
|
|
111
164
|
return timedOut ? "timed-out" : "completed";
|
|
112
165
|
}
|
|
113
166
|
|
|
@@ -118,13 +171,32 @@ export function streamOmlxOpenAICompletions(
|
|
|
118
171
|
firstDeltaTimeoutMs: number,
|
|
119
172
|
onTimeout: OnStreamTimeout | undefined,
|
|
120
173
|
): AssistantMessageEventStream {
|
|
174
|
+
// Pi dispatches stream handlers by api id, not provider. Registering this
|
|
175
|
+
// wrapper as the openai-completions streamSimple handler (the mechanism pi
|
|
176
|
+
// exposes) replaces the shared entry, so non-oMLX OpenAI-compatible models
|
|
177
|
+
// (groq, zai, glm, ...) also arrive here. Pass them straight through to the
|
|
178
|
+
// concrete implementation with no oMLX-specific timeout/reissue logic, so the
|
|
179
|
+
// extension cannot perturb unrelated providers.
|
|
180
|
+
if (model.provider !== PROVIDER_KEY) {
|
|
181
|
+
return streamOpenAICompletionsImpl(
|
|
182
|
+
model as Model<"openai-completions">,
|
|
183
|
+
context,
|
|
184
|
+
options,
|
|
185
|
+
);
|
|
186
|
+
}
|
|
187
|
+
|
|
121
188
|
const stream = createAssistantMessageEventStream();
|
|
122
189
|
const writer = new StreamWriter(stream, model);
|
|
123
190
|
|
|
124
191
|
(async () => {
|
|
125
192
|
try {
|
|
126
|
-
|
|
127
|
-
|
|
193
|
+
// Separate budgets so a reissue never consumes a timeout attempt and
|
|
194
|
+
// the loop index is never mutated: timeoutAttemptsLeft governs how
|
|
195
|
+
// many timed-out retries remain, reissueLeft governs reissues.
|
|
196
|
+
let timeoutAttemptsLeft = FIRST_DELTA_MAX_ATTEMPTS;
|
|
197
|
+
let reissueLeft = MAX_REISSUES;
|
|
198
|
+
while (true) {
|
|
199
|
+
const attempt = FIRST_DELTA_MAX_ATTEMPTS - timeoutAttemptsLeft + 1;
|
|
128
200
|
const result = await runAttempt(
|
|
129
201
|
writer,
|
|
130
202
|
model,
|
|
@@ -133,20 +205,22 @@ export function streamOmlxOpenAICompletions(
|
|
|
133
205
|
firstDeltaTimeoutMs,
|
|
134
206
|
attempt,
|
|
135
207
|
onTimeout,
|
|
136
|
-
|
|
208
|
+
reissueLeft > 0,
|
|
137
209
|
);
|
|
138
210
|
if (writer.closed) return;
|
|
139
211
|
|
|
140
212
|
if (result === "reissue") {
|
|
141
|
-
|
|
142
|
-
attempt--; // a re-issue doesn't consume a timeout attempt
|
|
213
|
+
reissueLeft--;
|
|
143
214
|
continue;
|
|
144
215
|
}
|
|
145
216
|
if (result === "completed") {
|
|
146
217
|
writer.end();
|
|
147
218
|
return;
|
|
148
219
|
}
|
|
149
|
-
|
|
220
|
+
|
|
221
|
+
// timed-out
|
|
222
|
+
timeoutAttemptsLeft--;
|
|
223
|
+
if (timeoutAttemptsLeft <= 0) {
|
|
150
224
|
writer.pushError(
|
|
151
225
|
firstDeltaTimeoutMessage(
|
|
152
226
|
firstDeltaTimeoutMs,
|