pi-omlx-picker 0.2.9 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.ts CHANGED
@@ -20,6 +20,7 @@ import {
20
20
  } from "./src/catalog.ts";
21
21
  import {
22
22
  DEFAULT_OMLX_BASE_URL,
23
+ hasOmlxTarget,
23
24
  loadConfig,
24
25
  type OmlxConfig,
25
26
  resolveConfiguredApiKey,
@@ -115,9 +116,18 @@ function registerModels(
115
116
  models: OmlxModel[],
116
117
  modelSettingsPath?: string,
117
118
  ): void {
119
+ const keyless = !resolveConfiguredApiKey();
118
120
  pi.registerProvider(PROVIDER, {
119
121
  name: "OMLX",
120
- ...toProviderConfig(config.apiRoot, config.apiKeyEnvVar, models),
122
+ ...toProviderConfig(
123
+ config.apiRoot,
124
+ config.apiKeyEnvVar,
125
+ models,
126
+ undefined,
127
+ {
128
+ keyless,
129
+ },
130
+ ),
121
131
  });
122
132
  state.config = config;
123
133
  state.catalog = models;
@@ -152,8 +162,9 @@ function registerCachedOrSetupModels(pi: ExtensionAPI, state: State): void {
152
162
  apiRoot: DEFAULT_OMLX_BASE_URL,
153
163
  apiKeyEnvVar: "OMLX_API_KEY",
154
164
  };
165
+ const configured = resolveConfiguredApiKey() || hasOmlxTarget();
155
166
  const cached = registrableCachedModels(readCatalogCache(config.apiRoot));
156
- const fallbackCached = resolveConfiguredApiKey()
167
+ const fallbackCached = configured
157
168
  ? undefined
158
169
  : registrableCachedModels(readLastCatalogCache());
159
170
  const models = cached ?? fallbackCached;
@@ -161,7 +172,7 @@ function registerCachedOrSetupModels(pi: ExtensionAPI, state: State): void {
161
172
  state.config = config;
162
173
  state.catalog = [];
163
174
  state.registered = false;
164
- state.lastError = resolveConfiguredApiKey()
175
+ state.lastError = configured
165
176
  ? "No cached OMLX models with real max_context_window/max_tokens; waiting for live catalog refresh."
166
177
  : "OMLX credentials are not set. Run /login and choose OMLX.";
167
178
  state.lastRefreshAt = new Date().toISOString();
@@ -169,7 +180,9 @@ function registerCachedOrSetupModels(pi: ExtensionAPI, state: State): void {
169
180
  return;
170
181
  }
171
182
 
172
- if (resolveConfiguredApiKey()) {
183
+ // A key OR a configured base URL (keyless server) is enough to register the
184
+ // real provider. Pi omits the auth header when the resolved key is empty.
185
+ if (configured) {
173
186
  registerModels(pi, state, config, models);
174
187
  return;
175
188
  }
@@ -226,7 +239,7 @@ async function refreshProvider(
226
239
  ): Promise<RefreshResult> {
227
240
  const config = loadConfig();
228
241
  const apiKey = resolveConfiguredApiKey();
229
- if (!apiKey) {
242
+ if (!apiKey && !hasOmlxTarget()) {
230
243
  state.lastError = "OMLX credentials are not set";
231
244
  return "not_configured";
232
245
  }
@@ -237,7 +250,7 @@ async function refreshProvider(
237
250
 
238
251
  let models: OmlxModel[];
239
252
  try {
240
- models = await fetchModels(config.apiRoot, apiKey, {
253
+ models = await fetchModels(config.apiRoot, apiKey ?? "", {
241
254
  modelSettingsPath,
242
255
  timeoutMs: opts.timeoutMs,
243
256
  });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pi-omlx-picker",
3
- "version": "0.2.9",
3
+ "version": "0.3.1",
4
4
  "type": "module",
5
5
  "description": "Pi extension that discovers models from a local OMLX server and registers them as a native Pi provider.",
6
6
  "license": "MIT",
package/src/catalog.ts CHANGED
@@ -10,6 +10,8 @@ export interface OmlxModel {
10
10
  modelAlias?: string;
11
11
  contextWindow?: number;
12
12
  maxTokens?: number;
13
+ /** Model architectural ceiling (`max_model_len`). Prio-3 fallback and clamp limit. */
14
+ archContextWindow?: number;
13
15
  thinkingDefault?: boolean | null;
14
16
  taskBudgetTokens?: number;
15
17
  maxToolResultTokens?: number;
@@ -36,7 +38,7 @@ export interface CatalogDebugEvent {
36
38
 
37
39
  interface OpenAIModelsResponse {
38
40
  object: string;
39
- data: Array<{ id: string; object?: string }>;
41
+ data: Array<{ id: string; object?: string; max_model_len?: number | null }>;
40
42
  }
41
43
 
42
44
  interface OmlxModelsStatusResponse {
@@ -92,7 +94,10 @@ export function parseModelsResponse(json: unknown): OmlxModel[] {
92
94
  if (!entry || typeof entry.id !== "string" || !entry.id) continue;
93
95
  if (seen.has(entry.id)) continue;
94
96
  seen.add(entry.id);
95
- out.push({ id: entry.id });
97
+ const m: OmlxModel = { id: entry.id };
98
+ if (typeof entry.max_model_len === "number" && entry.max_model_len > 0)
99
+ m.archContextWindow = entry.max_model_len;
100
+ out.push(m);
96
101
  }
97
102
  return out;
98
103
  }
@@ -208,13 +213,15 @@ export async function fetchModels(
208
213
  opts.modelSettingsPath,
209
214
  opts.onDebug,
210
215
  );
211
- return applyApiGlobalDefaultsIfNeeded(
212
- models,
213
- apiRoot,
214
- apiKey,
215
- opts.signal,
216
- timeoutMs,
217
- opts.onDebug,
216
+ return resolveArchContextLimits(
217
+ await applyApiGlobalDefaultsIfNeeded(
218
+ models,
219
+ apiRoot,
220
+ apiKey,
221
+ opts.signal,
222
+ timeoutMs,
223
+ opts.onDebug,
224
+ ),
218
225
  );
219
226
  } catch (err) {
220
227
  if (err instanceof Error && err.name === "AbortError") throw err;
@@ -245,16 +252,35 @@ export async function fetchModels(
245
252
  opts.modelSettingsPath,
246
253
  opts.onDebug,
247
254
  );
248
- return applyApiGlobalDefaultsIfNeeded(
249
- models,
250
- apiRoot,
251
- apiKey,
252
- opts.signal,
253
- timeoutMs,
254
- opts.onDebug,
255
+ return resolveArchContextLimits(
256
+ await applyApiGlobalDefaultsIfNeeded(
257
+ models,
258
+ apiRoot,
259
+ apiKey,
260
+ opts.signal,
261
+ timeoutMs,
262
+ opts.onDebug,
263
+ ),
255
264
  );
256
265
  }
257
266
 
267
+ /**
268
+ * Final context-window resolution, applied after model-specific (prio 1) and
269
+ * global (prio 2) settings. The model's architectural ceiling
270
+ * (`archContextWindow`, from `max_model_len`) is the prio-3 fallback when no
271
+ * user setting exists, and the hard clamp when a user setting exceeds it.
272
+ */
273
+ export function resolveArchContextLimits(models: OmlxModel[]): OmlxModel[] {
274
+ return models.map((model) => {
275
+ const arch = model.archContextWindow;
276
+ if (arch == null) return model;
277
+ const next: OmlxModel = { ...model };
278
+ if (next.contextWindow == null) next.contextWindow = arch;
279
+ else if (next.contextWindow > arch) next.contextWindow = arch;
280
+ return next;
281
+ });
282
+ }
283
+
258
284
  async function applyApiGlobalDefaultsIfNeeded(
259
285
  models: OmlxModel[],
260
286
  apiRoot: string,
@@ -263,7 +289,8 @@ async function applyApiGlobalDefaultsIfNeeded(
263
289
  timeoutMs: number,
264
290
  onDebug?: (event: CatalogDebugEvent) => void,
265
291
  ): Promise<OmlxModel[]> {
266
- if (!models.some((m) => !m.contextWindow || !m.maxTokens)) return models;
292
+ if (!models.some((m) => m.contextWindow == null || m.maxTokens == null))
293
+ return models;
267
294
  let defaults: OmlxGlobalDefaults | undefined;
268
295
  try {
269
296
  defaults = await fetchGlobalDefaults(apiRoot, apiKey, signal, timeoutMs);
@@ -272,6 +299,7 @@ async function applyApiGlobalDefaultsIfNeeded(
272
299
  details: { apiRoot, defaults },
273
300
  });
274
301
  } catch (err) {
302
+ if (signal?.aborted) throw err;
275
303
  onDebug?.({
276
304
  kind: "catalog_global_settings_failed",
277
305
  details: {
@@ -281,12 +309,13 @@ async function applyApiGlobalDefaultsIfNeeded(
281
309
  });
282
310
  return models;
283
311
  }
284
- if (!defaults.contextWindow && !defaults.maxTokens) return models;
312
+ if (defaults.contextWindow == null && defaults.maxTokens == null)
313
+ return models;
285
314
  return models.map((model) => {
286
315
  const next: OmlxModel = { ...model };
287
- if (!next.contextWindow && defaults.contextWindow)
316
+ if (next.contextWindow == null && defaults.contextWindow != null)
288
317
  next.contextWindow = defaults.contextWindow;
289
- if (!next.maxTokens && defaults.maxTokens)
318
+ if (next.maxTokens == null && defaults.maxTokens != null)
290
319
  next.maxTokens = defaults.maxTokens;
291
320
  return next;
292
321
  });
@@ -325,8 +354,10 @@ async function getJson(
325
354
  timeoutMs: number,
326
355
  ): Promise<unknown> {
327
356
  const signal = withTimeout(parent, timeoutMs);
357
+ // Empty key => keyless server (skip_api_key_verification): omit the header.
358
+ const headers = apiKey ? { Authorization: `Bearer ${apiKey}` } : undefined;
328
359
  const res = await fetch(url, {
329
- headers: { Authorization: `Bearer ${apiKey}` },
360
+ headers,
330
361
  signal,
331
362
  }).catch((err) => {
332
363
  if (err instanceof Error && err.name === "AbortError") {
package/src/config.ts CHANGED
@@ -34,6 +34,18 @@ export function resolveConfiguredApiKey(
34
34
  return loadOmlxCredential()?.apiKey;
35
35
  }
36
36
 
37
+ /**
38
+ * True when the user has pointed us at a server even without an API key.
39
+ * OMLX servers run with `skip_api_key_verification: true` need no key; an
40
+ * explicit base URL (env or stored) is the signal that a keyless server is
41
+ * intended. With neither key nor base URL there is nothing to talk to.
42
+ */
43
+ export function hasOmlxTarget(env: NodeJS.ProcessEnv = process.env): boolean {
44
+ if (env.OMLX_API_KEY || env.OMLX_BASE_URL) return true;
45
+ const stored = loadOmlxCredential();
46
+ return Boolean(stored?.apiKey || stored?.baseUrl);
47
+ }
48
+
37
49
  // Legacy helper for older stored api_key credentials. Never fills only one side
38
50
  // of the env pair; partial shell overrides remain explicit shell state.
39
51
  export function applyStoredCredentialToEnv(
package/src/overflow.ts CHANGED
@@ -4,6 +4,7 @@ const OMLX_OVERFLOW_RE =
4
4
  /prompt too long[:.]?\s*(\d[\d,]*)\s*tokens?\s*exceeds\s*max(?:imum)?\s*context window of\s*(\d[\d,]*)\s*tokens?/i;
5
5
 
6
6
  export function normalizeOverflowMessage(errorMessage: string): string {
7
+ if (errorMessage.startsWith("prompt is too long:")) return errorMessage;
7
8
  const match = OMLX_OVERFLOW_RE.exec(errorMessage);
8
9
  if (!match) return errorMessage;
9
10
  const used = match[1];
package/src/provider.ts CHANGED
@@ -23,22 +23,27 @@ export function toProviderConfig(
23
23
  apiKeyEnvVar: string,
24
24
  models: OmlxModel[],
25
25
  onStreamTimeout?: (event: StreamTimeoutEvent) => void,
26
+ options: { keyless?: boolean } = {},
26
27
  ): ProviderConfig {
27
- return {
28
+ const config: ProviderConfig = {
28
29
  baseUrl: apiRoot,
29
- apiKey: `$${apiKeyEnvVar}`,
30
30
  api: "openai-completions",
31
- authHeader: true,
32
- streamSimple: (model, context, options) =>
31
+ // Keyless server (skip_api_key_verification): no auth header. Pi rejects
32
+ // authHeader:true with no key, and resolveConfigValueOrThrow would throw
33
+ // on an unset $OMLX_API_KEY — so both apiKey and authHeader stay off.
34
+ authHeader: !options.keyless,
35
+ streamSimple: (model, context, streamOptions) =>
33
36
  streamOmlxOpenAICompletions(
34
37
  model,
35
38
  context,
36
- options,
39
+ streamOptions,
37
40
  resolveFirstDeltaTimeoutMs(),
38
41
  onStreamTimeout,
39
42
  ),
40
43
  models: models.map(toProviderModel),
41
44
  };
45
+ if (!options.keyless) config.apiKey = `$${apiKeyEnvVar}`;
46
+ return config;
42
47
  }
43
48
 
44
49
  function requirePositive(
@@ -59,11 +64,11 @@ function toProviderModel(m: OmlxModel): ProviderModelConfig {
59
64
  name: m.displayName ?? m.id,
60
65
  reasoning,
61
66
  input: m.modelType === "vlm" ? ["text", "image"] : ["text"],
62
- cost: FREE_COST,
67
+ cost: { ...FREE_COST },
63
68
  contextWindow: requirePositive(m.contextWindow, m.id, "max_context_window"),
64
69
  maxTokens: requirePositive(m.maxTokens, m.id, "max_tokens"),
65
70
  compat: reasoning
66
71
  ? { ...BASE_COMPAT, thinkingFormat: thinkingFormatFor(m.reasoningParser) }
67
- : BASE_COMPAT,
72
+ : { ...BASE_COMPAT },
68
73
  };
69
74
  }
@@ -29,11 +29,8 @@ function extractAssistantParts(message: AssistantMessage): AssistantParts {
29
29
  function lastAssistantMessage(
30
30
  messages: Message[],
31
31
  ): AssistantMessage | undefined {
32
- for (let i = messages.length - 1; i >= 0; i--) {
33
- const m = messages[i];
34
- if (m.role === "assistant") return m;
35
- }
36
- return undefined;
32
+ const last = messages.at(-1);
33
+ return last?.role === "assistant" ? last : undefined;
37
34
  }
38
35
 
39
36
  function bigramCounts(s: string): Map<string, number> {
@@ -27,19 +27,6 @@ export function isThinkingEvent(event: AssistantMessageEvent): boolean {
27
27
  );
28
28
  }
29
29
 
30
- export function mergeAbortSignals(
31
- parent: AbortSignal | undefined,
32
- child: AbortSignal,
33
- ): AbortSignal {
34
- if (!parent) return child;
35
- if (parent.aborted) return parent;
36
- const controller = new AbortController();
37
- const abort = () => controller.abort();
38
- parent.addEventListener("abort", abort, { once: true });
39
- child.addEventListener("abort", abort, { once: true });
40
- return controller.signal;
41
- }
42
-
43
30
  export function eventPartial(
44
31
  event: AssistantMessageEvent,
45
32
  model: Model<Api>,
@@ -64,7 +51,7 @@ export function errorAssistantMessage(
64
51
  api: model.api,
65
52
  provider: model.provider,
66
53
  model: model.id,
67
- usage: ZERO_USAGE,
54
+ usage: { ...ZERO_USAGE, cost: { ...ZERO_USAGE.cost } },
68
55
  stopReason,
69
56
  errorMessage,
70
57
  timestamp: Date.now(),
@@ -21,6 +21,13 @@ export class StreamWriter {
21
21
  }
22
22
 
23
23
  push(event: AssistantMessageEvent): void {
24
+ if (event.type === "start") {
25
+ if (!this.startPushed) {
26
+ this.stream.push(this.startEvent ?? event);
27
+ this.startPushed = true;
28
+ }
29
+ return;
30
+ }
24
31
  if (!this.startPushed) {
25
32
  this.stream.push(
26
33
  this.startEvent ?? {
package/src/stream.ts CHANGED
@@ -7,18 +7,31 @@ import {
7
7
  type Model,
8
8
  type SimpleStreamOptions,
9
9
  } from "@earendil-works/pi-ai";
10
- import { streamSimple as streamSimpleOpenAICompletions } from "@earendil-works/pi-ai/api/openai-completions";
10
+ // Resolve the concrete OpenAI Completions stream once, via the lazy API factory
11
+ // re-exported from compat. Calling the compat `streamSimple` dispatcher from
12
+ // inside this wrapper would re-resolve through the api-provider registry; and
13
+ // because this extension registers itself as the openai-completions handler,
14
+ // that routes dispatch -> wrapper -> dispatch -> ... and overflows the stack.
15
+ // The lazy factory returns a closure over the concrete implementation that
16
+ // loads the module on first call and calls it directly — no registry, no
17
+ // re-dispatch. It is captured here at module load, BEFORE the wrapper is
18
+ // registered, so it can never be the wrapper itself.
19
+ //
20
+ // Note: pi's extension loader (jiti) only aliases a fixed set of pi-ai
21
+ // subpaths (root, /compat, /oauth). Importing `@earendil-works/pi-ai/api/...`
22
+ // is not resolvable there, so the concrete module is reached through compat.
23
+ import { openAICompletionsApi } from "@earendil-works/pi-ai/compat";
24
+ import { PROVIDER_KEY } from "./auth-storage.ts";
11
25
  import { normalizeErrorEvent } from "./overflow.ts";
12
26
  import { isRepeatStop } from "./repeat-stop.ts";
13
- import {
14
- isMeaningfulBodyEvent,
15
- isThinkingEvent,
16
- mergeAbortSignals,
17
- } from "./stream-events.ts";
27
+ import { isMeaningfulBodyEvent, isThinkingEvent } from "./stream-events.ts";
18
28
  import { StreamWriter } from "./stream-writer.ts";
19
29
 
30
+ const streamOpenAICompletionsImpl = openAICompletionsApi().streamSimple;
31
+
20
32
  const DEFAULT_FIRST_DELTA_TIMEOUT_MS = 120_000;
21
33
  const FIRST_DELTA_MAX_ATTEMPTS = 2;
34
+ const MAX_REISSUES = 1;
22
35
 
23
36
  export type StreamTimeoutEvent = {
24
37
  model: string;
@@ -45,6 +58,32 @@ export function resolveFirstDeltaTimeoutMs(): number {
45
58
  : DEFAULT_FIRST_DELTA_TIMEOUT_MS;
46
59
  }
47
60
 
61
+ // Merge the parent (caller) signal with our own timeout signal. We compose the
62
+ // raw source signals directly via AbortSignal.any rather than chaining through
63
+ // a freshly-created controller per call: chaining previously-merged signals
64
+ // accumulates abort listeners across a long session and, when abort fires,
65
+ // propagates through N recursive .abort() calls that overflow the stack.
66
+ // AbortSignal.any keeps the merged signal detached from either source's
67
+ // listener set, and returns an already-aborted signal if either input is
68
+ // aborted (so a pre-aborted parent propagates immediately). Node >=22
69
+ // guarantees AbortSignal.any is available (engines).
70
+ function mergeTimeoutSignal(
71
+ parent: AbortSignal | undefined,
72
+ own: AbortSignal,
73
+ ): AbortSignal {
74
+ if (!parent) return own;
75
+ return AbortSignal.any([parent, own]) as AbortSignal;
76
+ }
77
+
78
+ // Always flush buffered thinking events before leaving runAttempt, including
79
+ // the timed-out and thrown paths. Previously these were dropped silently.
80
+ function flushThinking(
81
+ writer: StreamWriter,
82
+ events: AssistantMessageEvent[],
83
+ ): void {
84
+ for (const held of events) writer.push(held);
85
+ }
86
+
48
87
  async function runAttempt(
49
88
  writer: StreamWriter,
50
89
  model: Model<Api>,
@@ -56,11 +95,14 @@ async function runAttempt(
56
95
  allowReissue: boolean,
57
96
  ): Promise<AttemptResult> {
58
97
  const controller = new AbortController();
59
- const signal = mergeAbortSignals(options?.signal, controller.signal);
98
+ const signal = mergeTimeoutSignal(options?.signal, controller.signal);
60
99
  let timedOut = false;
61
100
  let firstMeaningfulEvent = false;
62
101
 
63
102
  const timer = setTimeout(() => {
103
+ // clearTimeout below does not stop a callback already queued on the
104
+ // event loop; firstMeaningfulEvent is a load-bearing guard against the
105
+ // timer firing just after the first body event cleared it.
64
106
  if (writer.closed || firstMeaningfulEvent) return;
65
107
  timedOut = true;
66
108
  onTimeout?.({
@@ -76,7 +118,9 @@ async function runAttempt(
76
118
  let bufferedThinking: AssistantMessageEvent[] = [];
77
119
 
78
120
  try {
79
- const inner = streamSimpleOpenAICompletions(
121
+ // Direct call into the concrete implementation: no dispatch, so a model
122
+ // whose provider registered this wrapper cannot recurse back into it.
123
+ const inner = streamOpenAICompletionsImpl(
80
124
  model as Model<"openai-completions">,
81
125
  context,
82
126
  { ...options, signal },
@@ -96,18 +140,27 @@ async function runAttempt(
96
140
  continue;
97
141
  }
98
142
  if (allowReissue && isRepeatStop(event, context)) {
143
+ // Tear down the inner stream's network connection immediately so it
144
+ // does not drain unpredictably while the caller reissues.
145
+ controller.abort();
99
146
  bufferedThinking = [];
100
147
  return "reissue";
101
148
  }
102
- for (const held of bufferedThinking) writer.push(held);
149
+ flushThinking(writer, bufferedThinking);
103
150
  bufferedThinking = [];
104
151
  writer.push(normalizeErrorEvent(event));
105
152
  if (event.type === "done" || event.type === "error") break;
106
153
  }
154
+ } catch (err) {
155
+ flushThinking(writer, bufferedThinking);
156
+ if (timedOut) return "timed-out";
157
+ throw err;
107
158
  } finally {
108
159
  clearTimeout(timer);
109
160
  }
110
161
 
162
+ flushThinking(writer, bufferedThinking);
163
+
111
164
  return timedOut ? "timed-out" : "completed";
112
165
  }
113
166
 
@@ -118,13 +171,32 @@ export function streamOmlxOpenAICompletions(
118
171
  firstDeltaTimeoutMs: number,
119
172
  onTimeout: OnStreamTimeout | undefined,
120
173
  ): AssistantMessageEventStream {
174
+ // Pi dispatches stream handlers by api id, not provider. Registering this
175
+ // wrapper as the openai-completions streamSimple handler (the mechanism pi
176
+ // exposes) replaces the shared entry, so non-oMLX OpenAI-compatible models
177
+ // (groq, zai, glm, ...) also arrive here. Pass them straight through to the
178
+ // concrete implementation with no oMLX-specific timeout/reissue logic, so the
179
+ // extension cannot perturb unrelated providers.
180
+ if (model.provider !== PROVIDER_KEY) {
181
+ return streamOpenAICompletionsImpl(
182
+ model as Model<"openai-completions">,
183
+ context,
184
+ options,
185
+ );
186
+ }
187
+
121
188
  const stream = createAssistantMessageEventStream();
122
189
  const writer = new StreamWriter(stream, model);
123
190
 
124
191
  (async () => {
125
192
  try {
126
- let reissued = false;
127
- for (let attempt = 1; attempt <= FIRST_DELTA_MAX_ATTEMPTS; attempt++) {
193
+ // Separate budgets so a reissue never consumes a timeout attempt and
194
+ // the loop index is never mutated: timeoutAttemptsLeft governs how
195
+ // many timed-out retries remain, reissueLeft governs reissues.
196
+ let timeoutAttemptsLeft = FIRST_DELTA_MAX_ATTEMPTS;
197
+ let reissueLeft = MAX_REISSUES;
198
+ while (true) {
199
+ const attempt = FIRST_DELTA_MAX_ATTEMPTS - timeoutAttemptsLeft + 1;
128
200
  const result = await runAttempt(
129
201
  writer,
130
202
  model,
@@ -133,20 +205,22 @@ export function streamOmlxOpenAICompletions(
133
205
  firstDeltaTimeoutMs,
134
206
  attempt,
135
207
  onTimeout,
136
- !reissued,
208
+ reissueLeft > 0,
137
209
  );
138
210
  if (writer.closed) return;
139
211
 
140
212
  if (result === "reissue") {
141
- reissued = true;
142
- attempt--; // a re-issue doesn't consume a timeout attempt
213
+ reissueLeft--;
143
214
  continue;
144
215
  }
145
216
  if (result === "completed") {
146
217
  writer.end();
147
218
  return;
148
219
  }
149
- if (attempt >= FIRST_DELTA_MAX_ATTEMPTS) {
220
+
221
+ // timed-out
222
+ timeoutAttemptsLeft--;
223
+ if (timeoutAttemptsLeft <= 0) {
150
224
  writer.pushError(
151
225
  firstDeltaTimeoutMessage(
152
226
  firstDeltaTimeoutMs,
@@ -19,6 +19,6 @@ export function thinkingFormatFor(
19
19
  if (!reasoningParser) return NO_THINKING_FORMAT;
20
20
  return (
21
21
  REASONING_PARSER_FORMATS[reasoningParser.toLowerCase()] ??
22
- OMLX_CHAT_TEMPLATE_FORMAT
22
+ NO_THINKING_FORMAT
23
23
  );
24
24
  }