pi-omlx-picker 0.3.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pi-omlx-picker",
3
- "version": "0.3.0",
3
+ "version": "0.3.1",
4
4
  "type": "module",
5
5
  "description": "Pi extension that discovers models from a local OMLX server and registers them as a native Pi provider.",
6
6
  "license": "MIT",
@@ -27,19 +27,6 @@ export function isThinkingEvent(event: AssistantMessageEvent): boolean {
27
27
  );
28
28
  }
29
29
 
30
- export function mergeAbortSignals(
31
- parent: AbortSignal | undefined,
32
- child: AbortSignal,
33
- ): AbortSignal {
34
- if (!parent) return child;
35
- if (parent.aborted) return parent;
36
- const controller = new AbortController();
37
- const abort = () => controller.abort();
38
- parent.addEventListener("abort", abort, { once: true });
39
- child.addEventListener("abort", abort, { once: true });
40
- return controller.signal;
41
- }
42
-
43
30
  export function eventPartial(
44
31
  event: AssistantMessageEvent,
45
32
  model: Model<Api>,
package/src/stream.ts CHANGED
@@ -7,18 +7,31 @@ import {
7
7
  type Model,
8
8
  type SimpleStreamOptions,
9
9
  } from "@earendil-works/pi-ai";
10
- import { streamSimple as streamSimpleOpenAICompletions } from "@earendil-works/pi-ai/compat";
10
+ // Resolve the concrete OpenAI Completions stream once, via the lazy API factory
11
+ // re-exported from compat. Calling the compat `streamSimple` dispatcher from
12
+ // inside this wrapper would re-resolve through the api-provider registry; and
13
+ // because this extension registers itself as the openai-completions handler,
14
+ // that routes dispatch -> wrapper -> dispatch -> ... and overflows the stack.
15
+ // The lazy factory returns a closure over the concrete implementation that
16
+ // loads the module on first call and calls it directly — no registry, no
17
+ // re-dispatch. It is captured here at module load, BEFORE the wrapper is
18
+ // registered, so it can never be the wrapper itself.
19
+ //
20
+ // Note: pi's extension loader (jiti) only aliases a fixed set of pi-ai
21
+ // subpaths (root, /compat, /oauth). Importing `@earendil-works/pi-ai/api/...`
22
+ // is not resolvable there, so the concrete module is reached through compat.
23
+ import { openAICompletionsApi } from "@earendil-works/pi-ai/compat";
24
+ import { PROVIDER_KEY } from "./auth-storage.ts";
11
25
  import { normalizeErrorEvent } from "./overflow.ts";
12
26
  import { isRepeatStop } from "./repeat-stop.ts";
13
- import {
14
- isMeaningfulBodyEvent,
15
- isThinkingEvent,
16
- mergeAbortSignals,
17
- } from "./stream-events.ts";
27
+ import { isMeaningfulBodyEvent, isThinkingEvent } from "./stream-events.ts";
18
28
  import { StreamWriter } from "./stream-writer.ts";
19
29
 
30
+ const streamOpenAICompletionsImpl = openAICompletionsApi().streamSimple;
31
+
20
32
  const DEFAULT_FIRST_DELTA_TIMEOUT_MS = 120_000;
21
33
  const FIRST_DELTA_MAX_ATTEMPTS = 2;
34
+ const MAX_REISSUES = 1;
22
35
 
23
36
  export type StreamTimeoutEvent = {
24
37
  model: string;
@@ -45,6 +58,32 @@ export function resolveFirstDeltaTimeoutMs(): number {
45
58
  : DEFAULT_FIRST_DELTA_TIMEOUT_MS;
46
59
  }
47
60
 
61
+ // Merge the parent (caller) signal with our own timeout signal. We compose the
62
+ // raw source signals directly via AbortSignal.any rather than chaining through
63
+ // a freshly-created controller per call: chaining previously-merged signals
64
+ // accumulates abort listeners across a long session and, when abort fires,
65
+ // propagates through N recursive .abort() calls that overflow the stack.
66
+ // AbortSignal.any keeps the merged signal detached from either source's
67
+ // listener set, and returns an already-aborted signal if either input is
68
+ // aborted (so a pre-aborted parent propagates immediately). Node >=22
69
+ // guarantees AbortSignal.any is available (engines).
70
+ function mergeTimeoutSignal(
71
+ parent: AbortSignal | undefined,
72
+ own: AbortSignal,
73
+ ): AbortSignal {
74
+ if (!parent) return own;
75
+ return AbortSignal.any([parent, own]) as AbortSignal;
76
+ }
77
+
78
+ // Always flush buffered thinking events before leaving runAttempt, including
79
+ // the timed-out and thrown paths. Previously these were dropped silently.
80
+ function flushThinking(
81
+ writer: StreamWriter,
82
+ events: AssistantMessageEvent[],
83
+ ): void {
84
+ for (const held of events) writer.push(held);
85
+ }
86
+
48
87
  async function runAttempt(
49
88
  writer: StreamWriter,
50
89
  model: Model<Api>,
@@ -56,11 +95,14 @@ async function runAttempt(
56
95
  allowReissue: boolean,
57
96
  ): Promise<AttemptResult> {
58
97
  const controller = new AbortController();
59
- const signal = mergeAbortSignals(options?.signal, controller.signal);
98
+ const signal = mergeTimeoutSignal(options?.signal, controller.signal);
60
99
  let timedOut = false;
61
100
  let firstMeaningfulEvent = false;
62
101
 
63
102
  const timer = setTimeout(() => {
103
+ // clearTimeout below does not stop a callback already queued on the
104
+ // event loop; firstMeaningfulEvent is a load-bearing guard against the
105
+ // timer firing just after the first body event cleared it.
64
106
  if (writer.closed || firstMeaningfulEvent) return;
65
107
  timedOut = true;
66
108
  onTimeout?.({
@@ -76,7 +118,9 @@ async function runAttempt(
76
118
  let bufferedThinking: AssistantMessageEvent[] = [];
77
119
 
78
120
  try {
79
- const inner = streamSimpleOpenAICompletions(
121
+ // Direct call into the concrete implementation: no dispatch, so a model
122
+ // whose provider registered this wrapper cannot recurse back into it.
123
+ const inner = streamOpenAICompletionsImpl(
80
124
  model as Model<"openai-completions">,
81
125
  context,
82
126
  { ...options, signal },
@@ -96,24 +140,26 @@ async function runAttempt(
96
140
  continue;
97
141
  }
98
142
  if (allowReissue && isRepeatStop(event, context)) {
143
+ // Tear down the inner stream's network connection immediately so it
144
+ // does not drain unpredictably while the caller reissues.
145
+ controller.abort();
99
146
  bufferedThinking = [];
100
147
  return "reissue";
101
148
  }
102
- for (const held of bufferedThinking) writer.push(held);
149
+ flushThinking(writer, bufferedThinking);
103
150
  bufferedThinking = [];
104
151
  writer.push(normalizeErrorEvent(event));
105
152
  if (event.type === "done" || event.type === "error") break;
106
153
  }
107
154
  } catch (err) {
155
+ flushThinking(writer, bufferedThinking);
108
156
  if (timedOut) return "timed-out";
109
157
  throw err;
110
158
  } finally {
111
159
  clearTimeout(timer);
112
160
  }
113
161
 
114
- if (!timedOut) {
115
- for (const held of bufferedThinking) writer.push(held);
116
- }
162
+ flushThinking(writer, bufferedThinking);
117
163
 
118
164
  return timedOut ? "timed-out" : "completed";
119
165
  }
@@ -125,13 +171,32 @@ export function streamOmlxOpenAICompletions(
125
171
  firstDeltaTimeoutMs: number,
126
172
  onTimeout: OnStreamTimeout | undefined,
127
173
  ): AssistantMessageEventStream {
174
+ // Pi dispatches stream handlers by api id, not provider. Registering this
175
+ // wrapper as the openai-completions streamSimple handler (the mechanism pi
176
+ // exposes) replaces the shared entry, so non-oMLX OpenAI-compatible models
177
+ // (groq, zai, glm, ...) also arrive here. Pass them straight through to the
178
+ // concrete implementation with no oMLX-specific timeout/reissue logic, so the
179
+ // extension cannot perturb unrelated providers.
180
+ if (model.provider !== PROVIDER_KEY) {
181
+ return streamOpenAICompletionsImpl(
182
+ model as Model<"openai-completions">,
183
+ context,
184
+ options,
185
+ );
186
+ }
187
+
128
188
  const stream = createAssistantMessageEventStream();
129
189
  const writer = new StreamWriter(stream, model);
130
190
 
131
191
  (async () => {
132
192
  try {
133
- let reissued = false;
134
- for (let attempt = 1; attempt <= FIRST_DELTA_MAX_ATTEMPTS; attempt++) {
193
+ // Separate budgets so a reissue never consumes a timeout attempt and
194
+ // the loop index is never mutated: timeoutAttemptsLeft governs how
195
+ // many timed-out retries remain, reissueLeft governs reissues.
196
+ let timeoutAttemptsLeft = FIRST_DELTA_MAX_ATTEMPTS;
197
+ let reissueLeft = MAX_REISSUES;
198
+ while (true) {
199
+ const attempt = FIRST_DELTA_MAX_ATTEMPTS - timeoutAttemptsLeft + 1;
135
200
  const result = await runAttempt(
136
201
  writer,
137
202
  model,
@@ -140,20 +205,22 @@ export function streamOmlxOpenAICompletions(
140
205
  firstDeltaTimeoutMs,
141
206
  attempt,
142
207
  onTimeout,
143
- !reissued,
208
+ reissueLeft > 0,
144
209
  );
145
210
  if (writer.closed) return;
146
211
 
147
212
  if (result === "reissue") {
148
- reissued = true;
149
- attempt--; // a re-issue doesn't consume a timeout attempt
213
+ reissueLeft--;
150
214
  continue;
151
215
  }
152
216
  if (result === "completed") {
153
217
  writer.end();
154
218
  return;
155
219
  }
156
- if (attempt >= FIRST_DELTA_MAX_ATTEMPTS) {
220
+
221
+ // timed-out
222
+ timeoutAttemptsLeft--;
223
+ if (timeoutAttemptsLeft <= 0) {
157
224
  writer.pushError(
158
225
  firstDeltaTimeoutMessage(
159
226
  firstDeltaTimeoutMs,