pi-omlx-picker 0.3.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/stream-events.ts +0 -13
- package/src/stream.ts +85 -18
package/package.json
CHANGED
package/src/stream-events.ts
CHANGED
|
@@ -27,19 +27,6 @@ export function isThinkingEvent(event: AssistantMessageEvent): boolean {
|
|
|
27
27
|
);
|
|
28
28
|
}
|
|
29
29
|
|
|
30
|
-
export function mergeAbortSignals(
|
|
31
|
-
parent: AbortSignal | undefined,
|
|
32
|
-
child: AbortSignal,
|
|
33
|
-
): AbortSignal {
|
|
34
|
-
if (!parent) return child;
|
|
35
|
-
if (parent.aborted) return parent;
|
|
36
|
-
const controller = new AbortController();
|
|
37
|
-
const abort = () => controller.abort();
|
|
38
|
-
parent.addEventListener("abort", abort, { once: true });
|
|
39
|
-
child.addEventListener("abort", abort, { once: true });
|
|
40
|
-
return controller.signal;
|
|
41
|
-
}
|
|
42
|
-
|
|
43
30
|
export function eventPartial(
|
|
44
31
|
event: AssistantMessageEvent,
|
|
45
32
|
model: Model<Api>,
|
package/src/stream.ts
CHANGED
|
@@ -7,18 +7,31 @@ import {
|
|
|
7
7
|
type Model,
|
|
8
8
|
type SimpleStreamOptions,
|
|
9
9
|
} from "@earendil-works/pi-ai";
|
|
10
|
-
|
|
10
|
+
// Resolve the concrete OpenAI Completions stream once, via the lazy API factory
|
|
11
|
+
// re-exported from compat. Calling the compat `streamSimple` dispatcher from
|
|
12
|
+
// inside this wrapper would re-resolve through the api-provider registry; and
|
|
13
|
+
// because this extension registers itself as the openai-completions handler,
|
|
14
|
+
// that routes dispatch -> wrapper -> dispatch -> ... and overflows the stack.
|
|
15
|
+
// The lazy factory returns a closure over the concrete implementation that
|
|
16
|
+
// loads the module on first call and calls it directly — no registry, no
|
|
17
|
+
// re-dispatch. It is captured here at module load, BEFORE the wrapper is
|
|
18
|
+
// registered, so it can never be the wrapper itself.
|
|
19
|
+
//
|
|
20
|
+
// Note: pi's extension loader (jiti) only aliases a fixed set of pi-ai
|
|
21
|
+
// subpaths (root, /compat, /oauth). Importing `@earendil-works/pi-ai/api/...`
|
|
22
|
+
// is not resolvable there, so the concrete module is reached through compat.
|
|
23
|
+
import { openAICompletionsApi } from "@earendil-works/pi-ai/compat";
|
|
24
|
+
import { PROVIDER_KEY } from "./auth-storage.ts";
|
|
11
25
|
import { normalizeErrorEvent } from "./overflow.ts";
|
|
12
26
|
import { isRepeatStop } from "./repeat-stop.ts";
|
|
13
|
-
import {
|
|
14
|
-
isMeaningfulBodyEvent,
|
|
15
|
-
isThinkingEvent,
|
|
16
|
-
mergeAbortSignals,
|
|
17
|
-
} from "./stream-events.ts";
|
|
27
|
+
import { isMeaningfulBodyEvent, isThinkingEvent } from "./stream-events.ts";
|
|
18
28
|
import { StreamWriter } from "./stream-writer.ts";
|
|
19
29
|
|
|
30
|
+
const streamOpenAICompletionsImpl = openAICompletionsApi().streamSimple;
|
|
31
|
+
|
|
20
32
|
const DEFAULT_FIRST_DELTA_TIMEOUT_MS = 120_000;
|
|
21
33
|
const FIRST_DELTA_MAX_ATTEMPTS = 2;
|
|
34
|
+
const MAX_REISSUES = 1;
|
|
22
35
|
|
|
23
36
|
export type StreamTimeoutEvent = {
|
|
24
37
|
model: string;
|
|
@@ -45,6 +58,32 @@ export function resolveFirstDeltaTimeoutMs(): number {
|
|
|
45
58
|
: DEFAULT_FIRST_DELTA_TIMEOUT_MS;
|
|
46
59
|
}
|
|
47
60
|
|
|
61
|
+
// Merge the parent (caller) signal with our own timeout signal. We compose the
|
|
62
|
+
// raw source signals directly via AbortSignal.any rather than chaining through
|
|
63
|
+
// a freshly-created controller per call: chaining previously-merged signals
|
|
64
|
+
// accumulates abort listeners across a long session and, when abort fires,
|
|
65
|
+
// propagates through N recursive .abort() calls that overflow the stack.
|
|
66
|
+
// AbortSignal.any keeps the merged signal detached from either source's
|
|
67
|
+
// listener set, and returns an already-aborted signal if either input is
|
|
68
|
+
// aborted (so a pre-aborted parent propagates immediately). Node >=22
|
|
69
|
+
// guarantees AbortSignal.any is available (engines).
|
|
70
|
+
function mergeTimeoutSignal(
|
|
71
|
+
parent: AbortSignal | undefined,
|
|
72
|
+
own: AbortSignal,
|
|
73
|
+
): AbortSignal {
|
|
74
|
+
if (!parent) return own;
|
|
75
|
+
return AbortSignal.any([parent, own]) as AbortSignal;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
// Always flush buffered thinking events before leaving runAttempt, including
|
|
79
|
+
// the timed-out and thrown paths. Previously these were dropped silently.
|
|
80
|
+
function flushThinking(
|
|
81
|
+
writer: StreamWriter,
|
|
82
|
+
events: AssistantMessageEvent[],
|
|
83
|
+
): void {
|
|
84
|
+
for (const held of events) writer.push(held);
|
|
85
|
+
}
|
|
86
|
+
|
|
48
87
|
async function runAttempt(
|
|
49
88
|
writer: StreamWriter,
|
|
50
89
|
model: Model<Api>,
|
|
@@ -56,11 +95,14 @@ async function runAttempt(
|
|
|
56
95
|
allowReissue: boolean,
|
|
57
96
|
): Promise<AttemptResult> {
|
|
58
97
|
const controller = new AbortController();
|
|
59
|
-
const signal =
|
|
98
|
+
const signal = mergeTimeoutSignal(options?.signal, controller.signal);
|
|
60
99
|
let timedOut = false;
|
|
61
100
|
let firstMeaningfulEvent = false;
|
|
62
101
|
|
|
63
102
|
const timer = setTimeout(() => {
|
|
103
|
+
// clearTimeout below does not stop a callback already queued on the
|
|
104
|
+
// event loop; firstMeaningfulEvent is a load-bearing guard against the
|
|
105
|
+
// timer firing just after the first body event cleared it.
|
|
64
106
|
if (writer.closed || firstMeaningfulEvent) return;
|
|
65
107
|
timedOut = true;
|
|
66
108
|
onTimeout?.({
|
|
@@ -76,7 +118,9 @@ async function runAttempt(
|
|
|
76
118
|
let bufferedThinking: AssistantMessageEvent[] = [];
|
|
77
119
|
|
|
78
120
|
try {
|
|
79
|
-
|
|
121
|
+
// Direct call into the concrete implementation: no dispatch, so a model
|
|
122
|
+
// whose provider registered this wrapper cannot recurse back into it.
|
|
123
|
+
const inner = streamOpenAICompletionsImpl(
|
|
80
124
|
model as Model<"openai-completions">,
|
|
81
125
|
context,
|
|
82
126
|
{ ...options, signal },
|
|
@@ -96,24 +140,26 @@ async function runAttempt(
|
|
|
96
140
|
continue;
|
|
97
141
|
}
|
|
98
142
|
if (allowReissue && isRepeatStop(event, context)) {
|
|
143
|
+
// Tear down the inner stream's network connection immediately so it
|
|
144
|
+
// does not drain unpredictably while the caller reissues.
|
|
145
|
+
controller.abort();
|
|
99
146
|
bufferedThinking = [];
|
|
100
147
|
return "reissue";
|
|
101
148
|
}
|
|
102
|
-
|
|
149
|
+
flushThinking(writer, bufferedThinking);
|
|
103
150
|
bufferedThinking = [];
|
|
104
151
|
writer.push(normalizeErrorEvent(event));
|
|
105
152
|
if (event.type === "done" || event.type === "error") break;
|
|
106
153
|
}
|
|
107
154
|
} catch (err) {
|
|
155
|
+
flushThinking(writer, bufferedThinking);
|
|
108
156
|
if (timedOut) return "timed-out";
|
|
109
157
|
throw err;
|
|
110
158
|
} finally {
|
|
111
159
|
clearTimeout(timer);
|
|
112
160
|
}
|
|
113
161
|
|
|
114
|
-
|
|
115
|
-
for (const held of bufferedThinking) writer.push(held);
|
|
116
|
-
}
|
|
162
|
+
flushThinking(writer, bufferedThinking);
|
|
117
163
|
|
|
118
164
|
return timedOut ? "timed-out" : "completed";
|
|
119
165
|
}
|
|
@@ -125,13 +171,32 @@ export function streamOmlxOpenAICompletions(
|
|
|
125
171
|
firstDeltaTimeoutMs: number,
|
|
126
172
|
onTimeout: OnStreamTimeout | undefined,
|
|
127
173
|
): AssistantMessageEventStream {
|
|
174
|
+
// Pi dispatches stream handlers by api id, not provider. Registering this
|
|
175
|
+
// wrapper as the openai-completions streamSimple handler (the mechanism pi
|
|
176
|
+
// exposes) replaces the shared entry, so non-oMLX OpenAI-compatible models
|
|
177
|
+
// (groq, zai, glm, ...) also arrive here. Pass them straight through to the
|
|
178
|
+
// concrete implementation with no oMLX-specific timeout/reissue logic, so the
|
|
179
|
+
// extension cannot perturb unrelated providers.
|
|
180
|
+
if (model.provider !== PROVIDER_KEY) {
|
|
181
|
+
return streamOpenAICompletionsImpl(
|
|
182
|
+
model as Model<"openai-completions">,
|
|
183
|
+
context,
|
|
184
|
+
options,
|
|
185
|
+
);
|
|
186
|
+
}
|
|
187
|
+
|
|
128
188
|
const stream = createAssistantMessageEventStream();
|
|
129
189
|
const writer = new StreamWriter(stream, model);
|
|
130
190
|
|
|
131
191
|
(async () => {
|
|
132
192
|
try {
|
|
133
|
-
|
|
134
|
-
|
|
193
|
+
// Separate budgets so a reissue never consumes a timeout attempt and
|
|
194
|
+
// the loop index is never mutated: timeoutAttemptsLeft governs how
|
|
195
|
+
// many timed-out retries remain, reissueLeft governs reissues.
|
|
196
|
+
let timeoutAttemptsLeft = FIRST_DELTA_MAX_ATTEMPTS;
|
|
197
|
+
let reissueLeft = MAX_REISSUES;
|
|
198
|
+
while (true) {
|
|
199
|
+
const attempt = FIRST_DELTA_MAX_ATTEMPTS - timeoutAttemptsLeft + 1;
|
|
135
200
|
const result = await runAttempt(
|
|
136
201
|
writer,
|
|
137
202
|
model,
|
|
@@ -140,20 +205,22 @@ export function streamOmlxOpenAICompletions(
|
|
|
140
205
|
firstDeltaTimeoutMs,
|
|
141
206
|
attempt,
|
|
142
207
|
onTimeout,
|
|
143
|
-
|
|
208
|
+
reissueLeft > 0,
|
|
144
209
|
);
|
|
145
210
|
if (writer.closed) return;
|
|
146
211
|
|
|
147
212
|
if (result === "reissue") {
|
|
148
|
-
|
|
149
|
-
attempt--; // a re-issue doesn't consume a timeout attempt
|
|
213
|
+
reissueLeft--;
|
|
150
214
|
continue;
|
|
151
215
|
}
|
|
152
216
|
if (result === "completed") {
|
|
153
217
|
writer.end();
|
|
154
218
|
return;
|
|
155
219
|
}
|
|
156
|
-
|
|
220
|
+
|
|
221
|
+
// timed-out
|
|
222
|
+
timeoutAttemptsLeft--;
|
|
223
|
+
if (timeoutAttemptsLeft <= 0) {
|
|
157
224
|
writer.pushError(
|
|
158
225
|
firstDeltaTimeoutMessage(
|
|
159
226
|
firstDeltaTimeoutMs,
|