@oh-my-pi/pi-utils 14.7.2 → 14.7.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/package.json +2 -2
  2. package/src/stream.ts +132 -64
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "type": "module",
3
3
  "name": "@oh-my-pi/pi-utils",
4
- "version": "14.7.2",
4
+ "version": "14.7.4",
5
5
  "description": "Shared utilities for pi packages",
6
6
  "homepage": "https://github.com/can1357/oh-my-pi",
7
7
  "author": "Can Boluk",
@@ -38,7 +38,7 @@
38
38
  },
39
39
  "devDependencies": {
40
40
  "@types/bun": "^1.3.13",
41
- "@oh-my-pi/pi-natives": "14.7.2"
41
+ "@oh-my-pi/pi-natives": "14.7.4"
42
42
  },
43
43
  "engines": {
44
44
  "bun": ">=1.3.7"
package/src/stream.ts CHANGED
@@ -76,31 +76,6 @@ export async function* readJsonl<T>(stream: ReadableStream<Uint8Array>, signal?:
76
76
  // SSE (Server-Sent Events)
77
77
  // =============================================================================
78
78
 
79
- /** Byte lookup table: 1 = whitespace, 0 = not. */
80
- const WS = new Uint8Array(256);
81
- WS[0x09] = 1; // tab
82
- WS[0x0a] = 1; // LF
83
- WS[0x0d] = 1; // CR
84
- WS[0x20] = 1; // space
85
-
86
- const createPattern = (prefix: string) => {
87
- const pre = Buffer.from(prefix, "utf-8");
88
- return {
89
- strip(buf: Uint8Array): number | null {
90
- const n = pre.length;
91
- if (buf.length < n) return null;
92
- if (pre.equals(buf.subarray(0, n))) {
93
- return n;
94
- }
95
- return null;
96
- },
97
- };
98
- };
99
-
100
- const PAT_DATA = createPattern("data:");
101
-
102
- const PAT_DONE = createPattern("[DONE]");
103
-
104
79
  class ConcatSink {
105
80
  #space?: Buffer;
106
81
  #length = 0;
@@ -208,11 +183,15 @@ class ConcatSink {
208
183
  }
209
184
  }
210
185
 
211
- const kDoneError = new Error("SSE stream done");
212
-
213
186
  /**
214
187
  * Stream parsed JSON objects from SSE `data:` lines.
215
188
  *
189
+ * Thin wrapper over {@link readSseEvents}: yields one parsed JSON value per
190
+ * dispatched SSE event, skipping events with empty `data` and stopping at the
191
+ * OpenAI-style `[DONE]` sentinel. If your consumer doesn't care about `event:`
192
+ * names or doesn't need a custom parse step, use this; otherwise call
193
+ * `readSseEvents` directly.
194
+ *
216
195
  * @example
217
196
  * ```ts
218
197
  * for await (const obj of readSseJson(response.body!)) {
@@ -221,61 +200,150 @@ const kDoneError = new Error("SSE stream done");
221
200
  * ```
222
201
  */
223
202
  export async function* readSseJson<T>(stream: ReadableStream<Uint8Array>, signal?: AbortSignal): AsyncGenerator<T> {
224
- const lineBuffer = new ConcatSink();
225
- const jsonBuffer = new ConcatSink();
203
+ for await (const sse of readSseEvents(stream, signal)) {
204
+ const data = sse.data;
205
+ if (data === "" || data === "[DONE]") {
206
+ if (data === "[DONE]") return;
207
+ continue;
208
+ }
209
+ yield JSON.parse(data) as T;
210
+ }
211
+ }
226
212
 
227
- // pipeThrough with { signal } makes the stream abort-aware: the pipe
228
- // cancels the source and errors the output when the signal fires,
229
- // so for-await-of exits cleanly without manual reader/listener management.
230
- stream = createAbortableStream(stream, signal);
231
- try {
232
- const processLine = function* (line: Uint8Array) {
233
- // Strip trailing spaces including \r.
234
- let end = line.length;
235
- while (end && WS[line[end - 1]]) {
236
- --end;
237
- }
238
- if (!end) return; // blank line
213
+ /**
214
+ * A single Server-Sent Event dispatched on a blank-line boundary.
215
+ *
216
+ * - `event` is the value of the most recent `event:` field, or `null` if none.
217
+ * - `data` is the concatenation (joined by `\n`) of every `data:` field in the
218
+ * event, exactly as required by the SSE spec.
219
+ * - `raw` is the list of decoded non-empty lines that made up the event,
220
+ * preserved for diagnostic context (error reporting, debugging). The
221
+ * dispatching blank line is not included.
222
+ */
223
+ export interface ServerSentEvent {
224
+ event: string | null;
225
+ data: string;
226
+ raw: string[];
227
+ }
239
228
 
240
- const trimmed = end === line.length ? line : line.subarray(0, end);
229
+ interface SseEventState {
230
+ event: string | null;
231
+ // `data` accumulates across multiple `data:` lines per the SSE spec, joined
232
+ // by `\n`. We keep the running string here and append as lines arrive instead
233
+ // of buffering an array and joining at flush. `null` means "no data: field
234
+ // seen yet" (distinct from a `data:` field with an empty value).
235
+ data: string | null;
236
+ raw: string[];
237
+ }
241
238
 
242
- // Check "data:" prefix and optional space afterwards.
243
- let beg = PAT_DATA.strip(trimmed);
244
- if (beg === null) return;
245
- while (beg < end && WS[trimmed[beg]]) {
246
- ++beg;
247
- }
248
- if (beg >= end) return;
239
+ // Single decoder reused for all line decodes. Safe because lines are split on
240
+ // LF (0x0a) which is always a single-byte ASCII char in UTF-8 and never appears
241
+ // inside a multi-byte sequence — so each line is itself a complete UTF-8 run.
242
+ const SSE_LINE_DECODER = new TextDecoder("utf-8");
249
243
 
250
- // Fast-path: the OpenAI-style done marker isn't JSON.
251
- const donePrefix = PAT_DONE.strip(trimmed.subarray(beg, end));
252
- if (donePrefix !== null && donePrefix === end - beg) {
253
- throw kDoneError;
254
- }
244
+ function decodeSseLineBytes(line: Uint8Array, end: number): string {
245
+ return end === line.length ? SSE_LINE_DECODER.decode(line) : SSE_LINE_DECODER.decode(line.subarray(0, end));
246
+ }
255
247
 
256
- yield* jsonBuffer.pullJSONL<T>(trimmed, beg, end);
257
- };
258
- for await (const chunk of stream) {
248
+ function flushSseEvent(state: SseEventState): ServerSentEvent | null {
249
+ if (state.event === null && state.data === null) return null;
250
+ const event: ServerSentEvent = {
251
+ event: state.event,
252
+ data: state.data ?? "",
253
+ raw: state.raw,
254
+ };
255
+ state.event = null;
256
+ state.data = null;
257
+ state.raw = [];
258
+ return event;
259
+ }
260
+
261
+ function pushSseLine(line: Uint8Array, state: SseEventState): ServerSentEvent | null {
262
+ // `appendAndFlushLines` splits on LF only; strip a trailing CR so CRLF sources
263
+ // don't leak `\r` into field values.
264
+ let end = line.length;
265
+ if (end > 0 && line[end - 1] === 0x0d /* '\r' */) end--;
266
+ if (end === 0) return flushSseEvent(state);
267
+
268
+ // Comment line: keep in `raw` for diagnostic context, skip parsing.
269
+ if (line[0] === 0x3a /* ':' */) {
270
+ state.raw.push(decodeSseLineBytes(line, end));
271
+ return null;
272
+ }
273
+
274
+ const text = decodeSseLineBytes(line, end);
275
+ state.raw.push(text);
276
+
277
+ const colon = text.indexOf(":");
278
+ const fieldName = colon === -1 ? text : text.slice(0, colon);
279
+ let value = colon === -1 ? "" : text.slice(colon + 1);
280
+ if (value.charCodeAt(0) === 0x20 /* ' ' */) value = value.slice(1);
281
+
282
+ if (fieldName === "event") {
283
+ state.event = value;
284
+ } else if (fieldName === "data") {
285
+ if (state.data === null) {
286
+ state.data = value;
287
+ } else {
288
+ state.data += "\n";
289
+ state.data += value;
290
+ }
291
+ }
292
+ // `id` and `retry` are intentionally ignored — the providers we consume
293
+ // don't use them, and the underlying transport handles reconnects itself.
294
+ return null;
295
+ }
296
+
297
+ /**
298
+ * Stream raw Server-Sent Events from an HTTP response body.
299
+ *
300
+ * Yields one `ServerSentEvent` per blank-line dispatch. The consumer is
301
+ * responsible for parsing `data` (e.g. JSON, plain text, error envelope).
302
+ * Use `readSseJson` instead when every event is a single `data:` JSON object
303
+ * and you don't need access to the `event:` field.
304
+ *
305
+ * Internally backed by a Buffer-based line reader (`ConcatSink`) so chunk
306
+ * concatenation is O(n) and never triggers per-line string slicing of the
307
+ * accumulated buffer.
308
+ *
309
+ * @example
310
+ * ```ts
311
+ * for await (const sse of readSseEvents(response.body!)) {
312
+ * if (sse.event === "ping") continue;
313
+ * const obj = JSON.parse(sse.data);
314
+ * }
315
+ * ```
316
+ */
317
+ export async function* readSseEvents(
318
+ stream: ReadableStream<Uint8Array>,
319
+ signal?: AbortSignal,
320
+ ): AsyncGenerator<ServerSentEvent> {
321
+ const lineBuffer = new ConcatSink();
322
+ const state: SseEventState = { event: null, data: null, raw: [] };
323
+ const source = createAbortableStream(stream, signal);
324
+ try {
325
+ for await (const chunk of source) {
259
326
  for (const line of lineBuffer.appendAndFlushLines(chunk)) {
260
- yield* processLine(line);
327
+ const event = pushSseLine(line, state);
328
+ if (event) yield event;
261
329
  }
262
330
  }
331
+ // Treat any trailing partial line (no terminating LF) as a complete line.
263
332
  if (!lineBuffer.isEmpty) {
264
333
  const tail = lineBuffer.flush();
265
334
  if (tail) {
266
335
  lineBuffer.clear();
267
- yield* processLine(tail);
336
+ const event = pushSseLine(tail, state);
337
+ if (event) yield event;
268
338
  }
269
339
  }
340
+ // Real services don't always close on a blank line — flush any pending event.
341
+ const trailing = flushSseEvent(state);
342
+ if (trailing) yield trailing;
270
343
  } catch (err) {
271
- if (err === kDoneError) return;
272
- // Abort errors are expected — just stop the generator.
273
344
  if (signal?.aborted) return;
274
345
  throw err;
275
346
  }
276
- if (!jsonBuffer.isEmpty) {
277
- throw new Error("SSE stream ended unexpectedly");
278
- }
279
347
  }
280
348
 
281
349
  /**