@oh-my-pi/pi-utils 11.2.2 → 11.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/package.json +1 -1
  2. package/src/stream.ts +307 -139
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@oh-my-pi/pi-utils",
3
- "version": "11.2.2",
3
+ "version": "11.2.3",
4
4
  "description": "Shared utilities for pi packages",
5
5
  "type": "module",
6
6
  "main": "./src/index.ts",
package/src/stream.ts CHANGED
@@ -1,5 +1,3 @@
1
- import { ArrayBufferSink } from "bun";
2
-
3
1
  /**
4
2
  * Sanitize binary output for display/storage.
5
3
  * Removes characters that crash string-width or cause display issues:
@@ -42,58 +40,70 @@ export function sanitizeText(text: string): string {
42
40
  return sanitizeBinaryOutput(Bun.stripANSI(text)).replace(/\r/g, "");
43
41
  }
44
42
 
45
- /**
46
- * Create a transform stream that splits lines.
47
- */
48
- export function createSplitterStream<T>(options: {
49
- newLine?: boolean;
50
- mapFn: (chunk: Uint8Array) => T;
51
- }): TransformStream<Uint8Array, T> {
52
- const { newLine = false, mapFn } = options;
53
- const LF = 0x0a;
54
- const sink = new Bun.ArrayBufferSink();
55
- sink.start({ asUint8Array: true, stream: true, highWaterMark: 4096 });
56
- let pending = false; // whether the sink has unflushed data
57
-
58
- return new TransformStream<Uint8Array, T>({
59
- transform(chunk, ctrl) {
60
- let pos = 0;
61
-
62
- while (pos < chunk.length) {
63
- const nl = chunk.indexOf(LF, pos);
64
- if (nl === -1) {
65
- sink.write(chunk.subarray(pos));
66
- pending = true;
67
- break;
68
- }
69
-
70
- const slice = chunk.subarray(pos, newLine ? nl + 1 : nl);
43
+ const LF = 0x0a;
71
44
 
72
- if (pending) {
73
- if (slice.length > 0) sink.write(slice);
74
- ctrl.enqueue(mapFn(sink.flush() as Uint8Array));
75
- pending = false;
76
- } else {
77
- ctrl.enqueue(mapFn(slice));
78
- }
79
- pos = nl + 1;
45
+ export async function* readLines(stream: ReadableStream<Uint8Array>, signal?: AbortSignal): AsyncGenerator<Uint8Array> {
46
+ const buffer = new ConcatSink();
47
+ const source = signal ? stream.pipeThrough(new TransformStream(), { signal }) : stream;
48
+ try {
49
+ for await (const chunk of source) {
50
+ for (const line of buffer.appendAndFlushLines(chunk)) {
51
+ yield line;
80
52
  }
81
- },
82
- flush(ctrl) {
83
- if (pending) {
84
- const tail = sink.end() as Uint8Array;
85
- if (tail.length > 0) ctrl.enqueue(mapFn(tail));
53
+ }
54
+ if (!buffer.isEmpty) {
55
+ const tail = buffer.flush();
56
+ if (tail) {
57
+ buffer.clear();
58
+ yield tail;
86
59
  }
87
- },
88
- });
60
+ }
61
+ } catch (err) {
62
+ // Abort errors are expected — just stop the generator.
63
+ if (signal?.aborted) return;
64
+ throw err;
65
+ }
89
66
  }
90
67
 
91
- export function createTextLineSplitter(sanitize = false): TransformStream<Uint8Array, string> {
92
- const dec = new TextDecoder("utf-8", { ignoreBOM: true, fatal: true });
93
- if (sanitize) {
94
- return createSplitterStream({ mapFn: chunk => sanitizeText(dec.decode(chunk)) });
68
+ export async function* readJsonl<T>(stream: ReadableStream<Uint8Array>, signal?: AbortSignal): AsyncGenerator<T> {
69
+ const buffer = new ConcatSink();
70
+ const source = signal ? stream.pipeThrough(new TransformStream(), { signal }) : stream;
71
+ try {
72
+ const yieldBuffer: T[] = [];
73
+ for await (const chunk of source) {
74
+ buffer.appendAndConsume(chunk, 0, chunk.length, (payload, beg, end) => {
75
+ const { values, error, read, done } = Bun.JSONL.parseChunk(payload, beg, end);
76
+ if (values.length > 0) {
77
+ yieldBuffer.push(...(values as T[]));
78
+ }
79
+ if (error) throw error;
80
+ if (done) return 0;
81
+ return end - read;
82
+ });
83
+ if (yieldBuffer.length > 0) {
84
+ yield* yieldBuffer;
85
+ yieldBuffer.length = 0;
86
+ }
87
+ }
88
+ if (!buffer.isEmpty) {
89
+ const tail = buffer.flush();
90
+ if (tail) {
91
+ buffer.clear();
92
+ const { values, error, done } = Bun.JSONL.parseChunk(tail, 0, tail.length);
93
+ if (values.length > 0) {
94
+ yield* values as T[];
95
+ }
96
+ if (error) throw error;
97
+ if (!done) {
98
+ throw new Error("JSONL stream ended unexpectedly");
99
+ }
100
+ }
101
+ }
102
+ } catch (err) {
103
+ // Abort errors are expected — just stop the generator.
104
+ if (signal?.aborted) return;
105
+ throw err;
95
106
  }
96
- return createSplitterStream({ mapFn: dec.decode.bind(dec) });
97
107
  }
98
108
 
99
109
  /**
@@ -118,28 +128,169 @@ export function createTextDecoderStream(): TransformStream<Uint8Array, string> {
118
128
  // SSE (Server-Sent Events)
119
129
  // =============================================================================
120
130
 
121
- const LF = 0x0a;
122
- const CR = 0x0d;
123
- const SPACE = 0x20;
131
+ class Bitmap {
132
+ private bits: Uint32Array;
133
+ constructor(n: number) {
134
+ this.bits = new Uint32Array((n + 31) >>> 5);
135
+ }
136
+
137
+ set(i: number, value: boolean) {
138
+ const index = i >>> 5;
139
+ const mask = 1 << (i & 31);
140
+ if (value) {
141
+ this.bits[index] |= mask;
142
+ } else {
143
+ this.bits[index] &= ~mask;
144
+ }
145
+ }
146
+ get(i: number) {
147
+ const index = i >>> 5;
148
+ const mask = 1 << (i & 31);
149
+ const word = this.bits[index];
150
+ return word !== undefined && (word & mask) !== 0;
151
+ }
152
+ }
153
+
154
+ const WHITESPACE = new Bitmap(256);
155
+ for (let i = 0; i <= 0x7f; i++) {
156
+ const c = String.fromCharCode(i);
157
+ switch (c) {
158
+ case " ":
159
+ case "\t":
160
+ case "\n":
161
+ case "\r":
162
+ WHITESPACE.set(i, true);
163
+ break;
164
+ default:
165
+ WHITESPACE.set(i, !c.trim());
166
+ break;
167
+ }
168
+ }
169
+
170
+ const createPattern = (prefix: string) => {
171
+ const pre = Buffer.from(prefix, "utf-8");
172
+ return {
173
+ strip(buf: Uint8Array): number | null {
174
+ const n = pre.length;
175
+ if (buf.length < n) return null;
176
+ if (pre.equals(buf.subarray(0, n))) {
177
+ return n;
178
+ }
179
+ return null;
180
+ },
181
+ };
182
+ };
183
+
184
+ const PAT_DATA = createPattern("data:");
185
+
186
+ const PAT_DONE = createPattern("[DONE]");
187
+
188
+ class ConcatSink {
189
+ #space?: Buffer;
190
+ #length = 0;
124
191
 
125
- // "data:" = [0x64, 0x61, 0x74, 0x61, 0x3a]
126
- const DATA_0 = 0x64; // d
127
- const DATA_1 = 0x61; // a
128
- const DATA_2 = 0x74; // t
129
- const DATA_3 = 0x61; // a
130
- const DATA_4 = 0x3a; // :
192
+ #ensureCapacity(size: number): Buffer {
193
+ const space = this.#space;
194
+ if (space && space.length >= size) return space;
195
+ const nextSize = space ? Math.max(size, space.length * 2) : size;
196
+ const next = Buffer.allocUnsafe(nextSize);
197
+ if (space && this.#length > 0) {
198
+ space.copy(next, 0, 0, this.#length);
199
+ }
200
+ this.#space = next;
201
+ return next;
202
+ }
203
+
204
+ append(chunk: Uint8Array) {
205
+ const n = chunk.length;
206
+ if (!n) return;
207
+ const offset = this.#length;
208
+ const space = this.#ensureCapacity(offset + n);
209
+ space.set(chunk, offset);
210
+ this.#length += n;
211
+ }
212
+
213
+ reset(chunk: Uint8Array) {
214
+ const n = chunk.length;
215
+ if (!n) {
216
+ this.#length = 0;
217
+ return;
218
+ }
219
+ const space = this.#ensureCapacity(n);
220
+ space.set(chunk, 0);
221
+ this.#length = n;
222
+ }
223
+
224
+ get isEmpty(): boolean {
225
+ return this.#length === 0;
226
+ }
131
227
 
132
- // "[DONE]" = [0x5b, 0x44, 0x4f, 0x4e, 0x45, 0x5d]
133
- const DONE = Uint8Array.from([0x5b, 0x44, 0x4f, 0x4e, 0x45, 0x5d]);
228
+ flush(): Uint8Array | undefined {
229
+ if (!this.#length) return undefined;
230
+ return this.#space!.subarray(0, this.#length);
231
+ }
134
232
 
135
- function isDone(buf: Uint8Array, start: number, end: number): boolean {
136
- if (end - start !== 6) return false;
137
- for (let i = 0; i < 6; i++) {
138
- if (buf[start + i] !== DONE[i]) return false;
233
+ clear() {
234
+ this.#length = 0;
235
+ }
236
+
237
+ *appendAndFlushLines(chunk: Uint8Array) {
238
+ let pos = 0;
239
+ while (pos < chunk.length) {
240
+ const nl = chunk.indexOf(LF, pos);
241
+ if (nl === -1) {
242
+ this.append(chunk.subarray(pos));
243
+ return;
244
+ }
245
+ const suffix = chunk.subarray(pos, nl);
246
+ pos = nl + 1;
247
+ if (this.isEmpty) {
248
+ yield suffix;
249
+ } else {
250
+ this.append(suffix);
251
+ const payload = this.flush();
252
+ if (payload) {
253
+ yield payload;
254
+ this.clear();
255
+ }
256
+ }
257
+ }
258
+ }
259
+
260
+ appendAndConsume(
261
+ chunk: Uint8Array,
262
+ beg: number,
263
+ end: number,
264
+ // (slice) => [remaining length]
265
+ consumer: (payload: Uint8Array, beg: number, end: number) => number,
266
+ ) {
267
+ if (this.isEmpty) {
268
+ const rem = consumer(chunk, beg, end);
269
+ if (!rem) return;
270
+ this.reset(chunk.subarray(end - rem, end));
271
+ return;
272
+ }
273
+
274
+ const offset = this.#length;
275
+ const n = end - beg;
276
+ const total = offset + n;
277
+ const space = this.#ensureCapacity(total);
278
+ space.set(chunk.subarray(beg, end), offset);
279
+ this.#length = total;
280
+ const rem = consumer(space.subarray(0, total), 0, total);
281
+ if (!rem) {
282
+ this.#length = 0;
283
+ return;
284
+ }
285
+ if (rem < total) {
286
+ space.copyWithin(0, total - rem, total);
287
+ }
288
+ this.#length = rem;
139
289
  }
140
- return true;
141
290
  }
142
291
 
292
+ const kDoneError = new Error("SSE stream done");
293
+
143
294
  /**
144
295
  * Stream parsed JSON objects from SSE `data:` lines.
145
296
  *
@@ -150,96 +301,113 @@ function isDone(buf: Uint8Array, start: number, end: number): boolean {
150
301
  * }
151
302
  * ```
152
303
  */
153
- export async function* readSseJson<T>(
154
- stream: ReadableStream<Uint8Array>,
155
- abortSignal?: AbortSignal,
156
- ): AsyncGenerator<T> {
157
- const sink = new ArrayBufferSink();
158
- sink.start({ asUint8Array: true, stream: true, highWaterMark: 4096 });
159
- let pending = false;
304
+ export async function* readSseJson<T>(stream: ReadableStream<Uint8Array>, signal?: AbortSignal): AsyncGenerator<T> {
305
+ const lineBuffer = new ConcatSink();
306
+ const jsonBuffer = new ConcatSink();
160
307
 
161
308
  // pipeThrough with { signal } makes the stream abort-aware: the pipe
162
309
  // cancels the source and errors the output when the signal fires,
163
310
  // so for-await-of exits cleanly without manual reader/listener management.
164
- const source = abortSignal ? stream.pipeThrough(new TransformStream(), { signal: abortSignal }) : stream;
165
-
311
+ stream = signal ? stream.pipeThrough(new TransformStream(), { signal }) : stream;
166
312
  try {
167
- for await (const chunk of source) {
168
- let pos = 0;
169
- while (pos < chunk.length) {
170
- const nl = chunk.indexOf(LF, pos);
171
- if (nl === -1) {
172
- sink.write(chunk.subarray(pos));
173
- pending = true;
174
- break;
175
- }
313
+ const yieldBuffer: T[] = [];
314
+ const processLine = (line: Uint8Array) => {
315
+ // Strip trailing spaces including \r.
316
+ let end = line.length;
317
+ while (end && WHITESPACE.get(line[end - 1])) {
318
+ --end;
319
+ }
320
+ if (!end) return; // blank line
321
+
322
+ const trimmed = end === line.length ? line : line.subarray(0, end);
323
+
324
+ // Check "data:" prefix and optional space afterwards.
325
+ let beg = PAT_DATA.strip(trimmed);
326
+ if (beg === null) return;
327
+ while (beg < end && WHITESPACE.get(trimmed[beg])) {
328
+ ++beg;
329
+ }
330
+ if (beg >= end) return;
176
331
 
177
- let line: Uint8Array;
178
- if (pending) {
179
- if (nl > pos) sink.write(chunk.subarray(pos, nl));
180
- line = sink.flush() as Uint8Array;
181
- pending = false;
182
- } else {
183
- line = chunk.subarray(pos, nl);
332
+ jsonBuffer.appendAndConsume(trimmed, beg, end, (payload, beg, end) => {
333
+ const { values, error, read, done } = Bun.JSONL.parseChunk(payload, beg, end);
334
+ if (values.length > 0) {
335
+ yieldBuffer.push(...(values as T[]));
336
+ }
337
+ if (error) {
338
+ if (PAT_DONE.strip(payload.subarray(beg, end))) {
339
+ throw kDoneError;
340
+ }
341
+ throw error;
342
+ }
343
+ if (done) return 0;
344
+ return end - read;
345
+ });
346
+ };
347
+ for await (const chunk of stream) {
348
+ for (const line of lineBuffer.appendAndFlushLines(chunk)) {
349
+ processLine(line);
350
+ if (yieldBuffer.length > 0) {
351
+ yield* yieldBuffer;
352
+ yieldBuffer.length = 0;
353
+ }
354
+ }
355
+ }
356
+ if (!lineBuffer.isEmpty) {
357
+ const tail = lineBuffer.flush();
358
+ if (tail) {
359
+ lineBuffer.clear();
360
+ processLine(tail);
361
+ if (yieldBuffer.length > 0) {
362
+ yield* yieldBuffer;
363
+ yieldBuffer.length = 0;
184
364
  }
185
- pos = nl + 1;
186
-
187
- // Strip trailing CR, skip blank/short lines.
188
- const len = line.length > 0 && line[line.length - 1] === CR ? line.length - 1 : line.length;
189
- if (len < 6) continue; // "data:" + at least 1 byte
190
-
191
- // Check "data:" prefix.
192
- if (
193
- line[0] !== DATA_0 ||
194
- line[1] !== DATA_1 ||
195
- line[2] !== DATA_2 ||
196
- line[3] !== DATA_3 ||
197
- line[4] !== DATA_4
198
- )
199
- continue;
200
-
201
- // Payload start — skip optional space after colon.
202
- const pStart = line[5] === SPACE ? 6 : 5;
203
- if (pStart >= len) continue;
204
- if (isDone(line, pStart, len)) return;
205
-
206
- // Build payload + \n for JSONL.parse.
207
- const pLen = len - pStart;
208
- const buf = new Uint8Array(pLen + 1);
209
- buf.set(line.subarray(pStart, len));
210
- buf[pLen] = LF;
211
-
212
- const [parsed] = Bun.JSONL.parse(buf);
213
- if (parsed !== undefined) yield parsed as T;
214
365
  }
215
366
  }
216
367
  } catch (err) {
368
+ if (err === kDoneError) return;
217
369
  // Abort errors are expected — just stop the generator.
218
- if (abortSignal?.aborted) return;
370
+ if (signal?.aborted) return;
219
371
  throw err;
220
372
  }
373
+ if (!jsonBuffer.isEmpty) {
374
+ throw new Error("SSE stream ended unexpectedly");
375
+ }
376
+ }
221
377
 
222
- // Trailing line without final newline.
223
- if (pending) {
224
- const tail = sink.end() as Uint8Array;
225
- const len = tail.length > 0 && tail[tail.length - 1] === CR ? tail.length - 1 : tail.length;
226
- if (
227
- len >= 6 &&
228
- tail[0] === DATA_0 &&
229
- tail[1] === DATA_1 &&
230
- tail[2] === DATA_2 &&
231
- tail[3] === DATA_3 &&
232
- tail[4] === DATA_4
233
- ) {
234
- const pStart = tail[5] === SPACE ? 6 : 5;
235
- if (pStart < len && !isDone(tail, pStart, len)) {
236
- const pLen = len - pStart;
237
- const buf = new Uint8Array(pLen + 1);
238
- buf.set(tail.subarray(pStart, len));
239
- buf[pLen] = LF;
240
- const [parsed] = Bun.JSONL.parse(buf);
241
- if (parsed !== undefined) yield parsed as T;
378
+ /**
379
+ * Parse a complete JSONL string, skipping malformed lines instead of throwing.
380
+ *
381
+ * Uses `Bun.JSONL.parseChunk` internally. On parse errors, the malformed
382
+ * region is skipped up to the next newline and parsing continues.
383
+ *
384
+ * @example
385
+ * ```ts
386
+ * const entries = parseJsonlLenient<MyType>(fileContents);
387
+ * ```
388
+ */
389
+ export function parseJsonlLenient<T>(buffer: string): T[] {
390
+ let entries: T[] | undefined;
391
+
392
+ while (buffer.length > 0) {
393
+ const { values, error, read, done } = Bun.JSONL.parseChunk(buffer);
394
+ if (values.length > 0) {
395
+ const ext = values as T[];
396
+ if (!entries) {
397
+ entries = ext;
398
+ } else {
399
+ entries.push(...ext);
242
400
  }
243
401
  }
402
+ if (error) {
403
+ const nextNewline = buffer.indexOf("\n", read);
404
+ if (nextNewline === -1) break;
405
+ buffer = buffer.substring(nextNewline + 1);
406
+ continue;
407
+ }
408
+ if (read === 0) break;
409
+ buffer = buffer.substring(read);
410
+ if (done) break;
244
411
  }
412
+ return entries ?? [];
245
413
  }