@oh-my-pi/pi-utils 11.2.2 → 11.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/stream.ts +307 -139
package/package.json
CHANGED
package/src/stream.ts
CHANGED
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
import { ArrayBufferSink } from "bun";
|
|
2
|
-
|
|
3
1
|
/**
|
|
4
2
|
* Sanitize binary output for display/storage.
|
|
5
3
|
* Removes characters that crash string-width or cause display issues:
|
|
@@ -42,58 +40,70 @@ export function sanitizeText(text: string): string {
|
|
|
42
40
|
return sanitizeBinaryOutput(Bun.stripANSI(text)).replace(/\r/g, "");
|
|
43
41
|
}
|
|
44
42
|
|
|
45
|
-
|
|
46
|
-
* Create a transform stream that splits lines.
|
|
47
|
-
*/
|
|
48
|
-
export function createSplitterStream<T>(options: {
|
|
49
|
-
newLine?: boolean;
|
|
50
|
-
mapFn: (chunk: Uint8Array) => T;
|
|
51
|
-
}): TransformStream<Uint8Array, T> {
|
|
52
|
-
const { newLine = false, mapFn } = options;
|
|
53
|
-
const LF = 0x0a;
|
|
54
|
-
const sink = new Bun.ArrayBufferSink();
|
|
55
|
-
sink.start({ asUint8Array: true, stream: true, highWaterMark: 4096 });
|
|
56
|
-
let pending = false; // whether the sink has unflushed data
|
|
57
|
-
|
|
58
|
-
return new TransformStream<Uint8Array, T>({
|
|
59
|
-
transform(chunk, ctrl) {
|
|
60
|
-
let pos = 0;
|
|
61
|
-
|
|
62
|
-
while (pos < chunk.length) {
|
|
63
|
-
const nl = chunk.indexOf(LF, pos);
|
|
64
|
-
if (nl === -1) {
|
|
65
|
-
sink.write(chunk.subarray(pos));
|
|
66
|
-
pending = true;
|
|
67
|
-
break;
|
|
68
|
-
}
|
|
69
|
-
|
|
70
|
-
const slice = chunk.subarray(pos, newLine ? nl + 1 : nl);
|
|
43
|
+
const LF = 0x0a;
|
|
71
44
|
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
pos = nl + 1;
|
|
45
|
+
export async function* readLines(stream: ReadableStream<Uint8Array>, signal?: AbortSignal): AsyncGenerator<Uint8Array> {
|
|
46
|
+
const buffer = new ConcatSink();
|
|
47
|
+
const source = signal ? stream.pipeThrough(new TransformStream(), { signal }) : stream;
|
|
48
|
+
try {
|
|
49
|
+
for await (const chunk of source) {
|
|
50
|
+
for (const line of buffer.appendAndFlushLines(chunk)) {
|
|
51
|
+
yield line;
|
|
80
52
|
}
|
|
81
|
-
}
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
53
|
+
}
|
|
54
|
+
if (!buffer.isEmpty) {
|
|
55
|
+
const tail = buffer.flush();
|
|
56
|
+
if (tail) {
|
|
57
|
+
buffer.clear();
|
|
58
|
+
yield tail;
|
|
86
59
|
}
|
|
87
|
-
}
|
|
88
|
-
})
|
|
60
|
+
}
|
|
61
|
+
} catch (err) {
|
|
62
|
+
// Abort errors are expected — just stop the generator.
|
|
63
|
+
if (signal?.aborted) return;
|
|
64
|
+
throw err;
|
|
65
|
+
}
|
|
89
66
|
}
|
|
90
67
|
|
|
91
|
-
export function
|
|
92
|
-
const
|
|
93
|
-
|
|
94
|
-
|
|
68
|
+
export async function* readJsonl<T>(stream: ReadableStream<Uint8Array>, signal?: AbortSignal): AsyncGenerator<T> {
|
|
69
|
+
const buffer = new ConcatSink();
|
|
70
|
+
const source = signal ? stream.pipeThrough(new TransformStream(), { signal }) : stream;
|
|
71
|
+
try {
|
|
72
|
+
const yieldBuffer: T[] = [];
|
|
73
|
+
for await (const chunk of source) {
|
|
74
|
+
buffer.appendAndConsume(chunk, 0, chunk.length, (payload, beg, end) => {
|
|
75
|
+
const { values, error, read, done } = Bun.JSONL.parseChunk(payload, beg, end);
|
|
76
|
+
if (values.length > 0) {
|
|
77
|
+
yieldBuffer.push(...(values as T[]));
|
|
78
|
+
}
|
|
79
|
+
if (error) throw error;
|
|
80
|
+
if (done) return 0;
|
|
81
|
+
return end - read;
|
|
82
|
+
});
|
|
83
|
+
if (yieldBuffer.length > 0) {
|
|
84
|
+
yield* yieldBuffer;
|
|
85
|
+
yieldBuffer.length = 0;
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
if (!buffer.isEmpty) {
|
|
89
|
+
const tail = buffer.flush();
|
|
90
|
+
if (tail) {
|
|
91
|
+
buffer.clear();
|
|
92
|
+
const { values, error, done } = Bun.JSONL.parseChunk(tail, 0, tail.length);
|
|
93
|
+
if (values.length > 0) {
|
|
94
|
+
yield* values as T[];
|
|
95
|
+
}
|
|
96
|
+
if (error) throw error;
|
|
97
|
+
if (!done) {
|
|
98
|
+
throw new Error("JSONL stream ended unexpectedly");
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
} catch (err) {
|
|
103
|
+
// Abort errors are expected — just stop the generator.
|
|
104
|
+
if (signal?.aborted) return;
|
|
105
|
+
throw err;
|
|
95
106
|
}
|
|
96
|
-
return createSplitterStream({ mapFn: dec.decode.bind(dec) });
|
|
97
107
|
}
|
|
98
108
|
|
|
99
109
|
/**
|
|
@@ -118,28 +128,169 @@ export function createTextDecoderStream(): TransformStream<Uint8Array, string> {
|
|
|
118
128
|
// SSE (Server-Sent Events)
|
|
119
129
|
// =============================================================================
|
|
120
130
|
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
131
|
+
class Bitmap {
|
|
132
|
+
private bits: Uint32Array;
|
|
133
|
+
constructor(n: number) {
|
|
134
|
+
this.bits = new Uint32Array((n + 31) >>> 5);
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
set(i: number, value: boolean) {
|
|
138
|
+
const index = i >>> 5;
|
|
139
|
+
const mask = 1 << (i & 31);
|
|
140
|
+
if (value) {
|
|
141
|
+
this.bits[index] |= mask;
|
|
142
|
+
} else {
|
|
143
|
+
this.bits[index] &= ~mask;
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
get(i: number) {
|
|
147
|
+
const index = i >>> 5;
|
|
148
|
+
const mask = 1 << (i & 31);
|
|
149
|
+
const word = this.bits[index];
|
|
150
|
+
return word !== undefined && (word & mask) !== 0;
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
const WHITESPACE = new Bitmap(256);
|
|
155
|
+
for (let i = 0; i <= 0x7f; i++) {
|
|
156
|
+
const c = String.fromCharCode(i);
|
|
157
|
+
switch (c) {
|
|
158
|
+
case " ":
|
|
159
|
+
case "\t":
|
|
160
|
+
case "\n":
|
|
161
|
+
case "\r":
|
|
162
|
+
WHITESPACE.set(i, true);
|
|
163
|
+
break;
|
|
164
|
+
default:
|
|
165
|
+
WHITESPACE.set(i, !c.trim());
|
|
166
|
+
break;
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
const createPattern = (prefix: string) => {
|
|
171
|
+
const pre = Buffer.from(prefix, "utf-8");
|
|
172
|
+
return {
|
|
173
|
+
strip(buf: Uint8Array): number | null {
|
|
174
|
+
const n = pre.length;
|
|
175
|
+
if (buf.length < n) return null;
|
|
176
|
+
if (pre.equals(buf.subarray(0, n))) {
|
|
177
|
+
return n;
|
|
178
|
+
}
|
|
179
|
+
return null;
|
|
180
|
+
},
|
|
181
|
+
};
|
|
182
|
+
};
|
|
183
|
+
|
|
184
|
+
const PAT_DATA = createPattern("data:");
|
|
185
|
+
|
|
186
|
+
const PAT_DONE = createPattern("[DONE]");
|
|
187
|
+
|
|
188
|
+
class ConcatSink {
|
|
189
|
+
#space?: Buffer;
|
|
190
|
+
#length = 0;
|
|
124
191
|
|
|
125
|
-
|
|
126
|
-
const
|
|
127
|
-
|
|
128
|
-
const
|
|
129
|
-
const
|
|
130
|
-
|
|
192
|
+
#ensureCapacity(size: number): Buffer {
|
|
193
|
+
const space = this.#space;
|
|
194
|
+
if (space && space.length >= size) return space;
|
|
195
|
+
const nextSize = space ? Math.max(size, space.length * 2) : size;
|
|
196
|
+
const next = Buffer.allocUnsafe(nextSize);
|
|
197
|
+
if (space && this.#length > 0) {
|
|
198
|
+
space.copy(next, 0, 0, this.#length);
|
|
199
|
+
}
|
|
200
|
+
this.#space = next;
|
|
201
|
+
return next;
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
append(chunk: Uint8Array) {
|
|
205
|
+
const n = chunk.length;
|
|
206
|
+
if (!n) return;
|
|
207
|
+
const offset = this.#length;
|
|
208
|
+
const space = this.#ensureCapacity(offset + n);
|
|
209
|
+
space.set(chunk, offset);
|
|
210
|
+
this.#length += n;
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
reset(chunk: Uint8Array) {
|
|
214
|
+
const n = chunk.length;
|
|
215
|
+
if (!n) {
|
|
216
|
+
this.#length = 0;
|
|
217
|
+
return;
|
|
218
|
+
}
|
|
219
|
+
const space = this.#ensureCapacity(n);
|
|
220
|
+
space.set(chunk, 0);
|
|
221
|
+
this.#length = n;
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
get isEmpty(): boolean {
|
|
225
|
+
return this.#length === 0;
|
|
226
|
+
}
|
|
131
227
|
|
|
132
|
-
|
|
133
|
-
|
|
228
|
+
flush(): Uint8Array | undefined {
|
|
229
|
+
if (!this.#length) return undefined;
|
|
230
|
+
return this.#space!.subarray(0, this.#length);
|
|
231
|
+
}
|
|
134
232
|
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
233
|
+
clear() {
|
|
234
|
+
this.#length = 0;
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
*appendAndFlushLines(chunk: Uint8Array) {
|
|
238
|
+
let pos = 0;
|
|
239
|
+
while (pos < chunk.length) {
|
|
240
|
+
const nl = chunk.indexOf(LF, pos);
|
|
241
|
+
if (nl === -1) {
|
|
242
|
+
this.append(chunk.subarray(pos));
|
|
243
|
+
return;
|
|
244
|
+
}
|
|
245
|
+
const suffix = chunk.subarray(pos, nl);
|
|
246
|
+
pos = nl + 1;
|
|
247
|
+
if (this.isEmpty) {
|
|
248
|
+
yield suffix;
|
|
249
|
+
} else {
|
|
250
|
+
this.append(suffix);
|
|
251
|
+
const payload = this.flush();
|
|
252
|
+
if (payload) {
|
|
253
|
+
yield payload;
|
|
254
|
+
this.clear();
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
appendAndConsume(
|
|
261
|
+
chunk: Uint8Array,
|
|
262
|
+
beg: number,
|
|
263
|
+
end: number,
|
|
264
|
+
// (slice) => [remaining length]
|
|
265
|
+
consumer: (payload: Uint8Array, beg: number, end: number) => number,
|
|
266
|
+
) {
|
|
267
|
+
if (this.isEmpty) {
|
|
268
|
+
const rem = consumer(chunk, beg, end);
|
|
269
|
+
if (!rem) return;
|
|
270
|
+
this.reset(chunk.subarray(end - rem, end));
|
|
271
|
+
return;
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
const offset = this.#length;
|
|
275
|
+
const n = end - beg;
|
|
276
|
+
const total = offset + n;
|
|
277
|
+
const space = this.#ensureCapacity(total);
|
|
278
|
+
space.set(chunk.subarray(beg, end), offset);
|
|
279
|
+
this.#length = total;
|
|
280
|
+
const rem = consumer(space.subarray(0, total), 0, total);
|
|
281
|
+
if (!rem) {
|
|
282
|
+
this.#length = 0;
|
|
283
|
+
return;
|
|
284
|
+
}
|
|
285
|
+
if (rem < total) {
|
|
286
|
+
space.copyWithin(0, total - rem, total);
|
|
287
|
+
}
|
|
288
|
+
this.#length = rem;
|
|
139
289
|
}
|
|
140
|
-
return true;
|
|
141
290
|
}
|
|
142
291
|
|
|
292
|
+
const kDoneError = new Error("SSE stream done");
|
|
293
|
+
|
|
143
294
|
/**
|
|
144
295
|
* Stream parsed JSON objects from SSE `data:` lines.
|
|
145
296
|
*
|
|
@@ -150,96 +301,113 @@ function isDone(buf: Uint8Array, start: number, end: number): boolean {
|
|
|
150
301
|
* }
|
|
151
302
|
* ```
|
|
152
303
|
*/
|
|
153
|
-
export async function* readSseJson<T>(
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
): AsyncGenerator<T> {
|
|
157
|
-
const sink = new ArrayBufferSink();
|
|
158
|
-
sink.start({ asUint8Array: true, stream: true, highWaterMark: 4096 });
|
|
159
|
-
let pending = false;
|
|
304
|
+
export async function* readSseJson<T>(stream: ReadableStream<Uint8Array>, signal?: AbortSignal): AsyncGenerator<T> {
|
|
305
|
+
const lineBuffer = new ConcatSink();
|
|
306
|
+
const jsonBuffer = new ConcatSink();
|
|
160
307
|
|
|
161
308
|
// pipeThrough with { signal } makes the stream abort-aware: the pipe
|
|
162
309
|
// cancels the source and errors the output when the signal fires,
|
|
163
310
|
// so for-await-of exits cleanly without manual reader/listener management.
|
|
164
|
-
|
|
165
|
-
|
|
311
|
+
stream = signal ? stream.pipeThrough(new TransformStream(), { signal }) : stream;
|
|
166
312
|
try {
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
313
|
+
const yieldBuffer: T[] = [];
|
|
314
|
+
const processLine = (line: Uint8Array) => {
|
|
315
|
+
// Strip trailing spaces including \r.
|
|
316
|
+
let end = line.length;
|
|
317
|
+
while (end && WHITESPACE.get(line[end - 1])) {
|
|
318
|
+
--end;
|
|
319
|
+
}
|
|
320
|
+
if (!end) return; // blank line
|
|
321
|
+
|
|
322
|
+
const trimmed = end === line.length ? line : line.subarray(0, end);
|
|
323
|
+
|
|
324
|
+
// Check "data:" prefix and optional space afterwards.
|
|
325
|
+
let beg = PAT_DATA.strip(trimmed);
|
|
326
|
+
if (beg === null) return;
|
|
327
|
+
while (beg < end && WHITESPACE.get(trimmed[beg])) {
|
|
328
|
+
++beg;
|
|
329
|
+
}
|
|
330
|
+
if (beg >= end) return;
|
|
176
331
|
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
332
|
+
jsonBuffer.appendAndConsume(trimmed, beg, end, (payload, beg, end) => {
|
|
333
|
+
const { values, error, read, done } = Bun.JSONL.parseChunk(payload, beg, end);
|
|
334
|
+
if (values.length > 0) {
|
|
335
|
+
yieldBuffer.push(...(values as T[]));
|
|
336
|
+
}
|
|
337
|
+
if (error) {
|
|
338
|
+
if (PAT_DONE.strip(payload.subarray(beg, end))) {
|
|
339
|
+
throw kDoneError;
|
|
340
|
+
}
|
|
341
|
+
throw error;
|
|
342
|
+
}
|
|
343
|
+
if (done) return 0;
|
|
344
|
+
return end - read;
|
|
345
|
+
});
|
|
346
|
+
};
|
|
347
|
+
for await (const chunk of stream) {
|
|
348
|
+
for (const line of lineBuffer.appendAndFlushLines(chunk)) {
|
|
349
|
+
processLine(line);
|
|
350
|
+
if (yieldBuffer.length > 0) {
|
|
351
|
+
yield* yieldBuffer;
|
|
352
|
+
yieldBuffer.length = 0;
|
|
353
|
+
}
|
|
354
|
+
}
|
|
355
|
+
}
|
|
356
|
+
if (!lineBuffer.isEmpty) {
|
|
357
|
+
const tail = lineBuffer.flush();
|
|
358
|
+
if (tail) {
|
|
359
|
+
lineBuffer.clear();
|
|
360
|
+
processLine(tail);
|
|
361
|
+
if (yieldBuffer.length > 0) {
|
|
362
|
+
yield* yieldBuffer;
|
|
363
|
+
yieldBuffer.length = 0;
|
|
184
364
|
}
|
|
185
|
-
pos = nl + 1;
|
|
186
|
-
|
|
187
|
-
// Strip trailing CR, skip blank/short lines.
|
|
188
|
-
const len = line.length > 0 && line[line.length - 1] === CR ? line.length - 1 : line.length;
|
|
189
|
-
if (len < 6) continue; // "data:" + at least 1 byte
|
|
190
|
-
|
|
191
|
-
// Check "data:" prefix.
|
|
192
|
-
if (
|
|
193
|
-
line[0] !== DATA_0 ||
|
|
194
|
-
line[1] !== DATA_1 ||
|
|
195
|
-
line[2] !== DATA_2 ||
|
|
196
|
-
line[3] !== DATA_3 ||
|
|
197
|
-
line[4] !== DATA_4
|
|
198
|
-
)
|
|
199
|
-
continue;
|
|
200
|
-
|
|
201
|
-
// Payload start — skip optional space after colon.
|
|
202
|
-
const pStart = line[5] === SPACE ? 6 : 5;
|
|
203
|
-
if (pStart >= len) continue;
|
|
204
|
-
if (isDone(line, pStart, len)) return;
|
|
205
|
-
|
|
206
|
-
// Build payload + \n for JSONL.parse.
|
|
207
|
-
const pLen = len - pStart;
|
|
208
|
-
const buf = new Uint8Array(pLen + 1);
|
|
209
|
-
buf.set(line.subarray(pStart, len));
|
|
210
|
-
buf[pLen] = LF;
|
|
211
|
-
|
|
212
|
-
const [parsed] = Bun.JSONL.parse(buf);
|
|
213
|
-
if (parsed !== undefined) yield parsed as T;
|
|
214
365
|
}
|
|
215
366
|
}
|
|
216
367
|
} catch (err) {
|
|
368
|
+
if (err === kDoneError) return;
|
|
217
369
|
// Abort errors are expected — just stop the generator.
|
|
218
|
-
if (
|
|
370
|
+
if (signal?.aborted) return;
|
|
219
371
|
throw err;
|
|
220
372
|
}
|
|
373
|
+
if (!jsonBuffer.isEmpty) {
|
|
374
|
+
throw new Error("SSE stream ended unexpectedly");
|
|
375
|
+
}
|
|
376
|
+
}
|
|
221
377
|
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
378
|
+
/**
|
|
379
|
+
* Parse a complete JSONL string, skipping malformed lines instead of throwing.
|
|
380
|
+
*
|
|
381
|
+
* Uses `Bun.JSONL.parseChunk` internally. On parse errors, the malformed
|
|
382
|
+
* region is skipped up to the next newline and parsing continues.
|
|
383
|
+
*
|
|
384
|
+
* @example
|
|
385
|
+
* ```ts
|
|
386
|
+
* const entries = parseJsonlLenient<MyType>(fileContents);
|
|
387
|
+
* ```
|
|
388
|
+
*/
|
|
389
|
+
export function parseJsonlLenient<T>(buffer: string): T[] {
|
|
390
|
+
let entries: T[] | undefined;
|
|
391
|
+
|
|
392
|
+
while (buffer.length > 0) {
|
|
393
|
+
const { values, error, read, done } = Bun.JSONL.parseChunk(buffer);
|
|
394
|
+
if (values.length > 0) {
|
|
395
|
+
const ext = values as T[];
|
|
396
|
+
if (!entries) {
|
|
397
|
+
entries = ext;
|
|
398
|
+
} else {
|
|
399
|
+
entries.push(...ext);
|
|
242
400
|
}
|
|
243
401
|
}
|
|
402
|
+
if (error) {
|
|
403
|
+
const nextNewline = buffer.indexOf("\n", read);
|
|
404
|
+
if (nextNewline === -1) break;
|
|
405
|
+
buffer = buffer.substring(nextNewline + 1);
|
|
406
|
+
continue;
|
|
407
|
+
}
|
|
408
|
+
if (read === 0) break;
|
|
409
|
+
buffer = buffer.substring(read);
|
|
410
|
+
if (done) break;
|
|
244
411
|
}
|
|
412
|
+
return entries ?? [];
|
|
245
413
|
}
|