@oh-my-pi/pi-utils 11.8.3 → 11.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/stream.ts +116 -4
package/package.json
CHANGED
package/src/stream.ts
CHANGED
|
@@ -51,6 +51,118 @@ export function sanitizeText(text: string): string {
|
|
|
51
51
|
}
|
|
52
52
|
|
|
53
53
|
const LF = 0x0a;
|
|
54
|
+
const CR = 0x0d;
|
|
55
|
+
const decoder = new TextDecoder();
|
|
56
|
+
|
|
57
|
+
type JsonlChunkResult = {
|
|
58
|
+
values: unknown[];
|
|
59
|
+
error: unknown;
|
|
60
|
+
read: number;
|
|
61
|
+
done: boolean;
|
|
62
|
+
};
|
|
63
|
+
|
|
64
|
+
function hasBunJsonlParseChunk(): boolean {
|
|
65
|
+
return typeof Bun !== "undefined" && typeof Bun.JSONL !== "undefined" && typeof Bun.JSONL.parseChunk === "function";
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
function parseJsonLine(lineBytes: Uint8Array): unknown {
|
|
69
|
+
let end = lineBytes.length;
|
|
70
|
+
if (end > 0 && lineBytes[end - 1] === CR) {
|
|
71
|
+
end--;
|
|
72
|
+
}
|
|
73
|
+
const text = decoder.decode(lineBytes.subarray(0, end)).trim();
|
|
74
|
+
if (text.length === 0) return undefined;
|
|
75
|
+
return JSON.parse(text);
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
function parseJsonlChunkFallbackBytes(bytes: Uint8Array, beg = 0, end = bytes.length): JsonlChunkResult {
|
|
79
|
+
const values: unknown[] = [];
|
|
80
|
+
let lineStart = beg;
|
|
81
|
+
|
|
82
|
+
for (let i = beg; i < end; i++) {
|
|
83
|
+
if (bytes[i] !== LF) continue;
|
|
84
|
+
const line = bytes.subarray(lineStart, i);
|
|
85
|
+
try {
|
|
86
|
+
const parsed = parseJsonLine(line);
|
|
87
|
+
if (parsed !== undefined) values.push(parsed);
|
|
88
|
+
} catch (error) {
|
|
89
|
+
return { values, error, read: lineStart, done: false };
|
|
90
|
+
}
|
|
91
|
+
lineStart = i + 1;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
if (lineStart >= end) {
|
|
95
|
+
return { values, error: null, read: end, done: true };
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
const tail = bytes.subarray(lineStart, end);
|
|
99
|
+
const tailText = decoder.decode(tail).trim();
|
|
100
|
+
if (tailText.length === 0) {
|
|
101
|
+
return { values, error: null, read: end, done: true };
|
|
102
|
+
}
|
|
103
|
+
try {
|
|
104
|
+
values.push(JSON.parse(tailText));
|
|
105
|
+
return { values, error: null, read: end, done: true };
|
|
106
|
+
} catch {
|
|
107
|
+
// In streaming mode this is usually a partial line/object.
|
|
108
|
+
return { values, error: null, read: lineStart, done: false };
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
function parseJsonlChunkFallbackString(buffer: string): JsonlChunkResult {
|
|
113
|
+
const values: unknown[] = [];
|
|
114
|
+
let lineStart = 0;
|
|
115
|
+
|
|
116
|
+
for (let i = 0; i < buffer.length; i++) {
|
|
117
|
+
if (buffer.charCodeAt(i) !== LF) continue;
|
|
118
|
+
const rawLine = buffer.slice(lineStart, i);
|
|
119
|
+
const line = rawLine.endsWith("\r") ? rawLine.slice(0, -1) : rawLine;
|
|
120
|
+
const trimmed = line.trim();
|
|
121
|
+
if (trimmed.length > 0) {
|
|
122
|
+
try {
|
|
123
|
+
values.push(JSON.parse(trimmed));
|
|
124
|
+
} catch (error) {
|
|
125
|
+
return { values, error, read: lineStart, done: false };
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
lineStart = i + 1;
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
if (lineStart >= buffer.length) {
|
|
132
|
+
return { values, error: null, read: buffer.length, done: true };
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
const tail = buffer.slice(lineStart).trim();
|
|
136
|
+
if (tail.length === 0) {
|
|
137
|
+
return { values, error: null, read: buffer.length, done: true };
|
|
138
|
+
}
|
|
139
|
+
try {
|
|
140
|
+
values.push(JSON.parse(tail));
|
|
141
|
+
return { values, error: null, read: buffer.length, done: true };
|
|
142
|
+
} catch {
|
|
143
|
+
return { values, error: null, read: lineStart, done: false };
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
function parseJsonlChunkCompat(input: Uint8Array, beg?: number, end?: number): JsonlChunkResult;
|
|
148
|
+
function parseJsonlChunkCompat(input: string): JsonlChunkResult;
|
|
149
|
+
function parseJsonlChunkCompat(input: Uint8Array | string, beg?: number, end?: number): JsonlChunkResult {
|
|
150
|
+
if (hasBunJsonlParseChunk()) {
|
|
151
|
+
if (typeof input === "string") {
|
|
152
|
+
const { values, error, read, done } = Bun.JSONL.parseChunk(input);
|
|
153
|
+
return { values, error, read, done };
|
|
154
|
+
}
|
|
155
|
+
const start = beg ?? 0;
|
|
156
|
+
const stop = end ?? input.length;
|
|
157
|
+
const { values, error, read, done } = Bun.JSONL.parseChunk(input, start, stop);
|
|
158
|
+
return { values, error, read, done };
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
if (typeof input === "string") {
|
|
162
|
+
return parseJsonlChunkFallbackString(input);
|
|
163
|
+
}
|
|
164
|
+
return parseJsonlChunkFallbackBytes(input, beg, end);
|
|
165
|
+
}
|
|
54
166
|
|
|
55
167
|
export async function* readLines(stream: ReadableStream<Uint8Array>, signal?: AbortSignal): AsyncGenerator<Uint8Array> {
|
|
56
168
|
const buffer = new ConcatSink();
|
|
@@ -86,7 +198,7 @@ export async function* readJsonl<T>(stream: ReadableStream<Uint8Array>, signal?:
|
|
|
86
198
|
const tail = buffer.flush();
|
|
87
199
|
if (tail) {
|
|
88
200
|
buffer.clear();
|
|
89
|
-
const { values, error, done } =
|
|
201
|
+
const { values, error, done } = parseJsonlChunkCompat(tail, 0, tail.length);
|
|
90
202
|
if (values.length > 0) {
|
|
91
203
|
yield* values as T[];
|
|
92
204
|
}
|
|
@@ -205,7 +317,7 @@ class ConcatSink {
|
|
|
205
317
|
}
|
|
206
318
|
*pullJSONL<T>(chunk: Uint8Array, beg: number, end: number) {
|
|
207
319
|
if (this.isEmpty) {
|
|
208
|
-
const { values, error, read, done } =
|
|
320
|
+
const { values, error, read, done } = parseJsonlChunkCompat(chunk, beg, end);
|
|
209
321
|
if (values.length > 0) {
|
|
210
322
|
yield* values as T[];
|
|
211
323
|
}
|
|
@@ -222,7 +334,7 @@ class ConcatSink {
|
|
|
222
334
|
space.set(chunk.subarray(beg, end), offset);
|
|
223
335
|
this.#length = total;
|
|
224
336
|
|
|
225
|
-
const { values, error, read, done } =
|
|
337
|
+
const { values, error, read, done } = parseJsonlChunkCompat(space.subarray(0, total), 0, total);
|
|
226
338
|
if (values.length > 0) {
|
|
227
339
|
yield* values as T[];
|
|
228
340
|
}
|
|
@@ -324,7 +436,7 @@ export function parseJsonlLenient<T>(buffer: string): T[] {
|
|
|
324
436
|
let entries: T[] | undefined;
|
|
325
437
|
|
|
326
438
|
while (buffer.length > 0) {
|
|
327
|
-
const { values, error, read, done } =
|
|
439
|
+
const { values, error, read, done } = parseJsonlChunkCompat(buffer);
|
|
328
440
|
if (values.length > 0) {
|
|
329
441
|
const ext = values as T[];
|
|
330
442
|
if (!entries) {
|