valrs 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,692 @@
1
+ /**
2
+ * Streaming validation for large JSON arrays and NDJSON data.
3
+ *
4
+ * Provides O(1) memory validation for arbitrarily large files by processing
5
+ * items one at a time as they arrive from the stream.
6
+ *
7
+ * @example
8
+ * ```typescript
9
+ * import { v, stream, streamLines } from 'valrs';
10
+ *
11
+ * // Stream JSON array with validation
12
+ * const response = await fetch('/users.json');
13
+ * for await (const user of stream(v.array(User), response.body!)) {
14
+ * console.log(user); // Each user validated as it arrives
15
+ * }
16
+ *
17
+ * // Stream NDJSON (newline-delimited JSON)
18
+ * for await (const line of streamLines(User, ndjsonStream)) {
19
+ * process.send(line);
20
+ * }
21
+ * ```
22
+ */
23
+ import { ValArray } from './schema';
24
+ import { ValError } from './error';
25
+ // ============================================================================
26
+ // Byte Size Parsing
27
+ // ============================================================================
28
+ /**
29
+ * Parses a byte size string like '100MB' or '1GB' into bytes.
30
+ */
31
+ function parseByteSize(size) {
32
+ if (typeof size === 'number') {
33
+ return size;
34
+ }
35
+ const match = size.match(/^(\d+(?:\.\d+)?)\s*(B|KB|MB|GB|TB)?$/i);
36
+ if (!match) {
37
+ throw new Error(`Invalid byte size format: ${size}`);
38
+ }
39
+ const value = parseFloat(match[1] ?? '0');
40
+ const unit = (match[2] ?? 'B').toUpperCase();
41
+ const multipliers = {
42
+ B: 1,
43
+ KB: 1024,
44
+ MB: 1024 * 1024,
45
+ GB: 1024 * 1024 * 1024,
46
+ TB: 1024 * 1024 * 1024 * 1024,
47
+ };
48
+ return value * (multipliers[unit] ?? 1);
49
+ }
50
+ /**
51
+ * Parses a duration string like '30s' or '5m' into milliseconds.
52
+ */
53
+ function parseDuration(duration) {
54
+ if (typeof duration === 'number') {
55
+ return duration;
56
+ }
57
+ const match = duration.match(/^(\d+(?:\.\d+)?)\s*(ms|s|m|h)?$/i);
58
+ if (!match) {
59
+ throw new Error(`Invalid duration format: ${duration}`);
60
+ }
61
+ const value = parseFloat(match[1] ?? '0');
62
+ const unit = (match[2] ?? 'ms').toLowerCase();
63
+ const multipliers = {
64
+ ms: 1,
65
+ s: 1000,
66
+ m: 60 * 1000,
67
+ h: 60 * 60 * 1000,
68
+ };
69
+ return value * (multipliers[unit] ?? 1);
70
+ }
71
+ // ============================================================================
72
+ // JSON Array Streaming Parser
73
+ // ============================================================================
74
+ /**
75
+ * State for the JSON array parser state machine.
76
+ */
77
+ var ParserState;
78
+ (function (ParserState) {
79
+ /** Looking for opening bracket '['. */
80
+ ParserState[ParserState["LOOKING_FOR_ARRAY_START"] = 0] = "LOOKING_FOR_ARRAY_START";
81
+ /** Looking for first item or closing bracket. */
82
+ ParserState[ParserState["LOOKING_FOR_ITEM_OR_END"] = 1] = "LOOKING_FOR_ITEM_OR_END";
83
+ /** Parsing an item. */
84
+ ParserState[ParserState["PARSING_ITEM"] = 2] = "PARSING_ITEM";
85
+ /** Looking for comma or closing bracket. */
86
+ ParserState[ParserState["LOOKING_FOR_COMMA_OR_END"] = 3] = "LOOKING_FOR_COMMA_OR_END";
87
+ /** Array parsing complete. */
88
+ ParserState[ParserState["DONE"] = 4] = "DONE";
89
+ })(ParserState || (ParserState = {}));
90
+ /**
91
+ * Incremental JSON array parser that yields complete items.
92
+ *
93
+ * Uses a state machine to detect array boundaries and item separators,
94
+ * buffering partial JSON until complete items are detected.
95
+ */
96
+ class JsonArrayParser {
97
+ state = ParserState.LOOKING_FOR_ARRAY_START;
98
+ buffer = '';
99
+ depth = 0;
100
+ inString = false;
101
+ escapeNext = false;
102
+ itemStartIndex = 0;
103
+ /** Where to continue parsing from (to avoid reprocessing) */
104
+ continuationIndex = 0;
105
+ items = [];
106
+ /**
107
+ * Feeds a chunk of text to the parser and returns any complete items.
108
+ *
109
+ * @param chunk - A chunk of JSON text
110
+ * @returns Array of complete JSON item strings
111
+ */
112
+ feed(chunk) {
113
+ this.buffer += chunk;
114
+ this.items = [];
115
+ this.parse();
116
+ return this.items;
117
+ }
118
+ /**
119
+ * Returns true if the parser has finished processing the array.
120
+ */
121
+ isDone() {
122
+ return this.state === ParserState.DONE;
123
+ }
124
+ /**
125
+ * Returns any remaining buffer content (for error reporting).
126
+ */
127
+ getRemainingBuffer() {
128
+ return this.buffer;
129
+ }
130
+ parse() {
131
+ // Start from where we left off, not from 0
132
+ let i = this.continuationIndex;
133
+ const len = this.buffer.length;
134
+ while (i < len && this.state !== ParserState.DONE) {
135
+ const char = this.buffer[i];
136
+ switch (this.state) {
137
+ case ParserState.LOOKING_FOR_ARRAY_START:
138
+ if (char === '[') {
139
+ this.state = ParserState.LOOKING_FOR_ITEM_OR_END;
140
+ }
141
+ else if (char !== undefined && !/\s/.test(char)) {
142
+ throw new Error(`Expected '[' at position ${i}, got '${char}'`);
143
+ }
144
+ i++;
145
+ break;
146
+ case ParserState.LOOKING_FOR_ITEM_OR_END:
147
+ if (char === ']') {
148
+ this.state = ParserState.DONE;
149
+ i++;
150
+ }
151
+ else if (char !== undefined && !/\s/.test(char)) {
152
+ this.state = ParserState.PARSING_ITEM;
153
+ this.itemStartIndex = i;
154
+ this.depth = 0;
155
+ this.inString = false;
156
+ this.escapeNext = false;
157
+ // Don't increment i, we need to process this character
158
+ }
159
+ else {
160
+ i++;
161
+ }
162
+ break;
163
+ case ParserState.PARSING_ITEM:
164
+ i = this.parseItemCharacters(i, len);
165
+ break;
166
+ case ParserState.LOOKING_FOR_COMMA_OR_END:
167
+ if (char === ',') {
168
+ this.state = ParserState.LOOKING_FOR_ITEM_OR_END;
169
+ i++;
170
+ }
171
+ else if (char === ']') {
172
+ this.state = ParserState.DONE;
173
+ i++;
174
+ }
175
+ else if (char !== undefined && !/\s/.test(char)) {
176
+ throw new Error(`Expected ',' or ']' at position ${i}, got '${char}'`);
177
+ }
178
+ else {
179
+ i++;
180
+ }
181
+ break;
182
+ }
183
+ }
184
+ // Trim processed content from buffer, keeping any remaining item content
185
+ if (this.state === ParserState.PARSING_ITEM) {
186
+ // Keep from itemStartIndex onwards, adjust continuation index
187
+ const offset = this.itemStartIndex;
188
+ this.buffer = this.buffer.slice(offset);
189
+ this.itemStartIndex = 0;
190
+ // Continue from where we left off, adjusted for the slice
191
+ this.continuationIndex = i - offset;
192
+ }
193
+ else if (this.state === ParserState.DONE) {
194
+ // Parsing complete
195
+ this.buffer = '';
196
+ this.continuationIndex = 0;
197
+ }
198
+ else {
199
+ // Discard processed content
200
+ this.buffer = this.buffer.slice(i);
201
+ this.continuationIndex = 0;
202
+ }
203
+ }
204
+ parseItemCharacters(startIndex, len) {
205
+ let i = startIndex;
206
+ while (i < len) {
207
+ const char = this.buffer[i];
208
+ if (char === undefined)
209
+ break;
210
+ if (this.escapeNext) {
211
+ this.escapeNext = false;
212
+ i++;
213
+ continue;
214
+ }
215
+ if (this.inString) {
216
+ if (char === '\\') {
217
+ this.escapeNext = true;
218
+ }
219
+ else if (char === '"') {
220
+ this.inString = false;
221
+ }
222
+ i++;
223
+ continue;
224
+ }
225
+ // Not in string
226
+ if (char === '"') {
227
+ this.inString = true;
228
+ i++;
229
+ continue;
230
+ }
231
+ if (char === '{' || char === '[') {
232
+ this.depth++;
233
+ i++;
234
+ continue;
235
+ }
236
+ if (char === '}' || char === ']') {
237
+ this.depth--;
238
+ i++;
239
+ // Check if we've closed all nested structures
240
+ if (this.depth < 0) {
241
+ // This is the array's closing bracket - we went too far
242
+ // The item ended before this character
243
+ const itemJson = this.buffer.slice(this.itemStartIndex, i - 1).trim();
244
+ if (itemJson.length > 0) {
245
+ this.items.push(itemJson);
246
+ }
247
+ this.state = ParserState.DONE;
248
+ return i;
249
+ }
250
+ if (this.depth === 0) {
251
+ // Completed a nested structure, but might have more primitive content
252
+ continue;
253
+ }
254
+ continue;
255
+ }
256
+ if (char === ',' && this.depth === 0) {
257
+ // End of current item
258
+ const itemJson = this.buffer.slice(this.itemStartIndex, i).trim();
259
+ if (itemJson.length > 0) {
260
+ this.items.push(itemJson);
261
+ }
262
+ this.state = ParserState.LOOKING_FOR_ITEM_OR_END;
263
+ return i + 1;
264
+ }
265
+ i++;
266
+ }
267
+ return i;
268
+ }
269
+ }
270
+ /**
271
+ * Converts various stream types to an async iterable of strings.
272
+ */
273
+ async function* streamToAsyncIterable(input) {
274
+ const decoder = new TextDecoder();
275
+ // Handle ReadableStream
276
+ if ('getReader' in input) {
277
+ const reader = input.getReader();
278
+ try {
279
+ while (true) {
280
+ const { done, value } = await reader.read();
281
+ if (done)
282
+ break;
283
+ if (typeof value === 'string') {
284
+ yield value;
285
+ }
286
+ else {
287
+ yield decoder.decode(value, { stream: true });
288
+ }
289
+ }
290
+ // Flush any remaining bytes
291
+ const remaining = decoder.decode();
292
+ if (remaining) {
293
+ yield remaining;
294
+ }
295
+ }
296
+ finally {
297
+ reader.releaseLock();
298
+ }
299
+ return;
300
+ }
301
+ // Handle AsyncIterable (Node.js Readable streams, generators, etc.)
302
+ for await (const chunk of input) {
303
+ if (typeof chunk === 'string') {
304
+ yield chunk;
305
+ }
306
+ else {
307
+ yield decoder.decode(chunk, { stream: true });
308
+ }
309
+ }
310
+ // Flush any remaining bytes
311
+ const remaining = decoder.decode();
312
+ if (remaining) {
313
+ yield remaining;
314
+ }
315
+ }
316
+ // ============================================================================
317
+ // Main Streaming Functions
318
+ // ============================================================================
319
+ /**
320
+ * Streams a JSON array and validates each item with the provided schema.
321
+ *
322
+ * Processes items with O(1) memory regardless of array size. Items are
323
+ * validated and yielded as they arrive from the stream.
324
+ *
325
+ * @template T - The validated item type
326
+ * @param schema - Array schema with element schema for validation
327
+ * @param input - Input stream (Web ReadableStream, Node.js stream, or async iterable)
328
+ * @param options - Streaming options for limits and error handling
329
+ * @returns Async iterable of validated items
330
+ *
331
+ * @example
332
+ * ```typescript
333
+ * // Stream from fetch response
334
+ * const response = await fetch('/users.json');
335
+ * for await (const user of stream(v.array(User), response.body!)) {
336
+ * console.log(user);
337
+ * }
338
+ *
339
+ * // Stream with options
340
+ * for await (const user of stream(v.array(User), stream, {
341
+ * maxItems: 10000,
342
+ * maxBytes: '100MB',
343
+ * onError: 'skip',
344
+ * })) {
345
+ * process.send(user);
346
+ * }
347
+ *
348
+ * // Collect to array
349
+ * const users = await stream(v.array(User), response.body!).toArray();
350
+ * ```
351
+ */
352
+ export function stream(schema, input, options = {}) {
353
+ const elementSchema = schema.element;
354
+ const maxItems = options.maxItems ?? Infinity;
355
+ const maxBytes = options.maxBytes !== undefined ? parseByteSize(options.maxBytes) : Infinity;
356
+ const timeout = options.timeout !== undefined ? parseDuration(options.timeout) : Infinity;
357
+ const onError = options.onError ?? 'throw';
358
+ const errors = [];
359
+ let itemCount = 0;
360
+ let byteCount = 0;
361
+ let timeoutId;
362
+ let timedOut = false;
363
+ async function* generateItems() {
364
+ const parser = new JsonArrayParser();
365
+ const startTime = Date.now();
366
+ // Set up timeout if specified
367
+ if (timeout !== Infinity) {
368
+ timeoutId = setTimeout(() => {
369
+ timedOut = true;
370
+ }, timeout);
371
+ }
372
+ try {
373
+ for await (const chunk of streamToAsyncIterable(input)) {
374
+ // Check timeout
375
+ if (timedOut) {
376
+ throw new Error(`Stream processing timed out after ${Date.now() - startTime}ms`);
377
+ }
378
+ // Track bytes
379
+ byteCount += new TextEncoder().encode(chunk).length;
380
+ if (byteCount > maxBytes) {
381
+ throw new Error(`Stream exceeded maximum byte limit of ${maxBytes} bytes`);
382
+ }
383
+ // Feed chunk to parser
384
+ const items = parser.feed(chunk);
385
+ // Validate and yield each complete item
386
+ for (const itemJson of items) {
387
+ if (itemCount >= maxItems) {
388
+ return;
389
+ }
390
+ try {
391
+ const parsed = JSON.parse(itemJson);
392
+ const result = elementSchema['~standard'].validate(parsed);
393
+ if (result.issues !== undefined) {
394
+ const error = new ValError(result.issues);
395
+ if (onError === 'throw') {
396
+ throw error;
397
+ }
398
+ else if (onError === 'collect') {
399
+ errors.push({ index: itemCount, error, rawValue: parsed });
400
+ }
401
+ // 'skip' - just don't yield
402
+ }
403
+ else {
404
+ yield result.value;
405
+ }
406
+ }
407
+ catch (err) {
408
+ if (err instanceof SyntaxError) {
409
+ const valError = new ValError([
410
+ { message: `Invalid JSON at index ${itemCount}: ${err.message}` },
411
+ ]);
412
+ if (onError === 'throw') {
413
+ throw valError;
414
+ }
415
+ else if (onError === 'collect') {
416
+ errors.push({ index: itemCount, error: valError });
417
+ }
418
+ }
419
+ else {
420
+ throw err;
421
+ }
422
+ }
423
+ itemCount++;
424
+ }
425
+ // Check if parser is done
426
+ if (parser.isDone()) {
427
+ break;
428
+ }
429
+ }
430
+ // Check for incomplete parsing
431
+ const remaining = parser.getRemainingBuffer().trim();
432
+ if (!parser.isDone() && remaining.length > 0) {
433
+ throw new Error(`Incomplete JSON array. Remaining buffer: "${remaining.slice(0, 100)}..."`);
434
+ }
435
+ }
436
+ finally {
437
+ if (timeoutId !== undefined) {
438
+ clearTimeout(timeoutId);
439
+ }
440
+ }
441
+ }
442
+ const iterator = generateItems();
443
+ const result = {
444
+ errors,
445
+ async toArray() {
446
+ const items = [];
447
+ for await (const item of this) {
448
+ items.push(item);
449
+ }
450
+ return items;
451
+ },
452
+ async pipeTo(writable) {
453
+ const writer = writable.getWriter();
454
+ try {
455
+ for await (const item of this) {
456
+ await writer.write(item);
457
+ }
458
+ await writer.close();
459
+ }
460
+ catch (err) {
461
+ await writer.abort(err);
462
+ throw err;
463
+ }
464
+ },
465
+ [Symbol.asyncIterator]() {
466
+ return iterator;
467
+ },
468
+ };
469
+ return result;
470
+ }
471
+ /**
472
+ * Streams NDJSON (newline-delimited JSON) and validates each line.
473
+ *
474
+ * Each line is parsed as a separate JSON value and validated against
475
+ * the provided schema.
476
+ *
477
+ * @template T - The validated item type
478
+ * @param schema - Schema for validating each line
479
+ * @param input - Input stream
480
+ * @param options - Streaming options
481
+ * @returns Async iterable of validated items
482
+ *
483
+ * @example
484
+ * ```typescript
485
+ * for await (const event of streamLines(EventSchema, logStream)) {
486
+ * processEvent(event);
487
+ * }
488
+ * ```
489
+ */
490
+ export function streamLines(schema, input, options = {}) {
491
+ const maxItems = options.maxItems ?? Infinity;
492
+ const maxBytes = options.maxBytes !== undefined ? parseByteSize(options.maxBytes) : Infinity;
493
+ const timeout = options.timeout !== undefined ? parseDuration(options.timeout) : Infinity;
494
+ const onError = options.onError ?? 'throw';
495
+ const errors = [];
496
+ let itemCount = 0;
497
+ let byteCount = 0;
498
+ let timeoutId;
499
+ let timedOut = false;
500
+ async function* generateItems() {
501
+ let buffer = '';
502
+ const startTime = Date.now();
503
+ // Set up timeout if specified
504
+ if (timeout !== Infinity) {
505
+ timeoutId = setTimeout(() => {
506
+ timedOut = true;
507
+ }, timeout);
508
+ }
509
+ try {
510
+ for await (const chunk of streamToAsyncIterable(input)) {
511
+ // Check timeout
512
+ if (timedOut) {
513
+ throw new Error(`Stream processing timed out after ${Date.now() - startTime}ms`);
514
+ }
515
+ // Track bytes
516
+ byteCount += new TextEncoder().encode(chunk).length;
517
+ if (byteCount > maxBytes) {
518
+ throw new Error(`Stream exceeded maximum byte limit of ${maxBytes} bytes`);
519
+ }
520
+ buffer += chunk;
521
+ // Process complete lines
522
+ let newlineIndex;
523
+ while ((newlineIndex = buffer.indexOf('\n')) !== -1) {
524
+ if (itemCount >= maxItems) {
525
+ return;
526
+ }
527
+ const line = buffer.slice(0, newlineIndex).trim();
528
+ buffer = buffer.slice(newlineIndex + 1);
529
+ // Skip empty lines
530
+ if (line.length === 0) {
531
+ continue;
532
+ }
533
+ try {
534
+ const parsed = JSON.parse(line);
535
+ const result = schema['~standard'].validate(parsed);
536
+ if (result.issues !== undefined) {
537
+ const error = new ValError(result.issues);
538
+ if (onError === 'throw') {
539
+ throw error;
540
+ }
541
+ else if (onError === 'collect') {
542
+ errors.push({ index: itemCount, error, rawValue: parsed });
543
+ }
544
+ // 'skip' - just don't yield
545
+ }
546
+ else {
547
+ yield result.value;
548
+ }
549
+ }
550
+ catch (err) {
551
+ if (err instanceof SyntaxError) {
552
+ const valError = new ValError([
553
+ { message: `Invalid JSON at line ${itemCount}: ${err.message}` },
554
+ ]);
555
+ if (onError === 'throw') {
556
+ throw valError;
557
+ }
558
+ else if (onError === 'collect') {
559
+ errors.push({ index: itemCount, error: valError });
560
+ }
561
+ }
562
+ else {
563
+ throw err;
564
+ }
565
+ }
566
+ itemCount++;
567
+ }
568
+ }
569
+ // Process any remaining content
570
+ const remaining = buffer.trim();
571
+ if (remaining.length > 0 && itemCount < maxItems) {
572
+ try {
573
+ const parsed = JSON.parse(remaining);
574
+ const result = schema['~standard'].validate(parsed);
575
+ if (result.issues !== undefined) {
576
+ const error = new ValError(result.issues);
577
+ if (onError === 'throw') {
578
+ throw error;
579
+ }
580
+ else if (onError === 'collect') {
581
+ errors.push({ index: itemCount, error, rawValue: parsed });
582
+ }
583
+ }
584
+ else {
585
+ yield result.value;
586
+ }
587
+ }
588
+ catch (err) {
589
+ if (err instanceof SyntaxError) {
590
+ const valError = new ValError([
591
+ { message: `Invalid JSON at line ${itemCount}: ${err.message}` },
592
+ ]);
593
+ if (onError === 'throw') {
594
+ throw valError;
595
+ }
596
+ else if (onError === 'collect') {
597
+ errors.push({ index: itemCount, error: valError });
598
+ }
599
+ }
600
+ else {
601
+ throw err;
602
+ }
603
+ }
604
+ }
605
+ }
606
+ finally {
607
+ if (timeoutId !== undefined) {
608
+ clearTimeout(timeoutId);
609
+ }
610
+ }
611
+ }
612
+ const iterator = generateItems();
613
+ const result = {
614
+ errors,
615
+ async toArray() {
616
+ const items = [];
617
+ for await (const item of this) {
618
+ items.push(item);
619
+ }
620
+ return items;
621
+ },
622
+ async pipeTo(writable) {
623
+ const writer = writable.getWriter();
624
+ try {
625
+ for await (const item of this) {
626
+ await writer.write(item);
627
+ }
628
+ await writer.close();
629
+ }
630
+ catch (err) {
631
+ await writer.abort(err);
632
+ throw err;
633
+ }
634
+ },
635
+ [Symbol.asyncIterator]() {
636
+ return iterator;
637
+ },
638
+ };
639
+ return result;
640
+ }
641
+ // ============================================================================
642
+ // Convenience Helpers
643
+ // ============================================================================
644
+ /**
645
+ * Creates a mock readable stream from an array of chunks.
646
+ *
647
+ * Useful for testing streaming functionality.
648
+ *
649
+ * @param chunks - Array of string chunks
650
+ * @returns A ReadableStream that yields the chunks
651
+ *
652
+ * @example
653
+ * ```typescript
654
+ * const mockStream = createMockStream([
655
+ * '[{"id": 1},',
656
+ * '{"id": 2}]',
657
+ * ]);
658
+ *
659
+ * for await (const item of stream(v.array(schema), mockStream)) {
660
+ * console.log(item);
661
+ * }
662
+ * ```
663
+ */
664
+ export function createMockStream(chunks) {
665
+ let index = 0;
666
+ return new ReadableStream({
667
+ pull(controller) {
668
+ if (index < chunks.length) {
669
+ controller.enqueue(chunks[index]);
670
+ index++;
671
+ }
672
+ else {
673
+ controller.close();
674
+ }
675
+ },
676
+ });
677
+ }
678
+ /**
679
+ * Creates a mock readable stream from a complete JSON string, split into chunks.
680
+ *
681
+ * @param json - Complete JSON string
682
+ * @param chunkSize - Size of each chunk in characters (default: 64)
683
+ * @returns A ReadableStream that yields the chunks
684
+ */
685
+ export function createChunkedStream(json, chunkSize = 64) {
686
+ const chunks = [];
687
+ for (let i = 0; i < json.length; i += chunkSize) {
688
+ chunks.push(json.slice(i, i + chunkSize));
689
+ }
690
+ return createMockStream(chunks);
691
+ }
692
+ //# sourceMappingURL=streaming.js.map