web-csv-toolbox 0.14.0-next-386eebeaafe5857e28c876345c14c9fe5f1a3774 → 0.14.0-next-978b88933762ecc27270ce746b80a3fa7ed8c4f7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +28 -48
- package/dist/CSVLexer.js +8 -5
- package/dist/CSVLexer.js.map +1 -1
- package/dist/CSVLexerTransformer.d.ts +10 -12
- package/dist/CSVLexerTransformer.js +12 -16
- package/dist/CSVLexerTransformer.js.map +1 -1
- package/dist/CSVRecordAssembler.js +14 -4
- package/dist/CSVRecordAssembler.js.map +1 -1
- package/dist/CSVRecordAssemblerTransformer.d.ts +8 -14
- package/dist/CSVRecordAssemblerTransformer.js +10 -16
- package/dist/CSVRecordAssemblerTransformer.js.map +1 -1
- package/dist/assertCommonOptions.d.ts +1 -1
- package/dist/assertCommonOptions.js.map +1 -1
- package/dist/common/errors.d.ts +32 -0
- package/dist/common/errors.js +18 -0
- package/dist/common/errors.js.map +1 -1
- package/dist/common/types.d.ts +292 -66
- package/dist/constants.d.ts +12 -0
- package/dist/constants.js +2 -1
- package/dist/constants.js.map +1 -1
- package/dist/execution/EnginePresets.d.ts +52 -12
- package/dist/execution/EnginePresets.js +1 -1
- package/dist/execution/EnginePresets.js.map +1 -1
- package/dist/execution/InternalEngineConfig.js +40 -18
- package/dist/execution/InternalEngineConfig.js.map +1 -1
- package/dist/execution/worker/parseBinaryInWorker.node.js +3 -4
- package/dist/execution/worker/parseBinaryInWorker.node.js.map +1 -1
- package/dist/execution/worker/parseBinaryInWorker.web.js +3 -4
- package/dist/execution/worker/parseBinaryInWorker.web.js.map +1 -1
- package/dist/execution/worker/parseBinaryInWorkerWASM.node.js +3 -4
- package/dist/execution/worker/parseBinaryInWorkerWASM.node.js.map +1 -1
- package/dist/execution/worker/parseBinaryInWorkerWASM.web.js +3 -4
- package/dist/execution/worker/parseBinaryInWorkerWASM.web.js.map +1 -1
- package/dist/execution/worker/parseStreamInWorker.node.js +3 -4
- package/dist/execution/worker/parseStreamInWorker.node.js.map +1 -1
- package/dist/execution/worker/parseStreamInWorker.web.js +3 -4
- package/dist/execution/worker/parseStreamInWorker.web.js.map +1 -1
- package/dist/execution/worker/parseStringInWorker.node.js +3 -4
- package/dist/execution/worker/parseStringInWorker.node.js.map +1 -1
- package/dist/execution/worker/parseStringInWorker.web.js +3 -4
- package/dist/execution/worker/parseStringInWorker.web.js.map +1 -1
- package/dist/execution/worker/parseStringInWorkerWASM.node.js +3 -4
- package/dist/execution/worker/parseStringInWorkerWASM.node.js.map +1 -1
- package/dist/execution/worker/parseStringInWorkerWASM.web.js +3 -4
- package/dist/execution/worker/parseStringInWorkerWASM.web.js.map +1 -1
- package/dist/execution/worker/parseUint8ArrayStreamInWorker.node.js +3 -4
- package/dist/execution/worker/parseUint8ArrayStreamInWorker.node.js.map +1 -1
- package/dist/execution/worker/parseUint8ArrayStreamInWorker.web.js +3 -4
- package/dist/execution/worker/parseUint8ArrayStreamInWorker.web.js.map +1 -1
- package/dist/getCharsetValidation.constants.node.d.ts +11 -0
- package/dist/getCharsetValidation.constants.node.js +53 -0
- package/dist/getCharsetValidation.constants.node.js.map +1 -0
- package/dist/getCharsetValidation.constants.web.d.ts +36 -0
- package/dist/getCharsetValidation.constants.web.js +53 -0
- package/dist/getCharsetValidation.constants.web.js.map +1 -0
- package/dist/getOptionsFromFile.d.ts +14 -0
- package/dist/getOptionsFromFile.js +12 -0
- package/dist/getOptionsFromFile.js.map +1 -0
- package/dist/getOptionsFromResponse.js +17 -1
- package/dist/getOptionsFromResponse.js.map +1 -1
- package/dist/parseBlob.js +9 -1
- package/dist/parseBlob.js.map +1 -1
- package/dist/parseFile.d.ts +3 -2
- package/dist/parseFile.js +7 -3
- package/dist/parseFile.js.map +1 -1
- package/dist/parseFileToArray.d.ts +27 -0
- package/dist/parseFileToArray.js +12 -0
- package/dist/parseFileToArray.js.map +1 -0
- package/dist/parseFileToStream.d.ts +33 -0
- package/dist/parseFileToStream.js +10 -0
- package/dist/parseFileToStream.js.map +1 -0
- package/dist/utils/convertBinaryToString.js +17 -4
- package/dist/utils/convertBinaryToString.js.map +1 -1
- package/dist/utils/parseMime.js +3 -1
- package/dist/utils/parseMime.js.map +1 -1
- package/dist/utils/types.d.ts +21 -10
- package/dist/web-csv-toolbox.d.ts +3 -0
- package/dist/web-csv-toolbox.js +3 -0
- package/dist/web-csv-toolbox.js.map +1 -1
- package/package.json +7 -1
package/README.md
CHANGED
|
@@ -407,7 +407,7 @@ catering to users who need more detailed and fine-tuned functionality.
|
|
|
407
407
|
- **`function parseBlob(blob[, options])`**: [📑](https://kamiazya.github.io/web-csv-toolbox/functions/parseBlob-1.html)
|
|
408
408
|
- Parse CSV data from `Blob` or `File` objects.
|
|
409
409
|
- **`function parseFile(file[, options])`**: [📑](https://kamiazya.github.io/web-csv-toolbox/functions/parseFile-1.html)
|
|
410
|
-
-
|
|
410
|
+
- Parse `File` objects with automatic filename tracking in error messages.
|
|
411
411
|
- **`function parseStream(stream[, options])`**: [📑](https://kamiazya.github.io/web-csv-toolbox/functions/parseStream-1.html)
|
|
412
412
|
- Stream-based parsing for larger or continuous data.
|
|
413
413
|
- **`function parseStringStream(stream[, options])`**: [📑](https://kamiazya.github.io/web-csv-toolbox/functions/parseStringStream-1.html)
|
|
@@ -440,28 +440,21 @@ new CSVRecordAssemblerTransformer(options?, writableStrategy?, readableStrategy?
|
|
|
440
440
|
**Default queuing strategies (starting points, not benchmarked):**
|
|
441
441
|
```typescript
|
|
442
442
|
// CSVLexerTransformer defaults
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
highWaterMark: 1024
|
|
450
|
-
|
|
451
|
-
checkInterval: 100 // Check backpressure every 100 tokens
|
|
452
|
-
}
|
|
443
|
+
new CSVLexerTransformer(
|
|
444
|
+
{ backpressureCheckInterval: 100 }, // Check every 100 tokens
|
|
445
|
+
{
|
|
446
|
+
highWaterMark: 65536, // 64KB of characters
|
|
447
|
+
size: (chunk) => chunk.length, // Count by string length
|
|
448
|
+
},
|
|
449
|
+
new CountQueuingStrategy({ highWaterMark: 1024 }) // 1024 tokens
|
|
450
|
+
)
|
|
453
451
|
|
|
454
452
|
// CSVRecordAssemblerTransformer defaults
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
readableStrategy: {
|
|
461
|
-
highWaterMark: 256, // 256 records
|
|
462
|
-
size: () => 1, // Each record counts as 1
|
|
463
|
-
checkInterval: 10 // Check backpressure every 10 records
|
|
464
|
-
}
|
|
453
|
+
new CSVRecordAssemblerTransformer(
|
|
454
|
+
{ backpressureCheckInterval: 10 }, // Check every 10 records
|
|
455
|
+
new CountQueuingStrategy({ highWaterMark: 1024 }), // 1024 tokens
|
|
456
|
+
new CountQueuingStrategy({ highWaterMark: 256 }) // 256 records
|
|
457
|
+
)
|
|
465
458
|
```
|
|
466
459
|
|
|
467
460
|
**Key Features:**
|
|
@@ -477,8 +470,8 @@ readableStrategy: {
|
|
|
477
470
|
- Prevents blocking the main thread
|
|
478
471
|
- Critical for browser UI responsiveness
|
|
479
472
|
|
|
480
|
-
🔧 **Tunable Check Interval:**
|
|
481
|
-
- `
|
|
473
|
+
🔧 **Tunable Backpressure Check Interval:**
|
|
474
|
+
- `backpressureCheckInterval` (in options): How often to check for backpressure (count-based)
|
|
482
475
|
- Lower values (5-25): More responsive, slight overhead
|
|
483
476
|
- Higher values (100-500): Less overhead, slower response
|
|
484
477
|
- Customize based on downstream consumer speed
|
|
@@ -486,9 +479,9 @@ readableStrategy: {
|
|
|
486
479
|
> ⚠️ **Important**: These defaults are theoretical starting points based on data flow characteristics, **not empirical benchmarks**. Optimal values vary by runtime (browser/Node.js/Deno), file size, memory constraints, and CPU performance. **Profile your specific use case** to find the best values.
|
|
487
480
|
|
|
488
481
|
**When to customize:**
|
|
489
|
-
- 🚀 **High-throughput servers**: Higher `highWaterMark` (128KB+, 2048+ tokens), higher `
|
|
490
|
-
- 📱 **Memory-constrained environments**: Lower `highWaterMark` (16KB, 256 tokens), lower `
|
|
491
|
-
- 🐌 **Slow consumers** (DB writes, API calls): Lower `highWaterMark`, lower `
|
|
482
|
+
- 🚀 **High-throughput servers**: Higher `highWaterMark` (128KB+, 2048+ tokens), higher `backpressureCheckInterval` (200-500)
|
|
483
|
+
- 📱 **Memory-constrained environments**: Lower `highWaterMark` (16KB, 256 tokens), lower `backpressureCheckInterval` (10-25)
|
|
484
|
+
- 🐌 **Slow consumers** (DB writes, API calls): Lower `highWaterMark`, lower `backpressureCheckInterval` for responsive backpressure
|
|
492
485
|
- 🏃 **Fast processing**: Higher values to reduce overhead
|
|
493
486
|
|
|
494
487
|
**Example - High-throughput server:**
|
|
@@ -499,30 +492,17 @@ const response = await fetch('large-dataset.csv');
|
|
|
499
492
|
await response.body
|
|
500
493
|
.pipeThrough(new TextDecoderStream())
|
|
501
494
|
.pipeThrough(new CSVLexerTransformer(
|
|
502
|
-
{},
|
|
495
|
+
{ backpressureCheckInterval: 200 }, // Less frequent checks
|
|
503
496
|
{
|
|
504
|
-
highWaterMark: 131072,
|
|
497
|
+
highWaterMark: 131072, // 128KB
|
|
505
498
|
size: (chunk) => chunk.length,
|
|
506
|
-
checkInterval: 200 // Less frequent checks
|
|
507
499
|
},
|
|
508
|
-
{
|
|
509
|
-
highWaterMark: 2048, // 2048 tokens
|
|
510
|
-
size: (tokens) => tokens.length,
|
|
511
|
-
checkInterval: 100
|
|
512
|
-
}
|
|
500
|
+
new CountQueuingStrategy({ highWaterMark: 2048 }) // 2048 tokens
|
|
513
501
|
))
|
|
514
502
|
.pipeThrough(new CSVRecordAssemblerTransformer(
|
|
515
|
-
{},
|
|
516
|
-
{
|
|
517
|
-
|
|
518
|
-
size: (tokens) => tokens.length,
|
|
519
|
-
checkInterval: 20
|
|
520
|
-
},
|
|
521
|
-
{
|
|
522
|
-
highWaterMark: 512, // 512 records
|
|
523
|
-
size: () => 1,
|
|
524
|
-
checkInterval: 10
|
|
525
|
-
}
|
|
503
|
+
{ backpressureCheckInterval: 20 }, // Less frequent checks
|
|
504
|
+
new CountQueuingStrategy({ highWaterMark: 2048 }), // 2048 tokens
|
|
505
|
+
new CountQueuingStrategy({ highWaterMark: 512 }) // 512 records
|
|
526
506
|
))
|
|
527
507
|
.pipeTo(yourRecordProcessor);
|
|
528
508
|
```
|
|
@@ -532,9 +512,9 @@ await response.body
|
|
|
532
512
|
await csvStream
|
|
533
513
|
.pipeThrough(new CSVLexerTransformer()) // Use defaults
|
|
534
514
|
.pipeThrough(new CSVRecordAssemblerTransformer(
|
|
535
|
-
{},
|
|
536
|
-
{ highWaterMark: 512
|
|
537
|
-
{ highWaterMark: 64
|
|
515
|
+
{ backpressureCheckInterval: 2 }, // Very responsive
|
|
516
|
+
new CountQueuingStrategy({ highWaterMark: 512 }),
|
|
517
|
+
new CountQueuingStrategy({ highWaterMark: 64 })
|
|
538
518
|
))
|
|
539
519
|
.pipeTo(new WritableStream({
|
|
540
520
|
async write(record) {
|
package/dist/CSVLexer.js
CHANGED
|
@@ -20,6 +20,7 @@ class CSVLexer {
|
|
|
20
20
|
};
|
|
21
21
|
#rowNumber = 1;
|
|
22
22
|
#signal;
|
|
23
|
+
#source;
|
|
23
24
|
/**
|
|
24
25
|
* Constructs a new CSVLexer instance.
|
|
25
26
|
* @param options - The common options for the lexer.
|
|
@@ -29,21 +30,21 @@ class CSVLexer {
|
|
|
29
30
|
delimiter = DEFAULT_DELIMITER,
|
|
30
31
|
quotation = DEFAULT_QUOTATION,
|
|
31
32
|
maxBufferSize = DEFAULT_MAX_BUFFER_SIZE,
|
|
32
|
-
signal
|
|
33
|
+
signal,
|
|
34
|
+
source
|
|
33
35
|
} = options;
|
|
34
36
|
assertCommonOptions({ delimiter, quotation, maxBufferSize });
|
|
35
37
|
this.#delimiter = delimiter;
|
|
36
38
|
this.#quotation = quotation;
|
|
37
39
|
this.#fieldDelimiterLength = delimiter.length;
|
|
38
40
|
this.#maxBufferSize = maxBufferSize;
|
|
41
|
+
this.#source = source;
|
|
42
|
+
this.#signal = signal;
|
|
39
43
|
const d = escapeRegExp(delimiter);
|
|
40
44
|
const q = escapeRegExp(quotation);
|
|
41
45
|
this.#matcher = new RegExp(
|
|
42
46
|
`^(?:(?!${q})(?!${d})(?![\\r\\n]))([\\S\\s\\uFEFF\\xA0]+?)(?=${q}|${d}|\\r|\\n|$)`
|
|
43
47
|
);
|
|
44
|
-
if (signal) {
|
|
45
|
-
this.#signal = signal;
|
|
46
|
-
}
|
|
47
48
|
}
|
|
48
49
|
/**
|
|
49
50
|
* Lexes the given chunk of CSV data.
|
|
@@ -208,7 +209,9 @@ class CSVLexer {
|
|
|
208
209
|
} while (cur !== void 0);
|
|
209
210
|
if (this.#flush) {
|
|
210
211
|
throw new ParseError("Unexpected EOF while parsing quoted field.", {
|
|
211
|
-
position: { ...this.#cursor }
|
|
212
|
+
position: { ...this.#cursor },
|
|
213
|
+
rowNumber: this.#rowNumber,
|
|
214
|
+
source: this.#source
|
|
212
215
|
});
|
|
213
216
|
}
|
|
214
217
|
return null;
|
package/dist/CSVLexer.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"CSVLexer.js","sources":["../src/CSVLexer.ts"],"sourcesContent":["import { assertCommonOptions } from \"./assertCommonOptions.ts\";\nimport { Field, FieldDelimiter, RecordDelimiter } from \"./common/constants.ts\";\nimport { ParseError } from \"./common/errors.ts\";\nimport type {\n AbortSignalOptions,\n CommonOptions,\n Position,\n RecordDelimiterToken,\n Token,\n} from \"./common/types.ts\";\nimport { CRLF, DEFAULT_DELIMITER, DEFAULT_QUOTATION, LF } from \"./constants.ts\";\nimport { escapeRegExp } from \"./utils/escapeRegExp.ts\";\n\n/**\n * Default maximum buffer size in characters (UTF-16 code units).\n * Approximately 10MB for ASCII text, but may vary for non-ASCII characters.\n */\nexport const DEFAULT_MAX_BUFFER_SIZE = 10 * 1024 * 1024;\n\n/**\n * Options for the CSVLexer.lex method.\n */\nexport interface CSVLexerLexOptions {\n /**\n * If true, indicates that more chunks are expected.\n * If false or omitted, flushes remaining data.\n */\n stream?: boolean;\n}\n\n/**\n * CSV Lexer.\n *\n * CSVLexer tokenizes CSV data into fields and records.\n */\nexport class CSVLexer<\n Delimiter extends string = DEFAULT_DELIMITER,\n Quotation extends string = DEFAULT_QUOTATION,\n> {\n #delimiter: string;\n #quotation: string;\n #buffer = \"\";\n #flush = false;\n #matcher: RegExp;\n #fieldDelimiterLength: number;\n #maxBufferSize: number;\n\n #cursor: Position = {\n line: 1,\n column: 1,\n offset: 0,\n };\n #rowNumber = 1;\n\n #signal?: AbortSignal;\n\n /**\n * Constructs a new CSVLexer instance.\n * @param options - The common options for the lexer.\n */\n constructor(\n options: CommonOptions<Delimiter, Quotation> & AbortSignalOptions = {},\n ) {\n const {\n delimiter = DEFAULT_DELIMITER,\n quotation = DEFAULT_QUOTATION,\n maxBufferSize = DEFAULT_MAX_BUFFER_SIZE,\n signal,\n } = options;\n assertCommonOptions({ delimiter, quotation, maxBufferSize });\n this.#delimiter = delimiter;\n this.#quotation = quotation;\n this.#fieldDelimiterLength = delimiter.length;\n this.#maxBufferSize = maxBufferSize;\n const d = escapeRegExp(delimiter);\n const q = escapeRegExp(quotation);\n this.#matcher = new RegExp(\n `^(?:(?!${q})(?!${d})(?![\\\\r\\\\n]))([\\\\S\\\\s\\\\uFEFF\\\\xA0]+?)(?=${q}|${d}|\\\\r|\\\\n|$)`,\n );\n if (signal) {\n this.#signal = signal;\n }\n }\n\n /**\n * Lexes the given chunk of CSV data.\n * @param chunk - The chunk of CSV data to be lexed. Omit to flush remaining data.\n * @param options - Lexer options.\n * @returns An iterable iterator of tokens.\n */\n public lex(\n chunk?: string,\n options?: CSVLexerLexOptions,\n ): IterableIterator<Token> {\n const stream = options?.stream ?? false;\n\n if (!stream) {\n this.#flush = true;\n }\n if (chunk !== undefined && chunk.length !== 0) {\n this.#buffer += chunk;\n this.#checkBufferSize();\n }\n\n return this.#tokens();\n }\n\n /**\n * Generates tokens from the buffered CSV data.\n * @yields Tokens from the buffered CSV data.\n */\n *#tokens(): Generator<Token> {\n if (this.#flush) {\n // Trim the last CRLF or LF\n if (this.#buffer.endsWith(CRLF)) {\n this.#buffer = this.#buffer.slice(0, -2 /* -CRLF.length */);\n } else if (this.#buffer.endsWith(LF)) {\n this.#buffer = this.#buffer.slice(0, -1 /* -LF.length */);\n }\n }\n let token: Token | null;\n while ((token = this.#nextToken())) {\n yield token;\n }\n }\n\n /**\n * Checks if the buffer size exceeds the maximum allowed size.\n * @throws {RangeError} If the buffer size exceeds the maximum.\n */\n #checkBufferSize(): void {\n if (this.#buffer.length > this.#maxBufferSize) {\n throw new RangeError(\n `Buffer size (${this.#buffer.length} characters) exceeded maximum allowed size of ${this.#maxBufferSize} characters`,\n );\n }\n }\n\n /**\n * Retrieves the next token from the buffered CSV data.\n * @returns The next token or null if there are no more tokens.\n */\n #nextToken(): Token | null {\n this.#signal?.throwIfAborted();\n if (this.#buffer.length === 0) {\n return null;\n }\n // Buffer is Record Delimiter, defer to the next iteration.\n if (\n this.#flush === false &&\n (this.#buffer === CRLF || this.#buffer === LF)\n ) {\n return null;\n }\n\n // Check for CRLF\n if (this.#buffer.startsWith(CRLF)) {\n this.#buffer = this.#buffer.slice(2);\n const start: Position = { ...this.#cursor };\n this.#cursor.line++;\n this.#cursor.column = 1;\n this.#cursor.offset += 2; // CRLF.length\n const token: RecordDelimiterToken = {\n type: RecordDelimiter,\n value: CRLF,\n location: {\n start,\n end: { ...this.#cursor },\n rowNumber: this.#rowNumber++,\n },\n };\n return token;\n }\n\n // Check for LF\n if (this.#buffer.startsWith(LF)) {\n this.#buffer = this.#buffer.slice(1);\n const start: Position = { ...this.#cursor };\n this.#cursor.line++;\n this.#cursor.column = 1;\n this.#cursor.offset += 1; // LF.length\n const token: RecordDelimiterToken = {\n type: RecordDelimiter,\n value: LF,\n location: {\n start,\n end: { ...this.#cursor },\n rowNumber: this.#rowNumber++,\n },\n };\n return token;\n }\n\n // Check for Delimiter\n if (this.#buffer.startsWith(this.#delimiter)) {\n this.#buffer = this.#buffer.slice(1);\n const start: Position = { ...this.#cursor };\n this.#cursor.column += this.#fieldDelimiterLength;\n this.#cursor.offset += this.#fieldDelimiterLength;\n return {\n type: FieldDelimiter,\n value: this.#delimiter,\n location: {\n start,\n end: { ...this.#cursor },\n rowNumber: this.#rowNumber,\n },\n };\n }\n\n // Check for Quoted String\n if (this.#buffer.startsWith(this.#quotation)) {\n /**\n * Extract Quoted field.\n *\n * The following code is equivalent to the following:\n *\n * If the next character is a quote:\n * - If the character after that is a quote, then append a quote to the value and skip two characters.\n * - Otherwise, return the quoted string.\n * Otherwise, append the character to the value and skip one character.\n *\n * ```plaintext\n * | `i` | `i + 1` | `i + 2` |\n * |------------|------------|----------|\n * | cur | next | | => Variable names\n * | #quotation | #quotation | | => Escaped quote\n * | #quotation | (EOF) | | => Closing quote\n * | #quotation | undefined | | => End of buffer\n * | undefined | | | => End of buffer\n * ```\n */\n let value = \"\";\n let offset = 1; // Skip the opening quote\n let column = 2; // Skip the opening quote\n let line = 0;\n\n // Define variables\n let cur: string = this.#buffer[offset];\n let next: string | undefined = this.#buffer[offset + 1];\n do {\n // If the current character is a quote, check the next characters for closing quotes.\n if (cur === this.#quotation) {\n // If the cur character is a quote and the next character is a quote,\n // then append a quote to the value and skip two characters.\n if (next === this.#quotation) {\n // Append a quote to the value and skip two characters.\n value += this.#quotation;\n offset += 2;\n cur = this.#buffer[offset];\n next = this.#buffer[offset + 1];\n\n // Update the diff\n column += 2;\n continue;\n }\n\n // If the cur character is a quote and the next character is undefined,\n // then return null.\n if (next === undefined && this.#flush === false) {\n return null;\n }\n\n // Otherwise, return the quoted string.\n // Update the buffer and return the token\n offset++;\n this.#buffer = this.#buffer.slice(offset);\n const start: Position = { ...this.#cursor };\n this.#cursor.column += column;\n this.#cursor.offset += offset;\n this.#cursor.line += line;\n return {\n type: Field,\n value,\n location: {\n start,\n end: { ...this.#cursor },\n rowNumber: this.#rowNumber,\n },\n };\n }\n\n // Append the character to the value.\n value += cur;\n\n // Prepare for the next iteration\n if (cur === LF) {\n // If the current character is a LF,\n // then increment the line number and reset the column number.\n line++;\n column = 1;\n } else {\n // Otherwise, increment the column number and offset.\n column++;\n }\n\n offset++;\n cur = next;\n next = this.#buffer[offset + 1];\n } while (cur !== undefined);\n\n if (this.#flush) {\n throw new ParseError(\"Unexpected EOF while parsing quoted field.\", {\n position: { ...this.#cursor },\n });\n }\n return null;\n }\n\n // Check for Unquoted String\n const match = this.#matcher.exec(this.#buffer);\n if (match) {\n // If we're flushing and the match doesn't consume the entire buffer,\n // then return null\n if (this.#flush === false && match[0].length === this.#buffer.length) {\n return null;\n }\n const value = match[1];\n this.#buffer = this.#buffer.slice(value.length);\n const start: Position = { ...this.#cursor };\n this.#cursor.column += value.length;\n this.#cursor.offset += value.length;\n return {\n type: Field,\n value,\n location: {\n start,\n end: { ...this.#cursor },\n rowNumber: this.#rowNumber,\n },\n };\n }\n\n // Otherwise, return null\n return null;\n }\n}\n"],"names":[],"mappings":";;;;;;AAiBO,MAAM,uBAAA,GAA0B,KAAK,IAAA,GAAO;AAkB5C,MAAM,QAAA,CAGX;AAAA,EACA,UAAA;AAAA,EACA,UAAA;AAAA,EACA,OAAA,GAAU,EAAA;AAAA,EACV,MAAA,GAAS,KAAA;AAAA,EACT,QAAA;AAAA,EACA,qBAAA;AAAA,EACA,cAAA;AAAA,EAEA,OAAA,GAAoB;AAAA,IAClB,IAAA,EAAM,CAAA;AAAA,IACN,MAAA,EAAQ,CAAA;AAAA,IACR,MAAA,EAAQ;AAAA,GACV;AAAA,EACA,UAAA,GAAa,CAAA;AAAA,EAEb,OAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAMA,WAAA,CACE,OAAA,GAAoE,EAAC,EACrE;AACA,IAAA,MAAM;AAAA,MACJ,SAAA,GAAY,iBAAA;AAAA,MACZ,SAAA,GAAY,iBAAA;AAAA,MACZ,aAAA,GAAgB,uBAAA;AAAA,MAChB;AAAA,KACF,GAAI,OAAA;AACJ,IAAA,mBAAA,CAAoB,EAAE,SAAA,EAAW,SAAA,EAAW,aAAA,EAAe,CAAA;AAC3D,IAAA,IAAA,CAAK,UAAA,GAAa,SAAA;AAClB,IAAA,IAAA,CAAK,UAAA,GAAa,SAAA;AAClB,IAAA,IAAA,CAAK,wBAAwB,SAAA,CAAU,MAAA;AACvC,IAAA,IAAA,CAAK,cAAA,GAAiB,aAAA;AACtB,IAAA,MAAM,CAAA,GAAI,aAAa,SAAS,CAAA;AAChC,IAAA,MAAM,CAAA,GAAI,aAAa,SAAS,CAAA;AAChC,IAAA,IAAA,CAAK,WAAW,IAAI,MAAA;AAAA,MAClB,UAAU,CAAC,CAAA,IAAA,EAAO,CAAC,CAAA,yCAAA,EAA4C,CAAC,IAAI,CAAC,CAAA,WAAA;AAAA,KACvE;AACA,IAAA,IAAI,MAAA,EAAQ;AACV,MAAA,IAAA,CAAK,OAAA,GAAU,MAAA;AAAA,IACjB;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQO,GAAA,CACL,OACA,OAAA,EACyB;AACzB,IAAA,MAAM,MAAA,GAAS,SAAS,MAAA,IAAU,KAAA;AAElC,IAAA,IAAI,CAAC,MAAA,EAAQ;AACX,MAAA,IAAA,CAAK,MAAA,GAAS,IAAA;AAAA,IAChB;AACA,IAAA,IAAI,KAAA,KAAU,MAAA,IAAa,KAAA,CAAM,MAAA,KAAW,CAAA,EAAG;AAC7C,MAAA,IAAA,CAAK,OAAA,IAAW,KAAA;AAChB,MAAA,IAAA,CAAK,gBAAA,EAAiB;AAAA,IACxB;AAEA,IAAA,OAAO,KAAK,OAAA,EAAQ;AAAA,EACtB;AAAA;AAAA;AAAA;AAAA;AAAA,EAMA,CAAC,OAAA,GAA4B;AAC3B,IAAA,IAAI,KAAK,MAAA,EAAQ;AAEf,MAAA,IAAI,IAAA,CAAK,OAAA,CAAQ,QAAA,CAAS,IAAI,CAAA,EAAG;AAC/B,QAAA,IAAA,CAAK,OAAA,GAAU,KAAK,OAAA,CAAQ,KAAA;AAAA,UAAM,CAAA;AAAA,UAAG;AAAA;AAAA,SAAqB;AAAA,MAC5D,CAAA,MAAA,IAAW,IAAA,CAAK,OAAA,CAAQ,QAAA,CAAS,EAAE,CAAA,EAAG;AACpC,QAAA,IAAA,CAAK,OAAA,GAAU,KAAK,OAAA,CAAQ,KAAA;AAAA,UAAM,CAAA;AAAA,UAAG;AAAA;AAAA,SAAmB;AAAA,MAC1D;AAAA,IACF;AACA,IAAA,IAAI,KAAA;AACJ,IAAA,OAAQ,KAAA,GAAQ,IAAA,CAAK,UAAA,EAAW,EAAI;AAClC,MAAA,MAAM,KAAA;AAAA,IACR;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA,EAMA,gBAAA,GAAyB;AACvB,IAAA,IAAI,IAAA,CAAK,OAAA,CAAQ,MAAA,GAAS,IAAA,CAAK,cAAA,EAAgB;AAC7C,MAAA,MAAM,IAAI,UAAA;AAAA,QACR,gBAAgB,IAAA,CAAK,OAAA,CAAQ,MAAM,CAAA,8CAAA,EAAiD,KAAK,cAAc,CAAA,WAAA;AAAA,OACzG;AAAA,IACF;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA,EAMA,UAAA,GAA2B;AACzB,IAAA,IAAA,CAAK,SAAS,cAAA,EAAe;AAC7B,IAAA,IAAI,IAAA,CAAK,OAAA,CAAQ,MAAA,KAAW,CAAA,EAAG;AAC7B,MAAA,OAAO,IAAA;AAAA,IACT;AAEA,IAAA,IACE,IAAA,CAAK,WAAW,KAAA,KACf,IAAA,CAAK,YAAY,IAAA,IAAQ,IAAA,CAAK,YAAY,EAAA,CAAA,EAC3C;AACA,MAAA,OAAO,IAAA;AAAA,IACT;AAGA,IAAA,IAAI,IAAA,CAAK,OAAA,CAAQ,UAAA,CAAW,IAAI,CAAA,EAAG;AACjC,MAAA,IAAA,CAAK,OAAA,GAAU,IAAA,CAAK,OAAA,CAAQ,KAAA,CAAM,CAAC,CAAA;AACnC,MAAA,MAAM,KAAA,GAAkB,EAAE,GAAG,IAAA,CAAK,OAAA,EAAQ;AAC1C,MAAA,IAAA,CAAK,OAAA,CAAQ,IAAA,EAAA;AACb,MAAA,IAAA,CAAK,QAAQ,MAAA,GAAS,CAAA;AACtB,MAAA,IAAA,CAAK,QAAQ,MAAA,IAAU,CAAA;AACvB,MAAA,MAAM,KAAA,GAA8B;AAAA,QAClC,IAAA,EAAM,eAAA;AAAA,QACN,KAAA,EAAO,IAAA;AAAA,QACP,QAAA,EAAU;AAAA,UACR,KAAA;AAAA,UACA,GAAA,EAAK,EAAE,GAAG,IAAA,CAAK,OAAA,EAAQ;AAAA,UACvB,WAAW,IAAA,CAAK,UAAA;AAAA;AAClB,OACF;AACA,MAAA,OAAO,KAAA;AAAA,IACT;AAGA,IAAA,IAAI,IAAA,CAAK,OAAA,CAAQ,UAAA,CAAW,EAAE,CAAA,EAAG;AAC/B,MAAA,IAAA,CAAK,OAAA,GAAU,IAAA,CAAK,OAAA,CAAQ,KAAA,CAAM,CAAC,CAAA;AACnC,MAAA,MAAM,KAAA,GAAkB,EAAE,GAAG,IAAA,CAAK,OAAA,EAAQ;AAC1C,MAAA,IAAA,CAAK,OAAA,CAAQ,IAAA,EAAA;AACb,MAAA,IAAA,CAAK,QAAQ,MAAA,GAAS,CAAA;AACtB,MAAA,IAAA,CAAK,QAAQ,MAAA,IAAU,CAAA;AACvB,MAAA,MAAM,KAAA,GAA8B;AAAA,QAClC,IAAA,EAAM,eAAA;AAAA,QACN,KAAA,EAAO,EAAA;AAAA,QACP,QAAA,EAAU;AAAA,UACR,KAAA;AAAA,UACA,GAAA,EAAK,EAAE,GAAG,IAAA,CAAK,OAAA,EAAQ;AAAA,UACvB,WAAW,IAAA,CAAK,UAAA;AAAA;AAClB,OACF;AACA,MAAA,OAAO,KAAA;AAAA,IACT;AAGA,IAAA,IAAI,IAAA,CAAK,OAAA,CAAQ,UAAA,CAAW,IAAA,CAAK,UAAU,CAAA,EAAG;AAC5C,MAAA,IAAA,CAAK,OAAA,GAAU,IAAA,CAAK,OAAA,CAAQ,KAAA,CAAM,CAAC,CAAA;AACnC,MAAA,MAAM,KAAA,GAAkB,EAAE,GAAG,IAAA,CAAK,OAAA,EAAQ;AAC1C,MAAA,IAAA,CAAK,OAAA,CAAQ,UAAU,IAAA,CAAK,qBAAA;AAC5B,MAAA,IAAA,CAAK,OAAA,CAAQ,UAAU,IAAA,CAAK,qBAAA;AAC5B,MAAA,OAAO;AAAA,QACL,IAAA,EAAM,cAAA;AAAA,QACN,OAAO,IAAA,CAAK,UAAA;AAAA,QACZ,QAAA,EAAU;AAAA,UACR,KAAA;AAAA,UACA,GAAA,EAAK,EAAE,GAAG,IAAA,CAAK,OAAA,EAAQ;AAAA,UACvB,WAAW,IAAA,CAAK;AAAA;AAClB,OACF;AAAA,IACF;AAGA,IAAA,IAAI,IAAA,CAAK,OAAA,CAAQ,UAAA,CAAW,IAAA,CAAK,UAAU,CAAA,EAAG;AAqB5C,MAAA,IAAI,KAAA,GAAQ,EAAA;AACZ,MAAA,IAAI,MAAA,GAAS,CAAA;AACb,MAAA,IAAI,MAAA,GAAS,CAAA;AACb,MAAA,IAAI,IAAA,GAAO,CAAA;AAGX,MAAA,IAAI,GAAA,GAAc,IAAA,CAAK,OAAA,CAAQ,MAAM,CAAA;AACrC,MAAA,IAAI,IAAA,GAA2B,IAAA,CAAK,OAAA,CAAQ,MAAA,GAAS,CAAC,CAAA;AACtD,MAAA,GAAG;AAED,QAAA,IAAI,GAAA,KAAQ,KAAK,UAAA,EAAY;AAG3B,UAAA,IAAI,IAAA,KAAS,KAAK,UAAA,EAAY;AAE5B,YAAA,KAAA,IAAS,IAAA,CAAK,UAAA;AACd,YAAA,MAAA,IAAU,CAAA;AACV,YAAA,GAAA,GAAM,IAAA,CAAK,QAAQ,MAAM,CAAA;AACzB,YAAA,IAAA,GAAO,IAAA,CAAK,OAAA,CAAQ,MAAA,GAAS,CAAC,CAAA;AAG9B,YAAA,MAAA,IAAU,CAAA;AACV,YAAA;AAAA,UACF;AAIA,UAAA,IAAI,IAAA,KAAS,MAAA,IAAa,IAAA,CAAK,MAAA,KAAW,KAAA,EAAO;AAC/C,YAAA,OAAO,IAAA;AAAA,UACT;AAIA,UAAA,MAAA,EAAA;AACA,UAAA,IAAA,CAAK,OAAA,GAAU,IAAA,CAAK,OAAA,CAAQ,KAAA,CAAM,MAAM,CAAA;AACxC,UAAA,MAAM,KAAA,GAAkB,EAAE,GAAG,IAAA,CAAK,OAAA,EAAQ;AAC1C,UAAA,IAAA,CAAK,QAAQ,MAAA,IAAU,MAAA;AACvB,UAAA,IAAA,CAAK,QAAQ,MAAA,IAAU,MAAA;AACvB,UAAA,IAAA,CAAK,QAAQ,IAAA,IAAQ,IAAA;AACrB,UAAA,OAAO;AAAA,YACL,IAAA,EAAM,KAAA;AAAA,YACN,KAAA;AAAA,YACA,QAAA,EAAU;AAAA,cACR,KAAA;AAAA,cACA,GAAA,EAAK,EAAE,GAAG,IAAA,CAAK,OAAA,EAAQ;AAAA,cACvB,WAAW,IAAA,CAAK;AAAA;AAClB,WACF;AAAA,QACF;AAGA,QAAA,KAAA,IAAS,GAAA;AAGT,QAAA,IAAI,QAAQ,EAAA,EAAI;AAGd,UAAA,IAAA,EAAA;AACA,UAAA,MAAA,GAAS,CAAA;AAAA,QACX,CAAA,MAAO;AAEL,UAAA,MAAA,EAAA;AAAA,QACF;AAEA,QAAA,MAAA,EAAA;AACA,QAAA,GAAA,GAAM,IAAA;AACN,QAAA,IAAA,GAAO,IAAA,CAAK,OAAA,CAAQ,MAAA,GAAS,CAAC,CAAA;AAAA,MAChC,SAAS,GAAA,KAAQ,MAAA;AAEjB,MAAA,IAAI,KAAK,MAAA,EAAQ;AACf,QAAA,MAAM,IAAI,WAAW,4CAAA,EAA8C;AAAA,UACjE,QAAA,EAAU,EAAE,GAAG,IAAA,CAAK,OAAA;AAAQ,SAC7B,CAAA;AAAA,MACH;AACA,MAAA,OAAO,IAAA;AAAA,IACT;AAGA,IAAA,MAAM,KAAA,GAAQ,IAAA,CAAK,QAAA,CAAS,IAAA,CAAK,KAAK,OAAO,CAAA;AAC7C,IAAA,IAAI,KAAA,EAAO;AAGT,MAAA,IAAI,IAAA,CAAK,WAAW,KAAA,IAAS,KAAA,CAAM,CAAC,CAAA,CAAE,MAAA,KAAW,IAAA,CAAK,OAAA,CAAQ,MAAA,EAAQ;AACpE,QAAA,OAAO,IAAA;AAAA,MACT;AACA,MAAA,MAAM,KAAA,GAAQ,MAAM,CAAC,CAAA;AACrB,MAAA,IAAA,CAAK,OAAA,GAAU,IAAA,CAAK,OAAA,CAAQ,KAAA,CAAM,MAAM,MAAM,CAAA;AAC9C,MAAA,MAAM,KAAA,GAAkB,EAAE,GAAG,IAAA,CAAK,OAAA,EAAQ;AAC1C,MAAA,IAAA,CAAK,OAAA,CAAQ,UAAU,KAAA,CAAM,MAAA;AAC7B,MAAA,IAAA,CAAK,OAAA,CAAQ,UAAU,KAAA,CAAM,MAAA;AAC7B,MAAA,OAAO;AAAA,QACL,IAAA,EAAM,KAAA;AAAA,QACN,KAAA;AAAA,QACA,QAAA,EAAU;AAAA,UACR,KAAA;AAAA,UACA,GAAA,EAAK,EAAE,GAAG,IAAA,CAAK,OAAA,EAAQ;AAAA,UACvB,WAAW,IAAA,CAAK;AAAA;AAClB,OACF;AAAA,IACF;AAGA,IAAA,OAAO,IAAA;AAAA,EACT;AACF;;;;"}
|
|
1
|
+
{"version":3,"file":"CSVLexer.js","sources":["../src/CSVLexer.ts"],"sourcesContent":["import { assertCommonOptions } from \"./assertCommonOptions.ts\";\nimport { Field, FieldDelimiter, RecordDelimiter } from \"./common/constants.ts\";\nimport { ParseError } from \"./common/errors.ts\";\nimport type {\n AbortSignalOptions,\n CommonOptions,\n Position,\n RecordDelimiterToken,\n Token,\n} from \"./common/types.ts\";\nimport { CRLF, DEFAULT_DELIMITER, DEFAULT_QUOTATION, LF } from \"./constants.ts\";\nimport { escapeRegExp } from \"./utils/escapeRegExp.ts\";\n\n/**\n * Default maximum buffer size in characters (UTF-16 code units).\n * Approximately 10MB for ASCII text, but may vary for non-ASCII characters.\n */\nexport const DEFAULT_MAX_BUFFER_SIZE = 10 * 1024 * 1024;\n\n/**\n * Options for the CSVLexer.lex method.\n */\nexport interface CSVLexerLexOptions {\n /**\n * If true, indicates that more chunks are expected.\n * If false or omitted, flushes remaining data.\n */\n stream?: boolean;\n}\n\n/**\n * CSV Lexer.\n *\n * CSVLexer tokenizes CSV data into fields and records.\n */\nexport class CSVLexer<\n Delimiter extends string = DEFAULT_DELIMITER,\n Quotation extends string = DEFAULT_QUOTATION,\n> {\n #delimiter: string;\n #quotation: string;\n #buffer = \"\";\n #flush = false;\n #matcher: RegExp;\n #fieldDelimiterLength: number;\n #maxBufferSize: number;\n\n #cursor: Position = {\n line: 1,\n column: 1,\n offset: 0,\n };\n #rowNumber = 1;\n\n #signal?: AbortSignal;\n #source?: string;\n\n /**\n * Constructs a new CSVLexer instance.\n * @param options - The common options for the lexer.\n */\n constructor(\n options: CommonOptions<Delimiter, Quotation> & AbortSignalOptions = {},\n ) {\n const {\n delimiter = DEFAULT_DELIMITER,\n quotation = DEFAULT_QUOTATION,\n maxBufferSize = DEFAULT_MAX_BUFFER_SIZE,\n signal,\n source,\n } = options;\n assertCommonOptions({ delimiter, quotation, maxBufferSize });\n this.#delimiter = delimiter;\n this.#quotation = quotation;\n this.#fieldDelimiterLength = delimiter.length;\n this.#maxBufferSize = maxBufferSize;\n this.#source = source;\n this.#signal = signal;\n const d = escapeRegExp(delimiter);\n const q = escapeRegExp(quotation);\n this.#matcher = new RegExp(\n `^(?:(?!${q})(?!${d})(?![\\\\r\\\\n]))([\\\\S\\\\s\\\\uFEFF\\\\xA0]+?)(?=${q}|${d}|\\\\r|\\\\n|$)`,\n );\n }\n\n /**\n * Lexes the given chunk of CSV data.\n * @param chunk - The chunk of CSV data to be lexed. Omit to flush remaining data.\n * @param options - Lexer options.\n * @returns An iterable iterator of tokens.\n */\n public lex(\n chunk?: string,\n options?: CSVLexerLexOptions,\n ): IterableIterator<Token> {\n const stream = options?.stream ?? false;\n\n if (!stream) {\n this.#flush = true;\n }\n if (chunk !== undefined && chunk.length !== 0) {\n this.#buffer += chunk;\n this.#checkBufferSize();\n }\n\n return this.#tokens();\n }\n\n /**\n * Generates tokens from the buffered CSV data.\n * @yields Tokens from the buffered CSV data.\n */\n *#tokens(): Generator<Token> {\n if (this.#flush) {\n // Trim the last CRLF or LF\n if (this.#buffer.endsWith(CRLF)) {\n this.#buffer = this.#buffer.slice(0, -2 /* -CRLF.length */);\n } else if (this.#buffer.endsWith(LF)) {\n this.#buffer = this.#buffer.slice(0, -1 /* -LF.length */);\n }\n }\n let token: Token | null;\n while ((token = this.#nextToken())) {\n yield token;\n }\n }\n\n /**\n * Checks if the buffer size exceeds the maximum allowed size.\n * @throws {RangeError} If the buffer size exceeds the maximum.\n */\n #checkBufferSize(): void {\n if (this.#buffer.length > this.#maxBufferSize) {\n throw new RangeError(\n `Buffer size (${this.#buffer.length} characters) exceeded maximum allowed size of ${this.#maxBufferSize} characters`,\n );\n }\n }\n\n /**\n * Retrieves the next token from the buffered CSV data.\n * @returns The next token or null if there are no more tokens.\n */\n #nextToken(): Token | null {\n this.#signal?.throwIfAborted();\n if (this.#buffer.length === 0) {\n return null;\n }\n // Buffer is Record Delimiter, defer to the next iteration.\n if (\n this.#flush === false &&\n (this.#buffer === CRLF || this.#buffer === LF)\n ) {\n return null;\n }\n\n // Check for CRLF\n if (this.#buffer.startsWith(CRLF)) {\n this.#buffer = this.#buffer.slice(2);\n const start: Position = { ...this.#cursor };\n this.#cursor.line++;\n this.#cursor.column = 1;\n this.#cursor.offset += 2; // CRLF.length\n const token: RecordDelimiterToken = {\n type: RecordDelimiter,\n value: CRLF,\n location: {\n start,\n end: { ...this.#cursor },\n rowNumber: this.#rowNumber++,\n },\n };\n return token;\n }\n\n // Check for LF\n if (this.#buffer.startsWith(LF)) {\n this.#buffer = this.#buffer.slice(1);\n const start: Position = { ...this.#cursor };\n this.#cursor.line++;\n this.#cursor.column = 1;\n this.#cursor.offset += 1; // LF.length\n const token: RecordDelimiterToken = {\n type: RecordDelimiter,\n value: LF,\n location: {\n start,\n end: { ...this.#cursor },\n rowNumber: this.#rowNumber++,\n },\n };\n return token;\n }\n\n // Check for Delimiter\n if (this.#buffer.startsWith(this.#delimiter)) {\n this.#buffer = this.#buffer.slice(1);\n const start: Position = { ...this.#cursor };\n this.#cursor.column += this.#fieldDelimiterLength;\n this.#cursor.offset += this.#fieldDelimiterLength;\n return {\n type: FieldDelimiter,\n value: this.#delimiter,\n location: {\n start,\n end: { ...this.#cursor },\n rowNumber: this.#rowNumber,\n },\n };\n }\n\n // Check for Quoted String\n if (this.#buffer.startsWith(this.#quotation)) {\n /**\n * Extract Quoted field.\n *\n * The following code is equivalent to the following:\n *\n * If the next character is a quote:\n * - If the character after that is a quote, then append a quote to the value and skip two characters.\n * - Otherwise, return the quoted string.\n * Otherwise, append the character to the value and skip one character.\n *\n * ```plaintext\n * | `i` | `i + 1` | `i + 2` |\n * |------------|------------|----------|\n * | cur | next | | => Variable names\n * | #quotation | #quotation | | => Escaped quote\n * | #quotation | (EOF) | | => Closing quote\n * | #quotation | undefined | | => End of buffer\n * | undefined | | | => End of buffer\n * ```\n */\n let value = \"\";\n let offset = 1; // Skip the opening quote\n let column = 2; // Skip the opening quote\n let line = 0;\n\n // Define variables\n let cur: string = this.#buffer[offset];\n let next: string | undefined = this.#buffer[offset + 1];\n do {\n // If the current character is a quote, check the next characters for closing quotes.\n if (cur === this.#quotation) {\n // If the cur character is a quote and the next character is a quote,\n // then append a quote to the value and skip two characters.\n if (next === this.#quotation) {\n // Append a quote to the value and skip two characters.\n value += this.#quotation;\n offset += 2;\n cur = this.#buffer[offset];\n next = this.#buffer[offset + 1];\n\n // Update the diff\n column += 2;\n continue;\n }\n\n // If the cur character is a quote and the next character is undefined,\n // then return null.\n if (next === undefined && this.#flush === false) {\n return null;\n }\n\n // Otherwise, return the quoted string.\n // Update the buffer and return the token\n offset++;\n this.#buffer = this.#buffer.slice(offset);\n const start: Position = { ...this.#cursor };\n this.#cursor.column += column;\n this.#cursor.offset += offset;\n this.#cursor.line += line;\n return {\n type: Field,\n value,\n location: {\n start,\n end: { ...this.#cursor },\n rowNumber: this.#rowNumber,\n },\n };\n }\n\n // Append the character to the value.\n value += cur;\n\n // Prepare for the next iteration\n if (cur === LF) {\n // If the current character is a LF,\n // then increment the line number and reset the column number.\n line++;\n column = 1;\n } else {\n // Otherwise, increment the column number and offset.\n column++;\n }\n\n offset++;\n cur = next;\n next = this.#buffer[offset + 1];\n } while (cur !== undefined);\n\n if (this.#flush) {\n throw new ParseError(\"Unexpected EOF while parsing quoted field.\", {\n position: { ...this.#cursor },\n rowNumber: this.#rowNumber,\n source: this.#source,\n });\n }\n return null;\n }\n\n // Check for Unquoted String\n const match = this.#matcher.exec(this.#buffer);\n if (match) {\n // If we're flushing and the match doesn't consume the entire buffer,\n // then return null\n if (this.#flush === false && match[0].length === this.#buffer.length) {\n return null;\n }\n const value = match[1];\n this.#buffer = this.#buffer.slice(value.length);\n const start: Position = { ...this.#cursor };\n this.#cursor.column += value.length;\n this.#cursor.offset += value.length;\n return {\n type: Field,\n value,\n location: {\n start,\n end: { ...this.#cursor },\n rowNumber: this.#rowNumber,\n },\n };\n }\n\n // Otherwise, return null\n return null;\n }\n}\n"],"names":[],"mappings":";;;;;;AAiBO,MAAM,uBAAA,GAA0B,KAAK,IAAA,GAAO;AAkB5C,MAAM,QAAA,CAGX;AAAA,EACA,UAAA;AAAA,EACA,UAAA;AAAA,EACA,OAAA,GAAU,EAAA;AAAA,EACV,MAAA,GAAS,KAAA;AAAA,EACT,QAAA;AAAA,EACA,qBAAA;AAAA,EACA,cAAA;AAAA,EAEA,OAAA,GAAoB;AAAA,IAClB,IAAA,EAAM,CAAA;AAAA,IACN,MAAA,EAAQ,CAAA;AAAA,IACR,MAAA,EAAQ;AAAA,GACV;AAAA,EACA,UAAA,GAAa,CAAA;AAAA,EAEb,OAAA;AAAA,EACA,OAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAMA,WAAA,CACE,OAAA,GAAoE,EAAC,EACrE;AACA,IAAA,MAAM;AAAA,MACJ,SAAA,GAAY,iBAAA;AAAA,MACZ,SAAA,GAAY,iBAAA;AAAA,MACZ,aAAA,GAAgB,uBAAA;AAAA,MAChB,MAAA;AAAA,MACA;AAAA,KACF,GAAI,OAAA;AACJ,IAAA,mBAAA,CAAoB,EAAE,SAAA,EAAW,SAAA,EAAW,aAAA,EAAe,CAAA;AAC3D,IAAA,IAAA,CAAK,UAAA,GAAa,SAAA;AAClB,IAAA,IAAA,CAAK,UAAA,GAAa,SAAA;AAClB,IAAA,IAAA,CAAK,wBAAwB,SAAA,CAAU,MAAA;AACvC,IAAA,IAAA,CAAK,cAAA,GAAiB,aAAA;AACtB,IAAA,IAAA,CAAK,OAAA,GAAU,MAAA;AACf,IAAA,IAAA,CAAK,OAAA,GAAU,MAAA;AACf,IAAA,MAAM,CAAA,GAAI,aAAa,SAAS,CAAA;AAChC,IAAA,MAAM,CAAA,GAAI,aAAa,SAAS,CAAA;AAChC,IAAA,IAAA,CAAK,WAAW,IAAI,MAAA;AAAA,MAClB,UAAU,CAAC,CAAA,IAAA,EAAO,CAAC,CAAA,yCAAA,EAA4C,CAAC,IAAI,CAAC,CAAA,WAAA;AAAA,KACvE;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQO,GAAA,CACL,OACA,OAAA,EACyB;AACzB,IAAA,MAAM,MAAA,GAAS,SAAS,MAAA,IAAU,KAAA;AAElC,IAAA,IAAI,CAAC,MAAA,EAAQ;AACX,MAAA,IAAA,CAAK,MAAA,GAAS,IAAA;AAAA,IAChB;AACA,IAAA,IAAI,KAAA,KAAU,MAAA,IAAa,KAAA,CAAM,MAAA,KAAW,CAAA,EAAG;AAC7C,MAAA,IAAA,CAAK,OAAA,IAAW,KAAA;AAChB,MAAA,IAAA,CAAK,gBAAA,EAAiB;AAAA,IACxB;AAEA,IAAA,OAAO,KAAK,OAAA,EAAQ;AAAA,EACtB;AAAA;AAAA;AAAA;AAAA;AAAA,EAMA,CAAC,OAAA,GAA4B;AAC3B,IAAA,IAAI,KAAK,MAAA,EAAQ;AAEf,MAAA,IAAI,IAAA,CAAK,OAAA,CAAQ,QAAA,CAAS,IAAI,CAAA,EAAG;AAC/B,QAAA,IAAA,CAAK,OAAA,GAAU,KAAK,OAAA,CAAQ,KAAA;AAAA,UAAM,CAAA;AAAA,UAAG;AAAA;AAAA,SAAqB;AAAA,MAC5D,CAAA,MAAA,IAAW,IAAA,CAAK,OAAA,CAAQ,QAAA,CAAS,EAAE,CAAA,EAAG;AACpC,QAAA,IAAA,CAAK,OAAA,GAAU,KAAK,OAAA,CAAQ,KAAA;AAAA,UAAM,CAAA;AAAA,UAAG;AAAA;AAAA,SAAmB;AAAA,MAC1D;AAAA,IACF;AACA,IAAA,IAAI,KAAA;AACJ,IAAA,OAAQ,KAAA,GAAQ,IAAA,CAAK,UAAA,EAAW,EAAI;AAClC,MAAA,MAAM,KAAA;AAAA,IACR;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA,EAMA,gBAAA,GAAyB;AACvB,IAAA,IAAI,IAAA,CAAK,OAAA,CAAQ,MAAA,GAAS,IAAA,CAAK,cAAA,EAAgB;AAC7C,MAAA,MAAM,IAAI,UAAA;AAAA,QACR,gBAAgB,IAAA,CAAK,OAAA,CAAQ,MAAM,CAAA,8CAAA,EAAiD,KAAK,cAAc,CAAA,WAAA;AAAA,OACzG;AAAA,IACF;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA,EAMA,UAAA,GAA2B;AACzB,IAAA,IAAA,CAAK,SAAS,cAAA,EAAe;AAC7B,IAAA,IAAI,IAAA,CAAK,OAAA,CAAQ,MAAA,KAAW,CAAA,EAAG;AAC7B,MAAA,OAAO,IAAA;AAAA,IACT;AAEA,IAAA,IACE,IAAA,CAAK,WAAW,KAAA,KACf,IAAA,CAAK,YAAY,IAAA,IAAQ,IAAA,CAAK,YAAY,EAAA,CAAA,EAC3C;AACA,MAAA,OAAO,IAAA;AAAA,IACT;AAGA,IAAA,IAAI,IAAA,CAAK,OAAA,CAAQ,UAAA,CAAW,IAAI,CAAA,EAAG;AACjC,MAAA,IAAA,CAAK,OAAA,GAAU,IAAA,CAAK,OAAA,CAAQ,KAAA,CAAM,CAAC,CAAA;AACnC,MAAA,MAAM,KAAA,GAAkB,EAAE,GAAG,IAAA,CAAK,OAAA,EAAQ;AAC1C,MAAA,IAAA,CAAK,OAAA,CAAQ,IAAA,EAAA;AACb,MAAA,IAAA,CAAK,QAAQ,MAAA,GAAS,CAAA;AACtB,MAAA,IAAA,CAAK,QAAQ,MAAA,IAAU,CAAA;AACvB,MAAA,MAAM,KAAA,GAA8B;AAAA,QAClC,IAAA,EAAM,eAAA;AAAA,QACN,KAAA,EAAO,IAAA;AAAA,QACP,QAAA,EAAU;AAAA,UACR,KAAA;AAAA,UACA,GAAA,EAAK,EAAE,GAAG,IAAA,CAAK,OAAA,EAAQ;AAAA,UACvB,WAAW,IAAA,CAAK,UAAA;AAAA;AAClB,OACF;AACA,MAAA,OAAO,KAAA;AAAA,IACT;AAGA,IAAA,IAAI,IAAA,CAAK,OAAA,CAAQ,UAAA,CAAW,EAAE,CAAA,EAAG;AAC/B,MAAA,IAAA,CAAK,OAAA,GAAU,IAAA,CAAK,OAAA,CAAQ,KAAA,CAAM,CAAC,CAAA;AACnC,MAAA,MAAM,KAAA,GAAkB,EAAE,GAAG,IAAA,CAAK,OAAA,EAAQ;AAC1C,MAAA,IAAA,CAAK,OAAA,CAAQ,IAAA,EAAA;AACb,MAAA,IAAA,CAAK,QAAQ,MAAA,GAAS,CAAA;AACtB,MAAA,IAAA,CAAK,QAAQ,MAAA,IAAU,CAAA;AACvB,MAAA,MAAM,KAAA,GAA8B;AAAA,QAClC,IAAA,EAAM,eAAA;AAAA,QACN,KAAA,EAAO,EAAA;AAAA,QACP,QAAA,EAAU;AAAA,UACR,KAAA;AAAA,UACA,GAAA,EAAK,EAAE,GAAG,IAAA,CAAK,OAAA,EAAQ;AAAA,UACvB,WAAW,IAAA,CAAK,UAAA;AAAA;AAClB,OACF;AACA,MAAA,OAAO,KAAA;AAAA,IACT;AAGA,IAAA,IAAI,IAAA,CAAK,OAAA,CAAQ,UAAA,CAAW,IAAA,CAAK,UAAU,CAAA,EAAG;AAC5C,MAAA,IAAA,CAAK,OAAA,GAAU,IAAA,CAAK,OAAA,CAAQ,KAAA,CAAM,CAAC,CAAA;AACnC,MAAA,MAAM,KAAA,GAAkB,EAAE,GAAG,IAAA,CAAK,OAAA,EAAQ;AAC1C,MAAA,IAAA,CAAK,OAAA,CAAQ,UAAU,IAAA,CAAK,qBAAA;AAC5B,MAAA,IAAA,CAAK,OAAA,CAAQ,UAAU,IAAA,CAAK,qBAAA;AAC5B,MAAA,OAAO;AAAA,QACL,IAAA,EAAM,cAAA;AAAA,QACN,OAAO,IAAA,CAAK,UAAA;AAAA,QACZ,QAAA,EAAU;AAAA,UACR,KAAA;AAAA,UACA,GAAA,EAAK,EAAE,GAAG,IAAA,CAAK,OAAA,EAAQ;AAAA,UACvB,WAAW,IAAA,CAAK;AAAA;AAClB,OACF;AAAA,IACF;AAGA,IAAA,IAAI,IAAA,CAAK,OAAA,CAAQ,UAAA,CAAW,IAAA,CAAK,UAAU,CAAA,EAAG;AAqB5C,MAAA,IAAI,KAAA,GAAQ,EAAA;AACZ,MAAA,IAAI,MAAA,GAAS,CAAA;AACb,MAAA,IAAI,MAAA,GAAS,CAAA;AACb,MAAA,IAAI,IAAA,GAAO,CAAA;AAGX,MAAA,IAAI,GAAA,GAAc,IAAA,CAAK,OAAA,CAAQ,MAAM,CAAA;AACrC,MAAA,IAAI,IAAA,GAA2B,IAAA,CAAK,OAAA,CAAQ,MAAA,GAAS,CAAC,CAAA;AACtD,MAAA,GAAG;AAED,QAAA,IAAI,GAAA,KAAQ,KAAK,UAAA,EAAY;AAG3B,UAAA,IAAI,IAAA,KAAS,KAAK,UAAA,EAAY;AAE5B,YAAA,KAAA,IAAS,IAAA,CAAK,UAAA;AACd,YAAA,MAAA,IAAU,CAAA;AACV,YAAA,GAAA,GAAM,IAAA,CAAK,QAAQ,MAAM,CAAA;AACzB,YAAA,IAAA,GAAO,IAAA,CAAK,OAAA,CAAQ,MAAA,GAAS,CAAC,CAAA;AAG9B,YAAA,MAAA,IAAU,CAAA;AACV,YAAA;AAAA,UACF;AAIA,UAAA,IAAI,IAAA,KAAS,MAAA,IAAa,IAAA,CAAK,MAAA,KAAW,KAAA,EAAO;AAC/C,YAAA,OAAO,IAAA;AAAA,UACT;AAIA,UAAA,MAAA,EAAA;AACA,UAAA,IAAA,CAAK,OAAA,GAAU,IAAA,CAAK,OAAA,CAAQ,KAAA,CAAM,MAAM,CAAA;AACxC,UAAA,MAAM,KAAA,GAAkB,EAAE,GAAG,IAAA,CAAK,OAAA,EAAQ;AAC1C,UAAA,IAAA,CAAK,QAAQ,MAAA,IAAU,MAAA;AACvB,UAAA,IAAA,CAAK,QAAQ,MAAA,IAAU,MAAA;AACvB,UAAA,IAAA,CAAK,QAAQ,IAAA,IAAQ,IAAA;AACrB,UAAA,OAAO;AAAA,YACL,IAAA,EAAM,KAAA;AAAA,YACN,KAAA;AAAA,YACA,QAAA,EAAU;AAAA,cACR,KAAA;AAAA,cACA,GAAA,EAAK,EAAE,GAAG,IAAA,CAAK,OAAA,EAAQ;AAAA,cACvB,WAAW,IAAA,CAAK;AAAA;AAClB,WACF;AAAA,QACF;AAGA,QAAA,KAAA,IAAS,GAAA;AAGT,QAAA,IAAI,QAAQ,EAAA,EAAI;AAGd,UAAA,IAAA,EAAA;AACA,UAAA,MAAA,GAAS,CAAA;AAAA,QACX,CAAA,MAAO;AAEL,UAAA,MAAA,EAAA;AAAA,QACF;AAEA,QAAA,MAAA,EAAA;AACA,QAAA,GAAA,GAAM,IAAA;AACN,QAAA,IAAA,GAAO,IAAA,CAAK,OAAA,CAAQ,MAAA,GAAS,CAAC,CAAA;AAAA,MAChC,SAAS,GAAA,KAAQ,MAAA;AAEjB,MAAA,IAAI,KAAK,MAAA,EAAQ;AACf,QAAA,MAAM,IAAI,WAAW,4CAAA,EAA8C;AAAA,UACjE,QAAA,EAAU,EAAE,GAAG,IAAA,CAAK,OAAA,EAAQ;AAAA,UAC5B,WAAW,IAAA,CAAK,UAAA;AAAA,UAChB,QAAQ,IAAA,CAAK;AAAA,SACd,CAAA;AAAA,MACH;AACA,MAAA,OAAO,IAAA;AAAA,IACT;AAGA,IAAA,MAAM,KAAA,GAAQ,IAAA,CAAK,QAAA,CAAS,IAAA,CAAK,KAAK,OAAO,CAAA;AAC7C,IAAA,IAAI,KAAA,EAAO;AAGT,MAAA,IAAI,IAAA,CAAK,WAAW,KAAA,IAAS,KAAA,CAAM,CAAC,CAAA,CAAE,MAAA,KAAW,IAAA,CAAK,OAAA,CAAQ,MAAA,EAAQ;AACpE,QAAA,OAAO,IAAA;AAAA,MACT;AACA,MAAA,MAAM,KAAA,GAAQ,MAAM,CAAC,CAAA;AACrB,MAAA,IAAA,CAAK,OAAA,GAAU,IAAA,CAAK,OAAA,CAAQ,KAAA,CAAM,MAAM,MAAM,CAAA;AAC9C,MAAA,MAAM,KAAA,GAAkB,EAAE,GAAG,IAAA,CAAK,OAAA,EAAQ;AAC1C,MAAA,IAAA,CAAK,OAAA,CAAQ,UAAU,KAAA,CAAM,MAAA;AAC7B,MAAA,IAAA,CAAK,OAAA,CAAQ,UAAU,KAAA,CAAM,MAAA;AAC7B,MAAA,OAAO;AAAA,QACL,IAAA,EAAM,KAAA;AAAA,QACN,KAAA;AAAA,QACA,QAAA,EAAU;AAAA,UACR,KAAA;AAAA,UACA,GAAA,EAAK,EAAE,GAAG,IAAA,CAAK,OAAA,EAAQ;AAAA,UACvB,WAAW,IAAA,CAAK;AAAA;AAClB,OACF;AAAA,IACF;AAGA,IAAA,OAAO,IAAA;AAAA,EACT;AACF;;;;"}
|
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
import { CSVLexer } from './CSVLexer.ts';
|
|
2
|
-
import { CSVLexerTransformerOptions,
|
|
2
|
+
import { CSVLexerTransformerOptions, Token } from './common/types.ts';
|
|
3
3
|
import { DEFAULT_DELIMITER, DEFAULT_QUOTATION } from './constants.ts';
|
|
4
4
|
/**
|
|
5
5
|
* A transform stream that converts a stream of strings into a stream of tokens.
|
|
6
6
|
*
|
|
7
7
|
* @category Low-level API
|
|
8
8
|
*
|
|
9
|
-
* @param options - CSV-specific options (delimiter, quotation, etc.)
|
|
10
|
-
* @param writableStrategy - Strategy for the writable side (default: `{ highWaterMark: 65536, size: chunk => chunk.length
|
|
11
|
-
* @param readableStrategy - Strategy for the readable side (default: `{ highWaterMark: 1024, size: () => 1
|
|
9
|
+
* @param options - CSV-specific options (delimiter, quotation, checkInterval, etc.)
|
|
10
|
+
* @param writableStrategy - Strategy for the writable side (default: `{ highWaterMark: 65536, size: chunk => chunk.length }`)
|
|
11
|
+
* @param readableStrategy - Strategy for the readable side (default: `{ highWaterMark: 1024, size: () => 1 }`)
|
|
12
12
|
*
|
|
13
13
|
* @remarks
|
|
14
14
|
* Follows the Web Streams API pattern where queuing strategies are passed as
|
|
@@ -52,17 +52,15 @@ import { DEFAULT_DELIMITER, DEFAULT_QUOTATION } from './constants.ts';
|
|
|
52
52
|
* @example Custom queuing strategies with backpressure tuning
|
|
53
53
|
* ```ts
|
|
54
54
|
* const transformer = new CSVLexerTransformer(
|
|
55
|
-
* {
|
|
55
|
+
* {
|
|
56
|
+
* delimiter: ',',
|
|
57
|
+
* backpressureCheckInterval: 50 // Check backpressure every 50 tokens
|
|
58
|
+
* },
|
|
56
59
|
* {
|
|
57
60
|
* highWaterMark: 131072, // 128KB of characters
|
|
58
61
|
* size: (chunk) => chunk.length, // Count by character length
|
|
59
|
-
* checkInterval: 200 // Check backpressure every 200 tokens
|
|
60
62
|
* },
|
|
61
|
-
* {
|
|
62
|
-
* highWaterMark: 2048, // 2048 tokens
|
|
63
|
-
* size: () => 1, // Each token counts as 1
|
|
64
|
-
* checkInterval: 50 // Check backpressure every 50 tokens
|
|
65
|
-
* }
|
|
63
|
+
* new CountQueuingStrategy({ highWaterMark: 2048 }) // 2048 tokens
|
|
66
64
|
* );
|
|
67
65
|
*
|
|
68
66
|
* await fetch('large-file.csv')
|
|
@@ -80,5 +78,5 @@ export declare class CSVLexerTransformer<Delimiter extends string = DEFAULT_DELI
|
|
|
80
78
|
* @internal
|
|
81
79
|
*/
|
|
82
80
|
protected yieldToEventLoop(): Promise<void>;
|
|
83
|
-
constructor(options?: CSVLexerTransformerOptions<Delimiter, Quotation>, writableStrategy?:
|
|
81
|
+
constructor(options?: CSVLexerTransformerOptions<Delimiter, Quotation>, writableStrategy?: QueuingStrategy<string>, readableStrategy?: QueuingStrategy<Token>);
|
|
84
82
|
}
|
|
@@ -1,5 +1,15 @@
|
|
|
1
1
|
import { CSVLexer } from './CSVLexer.js';
|
|
2
2
|
|
|
3
|
+
const DEFAULT_WRITABLE_STRATEGY = {
|
|
4
|
+
highWaterMark: 65536,
|
|
5
|
+
// 64KB worth of characters
|
|
6
|
+
size: (chunk) => chunk.length
|
|
7
|
+
// Count by string length
|
|
8
|
+
};
|
|
9
|
+
const DEFAULT_READABLE_STRATEGY = new CountQueuingStrategy({
|
|
10
|
+
highWaterMark: 1024
|
|
11
|
+
// 1024 tokens
|
|
12
|
+
});
|
|
3
13
|
class CSVLexerTransformer extends TransformStream {
|
|
4
14
|
lexer;
|
|
5
15
|
/**
|
|
@@ -10,23 +20,9 @@ class CSVLexerTransformer extends TransformStream {
|
|
|
10
20
|
async yieldToEventLoop() {
|
|
11
21
|
await new Promise((resolve) => setTimeout(resolve, 0));
|
|
12
22
|
}
|
|
13
|
-
constructor(options = {}, writableStrategy = {
|
|
14
|
-
highWaterMark: 65536,
|
|
15
|
-
// 64KB worth of characters
|
|
16
|
-
size: (chunk) => chunk.length,
|
|
17
|
-
// Count by string length (character count)
|
|
18
|
-
checkInterval: 100
|
|
19
|
-
// Check backpressure every 100 tokens
|
|
20
|
-
}, readableStrategy = {
|
|
21
|
-
highWaterMark: 1024,
|
|
22
|
-
// 1024 tokens
|
|
23
|
-
size: () => 1,
|
|
24
|
-
// Each token counts as 1
|
|
25
|
-
checkInterval: 100
|
|
26
|
-
// Check backpressure every 100 tokens
|
|
27
|
-
}) {
|
|
23
|
+
constructor(options = {}, writableStrategy = DEFAULT_WRITABLE_STRATEGY, readableStrategy = DEFAULT_READABLE_STRATEGY) {
|
|
28
24
|
const lexer = new CSVLexer(options);
|
|
29
|
-
const checkInterval =
|
|
25
|
+
const checkInterval = options.backpressureCheckInterval ?? 100;
|
|
30
26
|
super(
|
|
31
27
|
{
|
|
32
28
|
transform: async (chunk, controller) => {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"CSVLexerTransformer.js","sources":["../src/CSVLexerTransformer.ts"],"sourcesContent":["import { CSVLexer } from \"./CSVLexer.ts\";\nimport type {
|
|
1
|
+
{"version":3,"file":"CSVLexerTransformer.js","sources":["../src/CSVLexerTransformer.ts"],"sourcesContent":["import { CSVLexer } from \"./CSVLexer.ts\";\nimport type { CSVLexerTransformerOptions, Token } from \"./common/types.ts\";\nimport type { DEFAULT_DELIMITER, DEFAULT_QUOTATION } from \"./constants.ts\";\n\n/**\n * Default queuing strategy for the writable side (string input).\n * Counts by character length for accurate memory tracking.\n * @internal\n */\nconst DEFAULT_WRITABLE_STRATEGY: QueuingStrategy<string> = {\n highWaterMark: 65536, // 64KB worth of characters\n size: (chunk) => chunk.length, // Count by string length\n};\n\n/**\n * Default queuing strategy for the readable side (token output).\n * @internal\n */\nconst DEFAULT_READABLE_STRATEGY = new CountQueuingStrategy({\n highWaterMark: 1024, // 1024 tokens\n});\n\n/**\n * A transform stream that converts a stream of strings into a stream of tokens.\n *\n * @category Low-level API\n *\n * @param options - CSV-specific options (delimiter, quotation, checkInterval, etc.)\n * @param writableStrategy - Strategy for the writable side (default: `{ highWaterMark: 65536, size: chunk => chunk.length }`)\n * @param readableStrategy - Strategy for the readable side (default: `{ highWaterMark: 1024, size: () => 1 }`)\n *\n * @remarks\n * Follows the Web Streams API pattern where queuing strategies are passed as\n * constructor arguments, similar to the standard `TransformStream`.\n *\n * **Default Queuing Strategy:**\n * - Writable side: Counts by string length (characters). Default highWaterMark is 65536 characters (≈64KB).\n * - Readable side: Counts each token as 1. Default highWaterMark is 1024 tokens.\n *\n * **Backpressure Handling:**\n * The transformer monitors `controller.desiredSize` and yields to the event loop when backpressure\n * is detected (desiredSize ≤ 0). This prevents blocking the main thread during heavy processing\n * and allows the downstream consumer to catch up.\n *\n * These defaults are starting points based on data flow characteristics, not empirical benchmarks.\n * Optimal values depend on your runtime environment, data size, and performance requirements.\n *\n * @example Basic usage\n * ```ts\n * new ReadableStream({\n * start(controller) {\n * controller.enqueue(\"name,age\\r\\n\");\n * controller.enqueue(\"Alice,20\\r\\n\");\n * controller.close();\n * }\n * })\n * .pipeThrough(new CSVLexerTransformer())\n * .pipeTo(new WritableStream({ write(token) {\n * console.log(token);\n * }}));\n * // { type: Field, value: \"name\", location: {...} }\n * // { type: FieldDelimiter, value: \",\", location: {...} }\n * // { type: Field, value: \"age\", location: {...} }\n * // { type: RecordDelimiter, value: \"\\r\\n\", location: {...} }\n * // { type: Field, value: \"Alice\", location: {...} }\n * // { type: FieldDelimiter, value: \",\", location: {...} }\n * // { type: Field, value: \"20\" }\n * // { type: RecordDelimiter, value: \"\\r\\n\", location: {...} }\n * ```\n *\n * @example Custom queuing strategies with backpressure tuning\n * ```ts\n * const transformer = new CSVLexerTransformer(\n * {\n * delimiter: ',',\n * backpressureCheckInterval: 50 // Check backpressure every 50 tokens\n * },\n * {\n * highWaterMark: 131072, // 128KB of characters\n * size: (chunk) => chunk.length, // Count by character length\n * },\n * new CountQueuingStrategy({ highWaterMark: 2048 }) // 2048 tokens\n * );\n *\n * await fetch('large-file.csv')\n * .then(res => res.body)\n * .pipeThrough(new TextDecoderStream())\n * .pipeThrough(transformer)\n * .pipeTo(yourProcessor);\n * ```\n */\nexport class CSVLexerTransformer<\n Delimiter extends string = DEFAULT_DELIMITER,\n Quotation extends string = DEFAULT_QUOTATION,\n> extends TransformStream<string, Token> {\n public readonly lexer: CSVLexer<Delimiter, Quotation>;\n\n /**\n * Yields to the event loop to allow backpressure handling.\n * Can be overridden for testing purposes.\n * @internal\n */\n protected async yieldToEventLoop(): Promise<void> {\n await new Promise((resolve) => setTimeout(resolve, 0));\n }\n\n constructor(\n options: CSVLexerTransformerOptions<Delimiter, Quotation> = {},\n writableStrategy: QueuingStrategy<string> = DEFAULT_WRITABLE_STRATEGY,\n readableStrategy: QueuingStrategy<Token> = DEFAULT_READABLE_STRATEGY,\n ) {\n const lexer = new CSVLexer(options);\n const checkInterval = options.backpressureCheckInterval ?? 100;\n\n super(\n {\n transform: async (chunk, controller) => {\n if (chunk.length !== 0) {\n try {\n let tokenCount = 0;\n for (const token of lexer.lex(chunk, { stream: true })) {\n controller.enqueue(token);\n tokenCount++;\n\n // Check backpressure periodically based on checkInterval\n if (\n tokenCount % checkInterval === 0 &&\n controller.desiredSize !== null &&\n controller.desiredSize <= 0\n ) {\n // Yield to event loop when backpressure is detected\n await this.yieldToEventLoop();\n }\n }\n } catch (error) {\n controller.error(error);\n }\n }\n },\n flush: async (controller) => {\n try {\n let tokenCount = 0;\n for (const token of lexer.lex()) {\n controller.enqueue(token);\n tokenCount++;\n\n // Check backpressure periodically based on checkInterval\n if (\n tokenCount % checkInterval === 0 &&\n controller.desiredSize !== null &&\n controller.desiredSize <= 0\n ) {\n await this.yieldToEventLoop();\n }\n }\n } catch (error) {\n controller.error(error);\n }\n },\n },\n writableStrategy,\n readableStrategy,\n );\n this.lexer = lexer;\n }\n}\n"],"names":[],"mappings":";;AASA,MAAM,yBAAA,GAAqD;AAAA,EACzD,aAAA,EAAe,KAAA;AAAA;AAAA,EACf,IAAA,EAAM,CAAC,KAAA,KAAU,KAAA,CAAM;AAAA;AACzB,CAAA;AAMA,MAAM,yBAAA,GAA4B,IAAI,oBAAA,CAAqB;AAAA,EACzD,aAAA,EAAe;AAAA;AACjB,CAAC,CAAA;AAuEM,MAAM,4BAGH,eAAA,CAA+B;AAAA,EACvB,KAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOhB,MAAgB,gBAAA,GAAkC;AAChD,IAAA,MAAM,IAAI,OAAA,CAAQ,CAAC,YAAY,UAAA,CAAW,OAAA,EAAS,CAAC,CAAC,CAAA;AAAA,EACvD;AAAA,EAEA,YACE,OAAA,GAA4D,IAC5D,gBAAA,GAA4C,yBAAA,EAC5C,mBAA2C,yBAAA,EAC3C;AACA,IAAA,MAAM,KAAA,GAAQ,IAAI,QAAA,CAAS,OAAO,CAAA;AAClC,IAAA,MAAM,aAAA,GAAgB,QAAQ,yBAAA,IAA6B,GAAA;AAE3D,IAAA,KAAA;AAAA,MACE;AAAA,QACE,SAAA,EAAW,OAAO,KAAA,EAAO,UAAA,KAAe;AACtC,UAAA,IAAI,KAAA,CAAM,WAAW,CAAA,EAAG;AACtB,YAAA,IAAI;AACF,cAAA,IAAI,UAAA,GAAa,CAAA;AACjB,cAAA,KAAA,MAAW,KAAA,IAAS,MAAM,GAAA,CAAI,KAAA,EAAO,EAAE,MAAA,EAAQ,IAAA,EAAM,CAAA,EAAG;AACtD,gBAAA,UAAA,CAAW,QAAQ,KAAK,CAAA;AACxB,gBAAA,UAAA,EAAA;AAGA,gBAAA,IACE,UAAA,GAAa,kBAAkB,CAAA,IAC/B,UAAA,CAAW,gBAAgB,IAAA,IAC3B,UAAA,CAAW,eAAe,CAAA,EAC1B;AAEA,kBAAA,MAAM,KAAK,gBAAA,EAAiB;AAAA,gBAC9B;AAAA,cACF;AAAA,YACF,SAAS,KAAA,EAAO;AACd,cAAA,UAAA,CAAW,MAAM,KAAK,CAAA;AAAA,YACxB;AAAA,UACF;AAAA,QACF,CAAA;AAAA,QACA,KAAA,EAAO,OAAO,UAAA,KAAe;AAC3B,UAAA,IAAI;AACF,YAAA,IAAI,UAAA,GAAa,CAAA;AACjB,YAAA,KAAA,MAAW,KAAA,IAAS,KAAA,CAAM,GAAA,EAAI,EAAG;AAC/B,cAAA,UAAA,CAAW,QAAQ,KAAK,CAAA;AACxB,cAAA,UAAA,EAAA;AAGA,cAAA,IACE,UAAA,GAAa,kBAAkB,CAAA,IAC/B,UAAA,CAAW,gBAAgB,IAAA,IAC3B,UAAA,CAAW,eAAe,CAAA,EAC1B;AACA,gBAAA,MAAM,KAAK,gBAAA,EAAiB;AAAA,cAC9B;AAAA,YACF;AAAA,UACF,SAAS,KAAA,EAAO;AACd,YAAA,UAAA,CAAW,MAAM,KAAK,CAAA;AAAA,UACxB;AAAA,QACF;AAAA,OACF;AAAA,MACA,gBAAA;AAAA,MACA;AAAA,KACF;AACA,IAAA,IAAA,CAAK,KAAA,GAAQ,KAAA;AAAA,EACf;AACF;;;;"}
|
|
@@ -10,6 +10,8 @@ class CSVRecordAssembler {
|
|
|
10
10
|
#signal;
|
|
11
11
|
#maxFieldCount;
|
|
12
12
|
#skipEmptyLines;
|
|
13
|
+
#currentRowNumber;
|
|
14
|
+
#source;
|
|
13
15
|
constructor(options = {}) {
|
|
14
16
|
const mfc = options.maxFieldCount ?? DEFAULT_MAX_FIELD_COUNT;
|
|
15
17
|
if (!(Number.isFinite(mfc) || mfc === Number.POSITIVE_INFINITY) || Number.isFinite(mfc) && (mfc < 1 || !Number.isInteger(mfc))) {
|
|
@@ -19,6 +21,7 @@ class CSVRecordAssembler {
|
|
|
19
21
|
}
|
|
20
22
|
this.#maxFieldCount = mfc;
|
|
21
23
|
this.#skipEmptyLines = options.skipEmptyLines ?? false;
|
|
24
|
+
this.#source = options.source;
|
|
22
25
|
if (options.header !== void 0 && Array.isArray(options.header)) {
|
|
23
26
|
this.#setHeader(options.header);
|
|
24
27
|
}
|
|
@@ -58,6 +61,9 @@ class CSVRecordAssembler {
|
|
|
58
61
|
*/
|
|
59
62
|
*#processToken(token) {
|
|
60
63
|
this.#signal?.throwIfAborted();
|
|
64
|
+
if (token.location) {
|
|
65
|
+
this.#currentRowNumber = token.location.rowNumber;
|
|
66
|
+
}
|
|
61
67
|
switch (token.type) {
|
|
62
68
|
case FieldDelimiter:
|
|
63
69
|
this.#fieldIndex++;
|
|
@@ -119,22 +125,26 @@ class CSVRecordAssembler {
|
|
|
119
125
|
#checkFieldCount() {
|
|
120
126
|
if (this.#fieldIndex + 1 > this.#maxFieldCount) {
|
|
121
127
|
throw new RangeError(
|
|
122
|
-
`Field count (${this.#fieldIndex + 1}) exceeded maximum allowed count of ${this.#maxFieldCount}`
|
|
128
|
+
`Field count (${this.#fieldIndex + 1}) exceeded maximum allowed count of ${this.#maxFieldCount}${this.#currentRowNumber ? ` at row ${this.#currentRowNumber}` : ""}${this.#source ? ` in ${JSON.stringify(this.#source)}` : ""}`
|
|
123
129
|
);
|
|
124
130
|
}
|
|
125
131
|
}
|
|
126
132
|
#setHeader(header) {
|
|
127
133
|
if (header.length > this.#maxFieldCount) {
|
|
128
134
|
throw new RangeError(
|
|
129
|
-
`Header field count (${header.length}) exceeded maximum allowed count of ${this.#maxFieldCount}`
|
|
135
|
+
`Header field count (${header.length}) exceeded maximum allowed count of ${this.#maxFieldCount}${this.#source ? ` in ${JSON.stringify(this.#source)}` : ""}`
|
|
130
136
|
);
|
|
131
137
|
}
|
|
132
138
|
this.#header = header;
|
|
133
139
|
if (this.#header.length === 0) {
|
|
134
|
-
throw new ParseError("The header must not be empty."
|
|
140
|
+
throw new ParseError("The header must not be empty.", {
|
|
141
|
+
source: this.#source
|
|
142
|
+
});
|
|
135
143
|
}
|
|
136
144
|
if (new Set(this.#header).size !== this.#header.length) {
|
|
137
|
-
throw new ParseError("The header must not contain duplicate fields."
|
|
145
|
+
throw new ParseError("The header must not contain duplicate fields.", {
|
|
146
|
+
source: this.#source
|
|
147
|
+
});
|
|
138
148
|
}
|
|
139
149
|
}
|
|
140
150
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"CSVRecordAssembler.js","sources":["../src/CSVRecordAssembler.ts"],"sourcesContent":["import { FieldDelimiter, RecordDelimiter } from \"./common/constants.ts\";\nimport { ParseError } from \"./common/errors.ts\";\nimport type {\n CSVRecord,\n CSVRecordAssemblerOptions,\n Token,\n} from \"./common/types.ts\";\n\n/**\n * Default maximum field count per record (100,000 fields).\n */\nconst DEFAULT_MAX_FIELD_COUNT = 100_000;\n\n/**\n * Options for the CSVRecordAssembler.assemble method.\n */\nexport interface CSVRecordAssemblerAssembleOptions {\n /**\n * If true, indicates that more tokens are expected.\n * If false or omitted, flushes remaining data.\n */\n stream?: boolean;\n}\n\n/**\n * CSV Record Assembler.\n *\n * CSVRecordAssembler assembles tokens into CSV records.\n */\nexport class CSVRecordAssembler<Header extends ReadonlyArray<string>> {\n #fieldIndex = 0;\n #row: string[] = [];\n #header: Header | undefined;\n #dirty = false;\n #signal?: AbortSignal;\n #maxFieldCount: number;\n #skipEmptyLines: boolean;\n\n constructor(options: CSVRecordAssemblerOptions<Header> = {}) {\n const mfc = options.maxFieldCount ?? DEFAULT_MAX_FIELD_COUNT;\n // Validate maxFieldCount\n if (\n !(Number.isFinite(mfc) || mfc === Number.POSITIVE_INFINITY) ||\n (Number.isFinite(mfc) && (mfc < 1 || !Number.isInteger(mfc)))\n ) {\n throw new RangeError(\n \"maxFieldCount must be a positive integer or Number.POSITIVE_INFINITY\",\n );\n }\n this.#maxFieldCount = mfc;\n this.#skipEmptyLines = options.skipEmptyLines ?? false;\n if (options.header !== undefined && Array.isArray(options.header)) {\n this.#setHeader(options.header);\n }\n if (options.signal) {\n this.#signal = options.signal;\n }\n }\n\n /**\n * Assembles tokens into CSV records.\n * @param input - A single token or an iterable of tokens. Omit to flush remaining data.\n * @param options - Assembler options.\n * @returns An iterable iterator of CSV records.\n */\n public *assemble(\n input?: Token | Iterable<Token>,\n options?: CSVRecordAssemblerAssembleOptions,\n ): IterableIterator<CSVRecord<Header>> {\n const stream = options?.stream ?? false;\n\n if (input !== undefined) {\n // Check if input is iterable (has Symbol.iterator)\n if (this.#isIterable(input)) {\n for (const token of input) {\n yield* this.#processToken(token);\n }\n } else {\n // Single token\n yield* this.#processToken(input);\n }\n }\n\n if (!stream) {\n yield* this.#flush();\n }\n }\n\n /**\n * Checks if a value is iterable.\n */\n #isIterable(value: any): value is Iterable<Token> {\n return value != null && typeof value[Symbol.iterator] === \"function\";\n }\n\n /**\n * Processes a single token and yields a record if one is completed.\n */\n *#processToken(token: Token): IterableIterator<CSVRecord<Header>> {\n this.#signal?.throwIfAborted();\n\n switch (token.type) {\n case FieldDelimiter:\n this.#fieldIndex++;\n this.#checkFieldCount();\n this.#dirty = true;\n break;\n case RecordDelimiter:\n if (this.#header === undefined) {\n this.#setHeader(this.#row as unknown as Header);\n } else {\n if (this.#dirty) {\n // SAFETY: Object.fromEntries() is safe from prototype pollution.\n // See CSVRecordAssembler.prototype-safety.test.ts for details.\n yield Object.fromEntries(\n this.#header\n .map((header, index) => [header, index] as const)\n .filter(([header]) => header)\n .map(([header, index]) => [header, this.#row.at(index)]),\n ) as unknown as CSVRecord<Header>;\n } else {\n if (!this.#skipEmptyLines) {\n // SAFETY: Object.fromEntries() is safe from prototype pollution.\n // See CSVRecordAssembler.prototype-safety.test.ts for details.\n yield Object.fromEntries(\n this.#header\n .filter((header) => header)\n .map((header) => [header, \"\"]),\n ) as CSVRecord<Header>;\n }\n }\n }\n // Reset the row fields buffer.\n this.#fieldIndex = 0;\n this.#row = new Array(this.#header?.length).fill(\"\");\n this.#dirty = false;\n break;\n default:\n this.#dirty = true;\n this.#row[this.#fieldIndex] = token.value;\n break;\n }\n }\n\n /**\n * Flushes any remaining buffered data as a final record.\n *\n * @remarks\n * Prototype Pollution Safety:\n * This method uses Object.fromEntries() to create record objects from CSV data.\n * Object.fromEntries() is safe from prototype pollution because it creates\n * own properties (not prototype properties) even when keys like \"__proto__\",\n * \"constructor\", or \"prototype\" are used.\n *\n * For example, Object.fromEntries([[\"__proto__\", \"value\"]]) creates an object\n * with an own property \"__proto__\" set to \"value\", which does NOT pollute\n * Object.prototype and does NOT affect other objects.\n *\n * This safety is verified by regression tests in:\n * CSVRecordAssembler.prototype-safety.test.ts\n */\n *#flush(): IterableIterator<CSVRecord<Header>> {\n if (this.#header !== undefined) {\n if (this.#dirty) {\n // SAFETY: Object.fromEntries() creates own properties, preventing prototype pollution\n // even when CSV headers contain dangerous property names like __proto__, constructor, etc.\n // See CSVRecordAssembler.prototype-safety.test.ts for verification tests.\n yield Object.fromEntries(\n this.#header\n .map((header, index) => [header, index] as const)\n .filter(([header]) => header)\n .map(([header, index]) => [header, this.#row.at(index)]),\n ) as unknown as CSVRecord<Header>;\n }\n }\n }\n\n #checkFieldCount(): void {\n if (this.#fieldIndex + 1 > this.#maxFieldCount) {\n throw new RangeError(\n `Field count (${this.#fieldIndex + 1}) exceeded maximum allowed count of ${this.#maxFieldCount}`,\n );\n }\n }\n\n #setHeader(header: Header) {\n if (header.length > this.#maxFieldCount) {\n throw new RangeError(\n `Header field count (${header.length}) exceeded maximum allowed count of ${this.#maxFieldCount}`,\n );\n }\n this.#header = header;\n if (this.#header.length === 0) {\n throw new ParseError(\"The header must not be empty.\");\n }\n if (new Set(this.#header).size !== this.#header.length) {\n throw new ParseError(\"The header must not contain duplicate fields.\");\n }\n }\n}\n"],"names":[],"mappings":";;;AAWA,MAAM,uBAAA,GAA0B,GAAA;AAkBzB,MAAM,kBAAA,CAAyD;AAAA,EACpE,WAAA,GAAc,CAAA;AAAA,EACd,OAAiB,EAAC;AAAA,EAClB,OAAA;AAAA,EACA,MAAA,GAAS,KAAA;AAAA,EACT,OAAA;AAAA,EACA,cAAA;AAAA,EACA,eAAA;AAAA,EAEA,WAAA,CAAY,OAAA,GAA6C,EAAC,EAAG;AAC3D,IAAA,MAAM,GAAA,GAAM,QAAQ,aAAA,IAAiB,uBAAA;AAErC,IAAA,IACE,EAAE,MAAA,CAAO,QAAA,CAAS,GAAG,CAAA,IAAK,GAAA,KAAQ,OAAO,iBAAA,CAAA,IACxC,MAAA,CAAO,QAAA,CAAS,GAAG,MAAM,GAAA,GAAM,CAAA,IAAK,CAAC,MAAA,CAAO,SAAA,CAAU,GAAG,CAAA,CAAA,EAC1D;AACA,MAAA,MAAM,IAAI,UAAA;AAAA,QACR;AAAA,OACF;AAAA,IACF;AACA,IAAA,IAAA,CAAK,cAAA,GAAiB,GAAA;AACtB,IAAA,IAAA,CAAK,eAAA,GAAkB,QAAQ,cAAA,IAAkB,KAAA;AACjD,IAAA,IAAI,QAAQ,MAAA,KAAW,MAAA,IAAa,MAAM,OAAA,CAAQ,OAAA,CAAQ,MAAM,CAAA,EAAG;AACjE,MAAA,IAAA,CAAK,UAAA,CAAW,QAAQ,MAAM,CAAA;AAAA,IAChC;AACA,IAAA,IAAI,QAAQ,MAAA,EAAQ;AAClB,MAAA,IAAA,CAAK,UAAU,OAAA,CAAQ,MAAA;AAAA,IACzB;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQA,CAAQ,QAAA,CACN,KAAA,EACA,OAAA,EACqC;AACrC,IAAA,MAAM,MAAA,GAAS,SAAS,MAAA,IAAU,KAAA;AAElC,IAAA,IAAI,UAAU,MAAA,EAAW;AAEvB,MAAA,IAAI,IAAA,CAAK,WAAA,CAAY,KAAK,CAAA,EAAG;AAC3B,QAAA,KAAA,MAAW,SAAS,KAAA,EAAO;AACzB,UAAA,OAAO,IAAA,CAAK,cAAc,KAAK,CAAA;AAAA,QACjC;AAAA,MACF,CAAA,MAAO;AAEL,QAAA,OAAO,IAAA,CAAK,cAAc,KAAK,CAAA;AAAA,MACjC;AAAA,IACF;AAEA,IAAA,IAAI,CAAC,MAAA,EAAQ;AACX,MAAA,OAAO,KAAK,MAAA,EAAO;AAAA,IACrB;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,YAAY,KAAA,EAAsC;AAChD,IAAA,OAAO,SAAS,IAAA,IAAQ,OAAO,KAAA,CAAM,MAAA,CAAO,QAAQ,CAAA,KAAM,UAAA;AAAA,EAC5D;AAAA;AAAA;AAAA;AAAA,EAKA,CAAC,cAAc,KAAA,EAAmD;AAChE,IAAA,IAAA,CAAK,SAAS,cAAA,EAAe;AAE7B,IAAA,QAAQ,MAAM,IAAA;AAAM,MAClB,KAAK,cAAA;AACH,QAAA,IAAA,CAAK,WAAA,EAAA;AACL,QAAA,IAAA,CAAK,gBAAA,EAAiB;AACtB,QAAA,IAAA,CAAK,MAAA,GAAS,IAAA;AACd,QAAA;AAAA,MACF,KAAK,eAAA;AACH,QAAA,IAAI,IAAA,CAAK,YAAY,MAAA,EAAW;AAC9B,UAAA,IAAA,CAAK,UAAA,CAAW,KAAK,IAAyB,CAAA;AAAA,QAChD,CAAA,MAAO;AACL,UAAA,IAAI,KAAK,MAAA,EAAQ;AAGf,YAAA,MAAM,MAAA,CAAO,WAAA;AAAA,cACX,IAAA,CAAK,OAAA,CACF,GAAA,CAAI,CAAC,MAAA,EAAQ,KAAA,KAAU,CAAC,MAAA,EAAQ,KAAK,CAAU,CAAA,CAC/C,MAAA,CAAO,CAAC,CAAC,MAAM,CAAA,KAAM,MAAM,CAAA,CAC3B,GAAA,CAAI,CAAC,CAAC,QAAQ,KAAK,CAAA,KAAM,CAAC,MAAA,EAAQ,IAAA,CAAK,IAAA,CAAK,EAAA,CAAG,KAAK,CAAC,CAAC;AAAA,aAC3D;AAAA,UACF,CAAA,MAAO;AACL,YAAA,IAAI,CAAC,KAAK,eAAA,EAAiB;AAGzB,cAAA,MAAM,MAAA,CAAO,WAAA;AAAA,gBACX,IAAA,CAAK,OAAA,CACF,MAAA,CAAO,CAAC,MAAA,KAAW,MAAM,CAAA,CACzB,GAAA,CAAI,CAAC,MAAA,KAAW,CAAC,MAAA,EAAQ,EAAE,CAAC;AAAA,eACjC;AAAA,YACF;AAAA,UACF;AAAA,QACF;AAEA,QAAA,IAAA,CAAK,WAAA,GAAc,CAAA;AACnB,QAAA,IAAA,CAAK,IAAA,GAAO,IAAI,KAAA,CAAM,IAAA,CAAK,SAAS,MAAM,CAAA,CAAE,KAAK,EAAE,CAAA;AACnD,QAAA,IAAA,CAAK,MAAA,GAAS,KAAA;AACd,QAAA;AAAA,MACF;AACE,QAAA,IAAA,CAAK,MAAA,GAAS,IAAA;AACd,QAAA,IAAA,CAAK,IAAA,CAAK,IAAA,CAAK,WAAW,CAAA,GAAI,KAAA,CAAM,KAAA;AACpC,QAAA;AAAA;AACJ,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAmBA,CAAC,MAAA,GAA8C;AAC7C,IAAA,IAAI,IAAA,CAAK,YAAY,MAAA,EAAW;AAC9B,MAAA,IAAI,KAAK,MAAA,EAAQ;AAIf,QAAA,MAAM,MAAA,CAAO,WAAA;AAAA,UACX,IAAA,CAAK,OAAA,CACF,GAAA,CAAI,CAAC,MAAA,EAAQ,KAAA,KAAU,CAAC,MAAA,EAAQ,KAAK,CAAU,CAAA,CAC/C,MAAA,CAAO,CAAC,CAAC,MAAM,CAAA,KAAM,MAAM,CAAA,CAC3B,GAAA,CAAI,CAAC,CAAC,QAAQ,KAAK,CAAA,KAAM,CAAC,MAAA,EAAQ,IAAA,CAAK,IAAA,CAAK,EAAA,CAAG,KAAK,CAAC,CAAC;AAAA,SAC3D;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,gBAAA,GAAyB;AACvB,IAAA,IAAI,IAAA,CAAK,WAAA,GAAc,CAAA,GAAI,IAAA,CAAK,cAAA,EAAgB;AAC9C,MAAA,MAAM,IAAI,UAAA;AAAA,QACR,gBAAgB,IAAA,CAAK,WAAA,GAAc,CAAC,CAAA,oCAAA,EAAuC,KAAK,cAAc,CAAA;AAAA,OAChG;AAAA,IACF;AAAA,EACF;AAAA,EAEA,WAAW,MAAA,EAAgB;AACzB,IAAA,IAAI,MAAA,CAAO,MAAA,GAAS,IAAA,CAAK,cAAA,EAAgB;AACvC,MAAA,MAAM,IAAI,UAAA;AAAA,QACR,CAAA,oBAAA,EAAuB,MAAA,CAAO,MAAM,CAAA,oCAAA,EAAuC,KAAK,cAAc,CAAA;AAAA,OAChG;AAAA,IACF;AACA,IAAA,IAAA,CAAK,OAAA,GAAU,MAAA;AACf,IAAA,IAAI,IAAA,CAAK,OAAA,CAAQ,MAAA,KAAW,CAAA,EAAG;AAC7B,MAAA,MAAM,IAAI,WAAW,+BAA+B,CAAA;AAAA,IACtD;AACA,IAAA,IAAI,IAAI,IAAI,IAAA,CAAK,OAAO,EAAE,IAAA,KAAS,IAAA,CAAK,QAAQ,MAAA,EAAQ;AACtD,MAAA,MAAM,IAAI,WAAW,+CAA+C,CAAA;AAAA,IACtE;AAAA,EACF;AACF;;;;"}
|
|
1
|
+
{"version":3,"file":"CSVRecordAssembler.js","sources":["../src/CSVRecordAssembler.ts"],"sourcesContent":["import { FieldDelimiter, RecordDelimiter } from \"./common/constants.ts\";\nimport { ParseError } from \"./common/errors.ts\";\nimport type {\n CSVRecord,\n CSVRecordAssemblerOptions,\n Token,\n} from \"./common/types.ts\";\n\n/**\n * Default maximum field count per record (100,000 fields).\n */\nconst DEFAULT_MAX_FIELD_COUNT = 100_000;\n\n/**\n * Options for the CSVRecordAssembler.assemble method.\n */\nexport interface CSVRecordAssemblerAssembleOptions {\n /**\n * If true, indicates that more tokens are expected.\n * If false or omitted, flushes remaining data.\n */\n stream?: boolean;\n}\n\n/**\n * CSV Record Assembler.\n *\n * CSVRecordAssembler assembles tokens into CSV records.\n */\nexport class CSVRecordAssembler<Header extends ReadonlyArray<string>> {\n #fieldIndex = 0;\n #row: string[] = [];\n #header: Header | undefined;\n #dirty = false;\n #signal?: AbortSignal;\n #maxFieldCount: number;\n #skipEmptyLines: boolean;\n #currentRowNumber?: number;\n #source?: string;\n\n constructor(options: CSVRecordAssemblerOptions<Header> = {}) {\n const mfc = options.maxFieldCount ?? DEFAULT_MAX_FIELD_COUNT;\n // Validate maxFieldCount\n if (\n !(Number.isFinite(mfc) || mfc === Number.POSITIVE_INFINITY) ||\n (Number.isFinite(mfc) && (mfc < 1 || !Number.isInteger(mfc)))\n ) {\n throw new RangeError(\n \"maxFieldCount must be a positive integer or Number.POSITIVE_INFINITY\",\n );\n }\n this.#maxFieldCount = mfc;\n this.#skipEmptyLines = options.skipEmptyLines ?? false;\n this.#source = options.source;\n if (options.header !== undefined && Array.isArray(options.header)) {\n this.#setHeader(options.header);\n }\n if (options.signal) {\n this.#signal = options.signal;\n }\n }\n\n /**\n * Assembles tokens into CSV records.\n * @param input - A single token or an iterable of tokens. Omit to flush remaining data.\n * @param options - Assembler options.\n * @returns An iterable iterator of CSV records.\n */\n public *assemble(\n input?: Token | Iterable<Token>,\n options?: CSVRecordAssemblerAssembleOptions,\n ): IterableIterator<CSVRecord<Header>> {\n const stream = options?.stream ?? false;\n\n if (input !== undefined) {\n // Check if input is iterable (has Symbol.iterator)\n if (this.#isIterable(input)) {\n for (const token of input) {\n yield* this.#processToken(token);\n }\n } else {\n // Single token\n yield* this.#processToken(input);\n }\n }\n\n if (!stream) {\n yield* this.#flush();\n }\n }\n\n /**\n * Checks if a value is iterable.\n */\n #isIterable(value: any): value is Iterable<Token> {\n return value != null && typeof value[Symbol.iterator] === \"function\";\n }\n\n /**\n * Processes a single token and yields a record if one is completed.\n */\n *#processToken(token: Token): IterableIterator<CSVRecord<Header>> {\n this.#signal?.throwIfAborted();\n\n // Track the current record number for error reporting\n if (token.location) {\n this.#currentRowNumber = token.location.rowNumber;\n }\n\n switch (token.type) {\n case FieldDelimiter:\n this.#fieldIndex++;\n this.#checkFieldCount();\n this.#dirty = true;\n break;\n case RecordDelimiter:\n if (this.#header === undefined) {\n this.#setHeader(this.#row as unknown as Header);\n } else {\n if (this.#dirty) {\n // SAFETY: Object.fromEntries() is safe from prototype pollution.\n // See CSVRecordAssembler.prototype-safety.test.ts for details.\n yield Object.fromEntries(\n this.#header\n .map((header, index) => [header, index] as const)\n .filter(([header]) => header)\n .map(([header, index]) => [header, this.#row.at(index)]),\n ) as unknown as CSVRecord<Header>;\n } else {\n if (!this.#skipEmptyLines) {\n // SAFETY: Object.fromEntries() is safe from prototype pollution.\n // See CSVRecordAssembler.prototype-safety.test.ts for details.\n yield Object.fromEntries(\n this.#header\n .filter((header) => header)\n .map((header) => [header, \"\"]),\n ) as CSVRecord<Header>;\n }\n }\n }\n // Reset the row fields buffer.\n this.#fieldIndex = 0;\n this.#row = new Array(this.#header?.length).fill(\"\");\n this.#dirty = false;\n break;\n default:\n this.#dirty = true;\n this.#row[this.#fieldIndex] = token.value;\n break;\n }\n }\n\n /**\n * Flushes any remaining buffered data as a final record.\n *\n * @remarks\n * Prototype Pollution Safety:\n * This method uses Object.fromEntries() to create record objects from CSV data.\n * Object.fromEntries() is safe from prototype pollution because it creates\n * own properties (not prototype properties) even when keys like \"__proto__\",\n * \"constructor\", or \"prototype\" are used.\n *\n * For example, Object.fromEntries([[\"__proto__\", \"value\"]]) creates an object\n * with an own property \"__proto__\" set to \"value\", which does NOT pollute\n * Object.prototype and does NOT affect other objects.\n *\n * This safety is verified by regression tests in:\n * CSVRecordAssembler.prototype-safety.test.ts\n */\n *#flush(): IterableIterator<CSVRecord<Header>> {\n if (this.#header !== undefined) {\n if (this.#dirty) {\n // SAFETY: Object.fromEntries() creates own properties, preventing prototype pollution\n // even when CSV headers contain dangerous property names like __proto__, constructor, etc.\n // See CSVRecordAssembler.prototype-safety.test.ts for verification tests.\n yield Object.fromEntries(\n this.#header\n .map((header, index) => [header, index] as const)\n .filter(([header]) => header)\n .map(([header, index]) => [header, this.#row.at(index)]),\n ) as unknown as CSVRecord<Header>;\n }\n }\n }\n\n #checkFieldCount(): void {\n if (this.#fieldIndex + 1 > this.#maxFieldCount) {\n throw new RangeError(\n `Field count (${this.#fieldIndex + 1}) exceeded maximum allowed count of ${this.#maxFieldCount}${\n this.#currentRowNumber ? ` at row ${this.#currentRowNumber}` : \"\"\n }${this.#source ? ` in ${JSON.stringify(this.#source)}` : \"\"}`,\n );\n }\n }\n\n #setHeader(header: Header) {\n if (header.length > this.#maxFieldCount) {\n throw new RangeError(\n `Header field count (${header.length}) exceeded maximum allowed count of ${this.#maxFieldCount}${\n this.#source ? ` in ${JSON.stringify(this.#source)}` : \"\"\n }`,\n );\n }\n this.#header = header;\n if (this.#header.length === 0) {\n throw new ParseError(\"The header must not be empty.\", {\n source: this.#source,\n });\n }\n if (new Set(this.#header).size !== this.#header.length) {\n throw new ParseError(\"The header must not contain duplicate fields.\", {\n source: this.#source,\n });\n }\n }\n}\n"],"names":[],"mappings":";;;AAWA,MAAM,uBAAA,GAA0B,GAAA;AAkBzB,MAAM,kBAAA,CAAyD;AAAA,EACpE,WAAA,GAAc,CAAA;AAAA,EACd,OAAiB,EAAC;AAAA,EAClB,OAAA;AAAA,EACA,MAAA,GAAS,KAAA;AAAA,EACT,OAAA;AAAA,EACA,cAAA;AAAA,EACA,eAAA;AAAA,EACA,iBAAA;AAAA,EACA,OAAA;AAAA,EAEA,WAAA,CAAY,OAAA,GAA6C,EAAC,EAAG;AAC3D,IAAA,MAAM,GAAA,GAAM,QAAQ,aAAA,IAAiB,uBAAA;AAErC,IAAA,IACE,EAAE,MAAA,CAAO,QAAA,CAAS,GAAG,CAAA,IAAK,GAAA,KAAQ,OAAO,iBAAA,CAAA,IACxC,MAAA,CAAO,QAAA,CAAS,GAAG,MAAM,GAAA,GAAM,CAAA,IAAK,CAAC,MAAA,CAAO,SAAA,CAAU,GAAG,CAAA,CAAA,EAC1D;AACA,MAAA,MAAM,IAAI,UAAA;AAAA,QACR;AAAA,OACF;AAAA,IACF;AACA,IAAA,IAAA,CAAK,cAAA,GAAiB,GAAA;AACtB,IAAA,IAAA,CAAK,eAAA,GAAkB,QAAQ,cAAA,IAAkB,KAAA;AACjD,IAAA,IAAA,CAAK,UAAU,OAAA,CAAQ,MAAA;AACvB,IAAA,IAAI,QAAQ,MAAA,KAAW,MAAA,IAAa,MAAM,OAAA,CAAQ,OAAA,CAAQ,MAAM,CAAA,EAAG;AACjE,MAAA,IAAA,CAAK,UAAA,CAAW,QAAQ,MAAM,CAAA;AAAA,IAChC;AACA,IAAA,IAAI,QAAQ,MAAA,EAAQ;AAClB,MAAA,IAAA,CAAK,UAAU,OAAA,CAAQ,MAAA;AAAA,IACzB;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQA,CAAQ,QAAA,CACN,KAAA,EACA,OAAA,EACqC;AACrC,IAAA,MAAM,MAAA,GAAS,SAAS,MAAA,IAAU,KAAA;AAElC,IAAA,IAAI,UAAU,MAAA,EAAW;AAEvB,MAAA,IAAI,IAAA,CAAK,WAAA,CAAY,KAAK,CAAA,EAAG;AAC3B,QAAA,KAAA,MAAW,SAAS,KAAA,EAAO;AACzB,UAAA,OAAO,IAAA,CAAK,cAAc,KAAK,CAAA;AAAA,QACjC;AAAA,MACF,CAAA,MAAO;AAEL,QAAA,OAAO,IAAA,CAAK,cAAc,KAAK,CAAA;AAAA,MACjC;AAAA,IACF;AAEA,IAAA,IAAI,CAAC,MAAA,EAAQ;AACX,MAAA,OAAO,KAAK,MAAA,EAAO;AAAA,IACrB;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,YAAY,KAAA,EAAsC;AAChD,IAAA,OAAO,SAAS,IAAA,IAAQ,OAAO,KAAA,CAAM,MAAA,CAAO,QAAQ,CAAA,KAAM,UAAA;AAAA,EAC5D;AAAA;AAAA;AAAA;AAAA,EAKA,CAAC,cAAc,KAAA,EAAmD;AAChE,IAAA,IAAA,CAAK,SAAS,cAAA,EAAe;AAG7B,IAAA,IAAI,MAAM,QAAA,EAAU;AAClB,MAAA,IAAA,CAAK,iBAAA,GAAoB,MAAM,QAAA,CAAS,SAAA;AAAA,IAC1C;AAEA,IAAA,QAAQ,MAAM,IAAA;AAAM,MAClB,KAAK,cAAA;AACH,QAAA,IAAA,CAAK,WAAA,EAAA;AACL,QAAA,IAAA,CAAK,gBAAA,EAAiB;AACtB,QAAA,IAAA,CAAK,MAAA,GAAS,IAAA;AACd,QAAA;AAAA,MACF,KAAK,eAAA;AACH,QAAA,IAAI,IAAA,CAAK,YAAY,MAAA,EAAW;AAC9B,UAAA,IAAA,CAAK,UAAA,CAAW,KAAK,IAAyB,CAAA;AAAA,QAChD,CAAA,MAAO;AACL,UAAA,IAAI,KAAK,MAAA,EAAQ;AAGf,YAAA,MAAM,MAAA,CAAO,WAAA;AAAA,cACX,IAAA,CAAK,OAAA,CACF,GAAA,CAAI,CAAC,MAAA,EAAQ,KAAA,KAAU,CAAC,MAAA,EAAQ,KAAK,CAAU,CAAA,CAC/C,MAAA,CAAO,CAAC,CAAC,MAAM,CAAA,KAAM,MAAM,CAAA,CAC3B,GAAA,CAAI,CAAC,CAAC,QAAQ,KAAK,CAAA,KAAM,CAAC,MAAA,EAAQ,IAAA,CAAK,IAAA,CAAK,EAAA,CAAG,KAAK,CAAC,CAAC;AAAA,aAC3D;AAAA,UACF,CAAA,MAAO;AACL,YAAA,IAAI,CAAC,KAAK,eAAA,EAAiB;AAGzB,cAAA,MAAM,MAAA,CAAO,WAAA;AAAA,gBACX,IAAA,CAAK,OAAA,CACF,MAAA,CAAO,CAAC,MAAA,KAAW,MAAM,CAAA,CACzB,GAAA,CAAI,CAAC,MAAA,KAAW,CAAC,MAAA,EAAQ,EAAE,CAAC;AAAA,eACjC;AAAA,YACF;AAAA,UACF;AAAA,QACF;AAEA,QAAA,IAAA,CAAK,WAAA,GAAc,CAAA;AACnB,QAAA,IAAA,CAAK,IAAA,GAAO,IAAI,KAAA,CAAM,IAAA,CAAK,SAAS,MAAM,CAAA,CAAE,KAAK,EAAE,CAAA;AACnD,QAAA,IAAA,CAAK,MAAA,GAAS,KAAA;AACd,QAAA;AAAA,MACF;AACE,QAAA,IAAA,CAAK,MAAA,GAAS,IAAA;AACd,QAAA,IAAA,CAAK,IAAA,CAAK,IAAA,CAAK,WAAW,CAAA,GAAI,KAAA,CAAM,KAAA;AACpC,QAAA;AAAA;AACJ,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAmBA,CAAC,MAAA,GAA8C;AAC7C,IAAA,IAAI,IAAA,CAAK,YAAY,MAAA,EAAW;AAC9B,MAAA,IAAI,KAAK,MAAA,EAAQ;AAIf,QAAA,MAAM,MAAA,CAAO,WAAA;AAAA,UACX,IAAA,CAAK,OAAA,CACF,GAAA,CAAI,CAAC,MAAA,EAAQ,KAAA,KAAU,CAAC,MAAA,EAAQ,KAAK,CAAU,CAAA,CAC/C,MAAA,CAAO,CAAC,CAAC,MAAM,CAAA,KAAM,MAAM,CAAA,CAC3B,GAAA,CAAI,CAAC,CAAC,QAAQ,KAAK,CAAA,KAAM,CAAC,MAAA,EAAQ,IAAA,CAAK,IAAA,CAAK,EAAA,CAAG,KAAK,CAAC,CAAC;AAAA,SAC3D;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,gBAAA,GAAyB;AACvB,IAAA,IAAI,IAAA,CAAK,WAAA,GAAc,CAAA,GAAI,IAAA,CAAK,cAAA,EAAgB;AAC9C,MAAA,MAAM,IAAI,UAAA;AAAA,QACR,CAAA,aAAA,EAAgB,IAAA,CAAK,WAAA,GAAc,CAAC,CAAA,oCAAA,EAAuC,KAAK,cAAc,CAAA,EAC5F,IAAA,CAAK,iBAAA,GAAoB,CAAA,QAAA,EAAW,IAAA,CAAK,iBAAiB,CAAA,CAAA,GAAK,EACjE,CAAA,EAAG,IAAA,CAAK,OAAA,GAAU,CAAA,IAAA,EAAO,IAAA,CAAK,SAAA,CAAU,IAAA,CAAK,OAAO,CAAC,CAAA,CAAA,GAAK,EAAE,CAAA;AAAA,OAC9D;AAAA,IACF;AAAA,EACF;AAAA,EAEA,WAAW,MAAA,EAAgB;AACzB,IAAA,IAAI,MAAA,CAAO,MAAA,GAAS,IAAA,CAAK,cAAA,EAAgB;AACvC,MAAA,MAAM,IAAI,UAAA;AAAA,QACR,uBAAuB,MAAA,CAAO,MAAM,CAAA,oCAAA,EAAuC,IAAA,CAAK,cAAc,CAAA,EAC5F,IAAA,CAAK,OAAA,GAAU,CAAA,IAAA,EAAO,KAAK,SAAA,CAAU,IAAA,CAAK,OAAO,CAAC,KAAK,EACzD,CAAA;AAAA,OACF;AAAA,IACF;AACA,IAAA,IAAA,CAAK,OAAA,GAAU,MAAA;AACf,IAAA,IAAI,IAAA,CAAK,OAAA,CAAQ,MAAA,KAAW,CAAA,EAAG;AAC7B,MAAA,MAAM,IAAI,WAAW,+BAAA,EAAiC;AAAA,QACpD,QAAQ,IAAA,CAAK;AAAA,OACd,CAAA;AAAA,IACH;AACA,IAAA,IAAI,IAAI,IAAI,IAAA,CAAK,OAAO,EAAE,IAAA,KAAS,IAAA,CAAK,QAAQ,MAAA,EAAQ;AACtD,MAAA,MAAM,IAAI,WAAW,+CAAA,EAAiD;AAAA,QACpE,QAAQ,IAAA,CAAK;AAAA,OACd,CAAA;AAAA,IACH;AAAA,EACF;AACF;;;;"}
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
import { CSVRecordAssembler } from './CSVRecordAssembler.ts';
|
|
2
|
-
import { CSVRecord, CSVRecordAssemblerOptions,
|
|
2
|
+
import { CSVRecord, CSVRecordAssemblerOptions, Token } from './common/types.ts';
|
|
3
3
|
/**
|
|
4
4
|
* A transform stream that converts a stream of tokens into a stream of CSV records.
|
|
5
5
|
*
|
|
6
6
|
* @template Header The type of the header row.
|
|
7
|
-
* @param options - CSV-specific options (header, maxFieldCount, etc.)
|
|
8
|
-
* @param writableStrategy - Strategy for the writable side (default: `{ highWaterMark: 1024, size: () => 1
|
|
9
|
-
* @param readableStrategy - Strategy for the readable side (default: `{ highWaterMark: 256, size: () => 1
|
|
7
|
+
* @param options - CSV-specific options (header, maxFieldCount, checkInterval, etc.)
|
|
8
|
+
* @param writableStrategy - Strategy for the writable side (default: `{ highWaterMark: 1024, size: () => 1 }`)
|
|
9
|
+
* @param readableStrategy - Strategy for the readable side (default: `{ highWaterMark: 256, size: () => 1 }`)
|
|
10
10
|
*
|
|
11
11
|
* @category Low-level API
|
|
12
12
|
*
|
|
@@ -65,17 +65,11 @@ import { CSVRecord, CSVRecordAssemblerOptions, ExtendedQueuingStrategy, Token }
|
|
|
65
65
|
* @example Custom queuing strategies with backpressure tuning
|
|
66
66
|
* ```ts
|
|
67
67
|
* const transformer = new CSVRecordAssemblerTransformer(
|
|
68
|
-
* {},
|
|
69
68
|
* {
|
|
70
|
-
*
|
|
71
|
-
* size: () => 1, // Each token counts as 1
|
|
72
|
-
* checkInterval: 20 // Check backpressure every 20 records
|
|
69
|
+
* backpressureCheckInterval: 20 // Check backpressure every 20 records
|
|
73
70
|
* },
|
|
74
|
-
* {
|
|
75
|
-
*
|
|
76
|
-
* size: () => 1, // Each record counts as 1
|
|
77
|
-
* checkInterval: 5 // Check backpressure every 5 records
|
|
78
|
-
* }
|
|
71
|
+
* new CountQueuingStrategy({ highWaterMark: 2048 }), // 2048 tokens
|
|
72
|
+
* new CountQueuingStrategy({ highWaterMark: 512 }) // 512 records
|
|
79
73
|
* );
|
|
80
74
|
*
|
|
81
75
|
* await tokenStream
|
|
@@ -91,5 +85,5 @@ export declare class CSVRecordAssemblerTransformer<Header extends ReadonlyArray<
|
|
|
91
85
|
* @internal
|
|
92
86
|
*/
|
|
93
87
|
protected yieldToEventLoop(): Promise<void>;
|
|
94
|
-
constructor(options?: CSVRecordAssemblerOptions<Header>, writableStrategy?:
|
|
88
|
+
constructor(options?: CSVRecordAssemblerOptions<Header>, writableStrategy?: QueuingStrategy<Token>, readableStrategy?: QueuingStrategy<CSVRecord<Header>>);
|
|
95
89
|
}
|
|
@@ -1,5 +1,13 @@
|
|
|
1
1
|
import { CSVRecordAssembler } from './CSVRecordAssembler.js';
|
|
2
2
|
|
|
3
|
+
const DEFAULT_WRITABLE_STRATEGY = new CountQueuingStrategy({
|
|
4
|
+
highWaterMark: 1024
|
|
5
|
+
// 1024 tokens
|
|
6
|
+
});
|
|
7
|
+
const DEFAULT_READABLE_STRATEGY = new CountQueuingStrategy({
|
|
8
|
+
highWaterMark: 256
|
|
9
|
+
// 256 records
|
|
10
|
+
});
|
|
3
11
|
class CSVRecordAssemblerTransformer extends TransformStream {
|
|
4
12
|
assembler;
|
|
5
13
|
/**
|
|
@@ -10,23 +18,9 @@ class CSVRecordAssemblerTransformer extends TransformStream {
|
|
|
10
18
|
async yieldToEventLoop() {
|
|
11
19
|
await new Promise((resolve) => setTimeout(resolve, 0));
|
|
12
20
|
}
|
|
13
|
-
constructor(options = {}, writableStrategy = {
|
|
14
|
-
highWaterMark: 1024,
|
|
15
|
-
// 1024 tokens
|
|
16
|
-
size: () => 1,
|
|
17
|
-
// Each token counts as 1
|
|
18
|
-
checkInterval: 10
|
|
19
|
-
// Check backpressure every 10 records
|
|
20
|
-
}, readableStrategy = {
|
|
21
|
-
highWaterMark: 256,
|
|
22
|
-
// 256 records
|
|
23
|
-
size: () => 1,
|
|
24
|
-
// Each record counts as 1
|
|
25
|
-
checkInterval: 10
|
|
26
|
-
// Check backpressure every 10 records
|
|
27
|
-
}) {
|
|
21
|
+
constructor(options = {}, writableStrategy = DEFAULT_WRITABLE_STRATEGY, readableStrategy = DEFAULT_READABLE_STRATEGY) {
|
|
28
22
|
const assembler = new CSVRecordAssembler(options);
|
|
29
|
-
const checkInterval =
|
|
23
|
+
const checkInterval = options.backpressureCheckInterval ?? 10;
|
|
30
24
|
super(
|
|
31
25
|
{
|
|
32
26
|
transform: async (token, controller) => {
|