web-csv-toolbox 0.13.0-next-7d51d5285be9cffa5103de58469d8de0c98959d7 → 0.13.0-next-b21b6d89a7a3f18dcbf79ec04ffefde0d7ff4c4c
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -6
- package/dist/CSVLexer.js.map +1 -1
- package/dist/CSVLexerTransformer.js.map +1 -1
- package/dist/CSVRecordAssembler.js.map +1 -1
- package/dist/CSVRecordAssemblerTransformer.js.map +1 -1
- package/dist/_virtual/web_csv_toolbox_wasm_bg.wasm.js +1 -1
- package/dist/assertCommonOptions.js.map +1 -1
- package/dist/common/constants.js.map +1 -1
- package/dist/common/errors.js.map +1 -1
- package/dist/common/types.d.ts +311 -14
- package/dist/commonParseErrorHandling.js.map +1 -1
- package/dist/constants.js.map +1 -1
- package/dist/createWorker.node.d.ts +2 -0
- package/dist/createWorker.web.d.ts +2 -0
- package/dist/execution/EnginePresets.d.ts +143 -0
- package/dist/execution/EnginePresets.js +129 -0
- package/dist/execution/EnginePresets.js.map +1 -0
- package/dist/execution/InternalEngineConfig.d.ts +89 -0
- package/dist/execution/InternalEngineConfig.js +175 -0
- package/dist/execution/InternalEngineConfig.js.map +1 -0
- package/dist/execution/main/parseBinaryInMain.d.ts +12 -0
- package/dist/execution/main/parseStreamInMain.d.ts +12 -0
- package/dist/execution/main/parseStringInMain.d.ts +12 -0
- package/dist/execution/main/parseUint8ArrayStreamInMain.d.ts +12 -0
- package/dist/execution/wasm/parseBinaryInWASM.d.ts +18 -0
- package/dist/execution/wasm/parseBinaryInWASM.js +15 -0
- package/dist/execution/wasm/parseBinaryInWASM.js.map +1 -0
- package/dist/execution/wasm/parseStringInWASM.d.ts +16 -0
- package/dist/execution/worker/helpers/ReusableWorkerPool.d.ts +152 -0
- package/dist/execution/worker/helpers/ReusableWorkerPool.js +238 -0
- package/dist/execution/worker/helpers/ReusableWorkerPool.js.map +1 -0
- package/dist/execution/worker/helpers/TransientWorkerPool.d.ts +89 -0
- package/dist/execution/worker/helpers/WorkerManager.d.ts +27 -0
- package/dist/execution/worker/helpers/WorkerPool.d.ts +50 -0
- package/dist/execution/worker/helpers/WorkerSession.d.ts +78 -0
- package/dist/execution/worker/helpers/WorkerSession.js +58 -0
- package/dist/execution/worker/helpers/WorkerSession.js.map +1 -0
- package/dist/execution/worker/helpers/createWorker.node.d.ts +8 -0
- package/dist/execution/worker/helpers/createWorker.node.js +15 -0
- package/dist/execution/worker/helpers/createWorker.node.js.map +1 -0
- package/dist/execution/worker/helpers/createWorker.web.d.ts +8 -0
- package/dist/execution/worker/helpers/createWorker.web.js +11 -0
- package/dist/execution/worker/helpers/createWorker.web.js.map +1 -0
- package/dist/execution/worker/helpers/worker.node.d.ts +1 -0
- package/dist/execution/worker/helpers/worker.node.js +11 -0
- package/dist/execution/worker/helpers/worker.node.js.map +1 -0
- package/dist/execution/worker/helpers/worker.shared.d.ts +90 -0
- package/dist/execution/worker/helpers/worker.shared.js +241 -0
- package/dist/execution/worker/helpers/worker.shared.js.map +1 -0
- package/dist/execution/worker/helpers/worker.web.d.ts +1 -0
- package/dist/execution/worker/helpers/worker.web.js +16 -0
- package/dist/execution/worker/helpers/worker.web.js.map +1 -0
- package/dist/execution/worker/parseBinaryInWorker.node.d.ts +8 -0
- package/dist/execution/worker/parseBinaryInWorker.node.js +24 -0
- package/dist/execution/worker/parseBinaryInWorker.node.js.map +1 -0
- package/dist/execution/worker/parseBinaryInWorker.web.d.ts +8 -0
- package/dist/execution/worker/parseBinaryInWorker.web.js +24 -0
- package/dist/execution/worker/parseBinaryInWorker.web.js.map +1 -0
- package/dist/execution/worker/parseBinaryInWorkerWASM.node.d.ts +8 -0
- package/dist/execution/worker/parseBinaryInWorkerWASM.node.js +24 -0
- package/dist/execution/worker/parseBinaryInWorkerWASM.node.js.map +1 -0
- package/dist/execution/worker/parseBinaryInWorkerWASM.web.d.ts +8 -0
- package/dist/execution/worker/parseBinaryInWorkerWASM.web.js +24 -0
- package/dist/execution/worker/parseBinaryInWorkerWASM.web.js.map +1 -0
- package/dist/execution/worker/parseStreamInWorker.node.d.ts +15 -0
- package/dist/execution/worker/parseStreamInWorker.node.js +26 -0
- package/dist/execution/worker/parseStreamInWorker.node.js.map +1 -0
- package/dist/execution/worker/parseStreamInWorker.web.d.ts +12 -0
- package/dist/execution/worker/parseStreamInWorker.web.js +25 -0
- package/dist/execution/worker/parseStreamInWorker.web.js.map +1 -0
- package/dist/execution/worker/parseStringInWorker.node.d.ts +11 -0
- package/dist/execution/worker/parseStringInWorker.node.js +24 -0
- package/dist/execution/worker/parseStringInWorker.node.js.map +1 -0
- package/dist/execution/worker/parseStringInWorker.web.d.ts +11 -0
- package/dist/execution/worker/parseStringInWorker.web.js +24 -0
- package/dist/execution/worker/parseStringInWorker.web.js.map +1 -0
- package/dist/execution/worker/parseStringInWorkerWASM.node.d.ts +8 -0
- package/dist/execution/worker/parseStringInWorkerWASM.node.js +24 -0
- package/dist/execution/worker/parseStringInWorkerWASM.node.js.map +1 -0
- package/dist/execution/worker/parseStringInWorkerWASM.web.d.ts +8 -0
- package/dist/execution/worker/parseStringInWorkerWASM.web.js +24 -0
- package/dist/execution/worker/parseStringInWorkerWASM.web.js.map +1 -0
- package/dist/execution/worker/parseUint8ArrayStreamInWorker.node.d.ts +12 -0
- package/dist/execution/worker/parseUint8ArrayStreamInWorker.node.js +26 -0
- package/dist/execution/worker/parseUint8ArrayStreamInWorker.node.js.map +1 -0
- package/dist/execution/worker/parseUint8ArrayStreamInWorker.web.d.ts +9 -0
- package/dist/execution/worker/parseUint8ArrayStreamInWorker.web.js +25 -0
- package/dist/execution/worker/parseUint8ArrayStreamInWorker.web.js.map +1 -0
- package/dist/execution/worker/strategies/MessageStreamingStrategy.d.ts +17 -0
- package/dist/execution/worker/strategies/MessageStreamingStrategy.js +58 -0
- package/dist/execution/worker/strategies/MessageStreamingStrategy.js.map +1 -0
- package/dist/execution/worker/strategies/TransferableStreamStrategy.d.ts +25 -0
- package/dist/execution/worker/strategies/TransferableStreamStrategy.js +159 -0
- package/dist/execution/worker/strategies/TransferableStreamStrategy.js.map +1 -0
- package/dist/execution/worker/strategies/WorkerStrategy.d.ts +27 -0
- package/dist/execution/worker/strategies/WorkerStrategySelector.d.ts +43 -0
- package/dist/execution/worker/strategies/WorkerStrategySelector.js +89 -0
- package/dist/execution/worker/strategies/WorkerStrategySelector.js.map +1 -0
- package/dist/execution/worker/utils/messageHandler.d.ts +21 -0
- package/dist/execution/worker/utils/messageHandler.js +109 -0
- package/dist/execution/worker/utils/messageHandler.js.map +1 -0
- package/dist/execution/worker/utils/serializeOptions.d.ts +9 -0
- package/dist/execution/worker/utils/serializeOptions.js +14 -0
- package/dist/execution/worker/utils/serializeOptions.js.map +1 -0
- package/dist/execution/worker/utils/streamCollector.node.d.ts +14 -0
- package/dist/execution/worker/utils/streamCollector.node.js +78 -0
- package/dist/execution/worker/utils/streamCollector.node.js.map +1 -0
- package/dist/execution/worker/utils/workerUtils.d.ts +14 -0
- package/dist/execution/worker/utils/workerUtils.js +25 -0
- package/dist/execution/worker/utils/workerUtils.js.map +1 -0
- package/dist/getOptionsFromResponse.constants.node.d.ts +10 -0
- package/dist/getOptionsFromResponse.constants.node.js +8 -0
- package/dist/getOptionsFromResponse.constants.node.js.map +1 -0
- package/dist/getOptionsFromResponse.constants.web.d.ts +30 -0
- package/dist/getOptionsFromResponse.constants.web.js +7 -0
- package/dist/getOptionsFromResponse.constants.web.js.map +1 -0
- package/dist/getOptionsFromResponse.d.ts +2 -1
- package/dist/getOptionsFromResponse.js +5 -9
- package/dist/getOptionsFromResponse.js.map +1 -1
- package/dist/loadWASM.js.map +1 -1
- package/dist/loadWASM.web.js.map +1 -1
- package/dist/parse.d.ts +1 -1
- package/dist/parse.js +29 -5
- package/dist/parse.js.map +1 -1
- package/dist/parseBinary.d.ts +2 -1
- package/dist/parseBinary.js +32 -3
- package/dist/parseBinary.js.map +1 -1
- package/dist/parseBinaryInWorker.node.d.ts +2 -0
- package/dist/parseBinaryInWorker.web.d.ts +2 -0
- package/dist/parseBinaryInWorkerWASM.node.d.ts +2 -0
- package/dist/parseBinaryInWorkerWASM.web.d.ts +2 -0
- package/dist/parseBinaryToArraySync.d.ts +2 -1
- package/dist/parseBinaryToArraySync.js.map +1 -1
- package/dist/parseBinaryToIterableIterator.d.ts +2 -1
- package/dist/parseBinaryToIterableIterator.js.map +1 -1
- package/dist/parseBinaryToStream.d.ts +2 -1
- package/dist/parseBinaryToStream.js.map +1 -1
- package/dist/parseResponse.d.ts +1 -1
- package/dist/parseResponse.js +15 -8
- package/dist/parseResponse.js.map +1 -1
- package/dist/parseResponseToStream.d.ts +2 -1
- package/dist/parseResponseToStream.js.map +1 -1
- package/dist/parseStreamInWorker.node.d.ts +2 -0
- package/dist/parseStreamInWorker.web.d.ts +2 -0
- package/dist/parseString.d.ts +31 -0
- package/dist/parseString.js +27 -1
- package/dist/parseString.js.map +1 -1
- package/dist/parseStringInWorker.node.d.ts +2 -0
- package/dist/parseStringInWorker.web.d.ts +2 -0
- package/dist/parseStringInWorkerWASM.node.d.ts +2 -0
- package/dist/parseStringInWorkerWASM.web.d.ts +2 -0
- package/dist/parseStringStream.d.ts +43 -1
- package/dist/parseStringStream.js +24 -3
- package/dist/parseStringStream.js.map +1 -1
- package/dist/parseStringStreamToStream.js.map +1 -1
- package/dist/parseStringToArraySync.js.map +1 -1
- package/dist/parseStringToArraySyncWASM.js.map +1 -1
- package/dist/parseStringToIterableIterator.js.map +1 -1
- package/dist/parseStringToStream.js.map +1 -1
- package/dist/parseUint8ArrayStream.d.ts +4 -3
- package/dist/parseUint8ArrayStream.js +24 -3
- package/dist/parseUint8ArrayStream.js.map +1 -1
- package/dist/parseUint8ArrayStreamInWorker.node.d.ts +2 -0
- package/dist/parseUint8ArrayStreamInWorker.web.d.ts +2 -0
- package/dist/parseUint8ArrayStreamToStream.d.ts +2 -1
- package/dist/parseUint8ArrayStreamToStream.js +11 -5
- package/dist/parseUint8ArrayStreamToStream.js.map +1 -1
- package/dist/utils/convertBinaryToString.js.map +1 -1
- package/dist/utils/convertIterableIteratorToAsync.js.map +1 -1
- package/dist/utils/convertStreamToAsyncIterableIterator.js +2 -2
- package/dist/utils/convertStreamToAsyncIterableIterator.js.map +1 -1
- package/dist/utils/convertThisAsyncIterableIteratorToArray.d.ts +1 -1
- package/dist/utils/convertThisAsyncIterableIteratorToArray.js.map +1 -1
- package/dist/utils/escapeRegExp.js.map +1 -1
- package/dist/utils/parseMime.js.map +1 -1
- package/dist/utils/pipeline.js.map +1 -1
- package/dist/web-csv-toolbox.d.ts +4 -0
- package/dist/web-csv-toolbox.js +3 -0
- package/dist/web-csv-toolbox.js.map +1 -1
- package/dist/web_csv_toolbox_wasm_bg.wasm +0 -0
- package/dist/worker.node.d.ts +1 -0
- package/dist/worker.web.d.ts +1 -0
- package/package.json +53 -10
package/README.md
CHANGED
|
@@ -302,13 +302,13 @@ try {
|
|
|
302
302
|
|
|
303
303
|
### Works on Browser
|
|
304
304
|
|
|
305
|
-
| OS | Chrome |
|
|
305
|
+
| OS | Chrome | Firefox | Default |
|
|
306
306
|
| ------- | ------ | ------- | ------------- |
|
|
307
307
|
| Windows | ✅ | ✅ | ✅ (Edge) |
|
|
308
|
-
|
|
|
308
|
+
| macOS | ✅ | ✅ | ⬜ (Safari *) |
|
|
309
309
|
| Linux | ✅ | ✅ | - |
|
|
310
310
|
|
|
311
|
-
> **\*
|
|
311
|
+
> **\* Safari**: Basic functionality is expected to work, but it is not yet automatically tested in our CI environment.
|
|
312
312
|
|
|
313
313
|
### Others
|
|
314
314
|
|
|
@@ -750,7 +750,7 @@ try {
|
|
|
750
750
|
|
|
751
751
|
## Star ⭐
|
|
752
752
|
|
|
753
|
-
The easiest way to contribute is to use the library and star [repository](https://github.com/kamiazya/web-csv-toolbox/).
|
|
753
|
+
The easiest way to contribute is to use the library and star the [repository](https://github.com/kamiazya/web-csv-toolbox/).
|
|
754
754
|
|
|
755
755
|
### Questions 💭
|
|
756
756
|
|
|
@@ -758,7 +758,7 @@ Feel free to ask questions on [GitHub Discussions](https://github.com/kamiazya/w
|
|
|
758
758
|
|
|
759
759
|
### Report bugs / request additional features 💡
|
|
760
760
|
|
|
761
|
-
Please
|
|
761
|
+
Please create an issue at [GitHub Issues](https://github.com/kamiazya/web-csv-toolbox/issues/new/choose).
|
|
762
762
|
|
|
763
763
|
### Financial Support 💸
|
|
764
764
|
|
|
@@ -768,7 +768,7 @@ Please support [kamiazya](https://github.com/sponsors/kamiazya).
|
|
|
768
768
|
|
|
769
769
|
## License ⚖️
|
|
770
770
|
|
|
771
|
-
This software is released under the MIT License, see [LICENSE](https://github.com/kamiazya/web-csv-toolbox
|
|
771
|
+
This software is released under the MIT License, see [LICENSE](https://github.com/kamiazya/web-csv-toolbox/blob/main/LICENSE).
|
|
772
772
|
|
|
773
773
|
|
|
774
774
|
[](https://app.fossa.com/projects/git%2Bgithub.com%2Fkamiazya%2Fweb-csv-toolbox?ref=badge_large)
|
package/dist/CSVLexer.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"CSVLexer.js","sources":["../src/CSVLexer.ts"],"sourcesContent":["import { assertCommonOptions } from \"./assertCommonOptions.ts\";\nimport { Field, FieldDelimiter, RecordDelimiter } from \"./common/constants.ts\";\nimport { ParseError } from \"./common/errors.ts\";\nimport type {\n AbortSignalOptions,\n CommonOptions,\n Position,\n RecordDelimiterToken,\n Token,\n} from \"./common/types.ts\";\nimport { CRLF, DEFAULT_DELIMITER, DEFAULT_QUOTATION, LF } from \"./constants.ts\";\nimport { escapeRegExp } from \"./utils/escapeRegExp.ts\";\n\n/**\n * Default maximum buffer size in characters (UTF-16 code units).\n * Approximately 10MB for ASCII text, but may vary for non-ASCII characters.\n */\nexport const DEFAULT_MAX_BUFFER_SIZE = 10 * 1024 * 1024;\n\n/**\n * Options for the CSVLexer.lex method.\n */\nexport interface CSVLexerLexOptions {\n /**\n * If true, indicates that more chunks are expected.\n * If false or omitted, flushes remaining data.\n */\n stream?: boolean;\n}\n\n/**\n * CSV Lexer.\n *\n * CSVLexer tokenizes CSV data into fields and records.\n */\nexport class CSVLexer<\n Delimiter extends string = DEFAULT_DELIMITER,\n Quotation extends string = DEFAULT_QUOTATION,\n> {\n #delimiter: string;\n #quotation: string;\n #buffer = \"\";\n #flush = false;\n #matcher: RegExp;\n #fieldDelimiterLength: number;\n #maxBufferSize: number;\n\n #cursor: Position = {\n line: 1,\n column: 1,\n offset: 0,\n };\n #rowNumber = 1;\n\n #signal?: AbortSignal;\n\n /**\n * Constructs a new CSVLexer instance.\n * @param options - The common options for the lexer.\n */\n constructor(\n options: CommonOptions<Delimiter, Quotation> & AbortSignalOptions = {},\n ) {\n const {\n delimiter = DEFAULT_DELIMITER,\n quotation = DEFAULT_QUOTATION,\n maxBufferSize = DEFAULT_MAX_BUFFER_SIZE,\n signal,\n } = options;\n assertCommonOptions({ delimiter, quotation, maxBufferSize });\n this.#delimiter = delimiter;\n this.#quotation = quotation;\n this.#fieldDelimiterLength = delimiter.length;\n this.#maxBufferSize = maxBufferSize;\n const d = escapeRegExp(delimiter);\n const q = escapeRegExp(quotation);\n this.#matcher = new RegExp(\n `^(?:(?!${q})(?!${d})(?![\\\\r\\\\n]))([\\\\S\\\\s\\\\uFEFF\\\\xA0]+?)(?=${q}|${d}|\\\\r|\\\\n|$)`,\n );\n if (signal) {\n this.#signal = signal;\n }\n }\n\n /**\n * Lexes the given chunk of CSV data.\n * @param chunk - The chunk of CSV data to be lexed. Omit to flush remaining data.\n * @param options - Lexer options.\n * @returns An iterable iterator of tokens.\n */\n public lex(\n chunk?: string,\n options?: CSVLexerLexOptions,\n ): IterableIterator<Token> {\n const stream = options?.stream ?? false;\n\n if (!stream) {\n this.#flush = true;\n }\n if (chunk !== undefined && chunk.length !== 0) {\n this.#buffer += chunk;\n this.#checkBufferSize();\n }\n\n return this.#tokens();\n }\n\n /**\n * Generates tokens from the buffered CSV data.\n * @yields Tokens from the buffered CSV data.\n */\n *#tokens(): Generator<Token> {\n if (this.#flush) {\n // Trim the last CRLF or LF\n if (this.#buffer.endsWith(CRLF)) {\n this.#buffer = this.#buffer.slice(0, -2 /* -CRLF.length */);\n } else if (this.#buffer.endsWith(LF)) {\n this.#buffer = this.#buffer.slice(0, -1 /* -LF.length */);\n }\n }\n let token: Token | null;\n while ((token = this.#nextToken())) {\n yield token;\n }\n }\n\n /**\n * Checks if the buffer size exceeds the maximum allowed size.\n * @throws {RangeError} If the buffer size exceeds the maximum.\n */\n #checkBufferSize(): void {\n if (this.#buffer.length > this.#maxBufferSize) {\n throw new RangeError(\n `Buffer size (${this.#buffer.length} characters) exceeded maximum allowed size of ${this.#maxBufferSize} characters`,\n );\n }\n }\n\n /**\n * Retrieves the next token from the buffered CSV data.\n * @returns The next token or null if there are no more tokens.\n */\n #nextToken(): Token | null {\n this.#signal?.throwIfAborted();\n if (this.#buffer.length === 0) {\n return null;\n }\n // Buffer is Record Delimiter, defer to the next iteration.\n if (\n this.#flush === false &&\n (this.#buffer === CRLF || this.#buffer === LF)\n ) {\n return null;\n }\n\n // Check for CRLF\n if (this.#buffer.startsWith(CRLF)) {\n this.#buffer = this.#buffer.slice(2);\n const start: Position = { ...this.#cursor };\n this.#cursor.line++;\n this.#cursor.column = 1;\n this.#cursor.offset += 2; // CRLF.length\n const token: RecordDelimiterToken = {\n type: RecordDelimiter,\n value: CRLF,\n location: {\n start,\n end: { ...this.#cursor },\n rowNumber: this.#rowNumber++,\n },\n };\n return token;\n }\n\n // Check for LF\n if (this.#buffer.startsWith(LF)) {\n this.#buffer = this.#buffer.slice(1);\n const start: Position = { ...this.#cursor };\n this.#cursor.line++;\n this.#cursor.column = 1;\n this.#cursor.offset += 1; // LF.length\n const token: RecordDelimiterToken = {\n type: RecordDelimiter,\n value: LF,\n location: {\n start,\n end: { ...this.#cursor },\n rowNumber: this.#rowNumber++,\n },\n };\n return token;\n }\n\n // Check for Delimiter\n if (this.#buffer.startsWith(this.#delimiter)) {\n this.#buffer = this.#buffer.slice(1);\n const start: Position = { ...this.#cursor };\n this.#cursor.column += this.#fieldDelimiterLength;\n this.#cursor.offset += this.#fieldDelimiterLength;\n return {\n type: FieldDelimiter,\n value: this.#delimiter,\n location: {\n start,\n end: { ...this.#cursor },\n rowNumber: this.#rowNumber,\n },\n };\n }\n\n // Check for Quoted String\n if (this.#buffer.startsWith(this.#quotation)) {\n /**\n * Extract Quoted field.\n *\n * The following code is equivalent to the following:\n *\n * If the next character is a quote:\n * - If the character after that is a quote, then append a quote to the value and skip two characters.\n * - Otherwise, return the quoted string.\n * Otherwise, append the character to the value and skip one character.\n *\n * ```plaintext\n * | `i` | `i + 1` | `i + 2` |\n * |------------|------------|----------|\n * | cur | next | | => Variable names\n * | #quotation | #quotation | | => Escaped quote\n * | #quotation | (EOF) | | => Closing quote\n * | #quotation | undefined | | => End of buffer\n * | undefined | | | => End of buffer\n * ```\n */\n let value = \"\";\n let offset = 1; // Skip the opening quote\n let column = 2; // Skip the opening quote\n let line = 0;\n\n // Define variables\n let cur: string = this.#buffer[offset];\n let next: string | undefined = this.#buffer[offset + 1];\n do {\n // If the current character is a quote, check the next characters for closing quotes.\n if (cur === this.#quotation) {\n // If the cur character is a quote and the next character is a quote,\n // then append a quote to the value and skip two characters.\n if (next === this.#quotation) {\n // Append a quote to the value and skip two characters.\n value += this.#quotation;\n offset += 2;\n cur = this.#buffer[offset];\n next = this.#buffer[offset + 1];\n\n // Update the diff\n column += 2;\n continue;\n }\n\n // If the cur character is a quote and the next character is undefined,\n // then return null.\n if (next === undefined && this.#flush === false) {\n return null;\n }\n\n // Otherwise, return the quoted string.\n // Update the buffer and return the token\n offset++;\n this.#buffer = this.#buffer.slice(offset);\n const start: Position = { ...this.#cursor };\n this.#cursor.column += column;\n this.#cursor.offset += offset;\n this.#cursor.line += line;\n return {\n type: Field,\n value,\n location: {\n start,\n end: { ...this.#cursor },\n rowNumber: this.#rowNumber,\n },\n };\n }\n\n // Append the character to the value.\n value += cur;\n\n // Prepare for the next iteration\n if (cur === LF) {\n // If the current character is a LF,\n // then increment the line number and reset the column number.\n line++;\n column = 1;\n } else {\n // Otherwise, increment the column number and offset.\n column++;\n }\n\n offset++;\n cur = next;\n next = this.#buffer[offset + 1];\n } while (cur !== undefined);\n\n if (this.#flush) {\n throw new ParseError(\"Unexpected EOF while parsing quoted field.\", {\n position: { ...this.#cursor },\n });\n }\n return null;\n }\n\n // Check for Unquoted String\n const match = this.#matcher.exec(this.#buffer);\n if (match) {\n // If we're flushing and the match doesn't consume the entire buffer,\n // then return null\n if (this.#flush === false && match[0].length === this.#buffer.length) {\n return null;\n }\n const value = match[1];\n this.#buffer = this.#buffer.slice(value.length);\n const start: Position = { ...this.#cursor };\n this.#cursor.column += value.length;\n this.#cursor.offset += value.length;\n return {\n type: Field,\n value,\n location: {\n start,\n end: { ...this.#cursor },\n rowNumber: this.#rowNumber,\n },\n };\n }\n\n // Otherwise, return null\n return null;\n }\n}\n"],"names":[],"mappings":";;;;;;AAiBa,MAAA,uBAAA,GAA0B,KAAK,IAAO,GAAA;AAkB5C,MAAM,QAGX,CAAA;AAAA,EACA,UAAA;AAAA,EACA,UAAA;AAAA,EACA,OAAU,GAAA,EAAA;AAAA,EACV,MAAS,GAAA,KAAA;AAAA,EACT,QAAA;AAAA,EACA,qBAAA;AAAA,EACA,cAAA;AAAA,EAEA,OAAoB,GAAA;AAAA,IAClB,IAAM,EAAA,CAAA;AAAA,IACN,MAAQ,EAAA,CAAA;AAAA,IACR,MAAQ,EAAA;AAAA,GACV;AAAA,EACA,UAAa,GAAA,CAAA;AAAA,EAEb,OAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAMA,WAAA,CACE,OAAoE,GAAA,EACpE,EAAA;AACA,IAAM,MAAA;AAAA,MACJ,SAAY,GAAA,iBAAA;AAAA,MACZ,SAAY,GAAA,iBAAA;AAAA,MACZ,aAAgB,GAAA,uBAAA;AAAA,MAChB;AAAA,KACE,GAAA,OAAA;AACJ,IAAA,mBAAA,CAAoB,EAAE,SAAA,EAAW,SAAW,EAAA,aAAA,EAAe,CAAA;AAC3D,IAAA,IAAA,CAAK,UAAa,GAAA,SAAA;AAClB,IAAA,IAAA,CAAK,UAAa,GAAA,SAAA;AAClB,IAAA,IAAA,CAAK,wBAAwB,SAAU,CAAA,MAAA;AACvC,IAAA,IAAA,CAAK,cAAiB,GAAA,aAAA;AACtB,IAAM,MAAA,CAAA,GAAI,aAAa,SAAS,CAAA;AAChC,IAAM,MAAA,CAAA,GAAI,aAAa,SAAS,CAAA;AAChC,IAAA,IAAA,CAAK,WAAW,IAAI,MAAA;AAAA,MAClB,UAAU,CAAC,CAAA,IAAA,EAAO,CAAC,CAA4C,yCAAA,EAAA,CAAC,IAAI,CAAC,CAAA,WAAA;AAAA,KACvE;AACA,IAAA,IAAI,MAAQ,EAAA;AACV,MAAA,IAAA,CAAK,OAAU,GAAA,MAAA;AAAA;AACjB;AACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQO,GAAA,CACL,OACA,OACyB,EAAA;AACzB,IAAM,MAAA,MAAA,GAAS,SAAS,MAAU,IAAA,KAAA;AAElC,IAAA,IAAI,CAAC,MAAQ,EAAA;AACX,MAAA,IAAA,CAAK,MAAS,GAAA,IAAA;AAAA;AAEhB,IAAA,IAAI,KAAU,KAAA,MAAA,IAAa,KAAM,CAAA,MAAA,KAAW,CAAG,EAAA;AAC7C,MAAA,IAAA,CAAK,OAAW,IAAA,KAAA;AAChB,MAAA,IAAA,CAAK,gBAAiB,EAAA;AAAA;AAGxB,IAAA,OAAO,KAAK,OAAQ,EAAA;AAAA;AACtB;AAAA;AAAA;AAAA;AAAA,EAMA,CAAC,OAA4B,GAAA;AAC3B,IAAA,IAAI,KAAK,MAAQ,EAAA;AAEf,MAAA,IAAI,IAAK,CAAA,OAAA,CAAQ,QAAS,CAAA,IAAI,CAAG,EAAA;AAC/B,QAAK,IAAA,CAAA,OAAA,GAAU,KAAK,OAAQ,CAAA,KAAA;AAAA,UAAM,CAAA;AAAA,UAAG;AAAA;AAAA,SAAqB;AAAA,OACjD,MAAA,IAAA,IAAA,CAAK,OAAQ,CAAA,QAAA,CAAS,EAAE,CAAG,EAAA;AACpC,QAAK,IAAA,CAAA,OAAA,GAAU,KAAK,OAAQ,CAAA,KAAA;AAAA,UAAM,CAAA;AAAA,UAAG;AAAA;AAAA,SAAmB;AAAA;AAC1D;AAEF,IAAI,IAAA,KAAA;AACJ,IAAQ,OAAA,KAAA,GAAQ,IAAK,CAAA,UAAA,EAAe,EAAA;AAClC,MAAM,MAAA,KAAA;AAAA;AACR;AACF;AAAA;AAAA;AAAA;AAAA,EAMA,gBAAyB,GAAA;AACvB,IAAA,IAAI,IAAK,CAAA,OAAA,CAAQ,MAAS,GAAA,IAAA,CAAK,cAAgB,EAAA;AAC7C,MAAA,MAAM,IAAI,UAAA;AAAA,QACR,gBAAgB,IAAK,CAAA,OAAA,CAAQ,MAAM,CAAA,8CAAA,EAAiD,KAAK,cAAc,CAAA,WAAA;AAAA,OACzG;AAAA;AACF;AACF;AAAA;AAAA;AAAA;AAAA,EAMA,UAA2B,GAAA;AACzB,IAAA,IAAA,CAAK,SAAS,cAAe,EAAA;AAC7B,IAAI,IAAA,IAAA,CAAK,OAAQ,CAAA,MAAA,KAAW,CAAG,EAAA;AAC7B,MAAO,OAAA,IAAA;AAAA;AAGT,IACE,IAAA,IAAA,CAAK,WAAW,KACf,KAAA,IAAA,CAAK,YAAY,IAAQ,IAAA,IAAA,CAAK,YAAY,EAC3C,CAAA,EAAA;AACA,MAAO,OAAA,IAAA;AAAA;AAIT,IAAA,IAAI,IAAK,CAAA,OAAA,CAAQ,UAAW,CAAA,IAAI,CAAG,EAAA;AACjC,MAAA,IAAA,CAAK,OAAU,GAAA,IAAA,CAAK,OAAQ,CAAA,KAAA,CAAM,CAAC,CAAA;AACnC,MAAA,MAAM,KAAkB,GAAA,EAAE,GAAG,IAAA,CAAK,OAAQ,EAAA;AAC1C,MAAA,IAAA,CAAK,OAAQ,CAAA,IAAA,EAAA;AACb,MAAA,IAAA,CAAK,QAAQ,MAAS,GAAA,CAAA;AACtB,MAAA,IAAA,CAAK,QAAQ,MAAU,IAAA,CAAA;AACvB,MAAA,MAAM,KAA8B,GAAA;AAAA,QAClC,IAAM,EAAA,eAAA;AAAA,QACN,KAAO,EAAA,IAAA;AAAA,QACP,QAAU,EAAA;AAAA,UACR,KAAA;AAAA,UACA,GAAK,EAAA,EAAE,GAAG,IAAA,CAAK,OAAQ,EAAA;AAAA,UACvB,WAAW,IAAK,CAAA,UAAA;AAAA;AAClB,OACF;AACA,MAAO,OAAA,KAAA;AAAA;AAIT,IAAA,IAAI,IAAK,CAAA,OAAA,CAAQ,UAAW,CAAA,EAAE,CAAG,EAAA;AAC/B,MAAA,IAAA,CAAK,OAAU,GAAA,IAAA,CAAK,OAAQ,CAAA,KAAA,CAAM,CAAC,CAAA;AACnC,MAAA,MAAM,KAAkB,GAAA,EAAE,GAAG,IAAA,CAAK,OAAQ,EAAA;AAC1C,MAAA,IAAA,CAAK,OAAQ,CAAA,IAAA,EAAA;AACb,MAAA,IAAA,CAAK,QAAQ,MAAS,GAAA,CAAA;AACtB,MAAA,IAAA,CAAK,QAAQ,MAAU,IAAA,CAAA;AACvB,MAAA,MAAM,KAA8B,GAAA;AAAA,QAClC,IAAM,EAAA,eAAA;AAAA,QACN,KAAO,EAAA,EAAA;AAAA,QACP,QAAU,EAAA;AAAA,UACR,KAAA;AAAA,UACA,GAAK,EAAA,EAAE,GAAG,IAAA,CAAK,OAAQ,EAAA;AAAA,UACvB,WAAW,IAAK,CAAA,UAAA;AAAA;AAClB,OACF;AACA,MAAO,OAAA,KAAA;AAAA;AAIT,IAAA,IAAI,IAAK,CAAA,OAAA,CAAQ,UAAW,CAAA,IAAA,CAAK,UAAU,CAAG,EAAA;AAC5C,MAAA,IAAA,CAAK,OAAU,GAAA,IAAA,CAAK,OAAQ,CAAA,KAAA,CAAM,CAAC,CAAA;AACnC,MAAA,MAAM,KAAkB,GAAA,EAAE,GAAG,IAAA,CAAK,OAAQ,EAAA;AAC1C,MAAK,IAAA,CAAA,OAAA,CAAQ,UAAU,IAAK,CAAA,qBAAA;AAC5B,MAAK,IAAA,CAAA,OAAA,CAAQ,UAAU,IAAK,CAAA,qBAAA;AAC5B,MAAO,OAAA;AAAA,QACL,IAAM,EAAA,cAAA;AAAA,QACN,OAAO,IAAK,CAAA,UAAA;AAAA,QACZ,QAAU,EAAA;AAAA,UACR,KAAA;AAAA,UACA,GAAK,EAAA,EAAE,GAAG,IAAA,CAAK,OAAQ,EAAA;AAAA,UACvB,WAAW,IAAK,CAAA;AAAA;AAClB,OACF;AAAA;AAIF,IAAA,IAAI,IAAK,CAAA,OAAA,CAAQ,UAAW,CAAA,IAAA,CAAK,UAAU,CAAG,EAAA;AAqB5C,MAAA,IAAI,KAAQ,GAAA,EAAA;AACZ,MAAA,IAAI,MAAS,GAAA,CAAA;AACb,MAAA,IAAI,MAAS,GAAA,CAAA;AACb,MAAA,IAAI,IAAO,GAAA,CAAA;AAGX,MAAI,IAAA,GAAA,GAAc,IAAK,CAAA,OAAA,CAAQ,MAAM,CAAA;AACrC,MAAA,IAAI,IAA2B,GAAA,IAAA,CAAK,OAAQ,CAAA,MAAA,GAAS,CAAC,CAAA;AACtD,MAAG,GAAA;AAED,QAAI,IAAA,GAAA,KAAQ,KAAK,UAAY,EAAA;AAG3B,UAAI,IAAA,IAAA,KAAS,KAAK,UAAY,EAAA;AAE5B,YAAA,KAAA,IAAS,IAAK,CAAA,UAAA;AACd,YAAU,MAAA,IAAA,CAAA;AACV,YAAM,GAAA,GAAA,IAAA,CAAK,QAAQ,MAAM,CAAA;AACzB,YAAO,IAAA,GAAA,IAAA,CAAK,OAAQ,CAAA,MAAA,GAAS,CAAC,CAAA;AAG9B,YAAU,MAAA,IAAA,CAAA;AACV,YAAA;AAAA;AAKF,UAAA,IAAI,IAAS,KAAA,MAAA,IAAa,IAAK,CAAA,MAAA,KAAW,KAAO,EAAA;AAC/C,YAAO,OAAA,IAAA;AAAA;AAKT,UAAA,MAAA,EAAA;AACA,UAAA,IAAA,CAAK,OAAU,GAAA,IAAA,CAAK,OAAQ,CAAA,KAAA,CAAM,MAAM,CAAA;AACxC,UAAA,MAAM,KAAkB,GAAA,EAAE,GAAG,IAAA,CAAK,OAAQ,EAAA;AAC1C,UAAA,IAAA,CAAK,QAAQ,MAAU,IAAA,MAAA;AACvB,UAAA,IAAA,CAAK,QAAQ,MAAU,IAAA,MAAA;AACvB,UAAA,IAAA,CAAK,QAAQ,IAAQ,IAAA,IAAA;AACrB,UAAO,OAAA;AAAA,YACL,IAAM,EAAA,KAAA;AAAA,YACN,KAAA;AAAA,YACA,QAAU,EAAA;AAAA,cACR,KAAA;AAAA,cACA,GAAK,EAAA,EAAE,GAAG,IAAA,CAAK,OAAQ,EAAA;AAAA,cACvB,WAAW,IAAK,CAAA;AAAA;AAClB,WACF;AAAA;AAIF,QAAS,KAAA,IAAA,GAAA;AAGT,QAAA,IAAI,QAAQ,EAAI,EAAA;AAGd,UAAA,IAAA,EAAA;AACA,UAAS,MAAA,GAAA,CAAA;AAAA,SACJ,MAAA;AAEL,UAAA,MAAA,EAAA;AAAA;AAGF,QAAA,MAAA,EAAA;AACA,QAAM,GAAA,GAAA,IAAA;AACN,QAAO,IAAA,GAAA,IAAA,CAAK,OAAQ,CAAA,MAAA,GAAS,CAAC,CAAA;AAAA,eACvB,GAAQ,KAAA,MAAA;AAEjB,MAAA,IAAI,KAAK,MAAQ,EAAA;AACf,QAAM,MAAA,IAAI,WAAW,4CAA8C,EAAA;AAAA,UACjE,QAAU,EAAA,EAAE,GAAG,IAAA,CAAK,OAAQ;AAAA,SAC7B,CAAA;AAAA;AAEH,MAAO,OAAA,IAAA;AAAA;AAIT,IAAA,MAAM,KAAQ,GAAA,IAAA,CAAK,QAAS,CAAA,IAAA,CAAK,KAAK,OAAO,CAAA;AAC7C,IAAA,IAAI,KAAO,EAAA;AAGT,MAAI,IAAA,IAAA,CAAK,WAAW,KAAS,IAAA,KAAA,CAAM,CAAC,CAAE,CAAA,MAAA,KAAW,IAAK,CAAA,OAAA,CAAQ,MAAQ,EAAA;AACpE,QAAO,OAAA,IAAA;AAAA;AAET,MAAM,MAAA,KAAA,GAAQ,MAAM,CAAC,CAAA;AACrB,MAAA,IAAA,CAAK,OAAU,GAAA,IAAA,CAAK,OAAQ,CAAA,KAAA,CAAM,MAAM,MAAM,CAAA;AAC9C,MAAA,MAAM,KAAkB,GAAA,EAAE,GAAG,IAAA,CAAK,OAAQ,EAAA;AAC1C,MAAK,IAAA,CAAA,OAAA,CAAQ,UAAU,KAAM,CAAA,MAAA;AAC7B,MAAK,IAAA,CAAA,OAAA,CAAQ,UAAU,KAAM,CAAA,MAAA;AAC7B,MAAO,OAAA;AAAA,QACL,IAAM,EAAA,KAAA;AAAA,QACN,KAAA;AAAA,QACA,QAAU,EAAA;AAAA,UACR,KAAA;AAAA,UACA,GAAK,EAAA,EAAE,GAAG,IAAA,CAAK,OAAQ,EAAA;AAAA,UACvB,WAAW,IAAK,CAAA;AAAA;AAClB,OACF;AAAA;AAIF,IAAO,OAAA,IAAA;AAAA;AAEX;;;;"}
|
|
1
|
+
{"version":3,"file":"CSVLexer.js","sources":["../src/CSVLexer.ts"],"sourcesContent":["import { assertCommonOptions } from \"./assertCommonOptions.ts\";\nimport { Field, FieldDelimiter, RecordDelimiter } from \"./common/constants.ts\";\nimport { ParseError } from \"./common/errors.ts\";\nimport type {\n AbortSignalOptions,\n CommonOptions,\n Position,\n RecordDelimiterToken,\n Token,\n} from \"./common/types.ts\";\nimport { CRLF, DEFAULT_DELIMITER, DEFAULT_QUOTATION, LF } from \"./constants.ts\";\nimport { escapeRegExp } from \"./utils/escapeRegExp.ts\";\n\n/**\n * Default maximum buffer size in characters (UTF-16 code units).\n * Approximately 10MB for ASCII text, but may vary for non-ASCII characters.\n */\nexport const DEFAULT_MAX_BUFFER_SIZE = 10 * 1024 * 1024;\n\n/**\n * Options for the CSVLexer.lex method.\n */\nexport interface CSVLexerLexOptions {\n /**\n * If true, indicates that more chunks are expected.\n * If false or omitted, flushes remaining data.\n */\n stream?: boolean;\n}\n\n/**\n * CSV Lexer.\n *\n * CSVLexer tokenizes CSV data into fields and records.\n */\nexport class CSVLexer<\n Delimiter extends string = DEFAULT_DELIMITER,\n Quotation extends string = DEFAULT_QUOTATION,\n> {\n #delimiter: string;\n #quotation: string;\n #buffer = \"\";\n #flush = false;\n #matcher: RegExp;\n #fieldDelimiterLength: number;\n #maxBufferSize: number;\n\n #cursor: Position = {\n line: 1,\n column: 1,\n offset: 0,\n };\n #rowNumber = 1;\n\n #signal?: AbortSignal;\n\n /**\n * Constructs a new CSVLexer instance.\n * @param options - The common options for the lexer.\n */\n constructor(\n options: CommonOptions<Delimiter, Quotation> & AbortSignalOptions = {},\n ) {\n const {\n delimiter = DEFAULT_DELIMITER,\n quotation = DEFAULT_QUOTATION,\n maxBufferSize = DEFAULT_MAX_BUFFER_SIZE,\n signal,\n } = options;\n assertCommonOptions({ delimiter, quotation, maxBufferSize });\n this.#delimiter = delimiter;\n this.#quotation = quotation;\n this.#fieldDelimiterLength = delimiter.length;\n this.#maxBufferSize = maxBufferSize;\n const d = escapeRegExp(delimiter);\n const q = escapeRegExp(quotation);\n this.#matcher = new RegExp(\n `^(?:(?!${q})(?!${d})(?![\\\\r\\\\n]))([\\\\S\\\\s\\\\uFEFF\\\\xA0]+?)(?=${q}|${d}|\\\\r|\\\\n|$)`,\n );\n if (signal) {\n this.#signal = signal;\n }\n }\n\n /**\n * Lexes the given chunk of CSV data.\n * @param chunk - The chunk of CSV data to be lexed. Omit to flush remaining data.\n * @param options - Lexer options.\n * @returns An iterable iterator of tokens.\n */\n public lex(\n chunk?: string,\n options?: CSVLexerLexOptions,\n ): IterableIterator<Token> {\n const stream = options?.stream ?? false;\n\n if (!stream) {\n this.#flush = true;\n }\n if (chunk !== undefined && chunk.length !== 0) {\n this.#buffer += chunk;\n this.#checkBufferSize();\n }\n\n return this.#tokens();\n }\n\n /**\n * Generates tokens from the buffered CSV data.\n * @yields Tokens from the buffered CSV data.\n */\n *#tokens(): Generator<Token> {\n if (this.#flush) {\n // Trim the last CRLF or LF\n if (this.#buffer.endsWith(CRLF)) {\n this.#buffer = this.#buffer.slice(0, -2 /* -CRLF.length */);\n } else if (this.#buffer.endsWith(LF)) {\n this.#buffer = this.#buffer.slice(0, -1 /* -LF.length */);\n }\n }\n let token: Token | null;\n while ((token = this.#nextToken())) {\n yield token;\n }\n }\n\n /**\n * Checks if the buffer size exceeds the maximum allowed size.\n * @throws {RangeError} If the buffer size exceeds the maximum.\n */\n #checkBufferSize(): void {\n if (this.#buffer.length > this.#maxBufferSize) {\n throw new RangeError(\n `Buffer size (${this.#buffer.length} characters) exceeded maximum allowed size of ${this.#maxBufferSize} characters`,\n );\n }\n }\n\n /**\n * Retrieves the next token from the buffered CSV data.\n * @returns The next token or null if there are no more tokens.\n */\n #nextToken(): Token | null {\n this.#signal?.throwIfAborted();\n if (this.#buffer.length === 0) {\n return null;\n }\n // Buffer is Record Delimiter, defer to the next iteration.\n if (\n this.#flush === false &&\n (this.#buffer === CRLF || this.#buffer === LF)\n ) {\n return null;\n }\n\n // Check for CRLF\n if (this.#buffer.startsWith(CRLF)) {\n this.#buffer = this.#buffer.slice(2);\n const start: Position = { ...this.#cursor };\n this.#cursor.line++;\n this.#cursor.column = 1;\n this.#cursor.offset += 2; // CRLF.length\n const token: RecordDelimiterToken = {\n type: RecordDelimiter,\n value: CRLF,\n location: {\n start,\n end: { ...this.#cursor },\n rowNumber: this.#rowNumber++,\n },\n };\n return token;\n }\n\n // Check for LF\n if (this.#buffer.startsWith(LF)) {\n this.#buffer = this.#buffer.slice(1);\n const start: Position = { ...this.#cursor };\n this.#cursor.line++;\n this.#cursor.column = 1;\n this.#cursor.offset += 1; // LF.length\n const token: RecordDelimiterToken = {\n type: RecordDelimiter,\n value: LF,\n location: {\n start,\n end: { ...this.#cursor },\n rowNumber: this.#rowNumber++,\n },\n };\n return token;\n }\n\n // Check for Delimiter\n if (this.#buffer.startsWith(this.#delimiter)) {\n this.#buffer = this.#buffer.slice(1);\n const start: Position = { ...this.#cursor };\n this.#cursor.column += this.#fieldDelimiterLength;\n this.#cursor.offset += this.#fieldDelimiterLength;\n return {\n type: FieldDelimiter,\n value: this.#delimiter,\n location: {\n start,\n end: { ...this.#cursor },\n rowNumber: this.#rowNumber,\n },\n };\n }\n\n // Check for Quoted String\n if (this.#buffer.startsWith(this.#quotation)) {\n /**\n * Extract Quoted field.\n *\n * The following code is equivalent to the following:\n *\n * If the next character is a quote:\n * - If the character after that is a quote, then append a quote to the value and skip two characters.\n * - Otherwise, return the quoted string.\n * Otherwise, append the character to the value and skip one character.\n *\n * ```plaintext\n * | `i` | `i + 1` | `i + 2` |\n * |------------|------------|----------|\n * | cur | next | | => Variable names\n * | #quotation | #quotation | | => Escaped quote\n * | #quotation | (EOF) | | => Closing quote\n * | #quotation | undefined | | => End of buffer\n * | undefined | | | => End of buffer\n * ```\n */\n let value = \"\";\n let offset = 1; // Skip the opening quote\n let column = 2; // Skip the opening quote\n let line = 0;\n\n // Define variables\n let cur: string = this.#buffer[offset];\n let next: string | undefined = this.#buffer[offset + 1];\n do {\n // If the current character is a quote, check the next characters for closing quotes.\n if (cur === this.#quotation) {\n // If the cur character is a quote and the next character is a quote,\n // then append a quote to the value and skip two characters.\n if (next === this.#quotation) {\n // Append a quote to the value and skip two characters.\n value += this.#quotation;\n offset += 2;\n cur = this.#buffer[offset];\n next = this.#buffer[offset + 1];\n\n // Update the diff\n column += 2;\n continue;\n }\n\n // If the cur character is a quote and the next character is undefined,\n // then return null.\n if (next === undefined && this.#flush === false) {\n return null;\n }\n\n // Otherwise, return the quoted string.\n // Update the buffer and return the token\n offset++;\n this.#buffer = this.#buffer.slice(offset);\n const start: Position = { ...this.#cursor };\n this.#cursor.column += column;\n this.#cursor.offset += offset;\n this.#cursor.line += line;\n return {\n type: Field,\n value,\n location: {\n start,\n end: { ...this.#cursor },\n rowNumber: this.#rowNumber,\n },\n };\n }\n\n // Append the character to the value.\n value += cur;\n\n // Prepare for the next iteration\n if (cur === LF) {\n // If the current character is a LF,\n // then increment the line number and reset the column number.\n line++;\n column = 1;\n } else {\n // Otherwise, increment the column number and offset.\n column++;\n }\n\n offset++;\n cur = next;\n next = this.#buffer[offset + 1];\n } while (cur !== undefined);\n\n if (this.#flush) {\n throw new ParseError(\"Unexpected EOF while parsing quoted field.\", {\n position: { ...this.#cursor },\n });\n }\n return null;\n }\n\n // Check for Unquoted String\n const match = this.#matcher.exec(this.#buffer);\n if (match) {\n // If we're flushing and the match doesn't consume the entire buffer,\n // then return null\n if (this.#flush === false && match[0].length === this.#buffer.length) {\n return null;\n }\n const value = match[1];\n this.#buffer = this.#buffer.slice(value.length);\n const start: Position = { ...this.#cursor };\n this.#cursor.column += value.length;\n this.#cursor.offset += value.length;\n return {\n type: Field,\n value,\n location: {\n start,\n end: { ...this.#cursor },\n rowNumber: this.#rowNumber,\n },\n };\n }\n\n // Otherwise, return null\n return null;\n }\n}\n"],"names":[],"mappings":";;;;;;AAiBO,MAAM,uBAAA,GAA0B,KAAK,IAAA,GAAO;AAkB5C,MAAM,QAAA,CAGX;AAAA,EACA,UAAA;AAAA,EACA,UAAA;AAAA,EACA,OAAA,GAAU,EAAA;AAAA,EACV,MAAA,GAAS,KAAA;AAAA,EACT,QAAA;AAAA,EACA,qBAAA;AAAA,EACA,cAAA;AAAA,EAEA,OAAA,GAAoB;AAAA,IAClB,IAAA,EAAM,CAAA;AAAA,IACN,MAAA,EAAQ,CAAA;AAAA,IACR,MAAA,EAAQ;AAAA,GACV;AAAA,EACA,UAAA,GAAa,CAAA;AAAA,EAEb,OAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAMA,WAAA,CACE,OAAA,GAAoE,EAAC,EACrE;AACA,IAAA,MAAM;AAAA,MACJ,SAAA,GAAY,iBAAA;AAAA,MACZ,SAAA,GAAY,iBAAA;AAAA,MACZ,aAAA,GAAgB,uBAAA;AAAA,MAChB;AAAA,KACF,GAAI,OAAA;AACJ,IAAA,mBAAA,CAAoB,EAAE,SAAA,EAAW,SAAA,EAAW,aAAA,EAAe,CAAA;AAC3D,IAAA,IAAA,CAAK,UAAA,GAAa,SAAA;AAClB,IAAA,IAAA,CAAK,UAAA,GAAa,SAAA;AAClB,IAAA,IAAA,CAAK,wBAAwB,SAAA,CAAU,MAAA;AACvC,IAAA,IAAA,CAAK,cAAA,GAAiB,aAAA;AACtB,IAAA,MAAM,CAAA,GAAI,aAAa,SAAS,CAAA;AAChC,IAAA,MAAM,CAAA,GAAI,aAAa,SAAS,CAAA;AAChC,IAAA,IAAA,CAAK,WAAW,IAAI,MAAA;AAAA,MAClB,UAAU,CAAC,CAAA,IAAA,EAAO,CAAC,CAAA,yCAAA,EAA4C,CAAC,IAAI,CAAC,CAAA,WAAA;AAAA,KACvE;AACA,IAAA,IAAI,MAAA,EAAQ;AACV,MAAA,IAAA,CAAK,OAAA,GAAU,MAAA;AAAA,IACjB;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQO,GAAA,CACL,OACA,OAAA,EACyB;AACzB,IAAA,MAAM,MAAA,GAAS,SAAS,MAAA,IAAU,KAAA;AAElC,IAAA,IAAI,CAAC,MAAA,EAAQ;AACX,MAAA,IAAA,CAAK,MAAA,GAAS,IAAA;AAAA,IAChB;AACA,IAAA,IAAI,KAAA,KAAU,MAAA,IAAa,KAAA,CAAM,MAAA,KAAW,CAAA,EAAG;AAC7C,MAAA,IAAA,CAAK,OAAA,IAAW,KAAA;AAChB,MAAA,IAAA,CAAK,gBAAA,EAAiB;AAAA,IACxB;AAEA,IAAA,OAAO,KAAK,OAAA,EAAQ;AAAA,EACtB;AAAA;AAAA;AAAA;AAAA;AAAA,EAMA,CAAC,OAAA,GAA4B;AAC3B,IAAA,IAAI,KAAK,MAAA,EAAQ;AAEf,MAAA,IAAI,IAAA,CAAK,OAAA,CAAQ,QAAA,CAAS,IAAI,CAAA,EAAG;AAC/B,QAAA,IAAA,CAAK,OAAA,GAAU,KAAK,OAAA,CAAQ,KAAA;AAAA,UAAM,CAAA;AAAA,UAAG;AAAA;AAAA,SAAqB;AAAA,MAC5D,CAAA,MAAA,IAAW,IAAA,CAAK,OAAA,CAAQ,QAAA,CAAS,EAAE,CAAA,EAAG;AACpC,QAAA,IAAA,CAAK,OAAA,GAAU,KAAK,OAAA,CAAQ,KAAA;AAAA,UAAM,CAAA;AAAA,UAAG;AAAA;AAAA,SAAmB;AAAA,MAC1D;AAAA,IACF;AACA,IAAA,IAAI,KAAA;AACJ,IAAA,OAAQ,KAAA,GAAQ,IAAA,CAAK,UAAA,EAAW,EAAI;AAClC,MAAA,MAAM,KAAA;AAAA,IACR;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA,EAMA,gBAAA,GAAyB;AACvB,IAAA,IAAI,IAAA,CAAK,OAAA,CAAQ,MAAA,GAAS,IAAA,CAAK,cAAA,EAAgB;AAC7C,MAAA,MAAM,IAAI,UAAA;AAAA,QACR,gBAAgB,IAAA,CAAK,OAAA,CAAQ,MAAM,CAAA,8CAAA,EAAiD,KAAK,cAAc,CAAA,WAAA;AAAA,OACzG;AAAA,IACF;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA,EAMA,UAAA,GAA2B;AACzB,IAAA,IAAA,CAAK,SAAS,cAAA,EAAe;AAC7B,IAAA,IAAI,IAAA,CAAK,OAAA,CAAQ,MAAA,KAAW,CAAA,EAAG;AAC7B,MAAA,OAAO,IAAA;AAAA,IACT;AAEA,IAAA,IACE,IAAA,CAAK,WAAW,KAAA,KACf,IAAA,CAAK,YAAY,IAAA,IAAQ,IAAA,CAAK,YAAY,EAAA,CAAA,EAC3C;AACA,MAAA,OAAO,IAAA;AAAA,IACT;AAGA,IAAA,IAAI,IAAA,CAAK,OAAA,CAAQ,UAAA,CAAW,IAAI,CAAA,EAAG;AACjC,MAAA,IAAA,CAAK,OAAA,GAAU,IAAA,CAAK,OAAA,CAAQ,KAAA,CAAM,CAAC,CAAA;AACnC,MAAA,MAAM,KAAA,GAAkB,EAAE,GAAG,IAAA,CAAK,OAAA,EAAQ;AAC1C,MAAA,IAAA,CAAK,OAAA,CAAQ,IAAA,EAAA;AACb,MAAA,IAAA,CAAK,QAAQ,MAAA,GAAS,CAAA;AACtB,MAAA,IAAA,CAAK,QAAQ,MAAA,IAAU,CAAA;AACvB,MAAA,MAAM,KAAA,GAA8B;AAAA,QAClC,IAAA,EAAM,eAAA;AAAA,QACN,KAAA,EAAO,IAAA;AAAA,QACP,QAAA,EAAU;AAAA,UACR,KAAA;AAAA,UACA,GAAA,EAAK,EAAE,GAAG,IAAA,CAAK,OAAA,EAAQ;AAAA,UACvB,WAAW,IAAA,CAAK,UAAA;AAAA;AAClB,OACF;AACA,MAAA,OAAO,KAAA;AAAA,IACT;AAGA,IAAA,IAAI,IAAA,CAAK,OAAA,CAAQ,UAAA,CAAW,EAAE,CAAA,EAAG;AAC/B,MAAA,IAAA,CAAK,OAAA,GAAU,IAAA,CAAK,OAAA,CAAQ,KAAA,CAAM,CAAC,CAAA;AACnC,MAAA,MAAM,KAAA,GAAkB,EAAE,GAAG,IAAA,CAAK,OAAA,EAAQ;AAC1C,MAAA,IAAA,CAAK,OAAA,CAAQ,IAAA,EAAA;AACb,MAAA,IAAA,CAAK,QAAQ,MAAA,GAAS,CAAA;AACtB,MAAA,IAAA,CAAK,QAAQ,MAAA,IAAU,CAAA;AACvB,MAAA,MAAM,KAAA,GAA8B;AAAA,QAClC,IAAA,EAAM,eAAA;AAAA,QACN,KAAA,EAAO,EAAA;AAAA,QACP,QAAA,EAAU;AAAA,UACR,KAAA;AAAA,UACA,GAAA,EAAK,EAAE,GAAG,IAAA,CAAK,OAAA,EAAQ;AAAA,UACvB,WAAW,IAAA,CAAK,UAAA;AAAA;AAClB,OACF;AACA,MAAA,OAAO,KAAA;AAAA,IACT;AAGA,IAAA,IAAI,IAAA,CAAK,OAAA,CAAQ,UAAA,CAAW,IAAA,CAAK,UAAU,CAAA,EAAG;AAC5C,MAAA,IAAA,CAAK,OAAA,GAAU,IAAA,CAAK,OAAA,CAAQ,KAAA,CAAM,CAAC,CAAA;AACnC,MAAA,MAAM,KAAA,GAAkB,EAAE,GAAG,IAAA,CAAK,OAAA,EAAQ;AAC1C,MAAA,IAAA,CAAK,OAAA,CAAQ,UAAU,IAAA,CAAK,qBAAA;AAC5B,MAAA,IAAA,CAAK,OAAA,CAAQ,UAAU,IAAA,CAAK,qBAAA;AAC5B,MAAA,OAAO;AAAA,QACL,IAAA,EAAM,cAAA;AAAA,QACN,OAAO,IAAA,CAAK,UAAA;AAAA,QACZ,QAAA,EAAU;AAAA,UACR,KAAA;AAAA,UACA,GAAA,EAAK,EAAE,GAAG,IAAA,CAAK,OAAA,EAAQ;AAAA,UACvB,WAAW,IAAA,CAAK;AAAA;AAClB,OACF;AAAA,IACF;AAGA,IAAA,IAAI,IAAA,CAAK,OAAA,CAAQ,UAAA,CAAW,IAAA,CAAK,UAAU,CAAA,EAAG;AAqB5C,MAAA,IAAI,KAAA,GAAQ,EAAA;AACZ,MAAA,IAAI,MAAA,GAAS,CAAA;AACb,MAAA,IAAI,MAAA,GAAS,CAAA;AACb,MAAA,IAAI,IAAA,GAAO,CAAA;AAGX,MAAA,IAAI,GAAA,GAAc,IAAA,CAAK,OAAA,CAAQ,MAAM,CAAA;AACrC,MAAA,IAAI,IAAA,GAA2B,IAAA,CAAK,OAAA,CAAQ,MAAA,GAAS,CAAC,CAAA;AACtD,MAAA,GAAG;AAED,QAAA,IAAI,GAAA,KAAQ,KAAK,UAAA,EAAY;AAG3B,UAAA,IAAI,IAAA,KAAS,KAAK,UAAA,EAAY;AAE5B,YAAA,KAAA,IAAS,IAAA,CAAK,UAAA;AACd,YAAA,MAAA,IAAU,CAAA;AACV,YAAA,GAAA,GAAM,IAAA,CAAK,QAAQ,MAAM,CAAA;AACzB,YAAA,IAAA,GAAO,IAAA,CAAK,OAAA,CAAQ,MAAA,GAAS,CAAC,CAAA;AAG9B,YAAA,MAAA,IAAU,CAAA;AACV,YAAA;AAAA,UACF;AAIA,UAAA,IAAI,IAAA,KAAS,MAAA,IAAa,IAAA,CAAK,MAAA,KAAW,KAAA,EAAO;AAC/C,YAAA,OAAO,IAAA;AAAA,UACT;AAIA,UAAA,MAAA,EAAA;AACA,UAAA,IAAA,CAAK,OAAA,GAAU,IAAA,CAAK,OAAA,CAAQ,KAAA,CAAM,MAAM,CAAA;AACxC,UAAA,MAAM,KAAA,GAAkB,EAAE,GAAG,IAAA,CAAK,OAAA,EAAQ;AAC1C,UAAA,IAAA,CAAK,QAAQ,MAAA,IAAU,MAAA;AACvB,UAAA,IAAA,CAAK,QAAQ,MAAA,IAAU,MAAA;AACvB,UAAA,IAAA,CAAK,QAAQ,IAAA,IAAQ,IAAA;AACrB,UAAA,OAAO;AAAA,YACL,IAAA,EAAM,KAAA;AAAA,YACN,KAAA;AAAA,YACA,QAAA,EAAU;AAAA,cACR,KAAA;AAAA,cACA,GAAA,EAAK,EAAE,GAAG,IAAA,CAAK,OAAA,EAAQ;AAAA,cACvB,WAAW,IAAA,CAAK;AAAA;AAClB,WACF;AAAA,QACF;AAGA,QAAA,KAAA,IAAS,GAAA;AAGT,QAAA,IAAI,QAAQ,EAAA,EAAI;AAGd,UAAA,IAAA,EAAA;AACA,UAAA,MAAA,GAAS,CAAA;AAAA,QACX,CAAA,MAAO;AAEL,UAAA,MAAA,EAAA;AAAA,QACF;AAEA,QAAA,MAAA,EAAA;AACA,QAAA,GAAA,GAAM,IAAA;AACN,QAAA,IAAA,GAAO,IAAA,CAAK,OAAA,CAAQ,MAAA,GAAS,CAAC,CAAA;AAAA,MAChC,SAAS,GAAA,KAAQ,MAAA;AAEjB,MAAA,IAAI,KAAK,MAAA,EAAQ;AACf,QAAA,MAAM,IAAI,WAAW,4CAAA,EAA8C;AAAA,UACjE,QAAA,EAAU,EAAE,GAAG,IAAA,CAAK,OAAA;AAAQ,SAC7B,CAAA;AAAA,MACH;AACA,MAAA,OAAO,IAAA;AAAA,IACT;AAGA,IAAA,MAAM,KAAA,GAAQ,IAAA,CAAK,QAAA,CAAS,IAAA,CAAK,KAAK,OAAO,CAAA;AAC7C,IAAA,IAAI,KAAA,EAAO;AAGT,MAAA,IAAI,IAAA,CAAK,WAAW,KAAA,IAAS,KAAA,CAAM,CAAC,CAAA,CAAE,MAAA,KAAW,IAAA,CAAK,OAAA,CAAQ,MAAA,EAAQ;AACpE,QAAA,OAAO,IAAA;AAAA,MACT;AACA,MAAA,MAAM,KAAA,GAAQ,MAAM,CAAC,CAAA;AACrB,MAAA,IAAA,CAAK,OAAA,GAAU,IAAA,CAAK,OAAA,CAAQ,KAAA,CAAM,MAAM,MAAM,CAAA;AAC9C,MAAA,MAAM,KAAA,GAAkB,EAAE,GAAG,IAAA,CAAK,OAAA,EAAQ;AAC1C,MAAA,IAAA,CAAK,OAAA,CAAQ,UAAU,KAAA,CAAM,MAAA;AAC7B,MAAA,IAAA,CAAK,OAAA,CAAQ,UAAU,KAAA,CAAM,MAAA;AAC7B,MAAA,OAAO;AAAA,QACL,IAAA,EAAM,KAAA;AAAA,QACN,KAAA;AAAA,QACA,QAAA,EAAU;AAAA,UACR,KAAA;AAAA,UACA,GAAA,EAAK,EAAE,GAAG,IAAA,CAAK,OAAA,EAAQ;AAAA,UACvB,WAAW,IAAA,CAAK;AAAA;AAClB,OACF;AAAA,IACF;AAGA,IAAA,OAAO,IAAA;AAAA,EACT;AACF;;;;"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"CSVLexerTransformer.js","sources":["../src/CSVLexerTransformer.ts"],"sourcesContent":["import { CSVLexer } from \"./CSVLexer.ts\";\nimport type {\n CSVLexerTransformerOptions,\n ExtendedQueuingStrategy,\n Token,\n} from \"./common/types.ts\";\nimport type { DEFAULT_DELIMITER, DEFAULT_QUOTATION } from \"./constants.ts\";\n\n/**\n * A transform stream that converts a stream of strings into a stream of tokens.\n *\n * @category Low-level API\n *\n * @param options - CSV-specific options (delimiter, quotation, etc.)\n * @param writableStrategy - Strategy for the writable side (default: `{ highWaterMark: 65536, size: chunk => chunk.length, checkInterval: 100 }`)\n * @param readableStrategy - Strategy for the readable side (default: `{ highWaterMark: 1024, size: tokens => tokens.length, checkInterval: 100 }`)\n *\n * @remarks\n * Follows the Web Streams API pattern where queuing strategies are passed as\n * constructor arguments, similar to the standard `TransformStream`.\n *\n * **Default Queuing Strategy:**\n * - Writable side: Counts by string length (characters). Default highWaterMark is 65536 characters (≈64KB).\n * - Readable side: Counts by number of tokens in each array. Default highWaterMark is 1024 tokens.\n *\n * **Backpressure Handling:**\n * The transformer monitors `controller.desiredSize` and yields to the event loop when backpressure\n * is detected (desiredSize ≤ 0). This prevents blocking the main thread during heavy processing\n * and allows the downstream consumer to catch up.\n *\n * These defaults are starting points based on data flow characteristics, not empirical benchmarks.\n * Optimal values depend on your runtime environment, data size, and performance requirements.\n *\n * @example Basic usage\n * ```ts\n * new ReadableStream({\n * start(controller) {\n * controller.enqueue(\"name,age\\r\\n\");\n * controller.enqueue(\"Alice,20\\r\\n\");\n * controller.close();\n * }\n * })\n * .pipeThrough(new CSVLexerTransformer())\n * .pipeTo(new WritableStream({ write(tokens) {\n * for (const token of tokens) {\n * console.log(token);\n * }\n * }}));\n * // { type: Field, value: \"name\", location: {...} }\n * // { type: FieldDelimiter, value: \",\", location: {...} }\n * // { type: Field, value: \"age\", location: {...} }\n * // { type: RecordDelimiter, value: \"\\r\\n\", location: {...} }\n * // { type: Field, value: \"Alice\", location: {...} }\n * // { type: FieldDelimiter, value: \",\", location: {...} }\n * // { type: Field, value: \"20\" }\n * // { type: RecordDelimiter, value: \"\\r\\n\", location: {...} }\n * ```\n *\n * @example Custom queuing strategies with backpressure tuning\n * ```ts\n * const transformer = new CSVLexerTransformer(\n * { delimiter: ',' },\n * {\n * highWaterMark: 131072, // 128KB of characters\n * size: (chunk) => chunk.length, // Count by character length\n * checkInterval: 200 // Check backpressure every 200 tokens\n * },\n * {\n * highWaterMark: 2048, // 2048 tokens\n * size: (tokens) => tokens.length, // Count by token count\n * checkInterval: 50 // Check backpressure every 50 tokens\n * }\n * );\n *\n * await fetch('large-file.csv')\n * .then(res => res.body)\n * .pipeThrough(new TextDecoderStream())\n * .pipeThrough(transformer)\n * .pipeTo(yourProcessor);\n * ```\n */\nexport class CSVLexerTransformer<\n Delimiter extends string = DEFAULT_DELIMITER,\n Quotation extends string = DEFAULT_QUOTATION,\n> extends TransformStream<string, Token[]> {\n public readonly lexer: CSVLexer<Delimiter, Quotation>;\n\n /**\n * Yields to the event loop to allow backpressure handling.\n * Can be overridden for testing purposes.\n * @internal\n */\n protected async yieldToEventLoop(): Promise<void> {\n await new Promise((resolve) => setTimeout(resolve, 0));\n }\n\n constructor(\n options: CSVLexerTransformerOptions<Delimiter, Quotation> = {},\n writableStrategy: ExtendedQueuingStrategy<string> = {\n highWaterMark: 65536, // 64KB worth of characters\n size: (chunk) => chunk.length, // Count by string length (character count)\n checkInterval: 100, // Check backpressure every 100 tokens\n },\n readableStrategy: ExtendedQueuingStrategy<Token[]> = {\n highWaterMark: 1024, // 1024 tokens\n size: (tokens) => tokens.length, // Count by number of tokens in array\n checkInterval: 100, // Check backpressure every 100 tokens\n },\n ) {\n const lexer = new CSVLexer(options);\n const checkInterval =\n writableStrategy.checkInterval ?? readableStrategy.checkInterval ?? 100;\n\n super(\n {\n transform: async (chunk, controller) => {\n if (chunk.length !== 0) {\n try {\n const tokens: Token[] = [];\n for (const token of lexer.lex(chunk, { stream: true })) {\n tokens.push(token);\n\n // Check backpressure periodically based on checkInterval\n if (\n tokens.length % checkInterval === 0 &&\n controller.desiredSize !== null &&\n controller.desiredSize <= 0\n ) {\n // Yield to event loop when backpressure is detected\n await this.yieldToEventLoop();\n }\n }\n\n if (tokens.length > 0) {\n controller.enqueue(tokens);\n }\n } catch (error) {\n controller.error(error);\n }\n }\n },\n flush: async (controller) => {\n try {\n const tokens: Token[] = [];\n for (const token of lexer.lex()) {\n tokens.push(token);\n\n // Check backpressure periodically based on checkInterval\n if (\n tokens.length % checkInterval === 0 &&\n controller.desiredSize !== null &&\n controller.desiredSize <= 0\n ) {\n await this.yieldToEventLoop();\n }\n }\n\n if (tokens.length > 0) {\n controller.enqueue(tokens);\n }\n } catch (error) {\n controller.error(error);\n }\n },\n },\n writableStrategy,\n readableStrategy,\n );\n this.lexer = lexer;\n }\n}\n"],"names":[],"mappings":";;AAiFO,MAAM,4BAGH,
|
|
1
|
+
{"version":3,"file":"CSVLexerTransformer.js","sources":["../src/CSVLexerTransformer.ts"],"sourcesContent":["import { CSVLexer } from \"./CSVLexer.ts\";\nimport type {\n CSVLexerTransformerOptions,\n ExtendedQueuingStrategy,\n Token,\n} from \"./common/types.ts\";\nimport type { DEFAULT_DELIMITER, DEFAULT_QUOTATION } from \"./constants.ts\";\n\n/**\n * A transform stream that converts a stream of strings into a stream of tokens.\n *\n * @category Low-level API\n *\n * @param options - CSV-specific options (delimiter, quotation, etc.)\n * @param writableStrategy - Strategy for the writable side (default: `{ highWaterMark: 65536, size: chunk => chunk.length, checkInterval: 100 }`)\n * @param readableStrategy - Strategy for the readable side (default: `{ highWaterMark: 1024, size: tokens => tokens.length, checkInterval: 100 }`)\n *\n * @remarks\n * Follows the Web Streams API pattern where queuing strategies are passed as\n * constructor arguments, similar to the standard `TransformStream`.\n *\n * **Default Queuing Strategy:**\n * - Writable side: Counts by string length (characters). Default highWaterMark is 65536 characters (≈64KB).\n * - Readable side: Counts by number of tokens in each array. Default highWaterMark is 1024 tokens.\n *\n * **Backpressure Handling:**\n * The transformer monitors `controller.desiredSize` and yields to the event loop when backpressure\n * is detected (desiredSize ≤ 0). This prevents blocking the main thread during heavy processing\n * and allows the downstream consumer to catch up.\n *\n * These defaults are starting points based on data flow characteristics, not empirical benchmarks.\n * Optimal values depend on your runtime environment, data size, and performance requirements.\n *\n * @example Basic usage\n * ```ts\n * new ReadableStream({\n * start(controller) {\n * controller.enqueue(\"name,age\\r\\n\");\n * controller.enqueue(\"Alice,20\\r\\n\");\n * controller.close();\n * }\n * })\n * .pipeThrough(new CSVLexerTransformer())\n * .pipeTo(new WritableStream({ write(tokens) {\n * for (const token of tokens) {\n * console.log(token);\n * }\n * }}));\n * // { type: Field, value: \"name\", location: {...} }\n * // { type: FieldDelimiter, value: \",\", location: {...} }\n * // { type: Field, value: \"age\", location: {...} }\n * // { type: RecordDelimiter, value: \"\\r\\n\", location: {...} }\n * // { type: Field, value: \"Alice\", location: {...} }\n * // { type: FieldDelimiter, value: \",\", location: {...} }\n * // { type: Field, value: \"20\" }\n * // { type: RecordDelimiter, value: \"\\r\\n\", location: {...} }\n * ```\n *\n * @example Custom queuing strategies with backpressure tuning\n * ```ts\n * const transformer = new CSVLexerTransformer(\n * { delimiter: ',' },\n * {\n * highWaterMark: 131072, // 128KB of characters\n * size: (chunk) => chunk.length, // Count by character length\n * checkInterval: 200 // Check backpressure every 200 tokens\n * },\n * {\n * highWaterMark: 2048, // 2048 tokens\n * size: (tokens) => tokens.length, // Count by token count\n * checkInterval: 50 // Check backpressure every 50 tokens\n * }\n * );\n *\n * await fetch('large-file.csv')\n * .then(res => res.body)\n * .pipeThrough(new TextDecoderStream())\n * .pipeThrough(transformer)\n * .pipeTo(yourProcessor);\n * ```\n */\nexport class CSVLexerTransformer<\n Delimiter extends string = DEFAULT_DELIMITER,\n Quotation extends string = DEFAULT_QUOTATION,\n> extends TransformStream<string, Token[]> {\n public readonly lexer: CSVLexer<Delimiter, Quotation>;\n\n /**\n * Yields to the event loop to allow backpressure handling.\n * Can be overridden for testing purposes.\n * @internal\n */\n protected async yieldToEventLoop(): Promise<void> {\n await new Promise((resolve) => setTimeout(resolve, 0));\n }\n\n constructor(\n options: CSVLexerTransformerOptions<Delimiter, Quotation> = {},\n writableStrategy: ExtendedQueuingStrategy<string> = {\n highWaterMark: 65536, // 64KB worth of characters\n size: (chunk) => chunk.length, // Count by string length (character count)\n checkInterval: 100, // Check backpressure every 100 tokens\n },\n readableStrategy: ExtendedQueuingStrategy<Token[]> = {\n highWaterMark: 1024, // 1024 tokens\n size: (tokens) => tokens.length, // Count by number of tokens in array\n checkInterval: 100, // Check backpressure every 100 tokens\n },\n ) {\n const lexer = new CSVLexer(options);\n const checkInterval =\n writableStrategy.checkInterval ?? readableStrategy.checkInterval ?? 100;\n\n super(\n {\n transform: async (chunk, controller) => {\n if (chunk.length !== 0) {\n try {\n const tokens: Token[] = [];\n for (const token of lexer.lex(chunk, { stream: true })) {\n tokens.push(token);\n\n // Check backpressure periodically based on checkInterval\n if (\n tokens.length % checkInterval === 0 &&\n controller.desiredSize !== null &&\n controller.desiredSize <= 0\n ) {\n // Yield to event loop when backpressure is detected\n await this.yieldToEventLoop();\n }\n }\n\n if (tokens.length > 0) {\n controller.enqueue(tokens);\n }\n } catch (error) {\n controller.error(error);\n }\n }\n },\n flush: async (controller) => {\n try {\n const tokens: Token[] = [];\n for (const token of lexer.lex()) {\n tokens.push(token);\n\n // Check backpressure periodically based on checkInterval\n if (\n tokens.length % checkInterval === 0 &&\n controller.desiredSize !== null &&\n controller.desiredSize <= 0\n ) {\n await this.yieldToEventLoop();\n }\n }\n\n if (tokens.length > 0) {\n controller.enqueue(tokens);\n }\n } catch (error) {\n controller.error(error);\n }\n },\n },\n writableStrategy,\n readableStrategy,\n );\n this.lexer = lexer;\n }\n}\n"],"names":[],"mappings":";;AAiFO,MAAM,4BAGH,eAAA,CAAiC;AAAA,EACzB,KAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOhB,MAAgB,gBAAA,GAAkC;AAChD,IAAA,MAAM,IAAI,OAAA,CAAQ,CAAC,YAAY,UAAA,CAAW,OAAA,EAAS,CAAC,CAAC,CAAA;AAAA,EACvD;AAAA,EAEA,WAAA,CACE,OAAA,GAA4D,EAAC,EAC7D,gBAAA,GAAoD;AAAA,IAClD,aAAA,EAAe,KAAA;AAAA;AAAA,IACf,IAAA,EAAM,CAAC,KAAA,KAAU,KAAA,CAAM,MAAA;AAAA;AAAA,IACvB,aAAA,EAAe;AAAA;AAAA,KAEjB,gBAAA,GAAqD;AAAA,IACnD,aAAA,EAAe,IAAA;AAAA;AAAA,IACf,IAAA,EAAM,CAAC,MAAA,KAAW,MAAA,CAAO,MAAA;AAAA;AAAA,IACzB,aAAA,EAAe;AAAA;AAAA,GACjB,EACA;AACA,IAAA,MAAM,KAAA,GAAQ,IAAI,QAAA,CAAS,OAAO,CAAA;AAClC,IAAA,MAAM,aAAA,GACJ,gBAAA,CAAiB,aAAA,IAAiB,gBAAA,CAAiB,aAAA,IAAiB,GAAA;AAEtE,IAAA,KAAA;AAAA,MACE;AAAA,QACE,SAAA,EAAW,OAAO,KAAA,EAAO,UAAA,KAAe;AACtC,UAAA,IAAI,KAAA,CAAM,WAAW,CAAA,EAAG;AACtB,YAAA,IAAI;AACF,cAAA,MAAM,SAAkB,EAAC;AACzB,cAAA,KAAA,MAAW,KAAA,IAAS,MAAM,GAAA,CAAI,KAAA,EAAO,EAAE,MAAA,EAAQ,IAAA,EAAM,CAAA,EAAG;AACtD,gBAAA,MAAA,CAAO,KAAK,KAAK,CAAA;AAGjB,gBAAA,IACE,MAAA,CAAO,SAAS,aAAA,KAAkB,CAAA,IAClC,WAAW,WAAA,KAAgB,IAAA,IAC3B,UAAA,CAAW,WAAA,IAAe,CAAA,EAC1B;AAEA,kBAAA,MAAM,KAAK,gBAAA,EAAiB;AAAA,gBAC9B;AAAA,cACF;AAEA,cAAA,IAAI,MAAA,CAAO,SAAS,CAAA,EAAG;AACrB,gBAAA,UAAA,CAAW,QAAQ,MAAM,CAAA;AAAA,cAC3B;AAAA,YACF,SAAS,KAAA,EAAO;AACd,cAAA,UAAA,CAAW,MAAM,KAAK,CAAA;AAAA,YACxB;AAAA,UACF;AAAA,QACF,CAAA;AAAA,QACA,KAAA,EAAO,OAAO,UAAA,KAAe;AAC3B,UAAA,IAAI;AACF,YAAA,MAAM,SAAkB,EAAC;AACzB,YAAA,KAAA,MAAW,KAAA,IAAS,KAAA,CAAM,GAAA,EAAI,EAAG;AAC/B,cAAA,MAAA,CAAO,KAAK,KAAK,CAAA;AAGjB,cAAA,IACE,MAAA,CAAO,SAAS,aAAA,KAAkB,CAAA,IAClC,WAAW,WAAA,KAAgB,IAAA,IAC3B,UAAA,CAAW,WAAA,IAAe,CAAA,EAC1B;AACA,gBAAA,MAAM,KAAK,gBAAA,EAAiB;AAAA,cAC9B;AAAA,YACF;AAEA,YAAA,IAAI,MAAA,CAAO,SAAS,CAAA,EAAG;AACrB,cAAA,UAAA,CAAW,QAAQ,MAAM,CAAA;AAAA,YAC3B;AAAA,UACF,SAAS,KAAA,EAAO;AACd,YAAA,UAAA,CAAW,MAAM,KAAK,CAAA;AAAA,UACxB;AAAA,QACF;AAAA,OACF;AAAA,MACA,gBAAA;AAAA,MACA;AAAA,KACF;AACA,IAAA,IAAA,CAAK,KAAA,GAAQ,KAAA;AAAA,EACf;AACF;;;;"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"CSVRecordAssembler.js","sources":["../src/CSVRecordAssembler.ts"],"sourcesContent":["import { FieldDelimiter, RecordDelimiter } from \"./common/constants.ts\";\nimport { ParseError } from \"./common/errors.ts\";\nimport type {\n CSVRecord,\n CSVRecordAssemblerOptions,\n Token,\n} from \"./common/types.ts\";\n\n/**\n * Default maximum field count per record (100,000 fields).\n */\nconst DEFAULT_MAX_FIELD_COUNT = 100_000;\n\n/**\n * Options for the CSVRecordAssembler.assemble method.\n */\nexport interface CSVRecordAssemblerAssembleOptions {\n /**\n * If true, indicates that more tokens are expected.\n * If false or omitted, flushes remaining data.\n */\n stream?: boolean;\n}\n\n/**\n * CSV Record Assembler.\n *\n * CSVRecordAssembler assembles tokens into CSV records.\n */\nexport class CSVRecordAssembler<Header extends ReadonlyArray<string>> {\n #fieldIndex = 0;\n #row: string[] = [];\n #header: Header | undefined;\n #dirty = false;\n #signal?: AbortSignal;\n #maxFieldCount: number;\n #skipEmptyLines: boolean;\n\n constructor(options: CSVRecordAssemblerOptions<Header> = {}) {\n const mfc = options.maxFieldCount ?? DEFAULT_MAX_FIELD_COUNT;\n // Validate maxFieldCount\n if (\n !(Number.isFinite(mfc) || mfc === Number.POSITIVE_INFINITY) ||\n (Number.isFinite(mfc) && (mfc < 1 || !Number.isInteger(mfc)))\n ) {\n throw new RangeError(\n \"maxFieldCount must be a positive integer or Number.POSITIVE_INFINITY\",\n );\n }\n this.#maxFieldCount = mfc;\n this.#skipEmptyLines = options.skipEmptyLines ?? false;\n if (options.header !== undefined && Array.isArray(options.header)) {\n this.#setHeader(options.header);\n }\n if (options.signal) {\n this.#signal = options.signal;\n }\n }\n\n /**\n * Assembles tokens into CSV records.\n * @param tokens - The tokens to assemble. Omit to flush remaining data.\n * @param options - Assembler options.\n * @returns An iterable iterator of CSV records.\n */\n public *assemble(\n tokens?: Iterable<Token>,\n options?: CSVRecordAssemblerAssembleOptions,\n ): IterableIterator<CSVRecord<Header>> {\n const stream = options?.stream ?? false;\n\n if (tokens !== undefined) {\n for (const token of tokens) {\n this.#signal?.throwIfAborted();\n switch (token.type) {\n case FieldDelimiter:\n this.#fieldIndex++;\n this.#checkFieldCount();\n this.#dirty = true;\n break;\n case RecordDelimiter:\n if (this.#header === undefined) {\n this.#setHeader(this.#row as unknown as Header);\n } else {\n if (this.#dirty) {\n yield Object.fromEntries(\n this.#header.map((header, index) => [\n header,\n this.#row.at(index),\n ]),\n ) as unknown as CSVRecord<Header>;\n } else {\n if (this.#skipEmptyLines) {\n continue;\n }\n yield Object.fromEntries(\n this.#header.map((header) => [header, \"\"]),\n ) as CSVRecord<Header>;\n }\n }\n // Reset the row fields buffer.\n this.#fieldIndex = 0;\n this.#row = new Array(this.#header?.length).fill(\"\");\n this.#dirty = false;\n break;\n default:\n this.#dirty = true;\n this.#row[this.#fieldIndex] = token.value;\n break;\n }\n }\n }\n\n if (!stream) {\n if (this.#header !== undefined) {\n if (this.#dirty) {\n yield Object.fromEntries(\n this.#header\n .filter((v) => v)\n .map((header, index) => [header, this.#row.at(index)]),\n ) as unknown as CSVRecord<Header>;\n }\n }\n }\n }\n\n #checkFieldCount(): void {\n if (this.#fieldIndex + 1 > this.#maxFieldCount) {\n throw new RangeError(\n `Field count (${this.#fieldIndex + 1}) exceeded maximum allowed count of ${this.#maxFieldCount}`,\n );\n }\n }\n\n #setHeader(header: Header) {\n if (header.length > this.#maxFieldCount) {\n throw new RangeError(\n `Header field count (${header.length}) exceeded maximum allowed count of ${this.#maxFieldCount}`,\n );\n }\n this.#header = header;\n if (this.#header.length === 0) {\n throw new ParseError(\"The header must not be empty.\");\n }\n if (new Set(this.#header).size !== this.#header.length) {\n throw new ParseError(\"The header must not contain duplicate fields.\");\n }\n }\n}\n"],"names":[],"mappings":";;;AAWA,MAAM,
|
|
1
|
+
{"version":3,"file":"CSVRecordAssembler.js","sources":["../src/CSVRecordAssembler.ts"],"sourcesContent":["import { FieldDelimiter, RecordDelimiter } from \"./common/constants.ts\";\nimport { ParseError } from \"./common/errors.ts\";\nimport type {\n CSVRecord,\n CSVRecordAssemblerOptions,\n Token,\n} from \"./common/types.ts\";\n\n/**\n * Default maximum field count per record (100,000 fields).\n */\nconst DEFAULT_MAX_FIELD_COUNT = 100_000;\n\n/**\n * Options for the CSVRecordAssembler.assemble method.\n */\nexport interface CSVRecordAssemblerAssembleOptions {\n /**\n * If true, indicates that more tokens are expected.\n * If false or omitted, flushes remaining data.\n */\n stream?: boolean;\n}\n\n/**\n * CSV Record Assembler.\n *\n * CSVRecordAssembler assembles tokens into CSV records.\n */\nexport class CSVRecordAssembler<Header extends ReadonlyArray<string>> {\n #fieldIndex = 0;\n #row: string[] = [];\n #header: Header | undefined;\n #dirty = false;\n #signal?: AbortSignal;\n #maxFieldCount: number;\n #skipEmptyLines: boolean;\n\n constructor(options: CSVRecordAssemblerOptions<Header> = {}) {\n const mfc = options.maxFieldCount ?? DEFAULT_MAX_FIELD_COUNT;\n // Validate maxFieldCount\n if (\n !(Number.isFinite(mfc) || mfc === Number.POSITIVE_INFINITY) ||\n (Number.isFinite(mfc) && (mfc < 1 || !Number.isInteger(mfc)))\n ) {\n throw new RangeError(\n \"maxFieldCount must be a positive integer or Number.POSITIVE_INFINITY\",\n );\n }\n this.#maxFieldCount = mfc;\n this.#skipEmptyLines = options.skipEmptyLines ?? false;\n if (options.header !== undefined && Array.isArray(options.header)) {\n this.#setHeader(options.header);\n }\n if (options.signal) {\n this.#signal = options.signal;\n }\n }\n\n /**\n * Assembles tokens into CSV records.\n * @param tokens - The tokens to assemble. Omit to flush remaining data.\n * @param options - Assembler options.\n * @returns An iterable iterator of CSV records.\n */\n public *assemble(\n tokens?: Iterable<Token>,\n options?: CSVRecordAssemblerAssembleOptions,\n ): IterableIterator<CSVRecord<Header>> {\n const stream = options?.stream ?? false;\n\n if (tokens !== undefined) {\n for (const token of tokens) {\n this.#signal?.throwIfAborted();\n switch (token.type) {\n case FieldDelimiter:\n this.#fieldIndex++;\n this.#checkFieldCount();\n this.#dirty = true;\n break;\n case RecordDelimiter:\n if (this.#header === undefined) {\n this.#setHeader(this.#row as unknown as Header);\n } else {\n if (this.#dirty) {\n yield Object.fromEntries(\n this.#header.map((header, index) => [\n header,\n this.#row.at(index),\n ]),\n ) as unknown as CSVRecord<Header>;\n } else {\n if (this.#skipEmptyLines) {\n continue;\n }\n yield Object.fromEntries(\n this.#header.map((header) => [header, \"\"]),\n ) as CSVRecord<Header>;\n }\n }\n // Reset the row fields buffer.\n this.#fieldIndex = 0;\n this.#row = new Array(this.#header?.length).fill(\"\");\n this.#dirty = false;\n break;\n default:\n this.#dirty = true;\n this.#row[this.#fieldIndex] = token.value;\n break;\n }\n }\n }\n\n if (!stream) {\n if (this.#header !== undefined) {\n if (this.#dirty) {\n yield Object.fromEntries(\n this.#header\n .filter((v) => v)\n .map((header, index) => [header, this.#row.at(index)]),\n ) as unknown as CSVRecord<Header>;\n }\n }\n }\n }\n\n #checkFieldCount(): void {\n if (this.#fieldIndex + 1 > this.#maxFieldCount) {\n throw new RangeError(\n `Field count (${this.#fieldIndex + 1}) exceeded maximum allowed count of ${this.#maxFieldCount}`,\n );\n }\n }\n\n #setHeader(header: Header) {\n if (header.length > this.#maxFieldCount) {\n throw new RangeError(\n `Header field count (${header.length}) exceeded maximum allowed count of ${this.#maxFieldCount}`,\n );\n }\n this.#header = header;\n if (this.#header.length === 0) {\n throw new ParseError(\"The header must not be empty.\");\n }\n if (new Set(this.#header).size !== this.#header.length) {\n throw new ParseError(\"The header must not contain duplicate fields.\");\n }\n }\n}\n"],"names":[],"mappings":";;;AAWA,MAAM,uBAAA,GAA0B,GAAA;AAkBzB,MAAM,kBAAA,CAAyD;AAAA,EACpE,WAAA,GAAc,CAAA;AAAA,EACd,OAAiB,EAAC;AAAA,EAClB,OAAA;AAAA,EACA,MAAA,GAAS,KAAA;AAAA,EACT,OAAA;AAAA,EACA,cAAA;AAAA,EACA,eAAA;AAAA,EAEA,WAAA,CAAY,OAAA,GAA6C,EAAC,EAAG;AAC3D,IAAA,MAAM,GAAA,GAAM,QAAQ,aAAA,IAAiB,uBAAA;AAErC,IAAA,IACE,EAAE,MAAA,CAAO,QAAA,CAAS,GAAG,CAAA,IAAK,GAAA,KAAQ,OAAO,iBAAA,CAAA,IACxC,MAAA,CAAO,QAAA,CAAS,GAAG,MAAM,GAAA,GAAM,CAAA,IAAK,CAAC,MAAA,CAAO,SAAA,CAAU,GAAG,CAAA,CAAA,EAC1D;AACA,MAAA,MAAM,IAAI,UAAA;AAAA,QACR;AAAA,OACF;AAAA,IACF;AACA,IAAA,IAAA,CAAK,cAAA,GAAiB,GAAA;AACtB,IAAA,IAAA,CAAK,eAAA,GAAkB,QAAQ,cAAA,IAAkB,KAAA;AACjD,IAAA,IAAI,QAAQ,MAAA,KAAW,MAAA,IAAa,MAAM,OAAA,CAAQ,OAAA,CAAQ,MAAM,CAAA,EAAG;AACjE,MAAA,IAAA,CAAK,UAAA,CAAW,QAAQ,MAAM,CAAA;AAAA,IAChC;AACA,IAAA,IAAI,QAAQ,MAAA,EAAQ;AAClB,MAAA,IAAA,CAAK,UAAU,OAAA,CAAQ,MAAA;AAAA,IACzB;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQA,CAAQ,QAAA,CACN,MAAA,EACA,OAAA,EACqC;AACrC,IAAA,MAAM,MAAA,GAAS,SAAS,MAAA,IAAU,KAAA;AAElC,IAAA,IAAI,WAAW,MAAA,EAAW;AACxB,MAAA,KAAA,MAAW,SAAS,MAAA,EAAQ;AAC1B,QAAA,IAAA,CAAK,SAAS,cAAA,EAAe;AAC7B,QAAA,QAAQ,MAAM,IAAA;AAAM,UAClB,KAAK,cAAA;AACH,YAAA,IAAA,CAAK,WAAA,EAAA;AACL,YAAA,IAAA,CAAK,gBAAA,EAAiB;AACtB,YAAA,IAAA,CAAK,MAAA,GAAS,IAAA;AACd,YAAA;AAAA,UACF,KAAK,eAAA;AACH,YAAA,IAAI,IAAA,CAAK,YAAY,MAAA,EAAW;AAC9B,cAAA,IAAA,CAAK,UAAA,CAAW,KAAK,IAAyB,CAAA;AAAA,YAChD,CAAA,MAAO;AACL,cAAA,IAAI,KAAK,MAAA,EAAQ;AACf,gBAAA,MAAM,MAAA,CAAO,WAAA;AAAA,kBACX,IAAA,CAAK,OAAA,CAAQ,GAAA,CAAI,CAAC,QAAQ,KAAA,KAAU;AAAA,oBAClC,MAAA;AAAA,oBACA,IAAA,CAAK,IAAA,CAAK,EAAA,CAAG,KAAK;AAAA,mBACnB;AAAA,iBACH;AAAA,cACF,CAAA,MAAO;AACL,gBAAA,IAAI,KAAK,eAAA,EAAiB;AACxB,kBAAA;AAAA,gBACF;AACA,gBAAA,MAAM,MAAA,CAAO,WAAA;AAAA,kBACX,IAAA,CAAK,QAAQ,GAAA,CAAI,CAAC,WAAW,CAAC,MAAA,EAAQ,EAAE,CAAC;AAAA,iBAC3C;AAAA,cACF;AAAA,YACF;AAEA,YAAA,IAAA,CAAK,WAAA,GAAc,CAAA;AACnB,YAAA,IAAA,CAAK,IAAA,GAAO,IAAI,KAAA,CAAM,IAAA,CAAK,SAAS,MAAM,CAAA,CAAE,KAAK,EAAE,CAAA;AACnD,YAAA,IAAA,CAAK,MAAA,GAAS,KAAA;AACd,YAAA;AAAA,UACF;AACE,YAAA,IAAA,CAAK,MAAA,GAAS,IAAA;AACd,YAAA,IAAA,CAAK,IAAA,CAAK,IAAA,CAAK,WAAW,CAAA,GAAI,KAAA,CAAM,KAAA;AACpC,YAAA;AAAA;AACJ,MACF;AAAA,IACF;AAEA,IAAA,IAAI,CAAC,MAAA,EAAQ;AACX,MAAA,IAAI,IAAA,CAAK,YAAY,MAAA,EAAW;AAC9B,QAAA,IAAI,KAAK,MAAA,EAAQ;AACf,UAAA,MAAM,MAAA,CAAO,WAAA;AAAA,YACX,KAAK,OAAA,CACF,MAAA,CAAO,CAAC,CAAA,KAAM,CAAC,EACf,GAAA,CAAI,CAAC,MAAA,EAAQ,KAAA,KAAU,CAAC,MAAA,EAAQ,IAAA,CAAK,KAAK,EAAA,CAAG,KAAK,CAAC,CAAC;AAAA,WACzD;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,gBAAA,GAAyB;AACvB,IAAA,IAAI,IAAA,CAAK,WAAA,GAAc,CAAA,GAAI,IAAA,CAAK,cAAA,EAAgB;AAC9C,MAAA,MAAM,IAAI,UAAA;AAAA,QACR,gBAAgB,IAAA,CAAK,WAAA,GAAc,CAAC,CAAA,oCAAA,EAAuC,KAAK,cAAc,CAAA;AAAA,OAChG;AAAA,IACF;AAAA,EACF;AAAA,EAEA,WAAW,MAAA,EAAgB;AACzB,IAAA,IAAI,MAAA,CAAO,MAAA,GAAS,IAAA,CAAK,cAAA,EAAgB;AACvC,MAAA,MAAM,IAAI,UAAA;AAAA,QACR,CAAA,oBAAA,EAAuB,MAAA,CAAO,MAAM,CAAA,oCAAA,EAAuC,KAAK,cAAc,CAAA;AAAA,OAChG;AAAA,IACF;AACA,IAAA,IAAA,CAAK,OAAA,GAAU,MAAA;AACf,IAAA,IAAI,IAAA,CAAK,OAAA,CAAQ,MAAA,KAAW,CAAA,EAAG;AAC7B,MAAA,MAAM,IAAI,WAAW,+BAA+B,CAAA;AAAA,IACtD;AACA,IAAA,IAAI,IAAI,IAAI,IAAA,CAAK,OAAO,EAAE,IAAA,KAAS,IAAA,CAAK,QAAQ,MAAA,EAAQ;AACtD,MAAA,MAAM,IAAI,WAAW,+CAA+C,CAAA;AAAA,IACtE;AAAA,EACF;AACF;;;;"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"CSVRecordAssemblerTransformer.js","sources":["../src/CSVRecordAssemblerTransformer.ts"],"sourcesContent":["import { CSVRecordAssembler } from \"./CSVRecordAssembler.ts\";\nimport type {\n CSVRecord,\n CSVRecordAssemblerOptions,\n ExtendedQueuingStrategy,\n Token,\n} from \"./common/types.ts\";\n\n/**\n * A transform stream that converts a stream of tokens into a stream of CSV records.\n *\n * @template Header The type of the header row.\n * @param options - CSV-specific options (header, maxFieldCount, etc.)\n * @param writableStrategy - Strategy for the writable side (default: `{ highWaterMark: 1024, size: tokens => tokens.length, checkInterval: 10 }`)\n * @param readableStrategy - Strategy for the readable side (default: `{ highWaterMark: 256, size: () => 1, checkInterval: 10 }`)\n *\n * @category Low-level API\n *\n * @remarks\n * Follows the Web Streams API pattern where queuing strategies are passed as\n * constructor arguments, similar to the standard `TransformStream`.\n *\n * **Default Queuing Strategy:**\n * - Writable side: Counts by number of tokens in each array. Default highWaterMark is 1024 tokens.\n * - Readable side: Counts each record as 1. Default highWaterMark is 256 records.\n *\n * **Backpressure Handling:**\n * The transformer monitors `controller.desiredSize` and yields to the event loop when backpressure\n * is detected (desiredSize ≤ 0). This prevents blocking the main thread during heavy processing\n * and allows the downstream consumer to catch up.\n *\n * These defaults are starting points based on data flow characteristics, not empirical benchmarks.\n * Optimal values depend on your runtime environment, data size, and performance requirements.\n *\n * @example Parse a CSV with headers by data\n * ```ts\n * new ReadableStream({\n * start(controller) {\n * controller.enqueue(\"name,age\\r\\n\");\n * controller.enqueue(\"Alice,20\\r\\n\");\n * controller.enqueue(\"Bob,25\\r\\n\");\n * controller.enqueue(\"Charlie,30\\r\\n\");\n * controller.close();\n * })\n * .pipeThrough(new CSVLexerTransformer())\n * .pipeThrough(new CSVRecordAssemblerTransformer())\n * .pipeTo(new WritableStream({ write(row) { console.log(row); }}));\n * // { name: \"Alice\", age: \"20\" }\n * // { name: \"Bob\", age: \"25\" }\n * // { name: \"Charlie\", age: \"30\" }\n * ```\n *\n * @example Parse a CSV with headers by options\n * ```ts\n * new ReadableStream({\n * start(controller) {\n * controller.enqueue(\"Alice,20\\r\\n\");\n * controller.enqueue(\"Bob,25\\r\\n\");\n * controller.enqueue(\"Charlie,30\\r\\n\");\n * controller.close();\n * }\n * })\n * .pipeThrough(new CSVLexerTransformer())\n * .pipeThrough(new CSVRecordAssemblerTransformer({ header: [\"name\", \"age\"] }))\n * .pipeTo(new WritableStream({ write(row) { console.log(row); }}));\n * // { name: \"Alice\", age: \"20\" }\n * // { name: \"Bob\", age: \"25\" }\n * // { name: \"Charlie\", age: \"30\" }\n * ```\n *\n * @example Custom queuing strategies with backpressure tuning\n * ```ts\n * const transformer = new CSVRecordAssemblerTransformer(\n * {},\n * {\n * highWaterMark: 2048, // 2048 tokens\n * size: (tokens) => tokens.length, // Count by token count\n * checkInterval: 20 // Check backpressure every 20 records\n * },\n * {\n * highWaterMark: 512, // 512 records\n * size: () => 1, // Each record counts as 1\n * checkInterval: 5 // Check backpressure every 5 records\n * }\n * );\n *\n * await tokenStream\n * .pipeThrough(transformer)\n * .pipeTo(yourRecordProcessor);\n * ```\n */\nexport class CSVRecordAssemblerTransformer<\n Header extends ReadonlyArray<string>,\n> extends TransformStream<Token[], CSVRecord<Header>> {\n public readonly assembler: CSVRecordAssembler<Header>;\n\n /**\n * Yields to the event loop to allow backpressure handling.\n * Can be overridden for testing purposes.\n * @internal\n */\n protected async yieldToEventLoop(): Promise<void> {\n await new Promise((resolve) => setTimeout(resolve, 0));\n }\n\n constructor(\n options: CSVRecordAssemblerOptions<Header> = {},\n writableStrategy: ExtendedQueuingStrategy<Token[]> = {\n highWaterMark: 1024, // 1024 tokens\n size: (tokens) => tokens.length, // Count by number of tokens in array\n checkInterval: 10, // Check backpressure every 10 records\n },\n readableStrategy: ExtendedQueuingStrategy<CSVRecord<Header>> = {\n highWaterMark: 256, // 256 records\n size: () => 1, // Each record counts as 1\n checkInterval: 10, // Check backpressure every 10 records\n },\n ) {\n const assembler = new CSVRecordAssembler(options);\n const checkInterval =\n writableStrategy.checkInterval ?? readableStrategy.checkInterval ?? 10;\n\n super(\n {\n transform: async (tokens, controller) => {\n try {\n let recordCount = 0;\n for (const record of assembler.assemble(tokens, { stream: true })) {\n controller.enqueue(record);\n recordCount++;\n\n // Check backpressure periodically based on checkInterval\n if (\n recordCount % checkInterval === 0 &&\n controller.desiredSize !== null &&\n controller.desiredSize <= 0\n ) {\n // Yield to event loop when backpressure is detected\n await this.yieldToEventLoop();\n }\n }\n } catch (error) {\n controller.error(error);\n }\n },\n flush: async (controller) => {\n try {\n let recordCount = 0;\n for (const record of assembler.assemble()) {\n controller.enqueue(record);\n recordCount++;\n\n // Check backpressure periodically based on checkInterval\n if (\n recordCount % checkInterval === 0 &&\n controller.desiredSize !== null &&\n controller.desiredSize <= 0\n ) {\n await this.yieldToEventLoop();\n }\n }\n } catch (error) {\n controller.error(error);\n }\n },\n },\n writableStrategy,\n readableStrategy,\n );\n this.assembler = assembler;\n }\n}\n"],"names":[],"mappings":";;AA2FO,MAAM,sCAEH,
|
|
1
|
+
{"version":3,"file":"CSVRecordAssemblerTransformer.js","sources":["../src/CSVRecordAssemblerTransformer.ts"],"sourcesContent":["import { CSVRecordAssembler } from \"./CSVRecordAssembler.ts\";\nimport type {\n CSVRecord,\n CSVRecordAssemblerOptions,\n ExtendedQueuingStrategy,\n Token,\n} from \"./common/types.ts\";\n\n/**\n * A transform stream that converts a stream of tokens into a stream of CSV records.\n *\n * @template Header The type of the header row.\n * @param options - CSV-specific options (header, maxFieldCount, etc.)\n * @param writableStrategy - Strategy for the writable side (default: `{ highWaterMark: 1024, size: tokens => tokens.length, checkInterval: 10 }`)\n * @param readableStrategy - Strategy for the readable side (default: `{ highWaterMark: 256, size: () => 1, checkInterval: 10 }`)\n *\n * @category Low-level API\n *\n * @remarks\n * Follows the Web Streams API pattern where queuing strategies are passed as\n * constructor arguments, similar to the standard `TransformStream`.\n *\n * **Default Queuing Strategy:**\n * - Writable side: Counts by number of tokens in each array. Default highWaterMark is 1024 tokens.\n * - Readable side: Counts each record as 1. Default highWaterMark is 256 records.\n *\n * **Backpressure Handling:**\n * The transformer monitors `controller.desiredSize` and yields to the event loop when backpressure\n * is detected (desiredSize ≤ 0). This prevents blocking the main thread during heavy processing\n * and allows the downstream consumer to catch up.\n *\n * These defaults are starting points based on data flow characteristics, not empirical benchmarks.\n * Optimal values depend on your runtime environment, data size, and performance requirements.\n *\n * @example Parse a CSV with headers by data\n * ```ts\n * new ReadableStream({\n * start(controller) {\n * controller.enqueue(\"name,age\\r\\n\");\n * controller.enqueue(\"Alice,20\\r\\n\");\n * controller.enqueue(\"Bob,25\\r\\n\");\n * controller.enqueue(\"Charlie,30\\r\\n\");\n * controller.close();\n * })\n * .pipeThrough(new CSVLexerTransformer())\n * .pipeThrough(new CSVRecordAssemblerTransformer())\n * .pipeTo(new WritableStream({ write(row) { console.log(row); }}));\n * // { name: \"Alice\", age: \"20\" }\n * // { name: \"Bob\", age: \"25\" }\n * // { name: \"Charlie\", age: \"30\" }\n * ```\n *\n * @example Parse a CSV with headers by options\n * ```ts\n * new ReadableStream({\n * start(controller) {\n * controller.enqueue(\"Alice,20\\r\\n\");\n * controller.enqueue(\"Bob,25\\r\\n\");\n * controller.enqueue(\"Charlie,30\\r\\n\");\n * controller.close();\n * }\n * })\n * .pipeThrough(new CSVLexerTransformer())\n * .pipeThrough(new CSVRecordAssemblerTransformer({ header: [\"name\", \"age\"] }))\n * .pipeTo(new WritableStream({ write(row) { console.log(row); }}));\n * // { name: \"Alice\", age: \"20\" }\n * // { name: \"Bob\", age: \"25\" }\n * // { name: \"Charlie\", age: \"30\" }\n * ```\n *\n * @example Custom queuing strategies with backpressure tuning\n * ```ts\n * const transformer = new CSVRecordAssemblerTransformer(\n * {},\n * {\n * highWaterMark: 2048, // 2048 tokens\n * size: (tokens) => tokens.length, // Count by token count\n * checkInterval: 20 // Check backpressure every 20 records\n * },\n * {\n * highWaterMark: 512, // 512 records\n * size: () => 1, // Each record counts as 1\n * checkInterval: 5 // Check backpressure every 5 records\n * }\n * );\n *\n * await tokenStream\n * .pipeThrough(transformer)\n * .pipeTo(yourRecordProcessor);\n * ```\n */\nexport class CSVRecordAssemblerTransformer<\n Header extends ReadonlyArray<string>,\n> extends TransformStream<Token[], CSVRecord<Header>> {\n public readonly assembler: CSVRecordAssembler<Header>;\n\n /**\n * Yields to the event loop to allow backpressure handling.\n * Can be overridden for testing purposes.\n * @internal\n */\n protected async yieldToEventLoop(): Promise<void> {\n await new Promise((resolve) => setTimeout(resolve, 0));\n }\n\n constructor(\n options: CSVRecordAssemblerOptions<Header> = {},\n writableStrategy: ExtendedQueuingStrategy<Token[]> = {\n highWaterMark: 1024, // 1024 tokens\n size: (tokens) => tokens.length, // Count by number of tokens in array\n checkInterval: 10, // Check backpressure every 10 records\n },\n readableStrategy: ExtendedQueuingStrategy<CSVRecord<Header>> = {\n highWaterMark: 256, // 256 records\n size: () => 1, // Each record counts as 1\n checkInterval: 10, // Check backpressure every 10 records\n },\n ) {\n const assembler = new CSVRecordAssembler(options);\n const checkInterval =\n writableStrategy.checkInterval ?? readableStrategy.checkInterval ?? 10;\n\n super(\n {\n transform: async (tokens, controller) => {\n try {\n let recordCount = 0;\n for (const record of assembler.assemble(tokens, { stream: true })) {\n controller.enqueue(record);\n recordCount++;\n\n // Check backpressure periodically based on checkInterval\n if (\n recordCount % checkInterval === 0 &&\n controller.desiredSize !== null &&\n controller.desiredSize <= 0\n ) {\n // Yield to event loop when backpressure is detected\n await this.yieldToEventLoop();\n }\n }\n } catch (error) {\n controller.error(error);\n }\n },\n flush: async (controller) => {\n try {\n let recordCount = 0;\n for (const record of assembler.assemble()) {\n controller.enqueue(record);\n recordCount++;\n\n // Check backpressure periodically based on checkInterval\n if (\n recordCount % checkInterval === 0 &&\n controller.desiredSize !== null &&\n controller.desiredSize <= 0\n ) {\n await this.yieldToEventLoop();\n }\n }\n } catch (error) {\n controller.error(error);\n }\n },\n },\n writableStrategy,\n readableStrategy,\n );\n this.assembler = assembler;\n }\n}\n"],"names":[],"mappings":";;AA2FO,MAAM,sCAEH,eAAA,CAA4C;AAAA,EACpC,SAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOhB,MAAgB,gBAAA,GAAkC;AAChD,IAAA,MAAM,IAAI,OAAA,CAAQ,CAAC,YAAY,UAAA,CAAW,OAAA,EAAS,CAAC,CAAC,CAAA;AAAA,EACvD;AAAA,EAEA,WAAA,CACE,OAAA,GAA6C,EAAC,EAC9C,gBAAA,GAAqD;AAAA,IACnD,aAAA,EAAe,IAAA;AAAA;AAAA,IACf,IAAA,EAAM,CAAC,MAAA,KAAW,MAAA,CAAO,MAAA;AAAA;AAAA,IACzB,aAAA,EAAe;AAAA;AAAA,KAEjB,gBAAA,GAA+D;AAAA,IAC7D,aAAA,EAAe,GAAA;AAAA;AAAA,IACf,MAAM,MAAM,CAAA;AAAA;AAAA,IACZ,aAAA,EAAe;AAAA;AAAA,GACjB,EACA;AACA,IAAA,MAAM,SAAA,GAAY,IAAI,kBAAA,CAAmB,OAAO,CAAA;AAChD,IAAA,MAAM,aAAA,GACJ,gBAAA,CAAiB,aAAA,IAAiB,gBAAA,CAAiB,aAAA,IAAiB,EAAA;AAEtE,IAAA,KAAA;AAAA,MACE;AAAA,QACE,SAAA,EAAW,OAAO,MAAA,EAAQ,UAAA,KAAe;AACvC,UAAA,IAAI;AACF,YAAA,IAAI,WAAA,GAAc,CAAA;AAClB,YAAA,KAAA,MAAW,MAAA,IAAU,UAAU,QAAA,CAAS,MAAA,EAAQ,EAAE,MAAA,EAAQ,IAAA,EAAM,CAAA,EAAG;AACjE,cAAA,UAAA,CAAW,QAAQ,MAAM,CAAA;AACzB,cAAA,WAAA,EAAA;AAGA,cAAA,IACE,WAAA,GAAc,kBAAkB,CAAA,IAChC,UAAA,CAAW,gBAAgB,IAAA,IAC3B,UAAA,CAAW,eAAe,CAAA,EAC1B;AAEA,gBAAA,MAAM,KAAK,gBAAA,EAAiB;AAAA,cAC9B;AAAA,YACF;AAAA,UACF,SAAS,KAAA,EAAO;AACd,YAAA,UAAA,CAAW,MAAM,KAAK,CAAA;AAAA,UACxB;AAAA,QACF,CAAA;AAAA,QACA,KAAA,EAAO,OAAO,UAAA,KAAe;AAC3B,UAAA,IAAI;AACF,YAAA,IAAI,WAAA,GAAc,CAAA;AAClB,YAAA,KAAA,MAAW,MAAA,IAAU,SAAA,CAAU,QAAA,EAAS,EAAG;AACzC,cAAA,UAAA,CAAW,QAAQ,MAAM,CAAA;AACzB,cAAA,WAAA,EAAA;AAGA,cAAA,IACE,WAAA,GAAc,kBAAkB,CAAA,IAChC,UAAA,CAAW,gBAAgB,IAAA,IAC3B,UAAA,CAAW,eAAe,CAAA,EAC1B;AACA,gBAAA,MAAM,KAAK,gBAAA,EAAiB;AAAA,cAC9B;AAAA,YACF;AAAA,UACF,SAAS,KAAA,EAAO;AACd,YAAA,UAAA,CAAW,MAAM,KAAK,CAAA;AAAA,UACxB;AAAA,QACF;AAAA,OACF;AAAA,MACA,gBAAA;AAAA,MACA;AAAA,KACF;AACA,IAAA,IAAA,CAAK,SAAA,GAAY,SAAA;AAAA,EACnB;AACF;;;;"}
|