web-csv-toolbox 0.13.0-next-bd865d6ddb1cf9691d7b9a83d0790651f074dd47 → 0.13.0-next-b21b6d89a7a3f18dcbf79ec04ffefde0d7ff4c4c

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (187) hide show
  1. package/README.md +132 -6
  2. package/dist/CSVLexer.js.map +1 -1
  3. package/dist/CSVLexerTransformer.d.ts +52 -3
  4. package/dist/CSVLexerTransformer.js +58 -13
  5. package/dist/CSVLexerTransformer.js.map +1 -1
  6. package/dist/CSVRecordAssembler.js.map +1 -1
  7. package/dist/CSVRecordAssemblerTransformer.d.ts +49 -3
  8. package/dist/CSVRecordAssemblerTransformer.js +55 -18
  9. package/dist/CSVRecordAssemblerTransformer.js.map +1 -1
  10. package/dist/_virtual/web_csv_toolbox_wasm_bg.wasm.js +1 -1
  11. package/dist/assertCommonOptions.js.map +1 -1
  12. package/dist/common/constants.js.map +1 -1
  13. package/dist/common/errors.js.map +1 -1
  14. package/dist/common/types.d.ts +336 -14
  15. package/dist/commonParseErrorHandling.js.map +1 -1
  16. package/dist/constants.js.map +1 -1
  17. package/dist/createWorker.node.d.ts +2 -0
  18. package/dist/createWorker.web.d.ts +2 -0
  19. package/dist/execution/EnginePresets.d.ts +143 -0
  20. package/dist/execution/EnginePresets.js +129 -0
  21. package/dist/execution/EnginePresets.js.map +1 -0
  22. package/dist/execution/InternalEngineConfig.d.ts +89 -0
  23. package/dist/execution/InternalEngineConfig.js +175 -0
  24. package/dist/execution/InternalEngineConfig.js.map +1 -0
  25. package/dist/execution/main/parseBinaryInMain.d.ts +12 -0
  26. package/dist/execution/main/parseStreamInMain.d.ts +12 -0
  27. package/dist/execution/main/parseStringInMain.d.ts +12 -0
  28. package/dist/execution/main/parseUint8ArrayStreamInMain.d.ts +12 -0
  29. package/dist/execution/wasm/parseBinaryInWASM.d.ts +18 -0
  30. package/dist/execution/wasm/parseBinaryInWASM.js +15 -0
  31. package/dist/execution/wasm/parseBinaryInWASM.js.map +1 -0
  32. package/dist/execution/wasm/parseStringInWASM.d.ts +16 -0
  33. package/dist/execution/worker/helpers/ReusableWorkerPool.d.ts +152 -0
  34. package/dist/execution/worker/helpers/ReusableWorkerPool.js +238 -0
  35. package/dist/execution/worker/helpers/ReusableWorkerPool.js.map +1 -0
  36. package/dist/execution/worker/helpers/TransientWorkerPool.d.ts +89 -0
  37. package/dist/execution/worker/helpers/WorkerManager.d.ts +27 -0
  38. package/dist/execution/worker/helpers/WorkerPool.d.ts +50 -0
  39. package/dist/execution/worker/helpers/WorkerSession.d.ts +78 -0
  40. package/dist/execution/worker/helpers/WorkerSession.js +58 -0
  41. package/dist/execution/worker/helpers/WorkerSession.js.map +1 -0
  42. package/dist/execution/worker/helpers/createWorker.node.d.ts +8 -0
  43. package/dist/execution/worker/helpers/createWorker.node.js +15 -0
  44. package/dist/execution/worker/helpers/createWorker.node.js.map +1 -0
  45. package/dist/execution/worker/helpers/createWorker.web.d.ts +8 -0
  46. package/dist/execution/worker/helpers/createWorker.web.js +11 -0
  47. package/dist/execution/worker/helpers/createWorker.web.js.map +1 -0
  48. package/dist/execution/worker/helpers/worker.node.d.ts +1 -0
  49. package/dist/execution/worker/helpers/worker.node.js +11 -0
  50. package/dist/execution/worker/helpers/worker.node.js.map +1 -0
  51. package/dist/execution/worker/helpers/worker.shared.d.ts +90 -0
  52. package/dist/execution/worker/helpers/worker.shared.js +241 -0
  53. package/dist/execution/worker/helpers/worker.shared.js.map +1 -0
  54. package/dist/execution/worker/helpers/worker.web.d.ts +1 -0
  55. package/dist/execution/worker/helpers/worker.web.js +16 -0
  56. package/dist/execution/worker/helpers/worker.web.js.map +1 -0
  57. package/dist/execution/worker/parseBinaryInWorker.node.d.ts +8 -0
  58. package/dist/execution/worker/parseBinaryInWorker.node.js +24 -0
  59. package/dist/execution/worker/parseBinaryInWorker.node.js.map +1 -0
  60. package/dist/execution/worker/parseBinaryInWorker.web.d.ts +8 -0
  61. package/dist/execution/worker/parseBinaryInWorker.web.js +24 -0
  62. package/dist/execution/worker/parseBinaryInWorker.web.js.map +1 -0
  63. package/dist/execution/worker/parseBinaryInWorkerWASM.node.d.ts +8 -0
  64. package/dist/execution/worker/parseBinaryInWorkerWASM.node.js +24 -0
  65. package/dist/execution/worker/parseBinaryInWorkerWASM.node.js.map +1 -0
  66. package/dist/execution/worker/parseBinaryInWorkerWASM.web.d.ts +8 -0
  67. package/dist/execution/worker/parseBinaryInWorkerWASM.web.js +24 -0
  68. package/dist/execution/worker/parseBinaryInWorkerWASM.web.js.map +1 -0
  69. package/dist/execution/worker/parseStreamInWorker.node.d.ts +15 -0
  70. package/dist/execution/worker/parseStreamInWorker.node.js +26 -0
  71. package/dist/execution/worker/parseStreamInWorker.node.js.map +1 -0
  72. package/dist/execution/worker/parseStreamInWorker.web.d.ts +12 -0
  73. package/dist/execution/worker/parseStreamInWorker.web.js +25 -0
  74. package/dist/execution/worker/parseStreamInWorker.web.js.map +1 -0
  75. package/dist/execution/worker/parseStringInWorker.node.d.ts +11 -0
  76. package/dist/execution/worker/parseStringInWorker.node.js +24 -0
  77. package/dist/execution/worker/parseStringInWorker.node.js.map +1 -0
  78. package/dist/execution/worker/parseStringInWorker.web.d.ts +11 -0
  79. package/dist/execution/worker/parseStringInWorker.web.js +24 -0
  80. package/dist/execution/worker/parseStringInWorker.web.js.map +1 -0
  81. package/dist/execution/worker/parseStringInWorkerWASM.node.d.ts +8 -0
  82. package/dist/execution/worker/parseStringInWorkerWASM.node.js +24 -0
  83. package/dist/execution/worker/parseStringInWorkerWASM.node.js.map +1 -0
  84. package/dist/execution/worker/parseStringInWorkerWASM.web.d.ts +8 -0
  85. package/dist/execution/worker/parseStringInWorkerWASM.web.js +24 -0
  86. package/dist/execution/worker/parseStringInWorkerWASM.web.js.map +1 -0
  87. package/dist/execution/worker/parseUint8ArrayStreamInWorker.node.d.ts +12 -0
  88. package/dist/execution/worker/parseUint8ArrayStreamInWorker.node.js +26 -0
  89. package/dist/execution/worker/parseUint8ArrayStreamInWorker.node.js.map +1 -0
  90. package/dist/execution/worker/parseUint8ArrayStreamInWorker.web.d.ts +9 -0
  91. package/dist/execution/worker/parseUint8ArrayStreamInWorker.web.js +25 -0
  92. package/dist/execution/worker/parseUint8ArrayStreamInWorker.web.js.map +1 -0
  93. package/dist/execution/worker/strategies/MessageStreamingStrategy.d.ts +17 -0
  94. package/dist/execution/worker/strategies/MessageStreamingStrategy.js +58 -0
  95. package/dist/execution/worker/strategies/MessageStreamingStrategy.js.map +1 -0
  96. package/dist/execution/worker/strategies/TransferableStreamStrategy.d.ts +25 -0
  97. package/dist/execution/worker/strategies/TransferableStreamStrategy.js +159 -0
  98. package/dist/execution/worker/strategies/TransferableStreamStrategy.js.map +1 -0
  99. package/dist/execution/worker/strategies/WorkerStrategy.d.ts +27 -0
  100. package/dist/execution/worker/strategies/WorkerStrategySelector.d.ts +43 -0
  101. package/dist/execution/worker/strategies/WorkerStrategySelector.js +89 -0
  102. package/dist/execution/worker/strategies/WorkerStrategySelector.js.map +1 -0
  103. package/dist/execution/worker/utils/messageHandler.d.ts +21 -0
  104. package/dist/execution/worker/utils/messageHandler.js +109 -0
  105. package/dist/execution/worker/utils/messageHandler.js.map +1 -0
  106. package/dist/execution/worker/utils/serializeOptions.d.ts +9 -0
  107. package/dist/execution/worker/utils/serializeOptions.js +14 -0
  108. package/dist/execution/worker/utils/serializeOptions.js.map +1 -0
  109. package/dist/execution/worker/utils/streamCollector.node.d.ts +14 -0
  110. package/dist/execution/worker/utils/streamCollector.node.js +78 -0
  111. package/dist/execution/worker/utils/streamCollector.node.js.map +1 -0
  112. package/dist/execution/worker/utils/workerUtils.d.ts +14 -0
  113. package/dist/execution/worker/utils/workerUtils.js +25 -0
  114. package/dist/execution/worker/utils/workerUtils.js.map +1 -0
  115. package/dist/getOptionsFromResponse.constants.node.d.ts +10 -0
  116. package/dist/getOptionsFromResponse.constants.node.js +8 -0
  117. package/dist/getOptionsFromResponse.constants.node.js.map +1 -0
  118. package/dist/getOptionsFromResponse.constants.web.d.ts +30 -0
  119. package/dist/getOptionsFromResponse.constants.web.js +7 -0
  120. package/dist/getOptionsFromResponse.constants.web.js.map +1 -0
  121. package/dist/getOptionsFromResponse.d.ts +2 -1
  122. package/dist/getOptionsFromResponse.js +5 -9
  123. package/dist/getOptionsFromResponse.js.map +1 -1
  124. package/dist/loadWASM.js.map +1 -1
  125. package/dist/loadWASM.web.js.map +1 -1
  126. package/dist/parse.d.ts +1 -1
  127. package/dist/parse.js +29 -5
  128. package/dist/parse.js.map +1 -1
  129. package/dist/parseBinary.d.ts +2 -1
  130. package/dist/parseBinary.js +32 -3
  131. package/dist/parseBinary.js.map +1 -1
  132. package/dist/parseBinaryInWorker.node.d.ts +2 -0
  133. package/dist/parseBinaryInWorker.web.d.ts +2 -0
  134. package/dist/parseBinaryInWorkerWASM.node.d.ts +2 -0
  135. package/dist/parseBinaryInWorkerWASM.web.d.ts +2 -0
  136. package/dist/parseBinaryToArraySync.d.ts +2 -1
  137. package/dist/parseBinaryToArraySync.js.map +1 -1
  138. package/dist/parseBinaryToIterableIterator.d.ts +2 -1
  139. package/dist/parseBinaryToIterableIterator.js.map +1 -1
  140. package/dist/parseBinaryToStream.d.ts +2 -1
  141. package/dist/parseBinaryToStream.js.map +1 -1
  142. package/dist/parseResponse.d.ts +1 -1
  143. package/dist/parseResponse.js +15 -8
  144. package/dist/parseResponse.js.map +1 -1
  145. package/dist/parseResponseToStream.d.ts +2 -1
  146. package/dist/parseResponseToStream.js.map +1 -1
  147. package/dist/parseStreamInWorker.node.d.ts +2 -0
  148. package/dist/parseStreamInWorker.web.d.ts +2 -0
  149. package/dist/parseString.d.ts +31 -0
  150. package/dist/parseString.js +27 -1
  151. package/dist/parseString.js.map +1 -1
  152. package/dist/parseStringInWorker.node.d.ts +2 -0
  153. package/dist/parseStringInWorker.web.d.ts +2 -0
  154. package/dist/parseStringInWorkerWASM.node.d.ts +2 -0
  155. package/dist/parseStringInWorkerWASM.web.d.ts +2 -0
  156. package/dist/parseStringStream.d.ts +43 -1
  157. package/dist/parseStringStream.js +24 -3
  158. package/dist/parseStringStream.js.map +1 -1
  159. package/dist/parseStringStreamToStream.js.map +1 -1
  160. package/dist/parseStringToArraySync.js.map +1 -1
  161. package/dist/parseStringToArraySyncWASM.js.map +1 -1
  162. package/dist/parseStringToIterableIterator.js.map +1 -1
  163. package/dist/parseStringToStream.js.map +1 -1
  164. package/dist/parseUint8ArrayStream.d.ts +4 -3
  165. package/dist/parseUint8ArrayStream.js +24 -3
  166. package/dist/parseUint8ArrayStream.js.map +1 -1
  167. package/dist/parseUint8ArrayStreamInWorker.node.d.ts +2 -0
  168. package/dist/parseUint8ArrayStreamInWorker.web.d.ts +2 -0
  169. package/dist/parseUint8ArrayStreamToStream.d.ts +2 -1
  170. package/dist/parseUint8ArrayStreamToStream.js +11 -5
  171. package/dist/parseUint8ArrayStreamToStream.js.map +1 -1
  172. package/dist/utils/convertBinaryToString.js.map +1 -1
  173. package/dist/utils/convertIterableIteratorToAsync.js.map +1 -1
  174. package/dist/utils/convertStreamToAsyncIterableIterator.js +2 -2
  175. package/dist/utils/convertStreamToAsyncIterableIterator.js.map +1 -1
  176. package/dist/utils/convertThisAsyncIterableIteratorToArray.d.ts +1 -1
  177. package/dist/utils/convertThisAsyncIterableIteratorToArray.js.map +1 -1
  178. package/dist/utils/escapeRegExp.js.map +1 -1
  179. package/dist/utils/parseMime.js.map +1 -1
  180. package/dist/utils/pipeline.js.map +1 -1
  181. package/dist/web-csv-toolbox.d.ts +4 -0
  182. package/dist/web-csv-toolbox.js +3 -0
  183. package/dist/web-csv-toolbox.js.map +1 -1
  184. package/dist/web_csv_toolbox_wasm_bg.wasm +0 -0
  185. package/dist/worker.node.d.ts +1 -0
  186. package/dist/worker.web.d.ts +1 -0
  187. package/package.json +53 -10
package/README.md CHANGED
@@ -302,13 +302,13 @@ try {
302
302
 
303
303
  ### Works on Browser
304
304
 
305
- | OS | Chrome | FireFox | Default |
305
+ | OS | Chrome | Firefox | Default |
306
306
  | ------- | ------ | ------- | ------------- |
307
307
  | Windows | ✅ | ✅ | ✅ (Edge) |
308
- | macos | ✅ | ✅ | ⬜ (Safari *) |
308
+ | macOS | ✅ | ✅ | ⬜ (Safari *) |
309
309
  | Linux | ✅ | ✅ | - |
310
310
 
311
- > **\* To Be Tested**: [I couldn't launch Safari in headless mode](https://github.com/vitest-dev/vitest/blob/main/packages/browser/src/node/providers/webdriver.ts#L39-L41) on GitHub Actions, so I couldn't verify it, but it probably works.
311
+ > **\* Safari**: Basic functionality is expected to work, but it is not yet automatically tested in our CI environment.
312
312
 
313
313
  ### Others
314
314
 
@@ -357,8 +357,134 @@ ideal for developers looking for in-depth control and flexibility.
357
357
 
358
358
  - **`class CSVLexerTransformer`**: [📑](https://kamiazya.github.io/web-csv-toolbox/classes/CSVLexerTransformer.html)
359
359
  - A TransformStream class for lexical analysis of CSV data.
360
+ - Supports custom queuing strategies for controlling backpressure and memory usage.
360
361
  - **`class CSVRecordAssemblerTransformer`**: [📑](https://kamiazya.github.io/web-csv-toolbox/classes/CSVRecordAssemblerTransformer.html)
361
362
  - Handles the assembly of parsed data into records.
363
+ - Supports custom queuing strategies for controlling backpressure and memory usage.
364
+
365
+ #### Customizing Queuing Strategies
366
+
367
+ Both `CSVLexerTransformer` and `CSVRecordAssemblerTransformer` support custom queuing strategies following the Web Streams API pattern. Strategies are passed as constructor arguments with **data-type-aware size counting** and **configurable backpressure handling**.
368
+
369
+ **Constructor signature:**
370
+ ```typescript
371
+ new CSVLexerTransformer(options?, writableStrategy?, readableStrategy?)
372
+ new CSVRecordAssemblerTransformer(options?, writableStrategy?, readableStrategy?)
373
+ ```
374
+
375
+ **Default queuing strategies (starting points, not benchmarked):**
376
+ ```typescript
377
+ // CSVLexerTransformer defaults
378
+ writableStrategy: {
379
+ highWaterMark: 65536, // 64KB of characters
380
+ size: (chunk) => chunk.length, // Count by string length
381
+ checkInterval: 100 // Check backpressure every 100 tokens
382
+ }
383
+ readableStrategy: {
384
+ highWaterMark: 1024, // 1024 tokens
385
+ size: (tokens) => tokens.length, // Count by number of tokens
386
+ checkInterval: 100 // Check backpressure every 100 tokens
387
+ }
388
+
389
+ // CSVRecordAssemblerTransformer defaults
390
+ writableStrategy: {
391
+ highWaterMark: 1024, // 1024 tokens
392
+ size: (tokens) => tokens.length, // Count by number of tokens
393
+ checkInterval: 10 // Check backpressure every 10 records
394
+ }
395
+ readableStrategy: {
396
+ highWaterMark: 256, // 256 records
397
+ size: () => 1, // Each record counts as 1
398
+ checkInterval: 10 // Check backpressure every 10 records
399
+ }
400
+ ```
401
+
402
+ **Key Features:**
403
+
404
+ 🎯 **Smart Size Counting:**
405
+ - Character-based counting for string inputs (accurate memory tracking)
406
+ - Token-based counting between transformers (smooth pipeline flow)
407
+ - Record-based counting for output (intuitive and predictable)
408
+
409
+ ⚡ **Cooperative Backpressure:**
410
+ - Monitors `controller.desiredSize` during processing
411
+ - Yields to event loop when backpressure detected
412
+ - Prevents blocking the main thread
413
+ - Critical for browser UI responsiveness
414
+
415
+ 🔧 **Tunable Check Interval:**
416
+ - `checkInterval`: How often to check for backpressure
417
+ - Lower values (5-25): More responsive, slight overhead
418
+ - Higher values (100-500): Less overhead, slower response
419
+ - Customize based on downstream consumer speed
420
+
421
+ > ⚠️ **Important**: These defaults are theoretical starting points based on data flow characteristics, **not empirical benchmarks**. Optimal values vary by runtime (browser/Node.js/Deno), file size, memory constraints, and CPU performance. **Profile your specific use case** to find the best values.
422
+
423
+ **When to customize:**
424
+ - 🚀 **High-throughput servers**: Higher `highWaterMark` (128KB+, 2048+ tokens), higher `checkInterval` (200-500)
425
+ - 📱 **Memory-constrained environments**: Lower `highWaterMark` (16KB, 256 tokens), lower `checkInterval` (10-25)
426
+ - 🐌 **Slow consumers** (DB writes, API calls): Lower `highWaterMark`, lower `checkInterval` for responsive backpressure
427
+ - 🏃 **Fast processing**: Higher values to reduce overhead
428
+
429
+ **Example - High-throughput server:**
430
+ ```typescript
431
+ import { CSVLexerTransformer, CSVRecordAssemblerTransformer } from 'web-csv-toolbox';
432
+
433
+ const response = await fetch('large-dataset.csv');
434
+ await response.body
435
+ .pipeThrough(new TextDecoderStream())
436
+ .pipeThrough(new CSVLexerTransformer(
437
+ {},
438
+ {
439
+ highWaterMark: 131072, // 128KB
440
+ size: (chunk) => chunk.length,
441
+ checkInterval: 200 // Less frequent checks
442
+ },
443
+ {
444
+ highWaterMark: 2048, // 2048 tokens
445
+ size: (tokens) => tokens.length,
446
+ checkInterval: 100
447
+ }
448
+ ))
449
+ .pipeThrough(new CSVRecordAssemblerTransformer(
450
+ {},
451
+ {
452
+ highWaterMark: 2048, // 2048 tokens
453
+ size: (tokens) => tokens.length,
454
+ checkInterval: 20
455
+ },
456
+ {
457
+ highWaterMark: 512, // 512 records
458
+ size: () => 1,
459
+ checkInterval: 10
460
+ }
461
+ ))
462
+ .pipeTo(yourRecordProcessor);
463
+ ```
464
+
465
+ **Example - Slow consumer (API writes):**
466
+ ```typescript
467
+ await csvStream
468
+ .pipeThrough(new CSVLexerTransformer()) // Use defaults
469
+ .pipeThrough(new CSVRecordAssemblerTransformer(
470
+ {},
471
+ { highWaterMark: 512, size: (t) => t.length, checkInterval: 5 },
472
+ { highWaterMark: 64, size: () => 1, checkInterval: 2 } // Very responsive
473
+ ))
474
+ .pipeTo(new WritableStream({
475
+ async write(record) {
476
+ await fetch('/api/save', { method: 'POST', body: JSON.stringify(record) });
477
+ }
478
+ }));
479
+ ```
480
+
481
+ **Benchmarking:**
482
+ Use the provided benchmark tool to find optimal values for your use case:
483
+ ```bash
484
+ pnpm --filter web-csv-toolbox-benchmark queuing-strategy
485
+ ```
486
+
487
+ See `benchmark/queuing-strategy.bench.ts` for implementation details.
362
488
 
363
489
  ### Experimental APIs 🧪
364
490
 
@@ -624,7 +750,7 @@ try {
624
750
 
625
751
  ## Star ⭐
626
752
 
627
- The easiest way to contribute is to use the library and star [repository](https://github.com/kamiazya/web-csv-toolbox/).
753
+ The easiest way to contribute is to use the library and star the [repository](https://github.com/kamiazya/web-csv-toolbox/).
628
754
 
629
755
  ### Questions 💭
630
756
 
@@ -632,7 +758,7 @@ Feel free to ask questions on [GitHub Discussions](https://github.com/kamiazya/w
632
758
 
633
759
  ### Report bugs / request additional features 💡
634
760
 
635
- Please register at [GitHub Issues](https://github.com/kamiazya/web-csv-toolbox/issues/new/choose).
761
+ Please create an issue at [GitHub Issues](https://github.com/kamiazya/web-csv-toolbox/issues/new/choose).
636
762
 
637
763
  ### Financial Support 💸
638
764
 
@@ -642,7 +768,7 @@ Please support [kamiazya](https://github.com/sponsors/kamiazya).
642
768
 
643
769
  ## License ⚖️
644
770
 
645
- This software is released under the MIT License, see [LICENSE](https://github.com/kamiazya/web-csv-toolbox?tab=MIT-1-ov-file).
771
+ This software is released under the MIT License, see [LICENSE](https://github.com/kamiazya/web-csv-toolbox/blob/main/LICENSE).
646
772
 
647
773
 
648
774
  [![FOSSA Status](https://app.fossa.com/api/projects/git%2Bgithub.com%2Fkamiazya%2Fweb-csv-toolbox.svg?type=large)](https://app.fossa.com/projects/git%2Bgithub.com%2Fkamiazya%2Fweb-csv-toolbox?ref=badge_large)
@@ -1 +1 @@
1
- {"version":3,"file":"CSVLexer.js","sources":["../src/CSVLexer.ts"],"sourcesContent":["import { assertCommonOptions } from \"./assertCommonOptions.ts\";\nimport { Field, FieldDelimiter, RecordDelimiter } from \"./common/constants.ts\";\nimport { ParseError } from \"./common/errors.ts\";\nimport type {\n AbortSignalOptions,\n CommonOptions,\n Position,\n RecordDelimiterToken,\n Token,\n} from \"./common/types.ts\";\nimport { CRLF, DEFAULT_DELIMITER, DEFAULT_QUOTATION, LF } from \"./constants.ts\";\nimport { escapeRegExp } from \"./utils/escapeRegExp.ts\";\n\n/**\n * Default maximum buffer size in characters (UTF-16 code units).\n * Approximately 10MB for ASCII text, but may vary for non-ASCII characters.\n */\nexport const DEFAULT_MAX_BUFFER_SIZE = 10 * 1024 * 1024;\n\n/**\n * Options for the CSVLexer.lex method.\n */\nexport interface CSVLexerLexOptions {\n /**\n * If true, indicates that more chunks are expected.\n * If false or omitted, flushes remaining data.\n */\n stream?: boolean;\n}\n\n/**\n * CSV Lexer.\n *\n * CSVLexer tokenizes CSV data into fields and records.\n */\nexport class CSVLexer<\n Delimiter extends string = DEFAULT_DELIMITER,\n Quotation extends string = DEFAULT_QUOTATION,\n> {\n #delimiter: string;\n #quotation: string;\n #buffer = \"\";\n #flush = false;\n #matcher: RegExp;\n #fieldDelimiterLength: number;\n #maxBufferSize: number;\n\n #cursor: Position = {\n line: 1,\n column: 1,\n offset: 0,\n };\n #rowNumber = 1;\n\n #signal?: AbortSignal;\n\n /**\n * Constructs a new CSVLexer instance.\n * @param options - The common options for the lexer.\n */\n constructor(\n options: CommonOptions<Delimiter, Quotation> & AbortSignalOptions = {},\n ) {\n const {\n delimiter = DEFAULT_DELIMITER,\n quotation = DEFAULT_QUOTATION,\n maxBufferSize = DEFAULT_MAX_BUFFER_SIZE,\n signal,\n } = options;\n assertCommonOptions({ delimiter, quotation, maxBufferSize });\n this.#delimiter = delimiter;\n this.#quotation = quotation;\n this.#fieldDelimiterLength = delimiter.length;\n this.#maxBufferSize = maxBufferSize;\n const d = escapeRegExp(delimiter);\n const q = escapeRegExp(quotation);\n this.#matcher = new RegExp(\n `^(?:(?!${q})(?!${d})(?![\\\\r\\\\n]))([\\\\S\\\\s\\\\uFEFF\\\\xA0]+?)(?=${q}|${d}|\\\\r|\\\\n|$)`,\n );\n if (signal) {\n this.#signal = signal;\n }\n }\n\n /**\n * Lexes the given chunk of CSV data.\n * @param chunk - The chunk of CSV data to be lexed. Omit to flush remaining data.\n * @param options - Lexer options.\n * @returns An iterable iterator of tokens.\n */\n public lex(\n chunk?: string,\n options?: CSVLexerLexOptions,\n ): IterableIterator<Token> {\n const stream = options?.stream ?? false;\n\n if (!stream) {\n this.#flush = true;\n }\n if (chunk !== undefined && chunk.length !== 0) {\n this.#buffer += chunk;\n this.#checkBufferSize();\n }\n\n return this.#tokens();\n }\n\n /**\n * Generates tokens from the buffered CSV data.\n * @yields Tokens from the buffered CSV data.\n */\n *#tokens(): Generator<Token> {\n if (this.#flush) {\n // Trim the last CRLF or LF\n if (this.#buffer.endsWith(CRLF)) {\n this.#buffer = this.#buffer.slice(0, -2 /* -CRLF.length */);\n } else if (this.#buffer.endsWith(LF)) {\n this.#buffer = this.#buffer.slice(0, -1 /* -LF.length */);\n }\n }\n let token: Token | null;\n while ((token = this.#nextToken())) {\n yield token;\n }\n }\n\n /**\n * Checks if the buffer size exceeds the maximum allowed size.\n * @throws {RangeError} If the buffer size exceeds the maximum.\n */\n #checkBufferSize(): void {\n if (this.#buffer.length > this.#maxBufferSize) {\n throw new RangeError(\n `Buffer size (${this.#buffer.length} characters) exceeded maximum allowed size of ${this.#maxBufferSize} characters`,\n );\n }\n }\n\n /**\n * Retrieves the next token from the buffered CSV data.\n * @returns The next token or null if there are no more tokens.\n */\n #nextToken(): Token | null {\n this.#signal?.throwIfAborted();\n if (this.#buffer.length === 0) {\n return null;\n }\n // Buffer is Record Delimiter, defer to the next iteration.\n if (\n this.#flush === false &&\n (this.#buffer === CRLF || this.#buffer === LF)\n ) {\n return null;\n }\n\n // Check for CRLF\n if (this.#buffer.startsWith(CRLF)) {\n this.#buffer = this.#buffer.slice(2);\n const start: Position = { ...this.#cursor };\n this.#cursor.line++;\n this.#cursor.column = 1;\n this.#cursor.offset += 2; // CRLF.length\n const token: RecordDelimiterToken = {\n type: RecordDelimiter,\n value: CRLF,\n location: {\n start,\n end: { ...this.#cursor },\n rowNumber: this.#rowNumber++,\n },\n };\n return token;\n }\n\n // Check for LF\n if (this.#buffer.startsWith(LF)) {\n this.#buffer = this.#buffer.slice(1);\n const start: Position = { ...this.#cursor };\n this.#cursor.line++;\n this.#cursor.column = 1;\n this.#cursor.offset += 1; // LF.length\n const token: RecordDelimiterToken = {\n type: RecordDelimiter,\n value: LF,\n location: {\n start,\n end: { ...this.#cursor },\n rowNumber: this.#rowNumber++,\n },\n };\n return token;\n }\n\n // Check for Delimiter\n if (this.#buffer.startsWith(this.#delimiter)) {\n this.#buffer = this.#buffer.slice(1);\n const start: Position = { ...this.#cursor };\n this.#cursor.column += this.#fieldDelimiterLength;\n this.#cursor.offset += this.#fieldDelimiterLength;\n return {\n type: FieldDelimiter,\n value: this.#delimiter,\n location: {\n start,\n end: { ...this.#cursor },\n rowNumber: this.#rowNumber,\n },\n };\n }\n\n // Check for Quoted String\n if (this.#buffer.startsWith(this.#quotation)) {\n /**\n * Extract Quoted field.\n *\n * The following code is equivalent to the following:\n *\n * If the next character is a quote:\n * - If the character after that is a quote, then append a quote to the value and skip two characters.\n * - Otherwise, return the quoted string.\n * Otherwise, append the character to the value and skip one character.\n *\n * ```plaintext\n * | `i` | `i + 1` | `i + 2` |\n * |------------|------------|----------|\n * | cur | next | | => Variable names\n * | #quotation | #quotation | | => Escaped quote\n * | #quotation | (EOF) | | => Closing quote\n * | #quotation | undefined | | => End of buffer\n * | undefined | | | => End of buffer\n * ```\n */\n let value = \"\";\n let offset = 1; // Skip the opening quote\n let column = 2; // Skip the opening quote\n let line = 0;\n\n // Define variables\n let cur: string = this.#buffer[offset];\n let next: string | undefined = this.#buffer[offset + 1];\n do {\n // If the current character is a quote, check the next characters for closing quotes.\n if (cur === this.#quotation) {\n // If the cur character is a quote and the next character is a quote,\n // then append a quote to the value and skip two characters.\n if (next === this.#quotation) {\n // Append a quote to the value and skip two characters.\n value += this.#quotation;\n offset += 2;\n cur = this.#buffer[offset];\n next = this.#buffer[offset + 1];\n\n // Update the diff\n column += 2;\n continue;\n }\n\n // If the cur character is a quote and the next character is undefined,\n // then return null.\n if (next === undefined && this.#flush === false) {\n return null;\n }\n\n // Otherwise, return the quoted string.\n // Update the buffer and return the token\n offset++;\n this.#buffer = this.#buffer.slice(offset);\n const start: Position = { ...this.#cursor };\n this.#cursor.column += column;\n this.#cursor.offset += offset;\n this.#cursor.line += line;\n return {\n type: Field,\n value,\n location: {\n start,\n end: { ...this.#cursor },\n rowNumber: this.#rowNumber,\n },\n };\n }\n\n // Append the character to the value.\n value += cur;\n\n // Prepare for the next iteration\n if (cur === LF) {\n // If the current character is a LF,\n // then increment the line number and reset the column number.\n line++;\n column = 1;\n } else {\n // Otherwise, increment the column number and offset.\n column++;\n }\n\n offset++;\n cur = next;\n next = this.#buffer[offset + 1];\n } while (cur !== undefined);\n\n if (this.#flush) {\n throw new ParseError(\"Unexpected EOF while parsing quoted field.\", {\n position: { ...this.#cursor },\n });\n }\n return null;\n }\n\n // Check for Unquoted String\n const match = this.#matcher.exec(this.#buffer);\n if (match) {\n // If we're flushing and the match doesn't consume the entire buffer,\n // then return null\n if (this.#flush === false && match[0].length === this.#buffer.length) {\n return null;\n }\n const value = match[1];\n this.#buffer = this.#buffer.slice(value.length);\n const start: Position = { ...this.#cursor };\n this.#cursor.column += value.length;\n this.#cursor.offset += value.length;\n return {\n type: Field,\n value,\n location: {\n start,\n end: { ...this.#cursor },\n rowNumber: this.#rowNumber,\n },\n };\n }\n\n // Otherwise, return null\n return null;\n }\n}\n"],"names":[],"mappings":";;;;;;AAiBa,MAAA,uBAAA,GAA0B,KAAK,IAAO,GAAA;AAkB5C,MAAM,QAGX,CAAA;AAAA,EACA,UAAA;AAAA,EACA,UAAA;AAAA,EACA,OAAU,GAAA,EAAA;AAAA,EACV,MAAS,GAAA,KAAA;AAAA,EACT,QAAA;AAAA,EACA,qBAAA;AAAA,EACA,cAAA;AAAA,EAEA,OAAoB,GAAA;AAAA,IAClB,IAAM,EAAA,CAAA;AAAA,IACN,MAAQ,EAAA,CAAA;AAAA,IACR,MAAQ,EAAA;AAAA,GACV;AAAA,EACA,UAAa,GAAA,CAAA;AAAA,EAEb,OAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAMA,WAAA,CACE,OAAoE,GAAA,EACpE,EAAA;AACA,IAAM,MAAA;AAAA,MACJ,SAAY,GAAA,iBAAA;AAAA,MACZ,SAAY,GAAA,iBAAA;AAAA,MACZ,aAAgB,GAAA,uBAAA;AAAA,MAChB;AAAA,KACE,GAAA,OAAA;AACJ,IAAA,mBAAA,CAAoB,EAAE,SAAA,EAAW,SAAW,EAAA,aAAA,EAAe,CAAA;AAC3D,IAAA,IAAA,CAAK,UAAa,GAAA,SAAA;AAClB,IAAA,IAAA,CAAK,UAAa,GAAA,SAAA;AAClB,IAAA,IAAA,CAAK,wBAAwB,SAAU,CAAA,MAAA;AACvC,IAAA,IAAA,CAAK,cAAiB,GAAA,aAAA;AACtB,IAAM,MAAA,CAAA,GAAI,aAAa,SAAS,CAAA;AAChC,IAAM,MAAA,CAAA,GAAI,aAAa,SAAS,CAAA;AAChC,IAAA,IAAA,CAAK,WAAW,IAAI,MAAA;AAAA,MAClB,UAAU,CAAC,CAAA,IAAA,EAAO,CAAC,CAA4C,yCAAA,EAAA,CAAC,IAAI,CAAC,CAAA,WAAA;AAAA,KACvE;AACA,IAAA,IAAI,MAAQ,EAAA;AACV,MAAA,IAAA,CAAK,OAAU,GAAA,MAAA;AAAA;AACjB;AACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQO,GAAA,CACL,OACA,OACyB,EAAA;AACzB,IAAM,MAAA,MAAA,GAAS,SAAS,MAAU,IAAA,KAAA;AAElC,IAAA,IAAI,CAAC,MAAQ,EAAA;AACX,MAAA,IAAA,CAAK,MAAS,GAAA,IAAA;AAAA;AAEhB,IAAA,IAAI,KAAU,KAAA,MAAA,IAAa,KAAM,CAAA,MAAA,KAAW,CAAG,EAAA;AAC7C,MAAA,IAAA,CAAK,OAAW,IAAA,KAAA;AAChB,MAAA,IAAA,CAAK,gBAAiB,EAAA;AAAA;AAGxB,IAAA,OAAO,KAAK,OAAQ,EAAA;AAAA;AACtB;AAAA;AAAA;AAAA;AAAA,EAMA,CAAC,OAA4B,GAAA;AAC3B,IAAA,IAAI,KAAK,MAAQ,EAAA;AAEf,MAAA,IAAI,IAAK,CAAA,OAAA,CAAQ,QAAS,CAAA,IAAI,CAAG,EAAA;AAC/B,QAAK,IAAA,CAAA,OAAA,GAAU,KAAK,OAAQ,CAAA,KAAA;AAAA,UAAM,CAAA;AAAA,UAAG;AAAA;AAAA,SAAqB;AAAA,OACjD,MAAA,IAAA,IAAA,CAAK,OAAQ,CAAA,QAAA,CAAS,EAAE,CAAG,EAAA;AACpC,QAAK,IAAA,CAAA,OAAA,GAAU,KAAK,OAAQ,CAAA,KAAA;AAAA,UAAM,CAAA;AAAA,UAAG;AAAA;AAAA,SAAmB;AAAA;AAC1D;AAEF,IAAI,IAAA,KAAA;AACJ,IAAQ,OAAA,KAAA,GAAQ,IAAK,CAAA,UAAA,EAAe,EAAA;AAClC,MAAM,MAAA,KAAA;AAAA;AACR;AACF;AAAA;AAAA;AAAA;AAAA,EAMA,gBAAyB,GAAA;AACvB,IAAA,IAAI,IAAK,CAAA,OAAA,CAAQ,MAAS,GAAA,IAAA,CAAK,cAAgB,EAAA;AAC7C,MAAA,MAAM,IAAI,UAAA;AAAA,QACR,gBAAgB,IAAK,CAAA,OAAA,CAAQ,MAAM,CAAA,8CAAA,EAAiD,KAAK,cAAc,CAAA,WAAA;AAAA,OACzG;AAAA;AACF;AACF;AAAA;AAAA;AAAA;AAAA,EAMA,UAA2B,GAAA;AACzB,IAAA,IAAA,CAAK,SAAS,cAAe,EAAA;AAC7B,IAAI,IAAA,IAAA,CAAK,OAAQ,CAAA,MAAA,KAAW,CAAG,EAAA;AAC7B,MAAO,OAAA,IAAA;AAAA;AAGT,IACE,IAAA,IAAA,CAAK,WAAW,KACf,KAAA,IAAA,CAAK,YAAY,IAAQ,IAAA,IAAA,CAAK,YAAY,EAC3C,CAAA,EAAA;AACA,MAAO,OAAA,IAAA;AAAA;AAIT,IAAA,IAAI,IAAK,CAAA,OAAA,CAAQ,UAAW,CAAA,IAAI,CAAG,EAAA;AACjC,MAAA,IAAA,CAAK,OAAU,GAAA,IAAA,CAAK,OAAQ,CAAA,KAAA,CAAM,CAAC,CAAA;AACnC,MAAA,MAAM,KAAkB,GAAA,EAAE,GAAG,IAAA,CAAK,OAAQ,EAAA;AAC1C,MAAA,IAAA,CAAK,OAAQ,CAAA,IAAA,EAAA;AACb,MAAA,IAAA,CAAK,QAAQ,MAAS,GAAA,CAAA;AACtB,MAAA,IAAA,CAAK,QAAQ,MAAU,IAAA,CAAA;AACvB,MAAA,MAAM,KAA8B,GAAA;AAAA,QAClC,IAAM,EAAA,eAAA;AAAA,QACN,KAAO,EAAA,IAAA;AAAA,QACP,QAAU,EAAA;AAAA,UACR,KAAA;AAAA,UACA,GAAK,EAAA,EAAE,GAAG,IAAA,CAAK,OAAQ,EAAA;AAAA,UACvB,WAAW,IAAK,CAAA,UAAA;AAAA;AAClB,OACF;AACA,MAAO,OAAA,KAAA;AAAA;AAIT,IAAA,IAAI,IAAK,CAAA,OAAA,CAAQ,UAAW,CAAA,EAAE,CAAG,EAAA;AAC/B,MAAA,IAAA,CAAK,OAAU,GAAA,IAAA,CAAK,OAAQ,CAAA,KAAA,CAAM,CAAC,CAAA;AACnC,MAAA,MAAM,KAAkB,GAAA,EAAE,GAAG,IAAA,CAAK,OAAQ,EAAA;AAC1C,MAAA,IAAA,CAAK,OAAQ,CAAA,IAAA,EAAA;AACb,MAAA,IAAA,CAAK,QAAQ,MAAS,GAAA,CAAA;AACtB,MAAA,IAAA,CAAK,QAAQ,MAAU,IAAA,CAAA;AACvB,MAAA,MAAM,KAA8B,GAAA;AAAA,QAClC,IAAM,EAAA,eAAA;AAAA,QACN,KAAO,EAAA,EAAA;AAAA,QACP,QAAU,EAAA;AAAA,UACR,KAAA;AAAA,UACA,GAAK,EAAA,EAAE,GAAG,IAAA,CAAK,OAAQ,EAAA;AAAA,UACvB,WAAW,IAAK,CAAA,UAAA;AAAA;AAClB,OACF;AACA,MAAO,OAAA,KAAA;AAAA;AAIT,IAAA,IAAI,IAAK,CAAA,OAAA,CAAQ,UAAW,CAAA,IAAA,CAAK,UAAU,CAAG,EAAA;AAC5C,MAAA,IAAA,CAAK,OAAU,GAAA,IAAA,CAAK,OAAQ,CAAA,KAAA,CAAM,CAAC,CAAA;AACnC,MAAA,MAAM,KAAkB,GAAA,EAAE,GAAG,IAAA,CAAK,OAAQ,EAAA;AAC1C,MAAK,IAAA,CAAA,OAAA,CAAQ,UAAU,IAAK,CAAA,qBAAA;AAC5B,MAAK,IAAA,CAAA,OAAA,CAAQ,UAAU,IAAK,CAAA,qBAAA;AAC5B,MAAO,OAAA;AAAA,QACL,IAAM,EAAA,cAAA;AAAA,QACN,OAAO,IAAK,CAAA,UAAA;AAAA,QACZ,QAAU,EAAA;AAAA,UACR,KAAA;AAAA,UACA,GAAK,EAAA,EAAE,GAAG,IAAA,CAAK,OAAQ,EAAA;AAAA,UACvB,WAAW,IAAK,CAAA;AAAA;AAClB,OACF;AAAA;AAIF,IAAA,IAAI,IAAK,CAAA,OAAA,CAAQ,UAAW,CAAA,IAAA,CAAK,UAAU,CAAG,EAAA;AAqB5C,MAAA,IAAI,KAAQ,GAAA,EAAA;AACZ,MAAA,IAAI,MAAS,GAAA,CAAA;AACb,MAAA,IAAI,MAAS,GAAA,CAAA;AACb,MAAA,IAAI,IAAO,GAAA,CAAA;AAGX,MAAI,IAAA,GAAA,GAAc,IAAK,CAAA,OAAA,CAAQ,MAAM,CAAA;AACrC,MAAA,IAAI,IAA2B,GAAA,IAAA,CAAK,OAAQ,CAAA,MAAA,GAAS,CAAC,CAAA;AACtD,MAAG,GAAA;AAED,QAAI,IAAA,GAAA,KAAQ,KAAK,UAAY,EAAA;AAG3B,UAAI,IAAA,IAAA,KAAS,KAAK,UAAY,EAAA;AAE5B,YAAA,KAAA,IAAS,IAAK,CAAA,UAAA;AACd,YAAU,MAAA,IAAA,CAAA;AACV,YAAM,GAAA,GAAA,IAAA,CAAK,QAAQ,MAAM,CAAA;AACzB,YAAO,IAAA,GAAA,IAAA,CAAK,OAAQ,CAAA,MAAA,GAAS,CAAC,CAAA;AAG9B,YAAU,MAAA,IAAA,CAAA;AACV,YAAA;AAAA;AAKF,UAAA,IAAI,IAAS,KAAA,MAAA,IAAa,IAAK,CAAA,MAAA,KAAW,KAAO,EAAA;AAC/C,YAAO,OAAA,IAAA;AAAA;AAKT,UAAA,MAAA,EAAA;AACA,UAAA,IAAA,CAAK,OAAU,GAAA,IAAA,CAAK,OAAQ,CAAA,KAAA,CAAM,MAAM,CAAA;AACxC,UAAA,MAAM,KAAkB,GAAA,EAAE,GAAG,IAAA,CAAK,OAAQ,EAAA;AAC1C,UAAA,IAAA,CAAK,QAAQ,MAAU,IAAA,MAAA;AACvB,UAAA,IAAA,CAAK,QAAQ,MAAU,IAAA,MAAA;AACvB,UAAA,IAAA,CAAK,QAAQ,IAAQ,IAAA,IAAA;AACrB,UAAO,OAAA;AAAA,YACL,IAAM,EAAA,KAAA;AAAA,YACN,KAAA;AAAA,YACA,QAAU,EAAA;AAAA,cACR,KAAA;AAAA,cACA,GAAK,EAAA,EAAE,GAAG,IAAA,CAAK,OAAQ,EAAA;AAAA,cACvB,WAAW,IAAK,CAAA;AAAA;AAClB,WACF;AAAA;AAIF,QAAS,KAAA,IAAA,GAAA;AAGT,QAAA,IAAI,QAAQ,EAAI,EAAA;AAGd,UAAA,IAAA,EAAA;AACA,UAAS,MAAA,GAAA,CAAA;AAAA,SACJ,MAAA;AAEL,UAAA,MAAA,EAAA;AAAA;AAGF,QAAA,MAAA,EAAA;AACA,QAAM,GAAA,GAAA,IAAA;AACN,QAAO,IAAA,GAAA,IAAA,CAAK,OAAQ,CAAA,MAAA,GAAS,CAAC,CAAA;AAAA,eACvB,GAAQ,KAAA,MAAA;AAEjB,MAAA,IAAI,KAAK,MAAQ,EAAA;AACf,QAAM,MAAA,IAAI,WAAW,4CAA8C,EAAA;AAAA,UACjE,QAAU,EAAA,EAAE,GAAG,IAAA,CAAK,OAAQ;AAAA,SAC7B,CAAA;AAAA;AAEH,MAAO,OAAA,IAAA;AAAA;AAIT,IAAA,MAAM,KAAQ,GAAA,IAAA,CAAK,QAAS,CAAA,IAAA,CAAK,KAAK,OAAO,CAAA;AAC7C,IAAA,IAAI,KAAO,EAAA;AAGT,MAAI,IAAA,IAAA,CAAK,WAAW,KAAS,IAAA,KAAA,CAAM,CAAC,CAAE,CAAA,MAAA,KAAW,IAAK,CAAA,OAAA,CAAQ,MAAQ,EAAA;AACpE,QAAO,OAAA,IAAA;AAAA;AAET,MAAM,MAAA,KAAA,GAAQ,MAAM,CAAC,CAAA;AACrB,MAAA,IAAA,CAAK,OAAU,GAAA,IAAA,CAAK,OAAQ,CAAA,KAAA,CAAM,MAAM,MAAM,CAAA;AAC9C,MAAA,MAAM,KAAkB,GAAA,EAAE,GAAG,IAAA,CAAK,OAAQ,EAAA;AAC1C,MAAK,IAAA,CAAA,OAAA,CAAQ,UAAU,KAAM,CAAA,MAAA;AAC7B,MAAK,IAAA,CAAA,OAAA,CAAQ,UAAU,KAAM,CAAA,MAAA;AAC7B,MAAO,OAAA;AAAA,QACL,IAAM,EAAA,KAAA;AAAA,QACN,KAAA;AAAA,QACA,QAAU,EAAA;AAAA,UACR,KAAA;AAAA,UACA,GAAK,EAAA,EAAE,GAAG,IAAA,CAAK,OAAQ,EAAA;AAAA,UACvB,WAAW,IAAK,CAAA;AAAA;AAClB,OACF;AAAA;AAIF,IAAO,OAAA,IAAA;AAAA;AAEX;;;;"}
1
+ {"version":3,"file":"CSVLexer.js","sources":["../src/CSVLexer.ts"],"sourcesContent":["import { assertCommonOptions } from \"./assertCommonOptions.ts\";\nimport { Field, FieldDelimiter, RecordDelimiter } from \"./common/constants.ts\";\nimport { ParseError } from \"./common/errors.ts\";\nimport type {\n AbortSignalOptions,\n CommonOptions,\n Position,\n RecordDelimiterToken,\n Token,\n} from \"./common/types.ts\";\nimport { CRLF, DEFAULT_DELIMITER, DEFAULT_QUOTATION, LF } from \"./constants.ts\";\nimport { escapeRegExp } from \"./utils/escapeRegExp.ts\";\n\n/**\n * Default maximum buffer size in characters (UTF-16 code units).\n * Approximately 10MB for ASCII text, but may vary for non-ASCII characters.\n */\nexport const DEFAULT_MAX_BUFFER_SIZE = 10 * 1024 * 1024;\n\n/**\n * Options for the CSVLexer.lex method.\n */\nexport interface CSVLexerLexOptions {\n /**\n * If true, indicates that more chunks are expected.\n * If false or omitted, flushes remaining data.\n */\n stream?: boolean;\n}\n\n/**\n * CSV Lexer.\n *\n * CSVLexer tokenizes CSV data into fields and records.\n */\nexport class CSVLexer<\n Delimiter extends string = DEFAULT_DELIMITER,\n Quotation extends string = DEFAULT_QUOTATION,\n> {\n #delimiter: string;\n #quotation: string;\n #buffer = \"\";\n #flush = false;\n #matcher: RegExp;\n #fieldDelimiterLength: number;\n #maxBufferSize: number;\n\n #cursor: Position = {\n line: 1,\n column: 1,\n offset: 0,\n };\n #rowNumber = 1;\n\n #signal?: AbortSignal;\n\n /**\n * Constructs a new CSVLexer instance.\n * @param options - The common options for the lexer.\n */\n constructor(\n options: CommonOptions<Delimiter, Quotation> & AbortSignalOptions = {},\n ) {\n const {\n delimiter = DEFAULT_DELIMITER,\n quotation = DEFAULT_QUOTATION,\n maxBufferSize = DEFAULT_MAX_BUFFER_SIZE,\n signal,\n } = options;\n assertCommonOptions({ delimiter, quotation, maxBufferSize });\n this.#delimiter = delimiter;\n this.#quotation = quotation;\n this.#fieldDelimiterLength = delimiter.length;\n this.#maxBufferSize = maxBufferSize;\n const d = escapeRegExp(delimiter);\n const q = escapeRegExp(quotation);\n this.#matcher = new RegExp(\n `^(?:(?!${q})(?!${d})(?![\\\\r\\\\n]))([\\\\S\\\\s\\\\uFEFF\\\\xA0]+?)(?=${q}|${d}|\\\\r|\\\\n|$)`,\n );\n if (signal) {\n this.#signal = signal;\n }\n }\n\n /**\n * Lexes the given chunk of CSV data.\n * @param chunk - The chunk of CSV data to be lexed. Omit to flush remaining data.\n * @param options - Lexer options.\n * @returns An iterable iterator of tokens.\n */\n public lex(\n chunk?: string,\n options?: CSVLexerLexOptions,\n ): IterableIterator<Token> {\n const stream = options?.stream ?? false;\n\n if (!stream) {\n this.#flush = true;\n }\n if (chunk !== undefined && chunk.length !== 0) {\n this.#buffer += chunk;\n this.#checkBufferSize();\n }\n\n return this.#tokens();\n }\n\n /**\n * Generates tokens from the buffered CSV data.\n * @yields Tokens from the buffered CSV data.\n */\n *#tokens(): Generator<Token> {\n if (this.#flush) {\n // Trim the last CRLF or LF\n if (this.#buffer.endsWith(CRLF)) {\n this.#buffer = this.#buffer.slice(0, -2 /* -CRLF.length */);\n } else if (this.#buffer.endsWith(LF)) {\n this.#buffer = this.#buffer.slice(0, -1 /* -LF.length */);\n }\n }\n let token: Token | null;\n while ((token = this.#nextToken())) {\n yield token;\n }\n }\n\n /**\n * Checks if the buffer size exceeds the maximum allowed size.\n * @throws {RangeError} If the buffer size exceeds the maximum.\n */\n #checkBufferSize(): void {\n if (this.#buffer.length > this.#maxBufferSize) {\n throw new RangeError(\n `Buffer size (${this.#buffer.length} characters) exceeded maximum allowed size of ${this.#maxBufferSize} characters`,\n );\n }\n }\n\n /**\n * Retrieves the next token from the buffered CSV data.\n * @returns The next token or null if there are no more tokens.\n */\n #nextToken(): Token | null {\n this.#signal?.throwIfAborted();\n if (this.#buffer.length === 0) {\n return null;\n }\n // Buffer is Record Delimiter, defer to the next iteration.\n if (\n this.#flush === false &&\n (this.#buffer === CRLF || this.#buffer === LF)\n ) {\n return null;\n }\n\n // Check for CRLF\n if (this.#buffer.startsWith(CRLF)) {\n this.#buffer = this.#buffer.slice(2);\n const start: Position = { ...this.#cursor };\n this.#cursor.line++;\n this.#cursor.column = 1;\n this.#cursor.offset += 2; // CRLF.length\n const token: RecordDelimiterToken = {\n type: RecordDelimiter,\n value: CRLF,\n location: {\n start,\n end: { ...this.#cursor },\n rowNumber: this.#rowNumber++,\n },\n };\n return token;\n }\n\n // Check for LF\n if (this.#buffer.startsWith(LF)) {\n this.#buffer = this.#buffer.slice(1);\n const start: Position = { ...this.#cursor };\n this.#cursor.line++;\n this.#cursor.column = 1;\n this.#cursor.offset += 1; // LF.length\n const token: RecordDelimiterToken = {\n type: RecordDelimiter,\n value: LF,\n location: {\n start,\n end: { ...this.#cursor },\n rowNumber: this.#rowNumber++,\n },\n };\n return token;\n }\n\n // Check for Delimiter\n if (this.#buffer.startsWith(this.#delimiter)) {\n this.#buffer = this.#buffer.slice(1);\n const start: Position = { ...this.#cursor };\n this.#cursor.column += this.#fieldDelimiterLength;\n this.#cursor.offset += this.#fieldDelimiterLength;\n return {\n type: FieldDelimiter,\n value: this.#delimiter,\n location: {\n start,\n end: { ...this.#cursor },\n rowNumber: this.#rowNumber,\n },\n };\n }\n\n // Check for Quoted String\n if (this.#buffer.startsWith(this.#quotation)) {\n /**\n * Extract Quoted field.\n *\n * The following code is equivalent to the following:\n *\n * If the next character is a quote:\n * - If the character after that is a quote, then append a quote to the value and skip two characters.\n * - Otherwise, return the quoted string.\n * Otherwise, append the character to the value and skip one character.\n *\n * ```plaintext\n * | `i` | `i + 1` | `i + 2` |\n * |------------|------------|----------|\n * | cur | next | | => Variable names\n * | #quotation | #quotation | | => Escaped quote\n * | #quotation | (EOF) | | => Closing quote\n * | #quotation | undefined | | => End of buffer\n * | undefined | | | => End of buffer\n * ```\n */\n let value = \"\";\n let offset = 1; // Skip the opening quote\n let column = 2; // Skip the opening quote\n let line = 0;\n\n // Define variables\n let cur: string = this.#buffer[offset];\n let next: string | undefined = this.#buffer[offset + 1];\n do {\n // If the current character is a quote, check the next characters for closing quotes.\n if (cur === this.#quotation) {\n // If the cur character is a quote and the next character is a quote,\n // then append a quote to the value and skip two characters.\n if (next === this.#quotation) {\n // Append a quote to the value and skip two characters.\n value += this.#quotation;\n offset += 2;\n cur = this.#buffer[offset];\n next = this.#buffer[offset + 1];\n\n // Update the diff\n column += 2;\n continue;\n }\n\n // If the cur character is a quote and the next character is undefined,\n // then return null.\n if (next === undefined && this.#flush === false) {\n return null;\n }\n\n // Otherwise, return the quoted string.\n // Update the buffer and return the token\n offset++;\n this.#buffer = this.#buffer.slice(offset);\n const start: Position = { ...this.#cursor };\n this.#cursor.column += column;\n this.#cursor.offset += offset;\n this.#cursor.line += line;\n return {\n type: Field,\n value,\n location: {\n start,\n end: { ...this.#cursor },\n rowNumber: this.#rowNumber,\n },\n };\n }\n\n // Append the character to the value.\n value += cur;\n\n // Prepare for the next iteration\n if (cur === LF) {\n // If the current character is a LF,\n // then increment the line number and reset the column number.\n line++;\n column = 1;\n } else {\n // Otherwise, increment the column number and offset.\n column++;\n }\n\n offset++;\n cur = next;\n next = this.#buffer[offset + 1];\n } while (cur !== undefined);\n\n if (this.#flush) {\n throw new ParseError(\"Unexpected EOF while parsing quoted field.\", {\n position: { ...this.#cursor },\n });\n }\n return null;\n }\n\n // Check for Unquoted String\n const match = this.#matcher.exec(this.#buffer);\n if (match) {\n // If we're flushing and the match doesn't consume the entire buffer,\n // then return null\n if (this.#flush === false && match[0].length === this.#buffer.length) {\n return null;\n }\n const value = match[1];\n this.#buffer = this.#buffer.slice(value.length);\n const start: Position = { ...this.#cursor };\n this.#cursor.column += value.length;\n this.#cursor.offset += value.length;\n return {\n type: Field,\n value,\n location: {\n start,\n end: { ...this.#cursor },\n rowNumber: this.#rowNumber,\n },\n };\n }\n\n // Otherwise, return null\n return null;\n }\n}\n"],"names":[],"mappings":";;;;;;AAiBO,MAAM,uBAAA,GAA0B,KAAK,IAAA,GAAO;AAkB5C,MAAM,QAAA,CAGX;AAAA,EACA,UAAA;AAAA,EACA,UAAA;AAAA,EACA,OAAA,GAAU,EAAA;AAAA,EACV,MAAA,GAAS,KAAA;AAAA,EACT,QAAA;AAAA,EACA,qBAAA;AAAA,EACA,cAAA;AAAA,EAEA,OAAA,GAAoB;AAAA,IAClB,IAAA,EAAM,CAAA;AAAA,IACN,MAAA,EAAQ,CAAA;AAAA,IACR,MAAA,EAAQ;AAAA,GACV;AAAA,EACA,UAAA,GAAa,CAAA;AAAA,EAEb,OAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAMA,WAAA,CACE,OAAA,GAAoE,EAAC,EACrE;AACA,IAAA,MAAM;AAAA,MACJ,SAAA,GAAY,iBAAA;AAAA,MACZ,SAAA,GAAY,iBAAA;AAAA,MACZ,aAAA,GAAgB,uBAAA;AAAA,MAChB;AAAA,KACF,GAAI,OAAA;AACJ,IAAA,mBAAA,CAAoB,EAAE,SAAA,EAAW,SAAA,EAAW,aAAA,EAAe,CAAA;AAC3D,IAAA,IAAA,CAAK,UAAA,GAAa,SAAA;AAClB,IAAA,IAAA,CAAK,UAAA,GAAa,SAAA;AAClB,IAAA,IAAA,CAAK,wBAAwB,SAAA,CAAU,MAAA;AACvC,IAAA,IAAA,CAAK,cAAA,GAAiB,aAAA;AACtB,IAAA,MAAM,CAAA,GAAI,aAAa,SAAS,CAAA;AAChC,IAAA,MAAM,CAAA,GAAI,aAAa,SAAS,CAAA;AAChC,IAAA,IAAA,CAAK,WAAW,IAAI,MAAA;AAAA,MAClB,UAAU,CAAC,CAAA,IAAA,EAAO,CAAC,CAAA,yCAAA,EAA4C,CAAC,IAAI,CAAC,CAAA,WAAA;AAAA,KACvE;AACA,IAAA,IAAI,MAAA,EAAQ;AACV,MAAA,IAAA,CAAK,OAAA,GAAU,MAAA;AAAA,IACjB;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQO,GAAA,CACL,OACA,OAAA,EACyB;AACzB,IAAA,MAAM,MAAA,GAAS,SAAS,MAAA,IAAU,KAAA;AAElC,IAAA,IAAI,CAAC,MAAA,EAAQ;AACX,MAAA,IAAA,CAAK,MAAA,GAAS,IAAA;AAAA,IAChB;AACA,IAAA,IAAI,KAAA,KAAU,MAAA,IAAa,KAAA,CAAM,MAAA,KAAW,CAAA,EAAG;AAC7C,MAAA,IAAA,CAAK,OAAA,IAAW,KAAA;AAChB,MAAA,IAAA,CAAK,gBAAA,EAAiB;AAAA,IACxB;AAEA,IAAA,OAAO,KAAK,OAAA,EAAQ;AAAA,EACtB;AAAA;AAAA;AAAA;AAAA;AAAA,EAMA,CAAC,OAAA,GAA4B;AAC3B,IAAA,IAAI,KAAK,MAAA,EAAQ;AAEf,MAAA,IAAI,IAAA,CAAK,OAAA,CAAQ,QAAA,CAAS,IAAI,CAAA,EAAG;AAC/B,QAAA,IAAA,CAAK,OAAA,GAAU,KAAK,OAAA,CAAQ,KAAA;AAAA,UAAM,CAAA;AAAA,UAAG;AAAA;AAAA,SAAqB;AAAA,MAC5D,CAAA,MAAA,IAAW,IAAA,CAAK,OAAA,CAAQ,QAAA,CAAS,EAAE,CAAA,EAAG;AACpC,QAAA,IAAA,CAAK,OAAA,GAAU,KAAK,OAAA,CAAQ,KAAA;AAAA,UAAM,CAAA;AAAA,UAAG;AAAA;AAAA,SAAmB;AAAA,MAC1D;AAAA,IACF;AACA,IAAA,IAAI,KAAA;AACJ,IAAA,OAAQ,KAAA,GAAQ,IAAA,CAAK,UAAA,EAAW,EAAI;AAClC,MAAA,MAAM,KAAA;AAAA,IACR;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA,EAMA,gBAAA,GAAyB;AACvB,IAAA,IAAI,IAAA,CAAK,OAAA,CAAQ,MAAA,GAAS,IAAA,CAAK,cAAA,EAAgB;AAC7C,MAAA,MAAM,IAAI,UAAA;AAAA,QACR,gBAAgB,IAAA,CAAK,OAAA,CAAQ,MAAM,CAAA,8CAAA,EAAiD,KAAK,cAAc,CAAA,WAAA;AAAA,OACzG;AAAA,IACF;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA,EAMA,UAAA,GAA2B;AACzB,IAAA,IAAA,CAAK,SAAS,cAAA,EAAe;AAC7B,IAAA,IAAI,IAAA,CAAK,OAAA,CAAQ,MAAA,KAAW,CAAA,EAAG;AAC7B,MAAA,OAAO,IAAA;AAAA,IACT;AAEA,IAAA,IACE,IAAA,CAAK,WAAW,KAAA,KACf,IAAA,CAAK,YAAY,IAAA,IAAQ,IAAA,CAAK,YAAY,EAAA,CAAA,EAC3C;AACA,MAAA,OAAO,IAAA;AAAA,IACT;AAGA,IAAA,IAAI,IAAA,CAAK,OAAA,CAAQ,UAAA,CAAW,IAAI,CAAA,EAAG;AACjC,MAAA,IAAA,CAAK,OAAA,GAAU,IAAA,CAAK,OAAA,CAAQ,KAAA,CAAM,CAAC,CAAA;AACnC,MAAA,MAAM,KAAA,GAAkB,EAAE,GAAG,IAAA,CAAK,OAAA,EAAQ;AAC1C,MAAA,IAAA,CAAK,OAAA,CAAQ,IAAA,EAAA;AACb,MAAA,IAAA,CAAK,QAAQ,MAAA,GAAS,CAAA;AACtB,MAAA,IAAA,CAAK,QAAQ,MAAA,IAAU,CAAA;AACvB,MAAA,MAAM,KAAA,GAA8B;AAAA,QAClC,IAAA,EAAM,eAAA;AAAA,QACN,KAAA,EAAO,IAAA;AAAA,QACP,QAAA,EAAU;AAAA,UACR,KAAA;AAAA,UACA,GAAA,EAAK,EAAE,GAAG,IAAA,CAAK,OAAA,EAAQ;AAAA,UACvB,WAAW,IAAA,CAAK,UAAA;AAAA;AAClB,OACF;AACA,MAAA,OAAO,KAAA;AAAA,IACT;AAGA,IAAA,IAAI,IAAA,CAAK,OAAA,CAAQ,UAAA,CAAW,EAAE,CAAA,EAAG;AAC/B,MAAA,IAAA,CAAK,OAAA,GAAU,IAAA,CAAK,OAAA,CAAQ,KAAA,CAAM,CAAC,CAAA;AACnC,MAAA,MAAM,KAAA,GAAkB,EAAE,GAAG,IAAA,CAAK,OAAA,EAAQ;AAC1C,MAAA,IAAA,CAAK,OAAA,CAAQ,IAAA,EAAA;AACb,MAAA,IAAA,CAAK,QAAQ,MAAA,GAAS,CAAA;AACtB,MAAA,IAAA,CAAK,QAAQ,MAAA,IAAU,CAAA;AACvB,MAAA,MAAM,KAAA,GAA8B;AAAA,QAClC,IAAA,EAAM,eAAA;AAAA,QACN,KAAA,EAAO,EAAA;AAAA,QACP,QAAA,EAAU;AAAA,UACR,KAAA;AAAA,UACA,GAAA,EAAK,EAAE,GAAG,IAAA,CAAK,OAAA,EAAQ;AAAA,UACvB,WAAW,IAAA,CAAK,UAAA;AAAA;AAClB,OACF;AACA,MAAA,OAAO,KAAA;AAAA,IACT;AAGA,IAAA,IAAI,IAAA,CAAK,OAAA,CAAQ,UAAA,CAAW,IAAA,CAAK,UAAU,CAAA,EAAG;AAC5C,MAAA,IAAA,CAAK,OAAA,GAAU,IAAA,CAAK,OAAA,CAAQ,KAAA,CAAM,CAAC,CAAA;AACnC,MAAA,MAAM,KAAA,GAAkB,EAAE,GAAG,IAAA,CAAK,OAAA,EAAQ;AAC1C,MAAA,IAAA,CAAK,OAAA,CAAQ,UAAU,IAAA,CAAK,qBAAA;AAC5B,MAAA,IAAA,CAAK,OAAA,CAAQ,UAAU,IAAA,CAAK,qBAAA;AAC5B,MAAA,OAAO;AAAA,QACL,IAAA,EAAM,cAAA;AAAA,QACN,OAAO,IAAA,CAAK,UAAA;AAAA,QACZ,QAAA,EAAU;AAAA,UACR,KAAA;AAAA,UACA,GAAA,EAAK,EAAE,GAAG,IAAA,CAAK,OAAA,EAAQ;AAAA,UACvB,WAAW,IAAA,CAAK;AAAA;AAClB,OACF;AAAA,IACF;AAGA,IAAA,IAAI,IAAA,CAAK,OAAA,CAAQ,UAAA,CAAW,IAAA,CAAK,UAAU,CAAA,EAAG;AAqB5C,MAAA,IAAI,KAAA,GAAQ,EAAA;AACZ,MAAA,IAAI,MAAA,GAAS,CAAA;AACb,MAAA,IAAI,MAAA,GAAS,CAAA;AACb,MAAA,IAAI,IAAA,GAAO,CAAA;AAGX,MAAA,IAAI,GAAA,GAAc,IAAA,CAAK,OAAA,CAAQ,MAAM,CAAA;AACrC,MAAA,IAAI,IAAA,GAA2B,IAAA,CAAK,OAAA,CAAQ,MAAA,GAAS,CAAC,CAAA;AACtD,MAAA,GAAG;AAED,QAAA,IAAI,GAAA,KAAQ,KAAK,UAAA,EAAY;AAG3B,UAAA,IAAI,IAAA,KAAS,KAAK,UAAA,EAAY;AAE5B,YAAA,KAAA,IAAS,IAAA,CAAK,UAAA;AACd,YAAA,MAAA,IAAU,CAAA;AACV,YAAA,GAAA,GAAM,IAAA,CAAK,QAAQ,MAAM,CAAA;AACzB,YAAA,IAAA,GAAO,IAAA,CAAK,OAAA,CAAQ,MAAA,GAAS,CAAC,CAAA;AAG9B,YAAA,MAAA,IAAU,CAAA;AACV,YAAA;AAAA,UACF;AAIA,UAAA,IAAI,IAAA,KAAS,MAAA,IAAa,IAAA,CAAK,MAAA,KAAW,KAAA,EAAO;AAC/C,YAAA,OAAO,IAAA;AAAA,UACT;AAIA,UAAA,MAAA,EAAA;AACA,UAAA,IAAA,CAAK,OAAA,GAAU,IAAA,CAAK,OAAA,CAAQ,KAAA,CAAM,MAAM,CAAA;AACxC,UAAA,MAAM,KAAA,GAAkB,EAAE,GAAG,IAAA,CAAK,OAAA,EAAQ;AAC1C,UAAA,IAAA,CAAK,QAAQ,MAAA,IAAU,MAAA;AACvB,UAAA,IAAA,CAAK,QAAQ,MAAA,IAAU,MAAA;AACvB,UAAA,IAAA,CAAK,QAAQ,IAAA,IAAQ,IAAA;AACrB,UAAA,OAAO;AAAA,YACL,IAAA,EAAM,KAAA;AAAA,YACN,KAAA;AAAA,YACA,QAAA,EAAU;AAAA,cACR,KAAA;AAAA,cACA,GAAA,EAAK,EAAE,GAAG,IAAA,CAAK,OAAA,EAAQ;AAAA,cACvB,WAAW,IAAA,CAAK;AAAA;AAClB,WACF;AAAA,QACF;AAGA,QAAA,KAAA,IAAS,GAAA;AAGT,QAAA,IAAI,QAAQ,EAAA,EAAI;AAGd,UAAA,IAAA,EAAA;AACA,UAAA,MAAA,GAAS,CAAA;AAAA,QACX,CAAA,MAAO;AAEL,UAAA,MAAA,EAAA;AAAA,QACF;AAEA,QAAA,MAAA,EAAA;AACA,QAAA,GAAA,GAAM,IAAA;AACN,QAAA,IAAA,GAAO,IAAA,CAAK,OAAA,CAAQ,MAAA,GAAS,CAAC,CAAA;AAAA,MAChC,SAAS,GAAA,KAAQ,MAAA;AAEjB,MAAA,IAAI,KAAK,MAAA,EAAQ;AACf,QAAA,MAAM,IAAI,WAAW,4CAAA,EAA8C;AAAA,UACjE,QAAA,EAAU,EAAE,GAAG,IAAA,CAAK,OAAA;AAAQ,SAC7B,CAAA;AAAA,MACH;AACA,MAAA,OAAO,IAAA;AAAA,IACT;AAGA,IAAA,MAAM,KAAA,GAAQ,IAAA,CAAK,QAAA,CAAS,IAAA,CAAK,KAAK,OAAO,CAAA;AAC7C,IAAA,IAAI,KAAA,EAAO;AAGT,MAAA,IAAI,IAAA,CAAK,WAAW,KAAA,IAAS,KAAA,CAAM,CAAC,CAAA,CAAE,MAAA,KAAW,IAAA,CAAK,OAAA,CAAQ,MAAA,EAAQ;AACpE,QAAA,OAAO,IAAA;AAAA,MACT;AACA,MAAA,MAAM,KAAA,GAAQ,MAAM,CAAC,CAAA;AACrB,MAAA,IAAA,CAAK,OAAA,GAAU,IAAA,CAAK,OAAA,CAAQ,KAAA,CAAM,MAAM,MAAM,CAAA;AAC9C,MAAA,MAAM,KAAA,GAAkB,EAAE,GAAG,IAAA,CAAK,OAAA,EAAQ;AAC1C,MAAA,IAAA,CAAK,OAAA,CAAQ,UAAU,KAAA,CAAM,MAAA;AAC7B,MAAA,IAAA,CAAK,OAAA,CAAQ,UAAU,KAAA,CAAM,MAAA;AAC7B,MAAA,OAAO;AAAA,QACL,IAAA,EAAM,KAAA;AAAA,QACN,KAAA;AAAA,QACA,QAAA,EAAU;AAAA,UACR,KAAA;AAAA,UACA,GAAA,EAAK,EAAE,GAAG,IAAA,CAAK,OAAA,EAAQ;AAAA,UACvB,WAAW,IAAA,CAAK;AAAA;AAClB,OACF;AAAA,IACF;AAGA,IAAA,OAAO,IAAA;AAAA,EACT;AACF;;;;"}
@@ -1,12 +1,32 @@
1
1
  import { CSVLexer } from './CSVLexer.ts';
2
- import { CSVLexerTransformerOptions, Token } from './common/types.ts';
2
+ import { CSVLexerTransformerOptions, ExtendedQueuingStrategy, Token } from './common/types.ts';
3
3
  import { DEFAULT_DELIMITER, DEFAULT_QUOTATION } from './constants.ts';
4
4
  /**
5
5
  * A transform stream that converts a stream of strings into a stream of tokens.
6
6
  *
7
7
  * @category Low-level API
8
8
  *
9
- * @example Parse a CSV with headers by data
9
+ * @param options - CSV-specific options (delimiter, quotation, etc.)
10
+ * @param writableStrategy - Strategy for the writable side (default: `{ highWaterMark: 65536, size: chunk => chunk.length, checkInterval: 100 }`)
11
+ * @param readableStrategy - Strategy for the readable side (default: `{ highWaterMark: 1024, size: tokens => tokens.length, checkInterval: 100 }`)
12
+ *
13
+ * @remarks
14
+ * Follows the Web Streams API pattern where queuing strategies are passed as
15
+ * constructor arguments, similar to the standard `TransformStream`.
16
+ *
17
+ * **Default Queuing Strategy:**
18
+ * - Writable side: Counts by string length (characters). Default highWaterMark is 65536 characters (≈64KB).
19
+ * - Readable side: Counts by number of tokens in each array. Default highWaterMark is 1024 tokens.
20
+ *
21
+ * **Backpressure Handling:**
22
+ * The transformer monitors `controller.desiredSize` and yields to the event loop when backpressure
23
+ * is detected (desiredSize ≤ 0). This prevents blocking the main thread during heavy processing
24
+ * and allows the downstream consumer to catch up.
25
+ *
26
+ * These defaults are starting points based on data flow characteristics, not empirical benchmarks.
27
+ * Optimal values depend on your runtime environment, data size, and performance requirements.
28
+ *
29
+ * @example Basic usage
10
30
  * ```ts
11
31
  * new ReadableStream({
12
32
  * start(controller) {
@@ -30,8 +50,37 @@ import { DEFAULT_DELIMITER, DEFAULT_QUOTATION } from './constants.ts';
30
50
  * // { type: Field, value: "20" }
31
51
  * // { type: RecordDelimiter, value: "\r\n", location: {...} }
32
52
  * ```
53
+ *
54
+ * @example Custom queuing strategies with backpressure tuning
55
+ * ```ts
56
+ * const transformer = new CSVLexerTransformer(
57
+ * { delimiter: ',' },
58
+ * {
59
+ * highWaterMark: 131072, // 128KB of characters
60
+ * size: (chunk) => chunk.length, // Count by character length
61
+ * checkInterval: 200 // Check backpressure every 200 tokens
62
+ * },
63
+ * {
64
+ * highWaterMark: 2048, // 2048 tokens
65
+ * size: (tokens) => tokens.length, // Count by token count
66
+ * checkInterval: 50 // Check backpressure every 50 tokens
67
+ * }
68
+ * );
69
+ *
70
+ * await fetch('large-file.csv')
71
+ * .then(res => res.body)
72
+ * .pipeThrough(new TextDecoderStream())
73
+ * .pipeThrough(transformer)
74
+ * .pipeTo(yourProcessor);
75
+ * ```
33
76
  */
34
77
  export declare class CSVLexerTransformer<Delimiter extends string = DEFAULT_DELIMITER, Quotation extends string = DEFAULT_QUOTATION> extends TransformStream<string, Token[]> {
35
78
  readonly lexer: CSVLexer<Delimiter, Quotation>;
36
- constructor(options?: CSVLexerTransformerOptions<Delimiter, Quotation>);
79
+ /**
80
+ * Yields to the event loop to allow backpressure handling.
81
+ * Can be overridden for testing purposes.
82
+ * @internal
83
+ */
84
+ protected yieldToEventLoop(): Promise<void>;
85
+ constructor(options?: CSVLexerTransformerOptions<Delimiter, Quotation>, writableStrategy?: ExtendedQueuingStrategy<string>, readableStrategy?: ExtendedQueuingStrategy<Token[]>);
37
86
  }
@@ -2,26 +2,71 @@ import { CSVLexer } from './CSVLexer.js';
2
2
 
3
3
  class CSVLexerTransformer extends TransformStream {
4
4
  lexer;
5
- constructor(options = {}) {
5
+ /**
6
+ * Yields to the event loop to allow backpressure handling.
7
+ * Can be overridden for testing purposes.
8
+ * @internal
9
+ */
10
+ async yieldToEventLoop() {
11
+ await new Promise((resolve) => setTimeout(resolve, 0));
12
+ }
13
+ constructor(options = {}, writableStrategy = {
14
+ highWaterMark: 65536,
15
+ // 64KB worth of characters
16
+ size: (chunk) => chunk.length,
17
+ // Count by string length (character count)
18
+ checkInterval: 100
19
+ // Check backpressure every 100 tokens
20
+ }, readableStrategy = {
21
+ highWaterMark: 1024,
22
+ // 1024 tokens
23
+ size: (tokens) => tokens.length,
24
+ // Count by number of tokens in array
25
+ checkInterval: 100
26
+ // Check backpressure every 100 tokens
27
+ }) {
6
28
  const lexer = new CSVLexer(options);
7
- super({
8
- transform: (chunk, controller) => {
9
- if (chunk.length !== 0) {
29
+ const checkInterval = writableStrategy.checkInterval ?? readableStrategy.checkInterval ?? 100;
30
+ super(
31
+ {
32
+ transform: async (chunk, controller) => {
33
+ if (chunk.length !== 0) {
34
+ try {
35
+ const tokens = [];
36
+ for (const token of lexer.lex(chunk, { stream: true })) {
37
+ tokens.push(token);
38
+ if (tokens.length % checkInterval === 0 && controller.desiredSize !== null && controller.desiredSize <= 0) {
39
+ await this.yieldToEventLoop();
40
+ }
41
+ }
42
+ if (tokens.length > 0) {
43
+ controller.enqueue(tokens);
44
+ }
45
+ } catch (error) {
46
+ controller.error(error);
47
+ }
48
+ }
49
+ },
50
+ flush: async (controller) => {
10
51
  try {
11
- controller.enqueue([...lexer.lex(chunk, { stream: true })]);
52
+ const tokens = [];
53
+ for (const token of lexer.lex()) {
54
+ tokens.push(token);
55
+ if (tokens.length % checkInterval === 0 && controller.desiredSize !== null && controller.desiredSize <= 0) {
56
+ await this.yieldToEventLoop();
57
+ }
58
+ }
59
+ if (tokens.length > 0) {
60
+ controller.enqueue(tokens);
61
+ }
12
62
  } catch (error) {
13
63
  controller.error(error);
14
64
  }
15
65
  }
16
66
  },
17
- flush: (controller) => {
18
- try {
19
- controller.enqueue([...lexer.lex()]);
20
- } catch (error) {
21
- controller.error(error);
22
- }
23
- }
24
- });
67
+ writableStrategy,
68
+ readableStrategy
69
+ );
25
70
  this.lexer = lexer;
26
71
  }
27
72
  }
@@ -1 +1 @@
1
- {"version":3,"file":"CSVLexerTransformer.js","sources":["../src/CSVLexerTransformer.ts"],"sourcesContent":["import { CSVLexer } from \"./CSVLexer.ts\";\nimport type { CSVLexerTransformerOptions, Token } from \"./common/types.ts\";\nimport type { DEFAULT_DELIMITER, DEFAULT_QUOTATION } from \"./constants.ts\";\n\n/**\n * A transform stream that converts a stream of strings into a stream of tokens.\n *\n * @category Low-level API\n *\n * @example Parse a CSV with headers by data\n * ```ts\n * new ReadableStream({\n * start(controller) {\n * controller.enqueue(\"name,age\\r\\n\");\n * controller.enqueue(\"Alice,20\\r\\n\");\n * controller.close();\n * }\n * })\n * .pipeThrough(new CSVLexerTransformer())\n * .pipeTo(new WritableStream({ write(tokens) {\n * for (const token of tokens) {\n * console.log(token);\n * }\n * }}));\n * // { type: Field, value: \"name\", location: {...} }\n * // { type: FieldDelimiter, value: \",\", location: {...} }\n * // { type: Field, value: \"age\", location: {...} }\n * // { type: RecordDelimiter, value: \"\\r\\n\", location: {...} }\n * // { type: Field, value: \"Alice\", location: {...} }\n * // { type: FieldDelimiter, value: \",\", location: {...} }\n * // { type: Field, value: \"20\" }\n * // { type: RecordDelimiter, value: \"\\r\\n\", location: {...} }\n * ```\n */\nexport class CSVLexerTransformer<\n Delimiter extends string = DEFAULT_DELIMITER,\n Quotation extends string = DEFAULT_QUOTATION,\n> extends TransformStream<string, Token[]> {\n public readonly lexer: CSVLexer<Delimiter, Quotation>;\n constructor(options: CSVLexerTransformerOptions<Delimiter, Quotation> = {}) {\n const lexer = new CSVLexer(options);\n super({\n transform: (chunk, controller) => {\n if (chunk.length !== 0) {\n try {\n controller.enqueue([...lexer.lex(chunk, { stream: true })]);\n } catch (error) {\n controller.error(error);\n }\n }\n },\n flush: (controller) => {\n try {\n controller.enqueue([...lexer.lex()]);\n } catch (error) {\n controller.error(error);\n }\n },\n });\n this.lexer = lexer;\n }\n}\n"],"names":[],"mappings":";;AAkCO,MAAM,4BAGH,eAAiC,CAAA;AAAA,EACzB,KAAA;AAAA,EAChB,WAAA,CAAY,OAA4D,GAAA,EAAI,EAAA;AAC1E,IAAM,MAAA,KAAA,GAAQ,IAAI,QAAA,CAAS,OAAO,CAAA;AAClC,IAAM,KAAA,CAAA;AAAA,MACJ,SAAA,EAAW,CAAC,KAAA,EAAO,UAAe,KAAA;AAChC,QAAI,IAAA,KAAA,CAAM,WAAW,CAAG,EAAA;AACtB,UAAI,IAAA;AACF,YAAW,UAAA,CAAA,OAAA,CAAQ,CAAC,GAAG,KAAM,CAAA,GAAA,CAAI,KAAO,EAAA,EAAE,MAAQ,EAAA,IAAA,EAAM,CAAC,CAAC,CAAA;AAAA,mBACnD,KAAO,EAAA;AACd,YAAA,UAAA,CAAW,MAAM,KAAK,CAAA;AAAA;AACxB;AACF,OACF;AAAA,MACA,KAAA,EAAO,CAAC,UAAe,KAAA;AACrB,QAAI,IAAA;AACF,UAAA,UAAA,CAAW,QAAQ,CAAC,GAAG,KAAM,CAAA,GAAA,EAAK,CAAC,CAAA;AAAA,iBAC5B,KAAO,EAAA;AACd,UAAA,UAAA,CAAW,MAAM,KAAK,CAAA;AAAA;AACxB;AACF,KACD,CAAA;AACD,IAAA,IAAA,CAAK,KAAQ,GAAA,KAAA;AAAA;AAEjB;;;;"}
1
+ {"version":3,"file":"CSVLexerTransformer.js","sources":["../src/CSVLexerTransformer.ts"],"sourcesContent":["import { CSVLexer } from \"./CSVLexer.ts\";\nimport type {\n CSVLexerTransformerOptions,\n ExtendedQueuingStrategy,\n Token,\n} from \"./common/types.ts\";\nimport type { DEFAULT_DELIMITER, DEFAULT_QUOTATION } from \"./constants.ts\";\n\n/**\n * A transform stream that converts a stream of strings into a stream of tokens.\n *\n * @category Low-level API\n *\n * @param options - CSV-specific options (delimiter, quotation, etc.)\n * @param writableStrategy - Strategy for the writable side (default: `{ highWaterMark: 65536, size: chunk => chunk.length, checkInterval: 100 }`)\n * @param readableStrategy - Strategy for the readable side (default: `{ highWaterMark: 1024, size: tokens => tokens.length, checkInterval: 100 }`)\n *\n * @remarks\n * Follows the Web Streams API pattern where queuing strategies are passed as\n * constructor arguments, similar to the standard `TransformStream`.\n *\n * **Default Queuing Strategy:**\n * - Writable side: Counts by string length (characters). Default highWaterMark is 65536 characters (≈64KB).\n * - Readable side: Counts by number of tokens in each array. Default highWaterMark is 1024 tokens.\n *\n * **Backpressure Handling:**\n * The transformer monitors `controller.desiredSize` and yields to the event loop when backpressure\n * is detected (desiredSize ≤ 0). This prevents blocking the main thread during heavy processing\n * and allows the downstream consumer to catch up.\n *\n * These defaults are starting points based on data flow characteristics, not empirical benchmarks.\n * Optimal values depend on your runtime environment, data size, and performance requirements.\n *\n * @example Basic usage\n * ```ts\n * new ReadableStream({\n * start(controller) {\n * controller.enqueue(\"name,age\\r\\n\");\n * controller.enqueue(\"Alice,20\\r\\n\");\n * controller.close();\n * }\n * })\n * .pipeThrough(new CSVLexerTransformer())\n * .pipeTo(new WritableStream({ write(tokens) {\n * for (const token of tokens) {\n * console.log(token);\n * }\n * }}));\n * // { type: Field, value: \"name\", location: {...} }\n * // { type: FieldDelimiter, value: \",\", location: {...} }\n * // { type: Field, value: \"age\", location: {...} }\n * // { type: RecordDelimiter, value: \"\\r\\n\", location: {...} }\n * // { type: Field, value: \"Alice\", location: {...} }\n * // { type: FieldDelimiter, value: \",\", location: {...} }\n * // { type: Field, value: \"20\" }\n * // { type: RecordDelimiter, value: \"\\r\\n\", location: {...} }\n * ```\n *\n * @example Custom queuing strategies with backpressure tuning\n * ```ts\n * const transformer = new CSVLexerTransformer(\n * { delimiter: ',' },\n * {\n * highWaterMark: 131072, // 128KB of characters\n * size: (chunk) => chunk.length, // Count by character length\n * checkInterval: 200 // Check backpressure every 200 tokens\n * },\n * {\n * highWaterMark: 2048, // 2048 tokens\n * size: (tokens) => tokens.length, // Count by token count\n * checkInterval: 50 // Check backpressure every 50 tokens\n * }\n * );\n *\n * await fetch('large-file.csv')\n * .then(res => res.body)\n * .pipeThrough(new TextDecoderStream())\n * .pipeThrough(transformer)\n * .pipeTo(yourProcessor);\n * ```\n */\nexport class CSVLexerTransformer<\n Delimiter extends string = DEFAULT_DELIMITER,\n Quotation extends string = DEFAULT_QUOTATION,\n> extends TransformStream<string, Token[]> {\n public readonly lexer: CSVLexer<Delimiter, Quotation>;\n\n /**\n * Yields to the event loop to allow backpressure handling.\n * Can be overridden for testing purposes.\n * @internal\n */\n protected async yieldToEventLoop(): Promise<void> {\n await new Promise((resolve) => setTimeout(resolve, 0));\n }\n\n constructor(\n options: CSVLexerTransformerOptions<Delimiter, Quotation> = {},\n writableStrategy: ExtendedQueuingStrategy<string> = {\n highWaterMark: 65536, // 64KB worth of characters\n size: (chunk) => chunk.length, // Count by string length (character count)\n checkInterval: 100, // Check backpressure every 100 tokens\n },\n readableStrategy: ExtendedQueuingStrategy<Token[]> = {\n highWaterMark: 1024, // 1024 tokens\n size: (tokens) => tokens.length, // Count by number of tokens in array\n checkInterval: 100, // Check backpressure every 100 tokens\n },\n ) {\n const lexer = new CSVLexer(options);\n const checkInterval =\n writableStrategy.checkInterval ?? readableStrategy.checkInterval ?? 100;\n\n super(\n {\n transform: async (chunk, controller) => {\n if (chunk.length !== 0) {\n try {\n const tokens: Token[] = [];\n for (const token of lexer.lex(chunk, { stream: true })) {\n tokens.push(token);\n\n // Check backpressure periodically based on checkInterval\n if (\n tokens.length % checkInterval === 0 &&\n controller.desiredSize !== null &&\n controller.desiredSize <= 0\n ) {\n // Yield to event loop when backpressure is detected\n await this.yieldToEventLoop();\n }\n }\n\n if (tokens.length > 0) {\n controller.enqueue(tokens);\n }\n } catch (error) {\n controller.error(error);\n }\n }\n },\n flush: async (controller) => {\n try {\n const tokens: Token[] = [];\n for (const token of lexer.lex()) {\n tokens.push(token);\n\n // Check backpressure periodically based on checkInterval\n if (\n tokens.length % checkInterval === 0 &&\n controller.desiredSize !== null &&\n controller.desiredSize <= 0\n ) {\n await this.yieldToEventLoop();\n }\n }\n\n if (tokens.length > 0) {\n controller.enqueue(tokens);\n }\n } catch (error) {\n controller.error(error);\n }\n },\n },\n writableStrategy,\n readableStrategy,\n );\n this.lexer = lexer;\n }\n}\n"],"names":[],"mappings":";;AAiFO,MAAM,4BAGH,eAAA,CAAiC;AAAA,EACzB,KAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOhB,MAAgB,gBAAA,GAAkC;AAChD,IAAA,MAAM,IAAI,OAAA,CAAQ,CAAC,YAAY,UAAA,CAAW,OAAA,EAAS,CAAC,CAAC,CAAA;AAAA,EACvD;AAAA,EAEA,WAAA,CACE,OAAA,GAA4D,EAAC,EAC7D,gBAAA,GAAoD;AAAA,IAClD,aAAA,EAAe,KAAA;AAAA;AAAA,IACf,IAAA,EAAM,CAAC,KAAA,KAAU,KAAA,CAAM,MAAA;AAAA;AAAA,IACvB,aAAA,EAAe;AAAA;AAAA,KAEjB,gBAAA,GAAqD;AAAA,IACnD,aAAA,EAAe,IAAA;AAAA;AAAA,IACf,IAAA,EAAM,CAAC,MAAA,KAAW,MAAA,CAAO,MAAA;AAAA;AAAA,IACzB,aAAA,EAAe;AAAA;AAAA,GACjB,EACA;AACA,IAAA,MAAM,KAAA,GAAQ,IAAI,QAAA,CAAS,OAAO,CAAA;AAClC,IAAA,MAAM,aAAA,GACJ,gBAAA,CAAiB,aAAA,IAAiB,gBAAA,CAAiB,aAAA,IAAiB,GAAA;AAEtE,IAAA,KAAA;AAAA,MACE;AAAA,QACE,SAAA,EAAW,OAAO,KAAA,EAAO,UAAA,KAAe;AACtC,UAAA,IAAI,KAAA,CAAM,WAAW,CAAA,EAAG;AACtB,YAAA,IAAI;AACF,cAAA,MAAM,SAAkB,EAAC;AACzB,cAAA,KAAA,MAAW,KAAA,IAAS,MAAM,GAAA,CAAI,KAAA,EAAO,EAAE,MAAA,EAAQ,IAAA,EAAM,CAAA,EAAG;AACtD,gBAAA,MAAA,CAAO,KAAK,KAAK,CAAA;AAGjB,gBAAA,IACE,MAAA,CAAO,SAAS,aAAA,KAAkB,CAAA,IAClC,WAAW,WAAA,KAAgB,IAAA,IAC3B,UAAA,CAAW,WAAA,IAAe,CAAA,EAC1B;AAEA,kBAAA,MAAM,KAAK,gBAAA,EAAiB;AAAA,gBAC9B;AAAA,cACF;AAEA,cAAA,IAAI,MAAA,CAAO,SAAS,CAAA,EAAG;AACrB,gBAAA,UAAA,CAAW,QAAQ,MAAM,CAAA;AAAA,cAC3B;AAAA,YACF,SAAS,KAAA,EAAO;AACd,cAAA,UAAA,CAAW,MAAM,KAAK,CAAA;AAAA,YACxB;AAAA,UACF;AAAA,QACF,CAAA;AAAA,QACA,KAAA,EAAO,OAAO,UAAA,KAAe;AAC3B,UAAA,IAAI;AACF,YAAA,MAAM,SAAkB,EAAC;AACzB,YAAA,KAAA,MAAW,KAAA,IAAS,KAAA,CAAM,GAAA,EAAI,EAAG;AAC/B,cAAA,MAAA,CAAO,KAAK,KAAK,CAAA;AAGjB,cAAA,IACE,MAAA,CAAO,SAAS,aAAA,KAAkB,CAAA,IAClC,WAAW,WAAA,KAAgB,IAAA,IAC3B,UAAA,CAAW,WAAA,IAAe,CAAA,EAC1B;AACA,gBAAA,MAAM,KAAK,gBAAA,EAAiB;AAAA,cAC9B;AAAA,YACF;AAEA,YAAA,IAAI,MAAA,CAAO,SAAS,CAAA,EAAG;AACrB,cAAA,UAAA,CAAW,QAAQ,MAAM,CAAA;AAAA,YAC3B;AAAA,UACF,SAAS,KAAA,EAAO;AACd,YAAA,UAAA,CAAW,MAAM,KAAK,CAAA;AAAA,UACxB;AAAA,QACF;AAAA,OACF;AAAA,MACA,gBAAA;AAAA,MACA;AAAA,KACF;AACA,IAAA,IAAA,CAAK,KAAA,GAAQ,KAAA;AAAA,EACf;AACF;;;;"}
@@ -1 +1 @@
1
- {"version":3,"file":"CSVRecordAssembler.js","sources":["../src/CSVRecordAssembler.ts"],"sourcesContent":["import { FieldDelimiter, RecordDelimiter } from \"./common/constants.ts\";\nimport { ParseError } from \"./common/errors.ts\";\nimport type {\n CSVRecord,\n CSVRecordAssemblerOptions,\n Token,\n} from \"./common/types.ts\";\n\n/**\n * Default maximum field count per record (100,000 fields).\n */\nconst DEFAULT_MAX_FIELD_COUNT = 100_000;\n\n/**\n * Options for the CSVRecordAssembler.assemble method.\n */\nexport interface CSVRecordAssemblerAssembleOptions {\n /**\n * If true, indicates that more tokens are expected.\n * If false or omitted, flushes remaining data.\n */\n stream?: boolean;\n}\n\n/**\n * CSV Record Assembler.\n *\n * CSVRecordAssembler assembles tokens into CSV records.\n */\nexport class CSVRecordAssembler<Header extends ReadonlyArray<string>> {\n #fieldIndex = 0;\n #row: string[] = [];\n #header: Header | undefined;\n #dirty = false;\n #signal?: AbortSignal;\n #maxFieldCount: number;\n #skipEmptyLines: boolean;\n\n constructor(options: CSVRecordAssemblerOptions<Header> = {}) {\n const mfc = options.maxFieldCount ?? DEFAULT_MAX_FIELD_COUNT;\n // Validate maxFieldCount\n if (\n !(Number.isFinite(mfc) || mfc === Number.POSITIVE_INFINITY) ||\n (Number.isFinite(mfc) && (mfc < 1 || !Number.isInteger(mfc)))\n ) {\n throw new RangeError(\n \"maxFieldCount must be a positive integer or Number.POSITIVE_INFINITY\",\n );\n }\n this.#maxFieldCount = mfc;\n this.#skipEmptyLines = options.skipEmptyLines ?? false;\n if (options.header !== undefined && Array.isArray(options.header)) {\n this.#setHeader(options.header);\n }\n if (options.signal) {\n this.#signal = options.signal;\n }\n }\n\n /**\n * Assembles tokens into CSV records.\n * @param tokens - The tokens to assemble. Omit to flush remaining data.\n * @param options - Assembler options.\n * @returns An iterable iterator of CSV records.\n */\n public *assemble(\n tokens?: Iterable<Token>,\n options?: CSVRecordAssemblerAssembleOptions,\n ): IterableIterator<CSVRecord<Header>> {\n const stream = options?.stream ?? false;\n\n if (tokens !== undefined) {\n for (const token of tokens) {\n this.#signal?.throwIfAborted();\n switch (token.type) {\n case FieldDelimiter:\n this.#fieldIndex++;\n this.#checkFieldCount();\n this.#dirty = true;\n break;\n case RecordDelimiter:\n if (this.#header === undefined) {\n this.#setHeader(this.#row as unknown as Header);\n } else {\n if (this.#dirty) {\n yield Object.fromEntries(\n this.#header.map((header, index) => [\n header,\n this.#row.at(index),\n ]),\n ) as unknown as CSVRecord<Header>;\n } else {\n if (this.#skipEmptyLines) {\n continue;\n }\n yield Object.fromEntries(\n this.#header.map((header) => [header, \"\"]),\n ) as CSVRecord<Header>;\n }\n }\n // Reset the row fields buffer.\n this.#fieldIndex = 0;\n this.#row = new Array(this.#header?.length).fill(\"\");\n this.#dirty = false;\n break;\n default:\n this.#dirty = true;\n this.#row[this.#fieldIndex] = token.value;\n break;\n }\n }\n }\n\n if (!stream) {\n if (this.#header !== undefined) {\n if (this.#dirty) {\n yield Object.fromEntries(\n this.#header\n .filter((v) => v)\n .map((header, index) => [header, this.#row.at(index)]),\n ) as unknown as CSVRecord<Header>;\n }\n }\n }\n }\n\n #checkFieldCount(): void {\n if (this.#fieldIndex + 1 > this.#maxFieldCount) {\n throw new RangeError(\n `Field count (${this.#fieldIndex + 1}) exceeded maximum allowed count of ${this.#maxFieldCount}`,\n );\n }\n }\n\n #setHeader(header: Header) {\n if (header.length > this.#maxFieldCount) {\n throw new RangeError(\n `Header field count (${header.length}) exceeded maximum allowed count of ${this.#maxFieldCount}`,\n );\n }\n this.#header = header;\n if (this.#header.length === 0) {\n throw new ParseError(\"The header must not be empty.\");\n }\n if (new Set(this.#header).size !== this.#header.length) {\n throw new ParseError(\"The header must not contain duplicate fields.\");\n }\n }\n}\n"],"names":[],"mappings":";;;AAWA,MAAM,uBAA0B,GAAA,GAAA;AAkBzB,MAAM,kBAAyD,CAAA;AAAA,EACpE,WAAc,GAAA,CAAA;AAAA,EACd,OAAiB,EAAC;AAAA,EAClB,OAAA;AAAA,EACA,MAAS,GAAA,KAAA;AAAA,EACT,OAAA;AAAA,EACA,cAAA;AAAA,EACA,eAAA;AAAA,EAEA,WAAA,CAAY,OAA6C,GAAA,EAAI,EAAA;AAC3D,IAAM,MAAA,GAAA,GAAM,QAAQ,aAAiB,IAAA,uBAAA;AAErC,IAAA,IACE,EAAE,MAAO,CAAA,QAAA,CAAS,GAAG,CAAK,IAAA,GAAA,KAAQ,OAAO,iBACxC,CAAA,IAAA,MAAA,CAAO,QAAS,CAAA,GAAG,MAAM,GAAM,GAAA,CAAA,IAAK,CAAC,MAAO,CAAA,SAAA,CAAU,GAAG,CAC1D,CAAA,EAAA;AACA,MAAA,MAAM,IAAI,UAAA;AAAA,QACR;AAAA,OACF;AAAA;AAEF,IAAA,IAAA,CAAK,cAAiB,GAAA,GAAA;AACtB,IAAK,IAAA,CAAA,eAAA,GAAkB,QAAQ,cAAkB,IAAA,KAAA;AACjD,IAAA,IAAI,QAAQ,MAAW,KAAA,MAAA,IAAa,MAAM,OAAQ,CAAA,OAAA,CAAQ,MAAM,CAAG,EAAA;AACjE,MAAK,IAAA,CAAA,UAAA,CAAW,QAAQ,MAAM,CAAA;AAAA;AAEhC,IAAA,IAAI,QAAQ,MAAQ,EAAA;AAClB,MAAA,IAAA,CAAK,UAAU,OAAQ,CAAA,MAAA;AAAA;AACzB;AACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQA,CAAQ,QACN,CAAA,MAAA,EACA,OACqC,EAAA;AACrC,IAAM,MAAA,MAAA,GAAS,SAAS,MAAU,IAAA,KAAA;AAElC,IAAA,IAAI,WAAW,MAAW,EAAA;AACxB,MAAA,KAAA,MAAW,SAAS,MAAQ,EAAA;AAC1B,QAAA,IAAA,CAAK,SAAS,cAAe,EAAA;AAC7B,QAAA,QAAQ,MAAM,IAAM;AAAA,UAClB,KAAK,cAAA;AACH,YAAK,IAAA,CAAA,WAAA,EAAA;AACL,YAAA,IAAA,CAAK,gBAAiB,EAAA;AACtB,YAAA,IAAA,CAAK,MAAS,GAAA,IAAA;AACd,YAAA;AAAA,UACF,KAAK,eAAA;AACH,YAAI,IAAA,IAAA,CAAK,YAAY,MAAW,EAAA;AAC9B,cAAK,IAAA,CAAA,UAAA,CAAW,KAAK,IAAyB,CAAA;AAAA,aACzC,MAAA;AACL,cAAA,IAAI,KAAK,MAAQ,EAAA;AACf,gBAAA,MAAM,MAAO,CAAA,WAAA;AAAA,kBACX,IAAK,CAAA,OAAA,CAAQ,GAAI,CAAA,CAAC,QAAQ,KAAU,KAAA;AAAA,oBAClC,MAAA;AAAA,oBACA,IAAA,CAAK,IAAK,CAAA,EAAA,CAAG,KAAK;AAAA,mBACnB;AAAA,iBACH;AAAA,eACK,MAAA;AACL,gBAAA,IAAI,KAAK,eAAiB,EAAA;AACxB,kBAAA;AAAA;AAEF,gBAAA,MAAM,MAAO,CAAA,WAAA;AAAA,kBACX,IAAA,CAAK,QAAQ,GAAI,CAAA,CAAC,WAAW,CAAC,MAAA,EAAQ,EAAE,CAAC;AAAA,iBAC3C;AAAA;AACF;AAGF,YAAA,IAAA,CAAK,WAAc,GAAA,CAAA;AACnB,YAAK,IAAA,CAAA,IAAA,GAAO,IAAI,KAAM,CAAA,IAAA,CAAK,SAAS,MAAM,CAAA,CAAE,KAAK,EAAE,CAAA;AACnD,YAAA,IAAA,CAAK,MAAS,GAAA,KAAA;AACd,YAAA;AAAA,UACF;AACE,YAAA,IAAA,CAAK,MAAS,GAAA,IAAA;AACd,YAAA,IAAA,CAAK,IAAK,CAAA,IAAA,CAAK,WAAW,CAAA,GAAI,KAAM,CAAA,KAAA;AACpC,YAAA;AAAA;AACJ;AACF;AAGF,IAAA,IAAI,CAAC,MAAQ,EAAA;AACX,MAAI,IAAA,IAAA,CAAK,YAAY,MAAW,EAAA;AAC9B,QAAA,IAAI,KAAK,MAAQ,EAAA;AACf,UAAA,MAAM,MAAO,CAAA,WAAA;AAAA,YACX,KAAK,OACF,CAAA,MAAA,CAAO,CAAC,CAAM,KAAA,CAAC,EACf,GAAI,CAAA,CAAC,MAAQ,EAAA,KAAA,KAAU,CAAC,MAAQ,EAAA,IAAA,CAAK,KAAK,EAAG,CAAA,KAAK,CAAC,CAAC;AAAA,WACzD;AAAA;AACF;AACF;AACF;AACF,EAEA,gBAAyB,GAAA;AACvB,IAAA,IAAI,IAAK,CAAA,WAAA,GAAc,CAAI,GAAA,IAAA,CAAK,cAAgB,EAAA;AAC9C,MAAA,MAAM,IAAI,UAAA;AAAA,QACR,gBAAgB,IAAK,CAAA,WAAA,GAAc,CAAC,CAAA,oCAAA,EAAuC,KAAK,cAAc,CAAA;AAAA,OAChG;AAAA;AACF;AACF,EAEA,WAAW,MAAgB,EAAA;AACzB,IAAI,IAAA,MAAA,CAAO,MAAS,GAAA,IAAA,CAAK,cAAgB,EAAA;AACvC,MAAA,MAAM,IAAI,UAAA;AAAA,QACR,CAAuB,oBAAA,EAAA,MAAA,CAAO,MAAM,CAAA,oCAAA,EAAuC,KAAK,cAAc,CAAA;AAAA,OAChG;AAAA;AAEF,IAAA,IAAA,CAAK,OAAU,GAAA,MAAA;AACf,IAAI,IAAA,IAAA,CAAK,OAAQ,CAAA,MAAA,KAAW,CAAG,EAAA;AAC7B,MAAM,MAAA,IAAI,WAAW,+BAA+B,CAAA;AAAA;AAEtD,IAAI,IAAA,IAAI,IAAI,IAAK,CAAA,OAAO,EAAE,IAAS,KAAA,IAAA,CAAK,QAAQ,MAAQ,EAAA;AACtD,MAAM,MAAA,IAAI,WAAW,+CAA+C,CAAA;AAAA;AACtE;AAEJ;;;;"}
1
+ {"version":3,"file":"CSVRecordAssembler.js","sources":["../src/CSVRecordAssembler.ts"],"sourcesContent":["import { FieldDelimiter, RecordDelimiter } from \"./common/constants.ts\";\nimport { ParseError } from \"./common/errors.ts\";\nimport type {\n CSVRecord,\n CSVRecordAssemblerOptions,\n Token,\n} from \"./common/types.ts\";\n\n/**\n * Default maximum field count per record (100,000 fields).\n */\nconst DEFAULT_MAX_FIELD_COUNT = 100_000;\n\n/**\n * Options for the CSVRecordAssembler.assemble method.\n */\nexport interface CSVRecordAssemblerAssembleOptions {\n /**\n * If true, indicates that more tokens are expected.\n * If false or omitted, flushes remaining data.\n */\n stream?: boolean;\n}\n\n/**\n * CSV Record Assembler.\n *\n * CSVRecordAssembler assembles tokens into CSV records.\n */\nexport class CSVRecordAssembler<Header extends ReadonlyArray<string>> {\n #fieldIndex = 0;\n #row: string[] = [];\n #header: Header | undefined;\n #dirty = false;\n #signal?: AbortSignal;\n #maxFieldCount: number;\n #skipEmptyLines: boolean;\n\n constructor(options: CSVRecordAssemblerOptions<Header> = {}) {\n const mfc = options.maxFieldCount ?? DEFAULT_MAX_FIELD_COUNT;\n // Validate maxFieldCount\n if (\n !(Number.isFinite(mfc) || mfc === Number.POSITIVE_INFINITY) ||\n (Number.isFinite(mfc) && (mfc < 1 || !Number.isInteger(mfc)))\n ) {\n throw new RangeError(\n \"maxFieldCount must be a positive integer or Number.POSITIVE_INFINITY\",\n );\n }\n this.#maxFieldCount = mfc;\n this.#skipEmptyLines = options.skipEmptyLines ?? false;\n if (options.header !== undefined && Array.isArray(options.header)) {\n this.#setHeader(options.header);\n }\n if (options.signal) {\n this.#signal = options.signal;\n }\n }\n\n /**\n * Assembles tokens into CSV records.\n * @param tokens - The tokens to assemble. Omit to flush remaining data.\n * @param options - Assembler options.\n * @returns An iterable iterator of CSV records.\n */\n public *assemble(\n tokens?: Iterable<Token>,\n options?: CSVRecordAssemblerAssembleOptions,\n ): IterableIterator<CSVRecord<Header>> {\n const stream = options?.stream ?? false;\n\n if (tokens !== undefined) {\n for (const token of tokens) {\n this.#signal?.throwIfAborted();\n switch (token.type) {\n case FieldDelimiter:\n this.#fieldIndex++;\n this.#checkFieldCount();\n this.#dirty = true;\n break;\n case RecordDelimiter:\n if (this.#header === undefined) {\n this.#setHeader(this.#row as unknown as Header);\n } else {\n if (this.#dirty) {\n yield Object.fromEntries(\n this.#header.map((header, index) => [\n header,\n this.#row.at(index),\n ]),\n ) as unknown as CSVRecord<Header>;\n } else {\n if (this.#skipEmptyLines) {\n continue;\n }\n yield Object.fromEntries(\n this.#header.map((header) => [header, \"\"]),\n ) as CSVRecord<Header>;\n }\n }\n // Reset the row fields buffer.\n this.#fieldIndex = 0;\n this.#row = new Array(this.#header?.length).fill(\"\");\n this.#dirty = false;\n break;\n default:\n this.#dirty = true;\n this.#row[this.#fieldIndex] = token.value;\n break;\n }\n }\n }\n\n if (!stream) {\n if (this.#header !== undefined) {\n if (this.#dirty) {\n yield Object.fromEntries(\n this.#header\n .filter((v) => v)\n .map((header, index) => [header, this.#row.at(index)]),\n ) as unknown as CSVRecord<Header>;\n }\n }\n }\n }\n\n #checkFieldCount(): void {\n if (this.#fieldIndex + 1 > this.#maxFieldCount) {\n throw new RangeError(\n `Field count (${this.#fieldIndex + 1}) exceeded maximum allowed count of ${this.#maxFieldCount}`,\n );\n }\n }\n\n #setHeader(header: Header) {\n if (header.length > this.#maxFieldCount) {\n throw new RangeError(\n `Header field count (${header.length}) exceeded maximum allowed count of ${this.#maxFieldCount}`,\n );\n }\n this.#header = header;\n if (this.#header.length === 0) {\n throw new ParseError(\"The header must not be empty.\");\n }\n if (new Set(this.#header).size !== this.#header.length) {\n throw new ParseError(\"The header must not contain duplicate fields.\");\n }\n }\n}\n"],"names":[],"mappings":";;;AAWA,MAAM,uBAAA,GAA0B,GAAA;AAkBzB,MAAM,kBAAA,CAAyD;AAAA,EACpE,WAAA,GAAc,CAAA;AAAA,EACd,OAAiB,EAAC;AAAA,EAClB,OAAA;AAAA,EACA,MAAA,GAAS,KAAA;AAAA,EACT,OAAA;AAAA,EACA,cAAA;AAAA,EACA,eAAA;AAAA,EAEA,WAAA,CAAY,OAAA,GAA6C,EAAC,EAAG;AAC3D,IAAA,MAAM,GAAA,GAAM,QAAQ,aAAA,IAAiB,uBAAA;AAErC,IAAA,IACE,EAAE,MAAA,CAAO,QAAA,CAAS,GAAG,CAAA,IAAK,GAAA,KAAQ,OAAO,iBAAA,CAAA,IACxC,MAAA,CAAO,QAAA,CAAS,GAAG,MAAM,GAAA,GAAM,CAAA,IAAK,CAAC,MAAA,CAAO,SAAA,CAAU,GAAG,CAAA,CAAA,EAC1D;AACA,MAAA,MAAM,IAAI,UAAA;AAAA,QACR;AAAA,OACF;AAAA,IACF;AACA,IAAA,IAAA,CAAK,cAAA,GAAiB,GAAA;AACtB,IAAA,IAAA,CAAK,eAAA,GAAkB,QAAQ,cAAA,IAAkB,KAAA;AACjD,IAAA,IAAI,QAAQ,MAAA,KAAW,MAAA,IAAa,MAAM,OAAA,CAAQ,OAAA,CAAQ,MAAM,CAAA,EAAG;AACjE,MAAA,IAAA,CAAK,UAAA,CAAW,QAAQ,MAAM,CAAA;AAAA,IAChC;AACA,IAAA,IAAI,QAAQ,MAAA,EAAQ;AAClB,MAAA,IAAA,CAAK,UAAU,OAAA,CAAQ,MAAA;AAAA,IACzB;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQA,CAAQ,QAAA,CACN,MAAA,EACA,OAAA,EACqC;AACrC,IAAA,MAAM,MAAA,GAAS,SAAS,MAAA,IAAU,KAAA;AAElC,IAAA,IAAI,WAAW,MAAA,EAAW;AACxB,MAAA,KAAA,MAAW,SAAS,MAAA,EAAQ;AAC1B,QAAA,IAAA,CAAK,SAAS,cAAA,EAAe;AAC7B,QAAA,QAAQ,MAAM,IAAA;AAAM,UAClB,KAAK,cAAA;AACH,YAAA,IAAA,CAAK,WAAA,EAAA;AACL,YAAA,IAAA,CAAK,gBAAA,EAAiB;AACtB,YAAA,IAAA,CAAK,MAAA,GAAS,IAAA;AACd,YAAA;AAAA,UACF,KAAK,eAAA;AACH,YAAA,IAAI,IAAA,CAAK,YAAY,MAAA,EAAW;AAC9B,cAAA,IAAA,CAAK,UAAA,CAAW,KAAK,IAAyB,CAAA;AAAA,YAChD,CAAA,MAAO;AACL,cAAA,IAAI,KAAK,MAAA,EAAQ;AACf,gBAAA,MAAM,MAAA,CAAO,WAAA;AAAA,kBACX,IAAA,CAAK,OAAA,CAAQ,GAAA,CAAI,CAAC,QAAQ,KAAA,KAAU;AAAA,oBAClC,MAAA;AAAA,oBACA,IAAA,CAAK,IAAA,CAAK,EAAA,CAAG,KAAK;AAAA,mBACnB;AAAA,iBACH;AAAA,cACF,CAAA,MAAO;AACL,gBAAA,IAAI,KAAK,eAAA,EAAiB;AACxB,kBAAA;AAAA,gBACF;AACA,gBAAA,MAAM,MAAA,CAAO,WAAA;AAAA,kBACX,IAAA,CAAK,QAAQ,GAAA,CAAI,CAAC,WAAW,CAAC,MAAA,EAAQ,EAAE,CAAC;AAAA,iBAC3C;AAAA,cACF;AAAA,YACF;AAEA,YAAA,IAAA,CAAK,WAAA,GAAc,CAAA;AACnB,YAAA,IAAA,CAAK,IAAA,GAAO,IAAI,KAAA,CAAM,IAAA,CAAK,SAAS,MAAM,CAAA,CAAE,KAAK,EAAE,CAAA;AACnD,YAAA,IAAA,CAAK,MAAA,GAAS,KAAA;AACd,YAAA;AAAA,UACF;AACE,YAAA,IAAA,CAAK,MAAA,GAAS,IAAA;AACd,YAAA,IAAA,CAAK,IAAA,CAAK,IAAA,CAAK,WAAW,CAAA,GAAI,KAAA,CAAM,KAAA;AACpC,YAAA;AAAA;AACJ,MACF;AAAA,IACF;AAEA,IAAA,IAAI,CAAC,MAAA,EAAQ;AACX,MAAA,IAAI,IAAA,CAAK,YAAY,MAAA,EAAW;AAC9B,QAAA,IAAI,KAAK,MAAA,EAAQ;AACf,UAAA,MAAM,MAAA,CAAO,WAAA;AAAA,YACX,KAAK,OAAA,CACF,MAAA,CAAO,CAAC,CAAA,KAAM,CAAC,EACf,GAAA,CAAI,CAAC,MAAA,EAAQ,KAAA,KAAU,CAAC,MAAA,EAAQ,IAAA,CAAK,KAAK,EAAA,CAAG,KAAK,CAAC,CAAC;AAAA,WACzD;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,gBAAA,GAAyB;AACvB,IAAA,IAAI,IAAA,CAAK,WAAA,GAAc,CAAA,GAAI,IAAA,CAAK,cAAA,EAAgB;AAC9C,MAAA,MAAM,IAAI,UAAA;AAAA,QACR,gBAAgB,IAAA,CAAK,WAAA,GAAc,CAAC,CAAA,oCAAA,EAAuC,KAAK,cAAc,CAAA;AAAA,OAChG;AAAA,IACF;AAAA,EACF;AAAA,EAEA,WAAW,MAAA,EAAgB;AACzB,IAAA,IAAI,MAAA,CAAO,MAAA,GAAS,IAAA,CAAK,cAAA,EAAgB;AACvC,MAAA,MAAM,IAAI,UAAA;AAAA,QACR,CAAA,oBAAA,EAAuB,MAAA,CAAO,MAAM,CAAA,oCAAA,EAAuC,KAAK,cAAc,CAAA;AAAA,OAChG;AAAA,IACF;AACA,IAAA,IAAA,CAAK,OAAA,GAAU,MAAA;AACf,IAAA,IAAI,IAAA,CAAK,OAAA,CAAQ,MAAA,KAAW,CAAA,EAAG;AAC7B,MAAA,MAAM,IAAI,WAAW,+BAA+B,CAAA;AAAA,IACtD;AACA,IAAA,IAAI,IAAI,IAAI,IAAA,CAAK,OAAO,EAAE,IAAA,KAAS,IAAA,CAAK,QAAQ,MAAA,EAAQ;AACtD,MAAA,MAAM,IAAI,WAAW,+CAA+C,CAAA;AAAA,IACtE;AAAA,EACF;AACF;;;;"}
@@ -1,12 +1,31 @@
1
1
  import { CSVRecordAssembler } from './CSVRecordAssembler.ts';
2
- import { CSVRecord, CSVRecordAssemblerOptions, Token } from './common/types.ts';
2
+ import { CSVRecord, CSVRecordAssemblerOptions, ExtendedQueuingStrategy, Token } from './common/types.ts';
3
3
  /**
4
4
  * A transform stream that converts a stream of tokens into a stream of CSV records.
5
+ *
5
6
  * @template Header The type of the header row.
6
- * @param options The options for the parser.
7
+ * @param options - CSV-specific options (header, maxFieldCount, etc.)
8
+ * @param writableStrategy - Strategy for the writable side (default: `{ highWaterMark: 1024, size: tokens => tokens.length, checkInterval: 10 }`)
9
+ * @param readableStrategy - Strategy for the readable side (default: `{ highWaterMark: 256, size: () => 1, checkInterval: 10 }`)
7
10
  *
8
11
  * @category Low-level API
9
12
  *
13
+ * @remarks
14
+ * Follows the Web Streams API pattern where queuing strategies are passed as
15
+ * constructor arguments, similar to the standard `TransformStream`.
16
+ *
17
+ * **Default Queuing Strategy:**
18
+ * - Writable side: Counts by number of tokens in each array. Default highWaterMark is 1024 tokens.
19
+ * - Readable side: Counts each record as 1. Default highWaterMark is 256 records.
20
+ *
21
+ * **Backpressure Handling:**
22
+ * The transformer monitors `controller.desiredSize` and yields to the event loop when backpressure
23
+ * is detected (desiredSize ≤ 0). This prevents blocking the main thread during heavy processing
24
+ * and allows the downstream consumer to catch up.
25
+ *
26
+ * These defaults are starting points based on data flow characteristics, not empirical benchmarks.
27
+ * Optimal values depend on your runtime environment, data size, and performance requirements.
28
+ *
10
29
  * @example Parse a CSV with headers by data
11
30
  * ```ts
12
31
  * new ReadableStream({
@@ -42,8 +61,35 @@ import { CSVRecord, CSVRecordAssemblerOptions, Token } from './common/types.ts';
42
61
  * // { name: "Bob", age: "25" }
43
62
  * // { name: "Charlie", age: "30" }
44
63
  * ```
64
+ *
65
+ * @example Custom queuing strategies with backpressure tuning
66
+ * ```ts
67
+ * const transformer = new CSVRecordAssemblerTransformer(
68
+ * {},
69
+ * {
70
+ * highWaterMark: 2048, // 2048 tokens
71
+ * size: (tokens) => tokens.length, // Count by token count
72
+ * checkInterval: 20 // Check backpressure every 20 records
73
+ * },
74
+ * {
75
+ * highWaterMark: 512, // 512 records
76
+ * size: () => 1, // Each record counts as 1
77
+ * checkInterval: 5 // Check backpressure every 5 records
78
+ * }
79
+ * );
80
+ *
81
+ * await tokenStream
82
+ * .pipeThrough(transformer)
83
+ * .pipeTo(yourRecordProcessor);
84
+ * ```
45
85
  */
46
86
  export declare class CSVRecordAssemblerTransformer<Header extends ReadonlyArray<string>> extends TransformStream<Token[], CSVRecord<Header>> {
47
87
  readonly assembler: CSVRecordAssembler<Header>;
48
- constructor(options?: CSVRecordAssemblerOptions<Header>);
88
+ /**
89
+ * Yields to the event loop to allow backpressure handling.
90
+ * Can be overridden for testing purposes.
91
+ * @internal
92
+ */
93
+ protected yieldToEventLoop(): Promise<void>;
94
+ constructor(options?: CSVRecordAssemblerOptions<Header>, writableStrategy?: ExtendedQueuingStrategy<Token[]>, readableStrategy?: ExtendedQueuingStrategy<CSVRecord<Header>>);
49
95
  }