web-csv-toolbox 0.14.0-next-386eebeaafe5857e28c876345c14c9fe5f1a3774 → 0.14.0-next-978b88933762ecc27270ce746b80a3fa7ed8c4f7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. package/README.md +28 -48
  2. package/dist/CSVLexer.js +8 -5
  3. package/dist/CSVLexer.js.map +1 -1
  4. package/dist/CSVLexerTransformer.d.ts +10 -12
  5. package/dist/CSVLexerTransformer.js +12 -16
  6. package/dist/CSVLexerTransformer.js.map +1 -1
  7. package/dist/CSVRecordAssembler.js +14 -4
  8. package/dist/CSVRecordAssembler.js.map +1 -1
  9. package/dist/CSVRecordAssemblerTransformer.d.ts +8 -14
  10. package/dist/CSVRecordAssemblerTransformer.js +10 -16
  11. package/dist/CSVRecordAssemblerTransformer.js.map +1 -1
  12. package/dist/assertCommonOptions.d.ts +1 -1
  13. package/dist/assertCommonOptions.js.map +1 -1
  14. package/dist/common/errors.d.ts +32 -0
  15. package/dist/common/errors.js +18 -0
  16. package/dist/common/errors.js.map +1 -1
  17. package/dist/common/types.d.ts +292 -66
  18. package/dist/constants.d.ts +12 -0
  19. package/dist/constants.js +2 -1
  20. package/dist/constants.js.map +1 -1
  21. package/dist/execution/EnginePresets.d.ts +52 -12
  22. package/dist/execution/EnginePresets.js +1 -1
  23. package/dist/execution/EnginePresets.js.map +1 -1
  24. package/dist/execution/InternalEngineConfig.js +40 -18
  25. package/dist/execution/InternalEngineConfig.js.map +1 -1
  26. package/dist/execution/worker/parseBinaryInWorker.node.js +3 -4
  27. package/dist/execution/worker/parseBinaryInWorker.node.js.map +1 -1
  28. package/dist/execution/worker/parseBinaryInWorker.web.js +3 -4
  29. package/dist/execution/worker/parseBinaryInWorker.web.js.map +1 -1
  30. package/dist/execution/worker/parseBinaryInWorkerWASM.node.js +3 -4
  31. package/dist/execution/worker/parseBinaryInWorkerWASM.node.js.map +1 -1
  32. package/dist/execution/worker/parseBinaryInWorkerWASM.web.js +3 -4
  33. package/dist/execution/worker/parseBinaryInWorkerWASM.web.js.map +1 -1
  34. package/dist/execution/worker/parseStreamInWorker.node.js +3 -4
  35. package/dist/execution/worker/parseStreamInWorker.node.js.map +1 -1
  36. package/dist/execution/worker/parseStreamInWorker.web.js +3 -4
  37. package/dist/execution/worker/parseStreamInWorker.web.js.map +1 -1
  38. package/dist/execution/worker/parseStringInWorker.node.js +3 -4
  39. package/dist/execution/worker/parseStringInWorker.node.js.map +1 -1
  40. package/dist/execution/worker/parseStringInWorker.web.js +3 -4
  41. package/dist/execution/worker/parseStringInWorker.web.js.map +1 -1
  42. package/dist/execution/worker/parseStringInWorkerWASM.node.js +3 -4
  43. package/dist/execution/worker/parseStringInWorkerWASM.node.js.map +1 -1
  44. package/dist/execution/worker/parseStringInWorkerWASM.web.js +3 -4
  45. package/dist/execution/worker/parseStringInWorkerWASM.web.js.map +1 -1
  46. package/dist/execution/worker/parseUint8ArrayStreamInWorker.node.js +3 -4
  47. package/dist/execution/worker/parseUint8ArrayStreamInWorker.node.js.map +1 -1
  48. package/dist/execution/worker/parseUint8ArrayStreamInWorker.web.js +3 -4
  49. package/dist/execution/worker/parseUint8ArrayStreamInWorker.web.js.map +1 -1
  50. package/dist/getCharsetValidation.constants.node.d.ts +11 -0
  51. package/dist/getCharsetValidation.constants.node.js +53 -0
  52. package/dist/getCharsetValidation.constants.node.js.map +1 -0
  53. package/dist/getCharsetValidation.constants.web.d.ts +36 -0
  54. package/dist/getCharsetValidation.constants.web.js +53 -0
  55. package/dist/getCharsetValidation.constants.web.js.map +1 -0
  56. package/dist/getOptionsFromFile.d.ts +14 -0
  57. package/dist/getOptionsFromFile.js +12 -0
  58. package/dist/getOptionsFromFile.js.map +1 -0
  59. package/dist/getOptionsFromResponse.js +17 -1
  60. package/dist/getOptionsFromResponse.js.map +1 -1
  61. package/dist/parseBlob.js +9 -1
  62. package/dist/parseBlob.js.map +1 -1
  63. package/dist/parseFile.d.ts +3 -2
  64. package/dist/parseFile.js +7 -3
  65. package/dist/parseFile.js.map +1 -1
  66. package/dist/parseFileToArray.d.ts +27 -0
  67. package/dist/parseFileToArray.js +12 -0
  68. package/dist/parseFileToArray.js.map +1 -0
  69. package/dist/parseFileToStream.d.ts +33 -0
  70. package/dist/parseFileToStream.js +10 -0
  71. package/dist/parseFileToStream.js.map +1 -0
  72. package/dist/utils/convertBinaryToString.js +17 -4
  73. package/dist/utils/convertBinaryToString.js.map +1 -1
  74. package/dist/utils/parseMime.js +3 -1
  75. package/dist/utils/parseMime.js.map +1 -1
  76. package/dist/utils/types.d.ts +21 -10
  77. package/dist/web-csv-toolbox.d.ts +3 -0
  78. package/dist/web-csv-toolbox.js +3 -0
  79. package/dist/web-csv-toolbox.js.map +1 -1
  80. package/package.json +7 -1
@@ -1 +1 @@
1
- {"version":3,"file":"CSVRecordAssemblerTransformer.js","sources":["../src/CSVRecordAssemblerTransformer.ts"],"sourcesContent":["import { CSVRecordAssembler } from \"./CSVRecordAssembler.ts\";\nimport type {\n CSVRecord,\n CSVRecordAssemblerOptions,\n ExtendedQueuingStrategy,\n Token,\n} from \"./common/types.ts\";\n\n/**\n * A transform stream that converts a stream of tokens into a stream of CSV records.\n *\n * @template Header The type of the header row.\n * @param options - CSV-specific options (header, maxFieldCount, etc.)\n * @param writableStrategy - Strategy for the writable side (default: `{ highWaterMark: 1024, size: () => 1, checkInterval: 10 }`)\n * @param readableStrategy - Strategy for the readable side (default: `{ highWaterMark: 256, size: () => 1, checkInterval: 10 }`)\n *\n * @category Low-level API\n *\n * @remarks\n * Follows the Web Streams API pattern where queuing strategies are passed as\n * constructor arguments, similar to the standard `TransformStream`.\n *\n * **Default Queuing Strategy:**\n * - Writable side: Counts each token as 1. Default highWaterMark is 1024 tokens.\n * - Readable side: Counts each record as 1. Default highWaterMark is 256 records.\n *\n * **Backpressure Handling:**\n * The transformer monitors `controller.desiredSize` and yields to the event loop when backpressure\n * is detected (desiredSize ≤ 0). This prevents blocking the main thread during heavy processing\n * and allows the downstream consumer to catch up.\n *\n * These defaults are starting points based on data flow characteristics, not empirical benchmarks.\n * Optimal values depend on your runtime environment, data size, and performance requirements.\n *\n * @example Parse a CSV with headers by data\n * ```ts\n * new ReadableStream({\n * start(controller) {\n * controller.enqueue(\"name,age\\r\\n\");\n * controller.enqueue(\"Alice,20\\r\\n\");\n * controller.enqueue(\"Bob,25\\r\\n\");\n * controller.enqueue(\"Charlie,30\\r\\n\");\n * controller.close();\n * })\n * .pipeThrough(new CSVLexerTransformer())\n * .pipeThrough(new CSVRecordAssemblerTransformer())\n * .pipeTo(new WritableStream({ write(row) { console.log(row); }}));\n * // { name: \"Alice\", age: \"20\" }\n * // { name: \"Bob\", age: \"25\" }\n * // { name: \"Charlie\", age: \"30\" }\n * ```\n *\n * @example Parse a CSV with headers by options\n * ```ts\n * new ReadableStream({\n * start(controller) {\n * controller.enqueue(\"Alice,20\\r\\n\");\n * controller.enqueue(\"Bob,25\\r\\n\");\n * controller.enqueue(\"Charlie,30\\r\\n\");\n * controller.close();\n * }\n * })\n * .pipeThrough(new CSVLexerTransformer())\n * .pipeThrough(new CSVRecordAssemblerTransformer({ header: [\"name\", \"age\"] }))\n * .pipeTo(new WritableStream({ write(row) { console.log(row); }}));\n * // { name: \"Alice\", age: \"20\" }\n * // { name: \"Bob\", age: \"25\" }\n * // { name: \"Charlie\", age: \"30\" }\n * ```\n *\n * @example Custom queuing strategies with backpressure tuning\n * ```ts\n * const transformer = new CSVRecordAssemblerTransformer(\n * {},\n * {\n * highWaterMark: 2048, // 2048 tokens\n * size: () => 1, // Each token counts as 1\n * checkInterval: 20 // Check backpressure every 20 records\n * },\n * {\n * highWaterMark: 512, // 512 records\n * size: () => 1, // Each record counts as 1\n * checkInterval: 5 // Check backpressure every 5 records\n * }\n * );\n *\n * await tokenStream\n * .pipeThrough(transformer)\n * .pipeTo(yourRecordProcessor);\n * ```\n */\nexport class CSVRecordAssemblerTransformer<\n Header extends ReadonlyArray<string>,\n> extends TransformStream<Token, CSVRecord<Header>> {\n public readonly assembler: CSVRecordAssembler<Header>;\n\n /**\n * Yields to the event loop to allow backpressure handling.\n * Can be overridden for testing purposes.\n * @internal\n */\n protected async yieldToEventLoop(): Promise<void> {\n await new Promise((resolve) => setTimeout(resolve, 0));\n }\n\n constructor(\n options: CSVRecordAssemblerOptions<Header> = {},\n writableStrategy: ExtendedQueuingStrategy<Token> = {\n highWaterMark: 1024, // 1024 tokens\n size: () => 1, // Each token counts as 1\n checkInterval: 10, // Check backpressure every 10 records\n },\n readableStrategy: ExtendedQueuingStrategy<CSVRecord<Header>> = {\n highWaterMark: 256, // 256 records\n size: () => 1, // Each record counts as 1\n checkInterval: 10, // Check backpressure every 10 records\n },\n ) {\n const assembler = new CSVRecordAssembler(options);\n const checkInterval =\n writableStrategy.checkInterval ?? readableStrategy.checkInterval ?? 10;\n\n super(\n {\n transform: async (token, controller) => {\n try {\n let recordCount = 0;\n // Pass single token directly to assemble (no array creation)\n for (const record of assembler.assemble(token, { stream: true })) {\n controller.enqueue(record);\n recordCount++;\n\n // Check backpressure periodically based on checkInterval\n if (\n recordCount % checkInterval === 0 &&\n controller.desiredSize !== null &&\n controller.desiredSize <= 0\n ) {\n // Yield to event loop when backpressure is detected\n await this.yieldToEventLoop();\n }\n }\n } catch (error) {\n controller.error(error);\n }\n },\n flush: async (controller) => {\n try {\n let recordCount = 0;\n // Call assemble without arguments to flush\n for (const record of assembler.assemble()) {\n controller.enqueue(record);\n recordCount++;\n\n // Check backpressure periodically based on checkInterval\n if (\n recordCount % checkInterval === 0 &&\n controller.desiredSize !== null &&\n controller.desiredSize <= 0\n ) {\n await this.yieldToEventLoop();\n }\n }\n } catch (error) {\n controller.error(error);\n }\n },\n },\n writableStrategy,\n readableStrategy,\n );\n this.assembler = assembler;\n }\n}\n"],"names":[],"mappings":";;AA2FO,MAAM,sCAEH,eAAA,CAA0C;AAAA,EAClC,SAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOhB,MAAgB,gBAAA,GAAkC;AAChD,IAAA,MAAM,IAAI,OAAA,CAAQ,CAAC,YAAY,UAAA,CAAW,OAAA,EAAS,CAAC,CAAC,CAAA;AAAA,EACvD;AAAA,EAEA,WAAA,CACE,OAAA,GAA6C,EAAC,EAC9C,gBAAA,GAAmD;AAAA,IACjD,aAAA,EAAe,IAAA;AAAA;AAAA,IACf,MAAM,MAAM,CAAA;AAAA;AAAA,IACZ,aAAA,EAAe;AAAA;AAAA,KAEjB,gBAAA,GAA+D;AAAA,IAC7D,aAAA,EAAe,GAAA;AAAA;AAAA,IACf,MAAM,MAAM,CAAA;AAAA;AAAA,IACZ,aAAA,EAAe;AAAA;AAAA,GACjB,EACA;AACA,IAAA,MAAM,SAAA,GAAY,IAAI,kBAAA,CAAmB,OAAO,CAAA;AAChD,IAAA,MAAM,aAAA,GACJ,gBAAA,CAAiB,aAAA,IAAiB,gBAAA,CAAiB,aAAA,IAAiB,EAAA;AAEtE,IAAA,KAAA;AAAA,MACE;AAAA,QACE,SAAA,EAAW,OAAO,KAAA,EAAO,UAAA,KAAe;AACtC,UAAA,IAAI;AACF,YAAA,IAAI,WAAA,GAAc,CAAA;AAElB,YAAA,KAAA,MAAW,MAAA,IAAU,UAAU,QAAA,CAAS,KAAA,EAAO,EAAE,MAAA,EAAQ,IAAA,EAAM,CAAA,EAAG;AAChE,cAAA,UAAA,CAAW,QAAQ,MAAM,CAAA;AACzB,cAAA,WAAA,EAAA;AAGA,cAAA,IACE,WAAA,GAAc,kBAAkB,CAAA,IAChC,UAAA,CAAW,gBAAgB,IAAA,IAC3B,UAAA,CAAW,eAAe,CAAA,EAC1B;AAEA,gBAAA,MAAM,KAAK,gBAAA,EAAiB;AAAA,cAC9B;AAAA,YACF;AAAA,UACF,SAAS,KAAA,EAAO;AACd,YAAA,UAAA,CAAW,MAAM,KAAK,CAAA;AAAA,UACxB;AAAA,QACF,CAAA;AAAA,QACA,KAAA,EAAO,OAAO,UAAA,KAAe;AAC3B,UAAA,IAAI;AACF,YAAA,IAAI,WAAA,GAAc,CAAA;AAElB,YAAA,KAAA,MAAW,MAAA,IAAU,SAAA,CAAU,QAAA,EAAS,EAAG;AACzC,cAAA,UAAA,CAAW,QAAQ,MAAM,CAAA;AACzB,cAAA,WAAA,EAAA;AAGA,cAAA,IACE,WAAA,GAAc,kBAAkB,CAAA,IAChC,UAAA,CAAW,gBAAgB,IAAA,IAC3B,UAAA,CAAW,eAAe,CAAA,EAC1B;AACA,gBAAA,MAAM,KAAK,gBAAA,EAAiB;AAAA,cAC9B;AAAA,YACF;AAAA,UACF,SAAS,KAAA,EAAO;AACd,YAAA,UAAA,CAAW,MAAM,KAAK,CAAA;AAAA,UACxB;AAAA,QACF;AAAA,OACF;AAAA,MACA,gBAAA;AAAA,MACA;AAAA,KACF;AACA,IAAA,IAAA,CAAK,SAAA,GAAY,SAAA;AAAA,EACnB;AACF;;;;"}
1
+ {"version":3,"file":"CSVRecordAssemblerTransformer.js","sources":["../src/CSVRecordAssemblerTransformer.ts"],"sourcesContent":["import { CSVRecordAssembler } from \"./CSVRecordAssembler.ts\";\nimport type {\n CSVRecord,\n CSVRecordAssemblerOptions,\n Token,\n} from \"./common/types.ts\";\n\n/**\n * Default queuing strategy for the writable side (token input).\n * @internal\n */\nconst DEFAULT_WRITABLE_STRATEGY = new CountQueuingStrategy({\n highWaterMark: 1024, // 1024 tokens\n});\n\n/**\n * Default queuing strategy for the readable side (record output).\n * @internal\n */\nconst DEFAULT_READABLE_STRATEGY = new CountQueuingStrategy({\n highWaterMark: 256, // 256 records\n});\n\n/**\n * A transform stream that converts a stream of tokens into a stream of CSV records.\n *\n * @template Header The type of the header row.\n * @param options - CSV-specific options (header, maxFieldCount, checkInterval, etc.)\n * @param writableStrategy - Strategy for the writable side (default: `{ highWaterMark: 1024, size: () => 1 }`)\n * @param readableStrategy - Strategy for the readable side (default: `{ highWaterMark: 256, size: () => 1 }`)\n *\n * @category Low-level API\n *\n * @remarks\n * Follows the Web Streams API pattern where queuing strategies are passed as\n * constructor arguments, similar to the standard `TransformStream`.\n *\n * **Default Queuing Strategy:**\n * - Writable side: Counts each token as 1. Default highWaterMark is 1024 tokens.\n * - Readable side: Counts each record as 1. Default highWaterMark is 256 records.\n *\n * **Backpressure Handling:**\n * The transformer monitors `controller.desiredSize` and yields to the event loop when backpressure\n * is detected (desiredSize ≤ 0). This prevents blocking the main thread during heavy processing\n * and allows the downstream consumer to catch up.\n *\n * These defaults are starting points based on data flow characteristics, not empirical benchmarks.\n * Optimal values depend on your runtime environment, data size, and performance requirements.\n *\n * @example Parse a CSV with headers by data\n * ```ts\n * new ReadableStream({\n * start(controller) {\n * controller.enqueue(\"name,age\\r\\n\");\n * controller.enqueue(\"Alice,20\\r\\n\");\n * controller.enqueue(\"Bob,25\\r\\n\");\n * controller.enqueue(\"Charlie,30\\r\\n\");\n * controller.close();\n * })\n * .pipeThrough(new CSVLexerTransformer())\n * .pipeThrough(new CSVRecordAssemblerTransformer())\n * .pipeTo(new WritableStream({ write(row) { console.log(row); }}));\n * // { name: \"Alice\", age: \"20\" }\n * // { name: \"Bob\", age: \"25\" }\n * // { name: \"Charlie\", age: \"30\" }\n * ```\n *\n * @example Parse a CSV with headers by options\n * ```ts\n * new ReadableStream({\n * start(controller) {\n * controller.enqueue(\"Alice,20\\r\\n\");\n * controller.enqueue(\"Bob,25\\r\\n\");\n * controller.enqueue(\"Charlie,30\\r\\n\");\n * controller.close();\n * }\n * })\n * .pipeThrough(new CSVLexerTransformer())\n * .pipeThrough(new CSVRecordAssemblerTransformer({ header: [\"name\", \"age\"] }))\n * .pipeTo(new WritableStream({ write(row) { console.log(row); }}));\n * // { name: \"Alice\", age: \"20\" }\n * // { name: \"Bob\", age: \"25\" }\n * // { name: \"Charlie\", age: \"30\" }\n * ```\n *\n * @example Custom queuing strategies with backpressure tuning\n * ```ts\n * const transformer = new CSVRecordAssemblerTransformer(\n * {\n * backpressureCheckInterval: 20 // Check backpressure every 20 records\n * },\n * new CountQueuingStrategy({ highWaterMark: 2048 }), // 2048 tokens\n * new CountQueuingStrategy({ highWaterMark: 512 }) // 512 records\n * );\n *\n * await tokenStream\n * .pipeThrough(transformer)\n * .pipeTo(yourRecordProcessor);\n * ```\n */\nexport class CSVRecordAssemblerTransformer<\n Header extends ReadonlyArray<string>,\n> extends TransformStream<Token, CSVRecord<Header>> {\n public readonly assembler: CSVRecordAssembler<Header>;\n\n /**\n * Yields to the event loop to allow backpressure handling.\n * Can be overridden for testing purposes.\n * @internal\n */\n protected async yieldToEventLoop(): Promise<void> {\n await new Promise((resolve) => setTimeout(resolve, 0));\n }\n\n constructor(\n options: CSVRecordAssemblerOptions<Header> = {},\n writableStrategy: QueuingStrategy<Token> = DEFAULT_WRITABLE_STRATEGY,\n readableStrategy: QueuingStrategy<\n CSVRecord<Header>\n > = DEFAULT_READABLE_STRATEGY,\n ) {\n const assembler = new CSVRecordAssembler(options);\n const checkInterval = options.backpressureCheckInterval ?? 10;\n\n super(\n {\n transform: async (token, controller) => {\n try {\n let recordCount = 0;\n // Pass single token directly to assemble (no array creation)\n for (const record of assembler.assemble(token, { stream: true })) {\n controller.enqueue(record);\n recordCount++;\n\n // Check backpressure periodically based on checkInterval\n if (\n recordCount % checkInterval === 0 &&\n controller.desiredSize !== null &&\n controller.desiredSize <= 0\n ) {\n // Yield to event loop when backpressure is detected\n await this.yieldToEventLoop();\n }\n }\n } catch (error) {\n controller.error(error);\n }\n },\n flush: async (controller) => {\n try {\n let recordCount = 0;\n // Call assemble without arguments to flush\n for (const record of assembler.assemble()) {\n controller.enqueue(record);\n recordCount++;\n\n // Check backpressure periodically based on checkInterval\n if (\n recordCount % checkInterval === 0 &&\n controller.desiredSize !== null &&\n controller.desiredSize <= 0\n ) {\n await this.yieldToEventLoop();\n }\n }\n } catch (error) {\n controller.error(error);\n }\n },\n },\n writableStrategy,\n readableStrategy,\n );\n this.assembler = assembler;\n }\n}\n"],"names":[],"mappings":";;AAWA,MAAM,yBAAA,GAA4B,IAAI,oBAAA,CAAqB;AAAA,EACzD,aAAA,EAAe;AAAA;AACjB,CAAC,CAAA;AAMD,MAAM,yBAAA,GAA4B,IAAI,oBAAA,CAAqB;AAAA,EACzD,aAAA,EAAe;AAAA;AACjB,CAAC,CAAA;AA+EM,MAAM,sCAEH,eAAA,CAA0C;AAAA,EAClC,SAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOhB,MAAgB,gBAAA,GAAkC;AAChD,IAAA,MAAM,IAAI,OAAA,CAAQ,CAAC,YAAY,UAAA,CAAW,OAAA,EAAS,CAAC,CAAC,CAAA;AAAA,EACvD;AAAA,EAEA,YACE,OAAA,GAA6C,IAC7C,gBAAA,GAA2C,yBAAA,EAC3C,mBAEI,yBAAA,EACJ;AACA,IAAA,MAAM,SAAA,GAAY,IAAI,kBAAA,CAAmB,OAAO,CAAA;AAChD,IAAA,MAAM,aAAA,GAAgB,QAAQ,yBAAA,IAA6B,EAAA;AAE3D,IAAA,KAAA;AAAA,MACE;AAAA,QACE,SAAA,EAAW,OAAO,KAAA,EAAO,UAAA,KAAe;AACtC,UAAA,IAAI;AACF,YAAA,IAAI,WAAA,GAAc,CAAA;AAElB,YAAA,KAAA,MAAW,MAAA,IAAU,UAAU,QAAA,CAAS,KAAA,EAAO,EAAE,MAAA,EAAQ,IAAA,EAAM,CAAA,EAAG;AAChE,cAAA,UAAA,CAAW,QAAQ,MAAM,CAAA;AACzB,cAAA,WAAA,EAAA;AAGA,cAAA,IACE,WAAA,GAAc,kBAAkB,CAAA,IAChC,UAAA,CAAW,gBAAgB,IAAA,IAC3B,UAAA,CAAW,eAAe,CAAA,EAC1B;AAEA,gBAAA,MAAM,KAAK,gBAAA,EAAiB;AAAA,cAC9B;AAAA,YACF;AAAA,UACF,SAAS,KAAA,EAAO;AACd,YAAA,UAAA,CAAW,MAAM,KAAK,CAAA;AAAA,UACxB;AAAA,QACF,CAAA;AAAA,QACA,KAAA,EAAO,OAAO,UAAA,KAAe;AAC3B,UAAA,IAAI;AACF,YAAA,IAAI,WAAA,GAAc,CAAA;AAElB,YAAA,KAAA,MAAW,MAAA,IAAU,SAAA,CAAU,QAAA,EAAS,EAAG;AACzC,cAAA,UAAA,CAAW,QAAQ,MAAM,CAAA;AACzB,cAAA,WAAA,EAAA;AAGA,cAAA,IACE,WAAA,GAAc,kBAAkB,CAAA,IAChC,UAAA,CAAW,gBAAgB,IAAA,IAC3B,UAAA,CAAW,eAAe,CAAA,EAC1B;AACA,gBAAA,MAAM,KAAK,gBAAA,EAAiB;AAAA,cAC9B;AAAA,YACF;AAAA,UACF,SAAS,KAAA,EAAO;AACd,YAAA,UAAA,CAAW,MAAM,KAAK,CAAA;AAAA,UACxB;AAAA,QACF;AAAA,OACF;AAAA,MACA,gBAAA;AAAA,MACA;AAAA,KACF;AACA,IAAA,IAAA,CAAK,SAAA,GAAY,SAAA;AAAA,EACnB;AACF;;;;"}
@@ -18,4 +18,4 @@ import { CommonOptions } from './common/types.ts';
18
18
  * @throws {RangeError} If any required property is missing or if the delimiter is the same as the quotation.
19
19
  * @throws {TypeError} If any required property is not a string.
20
20
  */
21
- export declare function assertCommonOptions<Delimiter extends string, Quotation extends string>(options: Required<CommonOptions<Delimiter, Quotation>>): asserts options is Required<CommonOptions<Delimiter, Quotation>>;
21
+ export declare function assertCommonOptions<Delimiter extends string, Quotation extends string>(options: Required<Omit<CommonOptions<Delimiter, Quotation>, "source">>): asserts options is Required<Omit<CommonOptions<Delimiter, Quotation>, "source">>;
@@ -1 +1 @@
1
- {"version":3,"file":"assertCommonOptions.js","sources":["../src/assertCommonOptions.ts"],"sourcesContent":["import type { CommonOptions } from \"./common/types.ts\";\nimport { CR, LF } from \"./constants.ts\";\n\n/**\n * Asserts that the provided value is a string and satisfies certain conditions.\n * @param value - The value to be checked.\n * @param name - The name of the option.\n * @throws {RangeError} If the value is empty, longer than 1 byte, or includes CR or LF.\n * @throws {TypeError} If the value is not a string.\n */\nfunction assertOptionValue(\n value: string,\n name: string,\n): asserts value is string {\n if (typeof value === \"string\") {\n switch (true) {\n case value.length === 0:\n throw new RangeError(`${name} must not be empty`);\n case value.length > 1:\n throw new RangeError(`${name} must be a single character`);\n case value === LF:\n case value === CR:\n throw new RangeError(`${name} must not include CR or LF`);\n default:\n break;\n }\n } else {\n throw new TypeError(`${name} must be a string`);\n }\n}\n\n/**\n * Asserts that the provided options object contains all the required properties.\n * Throws an error if any required property is missing\n * or if the delimiter and quotation length is not 1 byte character,\n * or if the delimiter is the same as the quotation.\n *\n * @example\n *\n * ```ts\n * assertCommonOptions({\n * quotation: '\"',\n * delimiter: ',',\n * });\n * ```\n *\n * @param options - The options object to be validated.\n * @throws {RangeError} If any required property is missing or if the delimiter is the same as the quotation.\n * @throws {TypeError} If any required property is not a string.\n */\nexport function assertCommonOptions<\n Delimiter extends string,\n Quotation extends string,\n>(\n options: Required<CommonOptions<Delimiter, Quotation>>,\n): asserts options is Required<CommonOptions<Delimiter, Quotation>> {\n for (const name of [\"delimiter\", \"quotation\"] as const) {\n assertOptionValue(options[name], name);\n }\n // @ts-ignore: TS doesn't understand that the values are strings\n if (options.delimiter === options.quotation) {\n throw new RangeError(\n \"delimiter must not be the same as quotation, use different characters\",\n );\n }\n\n // Validate maxBufferSize\n const mbs = options.maxBufferSize;\n if (\n !(Number.isFinite(mbs) || mbs === Number.POSITIVE_INFINITY) ||\n (Number.isFinite(mbs) && (mbs < 1 || !Number.isInteger(mbs)))\n ) {\n throw new RangeError(\n \"maxBufferSize must be a positive integer (in characters) or Number.POSITIVE_INFINITY\",\n );\n }\n}\n"],"names":[],"mappings":";;AAUA,SAAS,iBAAA,CACP,OACA,IAAA,EACyB;AACzB,EAAA,IAAI,OAAO,UAAU,QAAA,EAAU;AAC7B,IAAA,QAAQ,IAAA;AAAM,MACZ,KAAK,MAAM,MAAA,KAAW,CAAA;AACpB,QAAA,MAAM,IAAI,UAAA,CAAW,CAAA,EAAG,IAAI,CAAA,kBAAA,CAAoB,CAAA;AAAA,MAClD,KAAK,MAAM,MAAA,GAAS,CAAA;AAClB,QAAA,MAAM,IAAI,UAAA,CAAW,CAAA,EAAG,IAAI,CAAA,2BAAA,CAA6B,CAAA;AAAA,MAC3D,KAAK,KAAA,KAAU,EAAA;AAAA,MACf,KAAK,KAAA,KAAU,EAAA;AACb,QAAA,MAAM,IAAI,UAAA,CAAW,CAAA,EAAG,IAAI,CAAA,0BAAA,CAA4B,CAAA;AAExD;AACJ,EACF,CAAA,MAAO;AACL,IAAA,MAAM,IAAI,SAAA,CAAU,CAAA,EAAG,IAAI,CAAA,iBAAA,CAAmB,CAAA;AAAA,EAChD;AACF;AAqBO,SAAS,oBAId,OAAA,EACkE;AAClE,EAAA,KAAA,MAAW,IAAA,IAAQ,CAAC,WAAA,EAAa,WAAW,CAAA,EAAY;AACtD,IAAA,iBAAA,CAAkB,OAAA,CAAQ,IAAI,CAAA,EAAG,IAAI,CAAA;AAAA,EACvC;AAEA,EAAA,IAAI,OAAA,CAAQ,SAAA,KAAc,OAAA,CAAQ,SAAA,EAAW;AAC3C,IAAA,MAAM,IAAI,UAAA;AAAA,MACR;AAAA,KACF;AAAA,EACF;AAGA,EAAA,MAAM,MAAM,OAAA,CAAQ,aAAA;AACpB,EAAA,IACE,EAAE,MAAA,CAAO,QAAA,CAAS,GAAG,CAAA,IAAK,GAAA,KAAQ,OAAO,iBAAA,CAAA,IACxC,MAAA,CAAO,QAAA,CAAS,GAAG,MAAM,GAAA,GAAM,CAAA,IAAK,CAAC,MAAA,CAAO,SAAA,CAAU,GAAG,CAAA,CAAA,EAC1D;AACA,IAAA,MAAM,IAAI,UAAA;AAAA,MACR;AAAA,KACF;AAAA,EACF;AACF;;;;"}
1
+ {"version":3,"file":"assertCommonOptions.js","sources":["../src/assertCommonOptions.ts"],"sourcesContent":["import type { CommonOptions } from \"./common/types.ts\";\nimport { CR, LF } from \"./constants.ts\";\n\n/**\n * Asserts that the provided value is a string and satisfies certain conditions.\n * @param value - The value to be checked.\n * @param name - The name of the option.\n * @throws {RangeError} If the value is empty, longer than 1 byte, or includes CR or LF.\n * @throws {TypeError} If the value is not a string.\n */\nfunction assertOptionValue(\n value: string,\n name: string,\n): asserts value is string {\n if (typeof value === \"string\") {\n switch (true) {\n case value.length === 0:\n throw new RangeError(`${name} must not be empty`);\n case value.length > 1:\n throw new RangeError(`${name} must be a single character`);\n case value === LF:\n case value === CR:\n throw new RangeError(`${name} must not include CR or LF`);\n default:\n break;\n }\n } else {\n throw new TypeError(`${name} must be a string`);\n }\n}\n\n/**\n * Asserts that the provided options object contains all the required properties.\n * Throws an error if any required property is missing\n * or if the delimiter and quotation length is not 1 byte character,\n * or if the delimiter is the same as the quotation.\n *\n * @example\n *\n * ```ts\n * assertCommonOptions({\n * quotation: '\"',\n * delimiter: ',',\n * });\n * ```\n *\n * @param options - The options object to be validated.\n * @throws {RangeError} If any required property is missing or if the delimiter is the same as the quotation.\n * @throws {TypeError} If any required property is not a string.\n */\nexport function assertCommonOptions<\n Delimiter extends string,\n Quotation extends string,\n>(\n options: Required<Omit<CommonOptions<Delimiter, Quotation>, \"source\">>,\n): asserts options is Required<\n Omit<CommonOptions<Delimiter, Quotation>, \"source\">\n> {\n for (const name of [\"delimiter\", \"quotation\"] as const) {\n assertOptionValue(options[name], name);\n }\n // @ts-ignore: TS doesn't understand that the values are strings\n if (options.delimiter === options.quotation) {\n throw new RangeError(\n \"delimiter must not be the same as quotation, use different characters\",\n );\n }\n\n // Validate maxBufferSize\n const mbs = options.maxBufferSize;\n if (\n !(Number.isFinite(mbs) || mbs === Number.POSITIVE_INFINITY) ||\n (Number.isFinite(mbs) && (mbs < 1 || !Number.isInteger(mbs)))\n ) {\n throw new RangeError(\n \"maxBufferSize must be a positive integer (in characters) or Number.POSITIVE_INFINITY\",\n );\n }\n}\n"],"names":[],"mappings":";;AAUA,SAAS,iBAAA,CACP,OACA,IAAA,EACyB;AACzB,EAAA,IAAI,OAAO,UAAU,QAAA,EAAU;AAC7B,IAAA,QAAQ,IAAA;AAAM,MACZ,KAAK,MAAM,MAAA,KAAW,CAAA;AACpB,QAAA,MAAM,IAAI,UAAA,CAAW,CAAA,EAAG,IAAI,CAAA,kBAAA,CAAoB,CAAA;AAAA,MAClD,KAAK,MAAM,MAAA,GAAS,CAAA;AAClB,QAAA,MAAM,IAAI,UAAA,CAAW,CAAA,EAAG,IAAI,CAAA,2BAAA,CAA6B,CAAA;AAAA,MAC3D,KAAK,KAAA,KAAU,EAAA;AAAA,MACf,KAAK,KAAA,KAAU,EAAA;AACb,QAAA,MAAM,IAAI,UAAA,CAAW,CAAA,EAAG,IAAI,CAAA,0BAAA,CAA4B,CAAA;AAExD;AACJ,EACF,CAAA,MAAO;AACL,IAAA,MAAM,IAAI,SAAA,CAAU,CAAA,EAAG,IAAI,CAAA,iBAAA,CAAmB,CAAA;AAAA,EAChD;AACF;AAqBO,SAAS,oBAId,OAAA,EAGA;AACA,EAAA,KAAA,MAAW,IAAA,IAAQ,CAAC,WAAA,EAAa,WAAW,CAAA,EAAY;AACtD,IAAA,iBAAA,CAAkB,OAAA,CAAQ,IAAI,CAAA,EAAG,IAAI,CAAA;AAAA,EACvC;AAEA,EAAA,IAAI,OAAA,CAAQ,SAAA,KAAc,OAAA,CAAQ,SAAA,EAAW;AAC3C,IAAA,MAAM,IAAI,UAAA;AAAA,MACR;AAAA,KACF;AAAA,EACF;AAGA,EAAA,MAAM,MAAM,OAAA,CAAQ,aAAA;AACpB,EAAA,IACE,EAAE,MAAA,CAAO,QAAA,CAAS,GAAG,CAAA,IAAK,GAAA,KAAQ,OAAO,iBAAA,CAAA,IACxC,MAAA,CAAO,QAAA,CAAS,GAAG,MAAM,GAAA,GAAM,CAAA,IAAK,CAAC,MAAA,CAAO,SAAA,CAAU,GAAG,CAAA,CAAA,EAC1D;AACA,IAAA,MAAM,IAAI,UAAA;AAAA,MACR;AAAA,KACF;AAAA,EACF;AACF;;;;"}
@@ -7,6 +7,22 @@ export interface ParseErrorOptions extends ErrorOptions {
7
7
  * The position where the error occurred.
8
8
  */
9
9
  position?: Position;
10
+ /**
11
+ * The row number where the error occurred.
12
+ *
13
+ * @remarks
14
+ * This represents the logical CSV row number (includes header if present),
15
+ * useful for error reporting to users.
16
+ */
17
+ rowNumber?: number;
18
+ /**
19
+ * Source identifier (e.g., filename) for error reporting.
20
+ *
21
+ * @remarks
22
+ * A human-readable identifier for the CSV source to help locate
23
+ * which file or stream caused the error.
24
+ */
25
+ source?: string;
10
26
  }
11
27
  /**
12
28
  * Error class for parse errors.
@@ -23,5 +39,21 @@ export declare class ParseError extends SyntaxError {
23
39
  * The position where the error occurred.
24
40
  */
25
41
  position?: Position;
42
+ /**
43
+ * The row number where the error occurred.
44
+ *
45
+ * @remarks
46
+ * This represents the logical CSV row number (includes header if present),
47
+ * useful for error reporting to users.
48
+ */
49
+ rowNumber?: number;
50
+ /**
51
+ * Source identifier (e.g., filename) for error reporting.
52
+ *
53
+ * @remarks
54
+ * A human-readable identifier for the CSV source to help locate
55
+ * which file or stream caused the error.
56
+ */
57
+ source?: string;
26
58
  constructor(message?: string, options?: ParseErrorOptions);
27
59
  }
@@ -3,10 +3,28 @@ class ParseError extends SyntaxError {
3
3
  * The position where the error occurred.
4
4
  */
5
5
  position;
6
+ /**
7
+ * The row number where the error occurred.
8
+ *
9
+ * @remarks
10
+ * This represents the logical CSV row number (includes header if present),
11
+ * useful for error reporting to users.
12
+ */
13
+ rowNumber;
14
+ /**
15
+ * Source identifier (e.g., filename) for error reporting.
16
+ *
17
+ * @remarks
18
+ * A human-readable identifier for the CSV source to help locate
19
+ * which file or stream caused the error.
20
+ */
21
+ source;
6
22
  constructor(message, options) {
7
23
  super(message, { cause: options?.cause });
8
24
  this.name = "ParseError";
9
25
  this.position = options?.position;
26
+ this.rowNumber = options?.rowNumber;
27
+ this.source = options?.source;
10
28
  }
11
29
  }
12
30
 
@@ -1 +1 @@
1
- {"version":3,"file":"errors.js","sources":["../../src/common/errors.ts"],"sourcesContent":["import type { Position } from \"./types.js\";\n\n/**\n * Options for creating a parse error.\n */\nexport interface ParseErrorOptions extends ErrorOptions {\n /**\n * The position where the error occurred.\n */\n position?: Position;\n}\n\n/**\n * Error class for parse errors.\n *\n * @remarks\n * This error is thrown when a parsing error occurs.\n * {@link ParseError} is a subclass of {@link !SyntaxError}.\n *\n * This is in reference to the specification\n * that the error thrown when a parse error occurs in the {@link !JSON.parse} function is {@link !SyntaxError}.\n */\nexport class ParseError extends SyntaxError {\n /**\n * The position where the error occurred.\n */\n public position?: Position;\n\n constructor(message?: string, options?: ParseErrorOptions) {\n super(message, { cause: options?.cause });\n this.name = \"ParseError\";\n this.position = options?.position;\n }\n}\n"],"names":[],"mappings":"AAsBO,MAAM,mBAAmB,WAAA,CAAY;AAAA;AAAA;AAAA;AAAA,EAInC,QAAA;AAAA,EAEP,WAAA,CAAY,SAAkB,OAAA,EAA6B;AACzD,IAAA,KAAA,CAAM,OAAA,EAAS,EAAE,KAAA,EAAO,OAAA,EAAS,OAAO,CAAA;AACxC,IAAA,IAAA,CAAK,IAAA,GAAO,YAAA;AACZ,IAAA,IAAA,CAAK,WAAW,OAAA,EAAS,QAAA;AAAA,EAC3B;AACF;;;;"}
1
+ {"version":3,"file":"errors.js","sources":["../../src/common/errors.ts"],"sourcesContent":["import type { Position } from \"./types.js\";\n\n/**\n * Options for creating a parse error.\n */\nexport interface ParseErrorOptions extends ErrorOptions {\n /**\n * The position where the error occurred.\n */\n position?: Position;\n /**\n * The row number where the error occurred.\n *\n * @remarks\n * This represents the logical CSV row number (includes header if present),\n * useful for error reporting to users.\n */\n rowNumber?: number;\n /**\n * Source identifier (e.g., filename) for error reporting.\n *\n * @remarks\n * A human-readable identifier for the CSV source to help locate\n * which file or stream caused the error.\n */\n source?: string;\n}\n\n/**\n * Error class for parse errors.\n *\n * @remarks\n * This error is thrown when a parsing error occurs.\n * {@link ParseError} is a subclass of {@link !SyntaxError}.\n *\n * This is in reference to the specification\n * that the error thrown when a parse error occurs in the {@link !JSON.parse} function is {@link !SyntaxError}.\n */\nexport class ParseError extends SyntaxError {\n /**\n * The position where the error occurred.\n */\n public position?: Position;\n /**\n * The row number where the error occurred.\n *\n * @remarks\n * This represents the logical CSV row number (includes header if present),\n * useful for error reporting to users.\n */\n public rowNumber?: number;\n /**\n * Source identifier (e.g., filename) for error reporting.\n *\n * @remarks\n * A human-readable identifier for the CSV source to help locate\n * which file or stream caused the error.\n */\n public source?: string;\n\n constructor(message?: string, options?: ParseErrorOptions) {\n super(message, { cause: options?.cause });\n this.name = \"ParseError\";\n this.position = options?.position;\n this.rowNumber = options?.rowNumber;\n this.source = options?.source;\n }\n}\n"],"names":[],"mappings":"AAsCO,MAAM,mBAAmB,WAAA,CAAY;AAAA;AAAA;AAAA;AAAA,EAInC,QAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQA,SAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQA,MAAA;AAAA,EAEP,WAAA,CAAY,SAAkB,OAAA,EAA6B;AACzD,IAAA,KAAA,CAAM,OAAA,EAAS,EAAE,KAAA,EAAO,OAAA,EAAS,OAAO,CAAA;AACxC,IAAA,IAAA,CAAK,IAAA,GAAO,YAAA;AACZ,IAAA,IAAA,CAAK,WAAW,OAAA,EAAS,QAAA;AACzB,IAAA,IAAA,CAAK,YAAY,OAAA,EAAS,SAAA;AAC1B,IAAA,IAAA,CAAK,SAAS,OAAA,EAAS,MAAA;AAAA,EACzB;AACF;;;;"}
@@ -1,5 +1,6 @@
1
1
  import { DEFAULT_DELIMITER, DEFAULT_QUOTATION } from '../constants.ts';
2
- import { Join } from '../utils/types.ts';
2
+ import { WorkerPool } from '../execution/worker/helpers/WorkerPool.ts';
3
+ import { JoinCSVFields } from '../utils/types.ts';
3
4
  import { Field, FieldDelimiter, RecordDelimiter } from './constants.ts';
4
5
  /**
5
6
  * Position object.
@@ -34,12 +35,49 @@ export interface TokenLocation {
34
35
  */
35
36
  end: Position;
36
37
  /**
37
- * Row number.
38
+ * Row number in the CSV (includes header if present).
38
39
  * Starts from 1.
39
40
  *
40
41
  * @remarks
41
- * This represents the logical row number in the CSV,
42
- * counting from 1 for the first row, whether it is a header or not.
42
+ * This represents the logical CSV row number, not the physical line number.
43
+ * A single CSV row may span multiple lines if fields contain newline
44
+ * characters within quotes.
45
+ *
46
+ * **Important distinction**:
47
+ * - `line`: Physical line number (incremented by `\n` characters)
48
+ * - `rowNumber`: Logical CSV row (incremented by record delimiters)
49
+ *
50
+ * The header row (if present) is counted as row 1. This corresponds to
51
+ * the physical row position in the file, making it easy to locate in editors.
52
+ *
53
+ * For physical line numbers, use `start.line` or `end.line`.
54
+ *
55
+ * **Primary use case**: Error reporting. This field allows errors to be
56
+ * reported with both physical position (`line`, `column`) and logical
57
+ * row context (`rowNumber`), making it easier for users to locate
58
+ * issues in their CSV data.
59
+ *
60
+ * @example
61
+ * ```csv
62
+ * name,description <- rowNumber: 1 (header)
63
+ * Alice,"Lives in
64
+ * New York" <- rowNumber: 2 (spans line 2-3)
65
+ * Bob,"Works" <- rowNumber: 3 (line 4)
66
+ * ```
67
+ * - Header: `rowNumber: 1`
68
+ * - Alice's row: `start.line: 2, end.line: 3, rowNumber: 2`
69
+ * - Bob's row: `start.line: 4, end.line: 4, rowNumber: 3`
70
+ *
71
+ * @example Error reporting
72
+ * ```ts
73
+ * try {
74
+ * await parseString(csv);
75
+ * } catch (error) {
76
+ * if (error instanceof ParseError) {
77
+ * console.error(`Error at row ${error.rowNumber}, line ${error.position?.line}`);
78
+ * }
79
+ * }
80
+ * ```
43
81
  */
44
82
  rowNumber: number;
45
83
  }
@@ -128,11 +166,37 @@ export interface AbortSignalOptions {
128
166
  */
129
167
  signal?: AbortSignal;
130
168
  }
169
+ /**
170
+ * Source identifier option for error reporting.
171
+ * @category Types
172
+ */
173
+ export interface SourceOption {
174
+ /**
175
+ * Source identifier for error reporting (e.g., filename, description).
176
+ *
177
+ * @remarks
178
+ * This option allows you to specify a human-readable identifier for the CSV source
179
+ * that will be included in error messages. This is particularly useful when parsing
180
+ * multiple files or streams to help identify which source caused an error.
181
+ *
182
+ * **Security Note**: Do not include sensitive information (API keys, tokens, full URLs)
183
+ * in this field as it may be exposed in error messages and logs.
184
+ *
185
+ * @example
186
+ * ```ts
187
+ * parseString(csv, { source: "users.csv" });
188
+ * // Error: Field count exceeded at row 5 in "users.csv"
189
+ * ```
190
+ *
191
+ * @default undefined
192
+ */
193
+ source?: string;
194
+ }
131
195
  /**
132
196
  * CSV Common Options.
133
197
  * @category Types
134
198
  */
135
- export interface CommonOptions<Delimiter extends string, Quotation extends string> {
199
+ export interface CommonOptions<Delimiter extends string, Quotation extends string> extends SourceOption {
136
200
  /**
137
201
  * CSV field delimiter.
138
202
  * If you want to parse TSV, specify `'\t'`.
@@ -291,12 +355,64 @@ export interface BinaryOptions {
291
355
  * ```
292
356
  */
293
357
  allowExperimentalCompressions?: boolean;
358
+ /**
359
+ * Allow non-standard character encodings not in the common charset list.
360
+ *
361
+ * @remarks
362
+ * When `true`, charset values from Content-Type headers that are not in the
363
+ * default supported list will be passed to the runtime's TextDecoder without
364
+ * validation. This allows using character encodings that may not be universally
365
+ * supported across all environments.
366
+ *
367
+ * ### Default Supported Charsets (commonly used)
368
+ *
369
+ * When `false` (default), only commonly used charsets are allowed, including:
370
+ * - **UTF**: `utf-8`, `utf-16le`, `utf-16be`
371
+ * - **ISO-8859**: `iso-8859-1` through `iso-8859-16`
372
+ * - **Windows**: `windows-1250` through `windows-1258`
373
+ * - **Asian**: `shift_jis`, `euc-jp`, `gb18030`, `euc-kr`, etc.
374
+ *
375
+ * ### Security Considerations
376
+ *
377
+ * **Use with caution**: Enabling this bypasses library validation and relies entirely
378
+ * on runtime error handling. Invalid or malicious charset values could cause:
379
+ * - Runtime exceptions from TextDecoder
380
+ * - Unexpected character decoding behavior
381
+ * - Potential security vulnerabilities
382
+ *
383
+ * It's recommended to validate charset values against your expected inputs before
384
+ * enabling this option.
385
+ *
386
+ * @default false
387
+ *
388
+ * @example
389
+ * ```ts
390
+ * // Safe mode (default): Only commonly supported charsets
391
+ * const response = await fetch('data.csv');
392
+ * await parse(response); // charset must be in SUPPORTED_CHARSETS
393
+ *
394
+ * // Allow non-standard charset
395
+ * const response = await fetch('data.csv'); // Content-Type: text/csv; charset=custom-encoding
396
+ * await parse(response, { allowNonStandardCharsets: true });
397
+ * // ⚠️ May throw error if runtime doesn't support the charset
398
+ * ```
399
+ */
400
+ allowNonStandardCharsets?: boolean;
294
401
  }
295
402
  /**
296
403
  * CSV Lexer Transformer Options.
297
404
  * @category Types
298
405
  */
299
406
  export interface CSVLexerTransformerOptions<Delimiter extends string = DEFAULT_DELIMITER, Quotation extends string = DEFAULT_QUOTATION> extends CommonOptions<Delimiter, Quotation>, AbortSignalOptions {
407
+ /**
408
+ * How often to check for backpressure (in number of tokens processed).
409
+ *
410
+ * Lower values = more responsive to backpressure but slight performance overhead.
411
+ * Higher values = less overhead but slower backpressure response.
412
+ *
413
+ * @default 100
414
+ */
415
+ backpressureCheckInterval?: number;
300
416
  }
301
417
  /**
302
418
  * CSV Record Assembler Options.
@@ -309,7 +425,7 @@ export interface CSVLexerTransformerOptions<Delimiter extends string = DEFAULT_D
309
425
  * If you don't specify `header`,
310
426
  * the first record will be treated as a header.
311
427
  */
312
- export interface CSVRecordAssemblerOptions<Header extends ReadonlyArray<string>> extends AbortSignalOptions {
428
+ export interface CSVRecordAssemblerOptions<Header extends ReadonlyArray<string>> extends SourceOption, AbortSignalOptions {
313
429
  /**
314
430
  * CSV header.
315
431
  *
@@ -343,6 +459,15 @@ export interface CSVRecordAssemblerOptions<Header extends ReadonlyArray<string>>
343
459
  * @default false
344
460
  */
345
461
  skipEmptyLines?: boolean;
462
+ /**
463
+ * How often to check for backpressure (in number of records processed).
464
+ *
465
+ * Lower values = more responsive to backpressure but slight performance overhead.
466
+ * Higher values = less overhead but slower backpressure response.
467
+ *
468
+ * @default 10
469
+ */
470
+ backpressureCheckInterval?: number;
346
471
  }
347
472
  /**
348
473
  * Worker communication strategy.
@@ -378,27 +503,168 @@ export interface EngineFallbackInfo {
378
503
  error?: Error;
379
504
  }
380
505
  /**
381
- * Engine configuration for CSV parsing.
506
+ * Backpressure monitoring intervals (count-based).
382
507
  *
383
- * All parsing engine settings are unified in this interface.
508
+ * Controls how frequently the internal parsers check for backpressure
509
+ * during streaming operations, based on the number of tokens/records processed.
384
510
  *
511
+ * @experimental This API may change in future versions based on performance research.
385
512
  * @category Types
386
513
  */
387
- export interface EngineConfig {
514
+ export interface BackpressureCheckInterval {
388
515
  /**
389
- * Execute in Worker thread.
516
+ * Check interval for the lexer stage (number of tokens processed).
517
+ *
518
+ * Lower values provide better responsiveness to backpressure but may have
519
+ * slight performance overhead.
520
+ *
521
+ * @default 100
522
+ */
523
+ lexer?: number;
524
+ /**
525
+ * Check interval for the assembler stage (number of records processed).
526
+ *
527
+ * Lower values provide better responsiveness to backpressure but may have
528
+ * slight performance overhead.
529
+ *
530
+ * @default 10
531
+ */
532
+ assembler?: number;
533
+ }
534
+ /**
535
+ * Internal streaming queuing strategies configuration.
536
+ *
537
+ * Controls the internal queuing behavior of the CSV parser's streaming pipeline.
538
+ * This affects memory usage and backpressure handling for large streaming operations.
539
+ *
540
+ * @remarks
541
+ * The CSV parser uses a two-stage pipeline:
542
+ * 1. **Lexer**: String → Token
543
+ * 2. **Assembler**: Token → CSVRecord
544
+ *
545
+ * Each stage has both writable (input) and readable (output) sides.
546
+ *
547
+ * @experimental This API may change in future versions based on performance research.
548
+ * @category Types
549
+ */
550
+ export interface QueuingStrategyConfig {
551
+ /**
552
+ * Queuing strategy for the lexer's writable side (string input).
553
+ *
554
+ * Controls how string chunks are buffered before being processed by the lexer.
555
+ *
556
+ * @default `{ highWaterMark: 65536 }` (≈64KB of characters)
557
+ */
558
+ lexerWritable?: QueuingStrategy<string>;
559
+ /**
560
+ * Queuing strategy for the lexer's readable side (token output).
561
+ *
562
+ * Controls how tokens are buffered after being produced by the lexer
563
+ * before being consumed by the assembler.
564
+ *
565
+ * @default `{ highWaterMark: 1024 }` (1024 tokens)
566
+ */
567
+ lexerReadable?: QueuingStrategy<Token>;
568
+ /**
569
+ * Queuing strategy for the assembler's writable side (token input).
570
+ *
571
+ * Controls how tokens are buffered before being processed by the assembler.
572
+ * This is the input side of the assembler, receiving tokens from the lexer.
573
+ *
574
+ * @default `{ highWaterMark: 1024 }` (1024 tokens)
575
+ */
576
+ assemblerWritable?: QueuingStrategy<Token>;
577
+ /**
578
+ * Queuing strategy for the assembler's readable side (record output).
579
+ *
580
+ * Controls how CSV records are buffered after being assembled.
581
+ *
582
+ * @default `{ highWaterMark: 256 }` (256 records)
583
+ */
584
+ assemblerReadable?: QueuingStrategy<CSVRecord<any>>;
585
+ }
586
+ /**
587
+ * Base engine configuration shared by all execution modes.
588
+ *
589
+ * @category Types
590
+ */
591
+ interface BaseEngineConfig {
592
+ /**
593
+ * Use WASM implementation.
594
+ *
595
+ * Requires prior initialization with {@link loadWASM}.
390
596
  *
391
597
  * @default false
392
598
  *
393
- * @example Worker execution
599
+ * @example Main thread + WASM
394
600
  * ```ts
395
- * parse(csv, { engine: { worker: true } })
601
+ * import { loadWASM, parse } from 'web-csv-toolbox';
602
+ *
603
+ * await loadWASM();
604
+ * parse(csv, { engine: { wasm: true } })
396
605
  * ```
606
+ *
607
+ * @example Worker + WASM
608
+ * ```ts
609
+ * await loadWASM();
610
+ * parse(csv, { engine: { worker: true, wasm: true } })
611
+ * ```
612
+ */
613
+ wasm?: boolean;
614
+ /**
615
+ * Blob reading strategy threshold (in bytes).
616
+ * Only applicable for `parseBlob()` and `parseFile()`.
617
+ *
618
+ * Determines when to use `blob.arrayBuffer()` vs `blob.stream()`:
619
+ * - Files smaller than threshold: Use `blob.arrayBuffer()` + `parseBinary()`
620
+ * - ✅ Faster for small files
621
+ * - ❌ Loads entire file into memory
622
+ * - Files equal to or larger than threshold: Use `blob.stream()` + `parseUint8ArrayStream()`
623
+ * - ✅ Memory-efficient for large files
624
+ * - ❌ Slight streaming overhead
625
+ *
626
+ * @default 1_048_576 (1MB)
627
+ */
628
+ arrayBufferThreshold?: number;
629
+ /**
630
+ * Backpressure monitoring intervals (count-based: number of tokens/records processed).
631
+ *
632
+ * @default { lexer: 100, assembler: 10 }
633
+ * @experimental
634
+ */
635
+ backpressureCheckInterval?: BackpressureCheckInterval;
636
+ /**
637
+ * Internal streaming queuing strategies.
638
+ *
639
+ * @experimental
640
+ */
641
+ queuingStrategy?: QueuingStrategyConfig;
642
+ }
643
+ /**
644
+ * Engine configuration for main thread execution.
645
+ *
646
+ * @category Types
647
+ */
648
+ export interface MainThreadEngineConfig extends BaseEngineConfig {
649
+ /**
650
+ * Execute in Worker thread.
651
+ *
652
+ * @default false
653
+ */
654
+ worker?: false;
655
+ }
656
+ /**
657
+ * Engine configuration for worker thread execution.
658
+ *
659
+ * @category Types
660
+ */
661
+ export interface WorkerEngineConfig extends BaseEngineConfig {
662
+ /**
663
+ * Execute in Worker thread.
397
664
  */
398
- worker?: boolean;
665
+ worker: true;
399
666
  /**
400
667
  * Custom Worker URL.
401
- * Only applicable when `worker: true`.
402
668
  *
403
669
  * If not provided, uses the bundled worker.
404
670
  *
@@ -418,7 +684,6 @@ export interface EngineConfig {
418
684
  workerURL?: string | URL;
419
685
  /**
420
686
  * Worker pool for managing worker lifecycle.
421
- * Only applicable when `worker: true`.
422
687
  *
423
688
  * When provided, the parsing function will use this pool's worker instance
424
689
  * instead of creating/reusing a module-level singleton worker.
@@ -455,32 +720,9 @@ export interface EngineConfig {
455
720
  * // Worker is reused for both operations
456
721
  * ```
457
722
  */
458
- workerPool?: import('../execution/worker/helpers/WorkerPool.ts').WorkerPool;
459
- /**
460
- * Use WASM implementation.
461
- *
462
- * Requires prior initialization with {@link loadWASM}.
463
- *
464
- * @default false
465
- *
466
- * @example Main thread + WASM
467
- * ```ts
468
- * import { loadWASM, parse } from 'web-csv-toolbox';
469
- *
470
- * await loadWASM();
471
- * parse(csv, { engine: { wasm: true } })
472
- * ```
473
- *
474
- * @example Worker + WASM
475
- * ```ts
476
- * await loadWASM();
477
- * parse(csv, { engine: { worker: true, wasm: true } })
478
- * ```
479
- */
480
- wasm?: boolean;
723
+ workerPool?: WorkerPool;
481
724
  /**
482
725
  * Worker communication strategy.
483
- * Only applicable when `worker: true`.
484
726
  *
485
727
  * - `"message-streaming"` (default): Message-based streaming
486
728
  * - ✅ All browsers including Safari
@@ -520,7 +762,6 @@ export interface EngineConfig {
520
762
  workerStrategy?: WorkerCommunicationStrategy;
521
763
  /**
522
764
  * Strict mode: disable automatic fallback.
523
- * Only applicable when `workerStrategy: "stream-transfer"`.
524
765
  *
525
766
  * When enabled:
526
767
  * - Throws error immediately if stream transfer fails
@@ -578,6 +819,15 @@ export interface EngineConfig {
578
819
  */
579
820
  onFallback?: (info: EngineFallbackInfo) => void;
580
821
  }
822
+ /**
823
+ * Engine configuration for CSV parsing.
824
+ *
825
+ * All parsing engine settings are unified in this type.
826
+ * Use discriminated union to ensure type-safe configuration based on worker mode.
827
+ *
828
+ * @category Types
829
+ */
830
+ export type EngineConfig = MainThreadEngineConfig | WorkerEngineConfig;
581
831
  /**
582
832
  * Engine configuration options.
583
833
  *
@@ -646,41 +896,17 @@ export type CSVRecord<Header extends ReadonlyArray<string>> = Record<Header[numb
646
896
  *
647
897
  * @category Types
648
898
  */
649
- export type CSVString<Header extends ReadonlyArray<string> = [], Delimiter extends string = DEFAULT_DELIMITER, Quotation extends string = DEFAULT_QUOTATION> = Header extends readonly [string, ...string[]] ? Join<Header, Delimiter, Quotation> | ReadableStream<Join<Header, Delimiter, Quotation>> : string | ReadableStream<string>;
899
+ export type CSVString<Header extends ReadonlyArray<string> = [], Delimiter extends string = DEFAULT_DELIMITER, Quotation extends string = DEFAULT_QUOTATION> = Header extends readonly [string, ...string[]] ? JoinCSVFields<Header, Delimiter, Quotation> | ReadableStream<JoinCSVFields<Header, Delimiter, Quotation>> : string | ReadableStream<string>;
650
900
  /**
651
901
  * CSV Binary.
652
902
  *
653
903
  * @category Types
654
904
  */
655
905
  export type CSVBinary = ReadableStream<Uint8Array> | Response | Request | Blob | ArrayBuffer | Uint8Array;
656
- /**
657
- * Backpressure monitoring options.
658
- *
659
- * @category Types
660
- */
661
- export interface BackpressureOptions {
662
- /**
663
- * How often to check for backpressure (in number of items processed).
664
- *
665
- * Lower values = more responsive to backpressure but slight performance overhead.
666
- * Higher values = less overhead but slower backpressure response.
667
- *
668
- * Default:
669
- * - CSVLexerTransformer: 100 tokens
670
- * - CSVRecordAssemblerTransformer: 10 records
671
- */
672
- checkInterval?: number;
673
- }
674
- /**
675
- * Extended queuing strategy with backpressure monitoring options.
676
- *
677
- * @category Types
678
- */
679
- export interface ExtendedQueuingStrategy<T> extends QueuingStrategy<T>, BackpressureOptions {
680
- }
681
906
  /**
682
907
  * CSV.
683
908
  *
684
909
  * @category Types
685
910
  */
686
911
  export type CSV<Header extends ReadonlyArray<string> = [], Delimiter extends string = DEFAULT_DELIMITER, Quotation extends string = DEFAULT_QUOTATION> = Header extends [] ? CSVString | CSVBinary : CSVString<Header, Delimiter, Quotation>;
912
+ export {};
@@ -17,3 +17,15 @@ export declare const DEFAULT_DELIMITER = ",";
17
17
  export type DEFAULT_DELIMITER = typeof DEFAULT_DELIMITER;
18
18
  export declare const DEFAULT_QUOTATION = "\"";
19
19
  export type DEFAULT_QUOTATION = typeof DEFAULT_QUOTATION;
20
+ /**
21
+ * Default threshold (in bytes) for Blob reading strategy.
22
+ *
23
+ * Files smaller than this use `blob.arrayBuffer()` (faster),
24
+ * files equal or larger use `blob.stream()` (memory-efficient).
25
+ *
26
+ * This value is determined by benchmarks.
27
+ *
28
+ * @category Constants
29
+ */
30
+ export declare const DEFAULT_ARRAY_BUFFER_THRESHOLD = 1048576;
31
+ export type DEFAULT_ARRAY_BUFFER_THRESHOLD = typeof DEFAULT_ARRAY_BUFFER_THRESHOLD;
package/dist/constants.js CHANGED
@@ -5,6 +5,7 @@ const COMMA = ",";
5
5
  const DOUBLE_QUOTE = '"';
6
6
  const DEFAULT_DELIMITER = COMMA;
7
7
  const DEFAULT_QUOTATION = DOUBLE_QUOTE;
8
+ const DEFAULT_ARRAY_BUFFER_THRESHOLD = 1048576;
8
9
 
9
- export { COMMA, CR, CRLF, DEFAULT_DELIMITER, DEFAULT_QUOTATION, DOUBLE_QUOTE, LF };
10
+ export { COMMA, CR, CRLF, DEFAULT_ARRAY_BUFFER_THRESHOLD, DEFAULT_DELIMITER, DEFAULT_QUOTATION, DOUBLE_QUOTE, LF };
10
11
  //# sourceMappingURL=constants.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"constants.js","sources":["../src/constants.ts"],"sourcesContent":["export const CR = \"\\r\";\nexport type CR = typeof CR;\n\nexport const CRLF = \"\\r\\n\";\nexport type CRLF = typeof CRLF;\n\nexport const LF = \"\\n\";\nexport type LF = typeof LF;\n\nexport type Newline = CRLF | CR | LF;\n\n/**\n * COMMA is a symbol for comma(,).\n */\nexport const COMMA = \",\";\n\n/**\n * DOUBLE_QUOTE is a symbol for double quote(\").\n */\nexport const DOUBLE_QUOTE = '\"';\n\nexport const DEFAULT_DELIMITER = COMMA;\nexport type DEFAULT_DELIMITER = typeof DEFAULT_DELIMITER;\n\nexport const DEFAULT_QUOTATION = DOUBLE_QUOTE;\nexport type DEFAULT_QUOTATION = typeof DEFAULT_QUOTATION;\n"],"names":[],"mappings":"AAAO,MAAM,EAAA,GAAK;AAGX,MAAM,IAAA,GAAO;AAGb,MAAM,EAAA,GAAK;AAQX,MAAM,KAAA,GAAQ;AAKd,MAAM,YAAA,GAAe;AAErB,MAAM,iBAAA,GAAoB;AAG1B,MAAM,iBAAA,GAAoB;;;;"}
1
+ {"version":3,"file":"constants.js","sources":["../src/constants.ts"],"sourcesContent":["export const CR = \"\\r\";\nexport type CR = typeof CR;\n\nexport const CRLF = \"\\r\\n\";\nexport type CRLF = typeof CRLF;\n\nexport const LF = \"\\n\";\nexport type LF = typeof LF;\n\nexport type Newline = CRLF | CR | LF;\n\n/**\n * COMMA is a symbol for comma(,).\n */\nexport const COMMA = \",\";\n\n/**\n * DOUBLE_QUOTE is a symbol for double quote(\").\n */\nexport const DOUBLE_QUOTE = '\"';\n\nexport const DEFAULT_DELIMITER = COMMA;\nexport type DEFAULT_DELIMITER = typeof DEFAULT_DELIMITER;\n\nexport const DEFAULT_QUOTATION = DOUBLE_QUOTE;\nexport type DEFAULT_QUOTATION = typeof DEFAULT_QUOTATION;\n\n/**\n * Default threshold (in bytes) for Blob reading strategy.\n *\n * Files smaller than this use `blob.arrayBuffer()` (faster),\n * files equal or larger use `blob.stream()` (memory-efficient).\n *\n * This value is determined by benchmarks.\n *\n * @category Constants\n */\nexport const DEFAULT_ARRAY_BUFFER_THRESHOLD = 1048576; // 1MB\nexport type DEFAULT_ARRAY_BUFFER_THRESHOLD =\n typeof DEFAULT_ARRAY_BUFFER_THRESHOLD;\n"],"names":[],"mappings":"AAAO,MAAM,EAAA,GAAK;AAGX,MAAM,IAAA,GAAO;AAGb,MAAM,EAAA,GAAK;AAQX,MAAM,KAAA,GAAQ;AAKd,MAAM,YAAA,GAAe;AAErB,MAAM,iBAAA,GAAoB;AAG1B,MAAM,iBAAA,GAAoB;AAa1B,MAAM,8BAAA,GAAiC;;;;"}