web-csv-toolbox 0.14.0-next-e45bc4d089f1fb259a7596b9862b3b34e717dab7 → 0.14.0-next-74f4486094d18dbaf4e7492f41a2860ba012985a
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +28 -48
- package/dist/CSVLexer.js +8 -5
- package/dist/CSVLexer.js.map +1 -1
- package/dist/CSVLexerTransformer.d.ts +10 -12
- package/dist/CSVLexerTransformer.js +12 -16
- package/dist/CSVLexerTransformer.js.map +1 -1
- package/dist/CSVRecordAssembler.js +14 -4
- package/dist/CSVRecordAssembler.js.map +1 -1
- package/dist/CSVRecordAssemblerTransformer.d.ts +8 -14
- package/dist/CSVRecordAssemblerTransformer.js +10 -16
- package/dist/CSVRecordAssemblerTransformer.js.map +1 -1
- package/dist/_virtual/web_csv_toolbox_wasm_bg.wasm.js +1 -1
- package/dist/assertCommonOptions.d.ts +1 -1
- package/dist/assertCommonOptions.js.map +1 -1
- package/dist/common/errors.d.ts +32 -0
- package/dist/common/errors.js +18 -0
- package/dist/common/errors.js.map +1 -1
- package/dist/common/types.d.ts +249 -66
- package/dist/constants.d.ts +12 -0
- package/dist/constants.js +2 -1
- package/dist/constants.js.map +1 -1
- package/dist/execution/EnginePresets.d.ts +52 -12
- package/dist/execution/EnginePresets.js +1 -1
- package/dist/execution/EnginePresets.js.map +1 -1
- package/dist/execution/InternalEngineConfig.js +40 -18
- package/dist/execution/InternalEngineConfig.js.map +1 -1
- package/dist/execution/worker/parseBinaryInWorker.node.js +6 -4
- package/dist/execution/worker/parseBinaryInWorker.node.js.map +1 -1
- package/dist/execution/worker/parseBinaryInWorker.web.js +6 -4
- package/dist/execution/worker/parseBinaryInWorker.web.js.map +1 -1
- package/dist/execution/worker/parseBinaryInWorkerWASM.node.js +6 -4
- package/dist/execution/worker/parseBinaryInWorkerWASM.node.js.map +1 -1
- package/dist/execution/worker/parseBinaryInWorkerWASM.web.js +6 -4
- package/dist/execution/worker/parseBinaryInWorkerWASM.web.js.map +1 -1
- package/dist/execution/worker/parseStreamInWorker.node.js +6 -4
- package/dist/execution/worker/parseStreamInWorker.node.js.map +1 -1
- package/dist/execution/worker/parseStreamInWorker.web.js +6 -4
- package/dist/execution/worker/parseStreamInWorker.web.js.map +1 -1
- package/dist/execution/worker/parseStringInWorker.node.js +6 -4
- package/dist/execution/worker/parseStringInWorker.node.js.map +1 -1
- package/dist/execution/worker/parseStringInWorker.web.js +6 -4
- package/dist/execution/worker/parseStringInWorker.web.js.map +1 -1
- package/dist/execution/worker/parseStringInWorkerWASM.node.js +6 -4
- package/dist/execution/worker/parseStringInWorkerWASM.node.js.map +1 -1
- package/dist/execution/worker/parseStringInWorkerWASM.web.js +6 -4
- package/dist/execution/worker/parseStringInWorkerWASM.web.js.map +1 -1
- package/dist/execution/worker/parseUint8ArrayStreamInWorker.node.js +6 -4
- package/dist/execution/worker/parseUint8ArrayStreamInWorker.node.js.map +1 -1
- package/dist/execution/worker/parseUint8ArrayStreamInWorker.web.js +6 -4
- package/dist/execution/worker/parseUint8ArrayStreamInWorker.web.js.map +1 -1
- package/dist/getOptionsFromFile.d.ts +14 -0
- package/dist/getOptionsFromFile.js +12 -0
- package/dist/getOptionsFromFile.js.map +1 -0
- package/dist/parseBlob.js +9 -1
- package/dist/parseBlob.js.map +1 -1
- package/dist/parseFile.d.ts +3 -2
- package/dist/parseFile.js +7 -3
- package/dist/parseFile.js.map +1 -1
- package/dist/parseFileToArray.d.ts +27 -0
- package/dist/parseFileToArray.js +12 -0
- package/dist/parseFileToArray.js.map +1 -0
- package/dist/parseFileToStream.d.ts +33 -0
- package/dist/parseFileToStream.js +10 -0
- package/dist/parseFileToStream.js.map +1 -0
- package/dist/utils/types.d.ts +21 -10
- package/dist/web-csv-toolbox.d.ts +3 -0
- package/dist/web-csv-toolbox.js +3 -0
- package/dist/web-csv-toolbox.js.map +1 -1
- package/dist/web_csv_toolbox_wasm_bg.wasm +0 -0
- package/package.json +2 -1
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"assertCommonOptions.js","sources":["../src/assertCommonOptions.ts"],"sourcesContent":["import type { CommonOptions } from \"./common/types.ts\";\nimport { CR, LF } from \"./constants.ts\";\n\n/**\n * Asserts that the provided value is a string and satisfies certain conditions.\n * @param value - The value to be checked.\n * @param name - The name of the option.\n * @throws {RangeError} If the value is empty, longer than 1 byte, or includes CR or LF.\n * @throws {TypeError} If the value is not a string.\n */\nfunction assertOptionValue(\n value: string,\n name: string,\n): asserts value is string {\n if (typeof value === \"string\") {\n switch (true) {\n case value.length === 0:\n throw new RangeError(`${name} must not be empty`);\n case value.length > 1:\n throw new RangeError(`${name} must be a single character`);\n case value === LF:\n case value === CR:\n throw new RangeError(`${name} must not include CR or LF`);\n default:\n break;\n }\n } else {\n throw new TypeError(`${name} must be a string`);\n }\n}\n\n/**\n * Asserts that the provided options object contains all the required properties.\n * Throws an error if any required property is missing\n * or if the delimiter and quotation length is not 1 byte character,\n * or if the delimiter is the same as the quotation.\n *\n * @example\n *\n * ```ts\n * assertCommonOptions({\n * quotation: '\"',\n * delimiter: ',',\n * });\n * ```\n *\n * @param options - The options object to be validated.\n * @throws {RangeError} If any required property is missing or if the delimiter is the same as the quotation.\n * @throws {TypeError} If any required property is not a string.\n */\nexport function assertCommonOptions<\n Delimiter extends string,\n Quotation extends string,\n>(\n options: Required<CommonOptions<Delimiter, Quotation>>,\n): asserts options is Required<CommonOptions<Delimiter, Quotation
|
|
1
|
+
{"version":3,"file":"assertCommonOptions.js","sources":["../src/assertCommonOptions.ts"],"sourcesContent":["import type { CommonOptions } from \"./common/types.ts\";\nimport { CR, LF } from \"./constants.ts\";\n\n/**\n * Asserts that the provided value is a string and satisfies certain conditions.\n * @param value - The value to be checked.\n * @param name - The name of the option.\n * @throws {RangeError} If the value is empty, longer than 1 byte, or includes CR or LF.\n * @throws {TypeError} If the value is not a string.\n */\nfunction assertOptionValue(\n value: string,\n name: string,\n): asserts value is string {\n if (typeof value === \"string\") {\n switch (true) {\n case value.length === 0:\n throw new RangeError(`${name} must not be empty`);\n case value.length > 1:\n throw new RangeError(`${name} must be a single character`);\n case value === LF:\n case value === CR:\n throw new RangeError(`${name} must not include CR or LF`);\n default:\n break;\n }\n } else {\n throw new TypeError(`${name} must be a string`);\n }\n}\n\n/**\n * Asserts that the provided options object contains all the required properties.\n * Throws an error if any required property is missing\n * or if the delimiter and quotation length is not 1 byte character,\n * or if the delimiter is the same as the quotation.\n *\n * @example\n *\n * ```ts\n * assertCommonOptions({\n * quotation: '\"',\n * delimiter: ',',\n * });\n * ```\n *\n * @param options - The options object to be validated.\n * @throws {RangeError} If any required property is missing or if the delimiter is the same as the quotation.\n * @throws {TypeError} If any required property is not a string.\n */\nexport function assertCommonOptions<\n Delimiter extends string,\n Quotation extends string,\n>(\n options: Required<Omit<CommonOptions<Delimiter, Quotation>, \"source\">>,\n): asserts options is Required<\n Omit<CommonOptions<Delimiter, Quotation>, \"source\">\n> {\n for (const name of [\"delimiter\", \"quotation\"] as const) {\n assertOptionValue(options[name], name);\n }\n // @ts-ignore: TS doesn't understand that the values are strings\n if (options.delimiter === options.quotation) {\n throw new RangeError(\n \"delimiter must not be the same as quotation, use different characters\",\n );\n }\n\n // Validate maxBufferSize\n const mbs = options.maxBufferSize;\n if (\n !(Number.isFinite(mbs) || mbs === Number.POSITIVE_INFINITY) ||\n (Number.isFinite(mbs) && (mbs < 1 || !Number.isInteger(mbs)))\n ) {\n throw new RangeError(\n \"maxBufferSize must be a positive integer (in characters) or Number.POSITIVE_INFINITY\",\n );\n }\n}\n"],"names":[],"mappings":";;AAUA,SAAS,iBAAA,CACP,OACA,IAAA,EACyB;AACzB,EAAA,IAAI,OAAO,UAAU,QAAA,EAAU;AAC7B,IAAA,QAAQ,IAAA;AAAM,MACZ,KAAK,MAAM,MAAA,KAAW,CAAA;AACpB,QAAA,MAAM,IAAI,UAAA,CAAW,CAAA,EAAG,IAAI,CAAA,kBAAA,CAAoB,CAAA;AAAA,MAClD,KAAK,MAAM,MAAA,GAAS,CAAA;AAClB,QAAA,MAAM,IAAI,UAAA,CAAW,CAAA,EAAG,IAAI,CAAA,2BAAA,CAA6B,CAAA;AAAA,MAC3D,KAAK,KAAA,KAAU,EAAA;AAAA,MACf,KAAK,KAAA,KAAU,EAAA;AACb,QAAA,MAAM,IAAI,UAAA,CAAW,CAAA,EAAG,IAAI,CAAA,0BAAA,CAA4B,CAAA;AAExD;AACJ,EACF,CAAA,MAAO;AACL,IAAA,MAAM,IAAI,SAAA,CAAU,CAAA,EAAG,IAAI,CAAA,iBAAA,CAAmB,CAAA;AAAA,EAChD;AACF;AAqBO,SAAS,oBAId,OAAA,EAGA;AACA,EAAA,KAAA,MAAW,IAAA,IAAQ,CAAC,WAAA,EAAa,WAAW,CAAA,EAAY;AACtD,IAAA,iBAAA,CAAkB,OAAA,CAAQ,IAAI,CAAA,EAAG,IAAI,CAAA;AAAA,EACvC;AAEA,EAAA,IAAI,OAAA,CAAQ,SAAA,KAAc,OAAA,CAAQ,SAAA,EAAW;AAC3C,IAAA,MAAM,IAAI,UAAA;AAAA,MACR;AAAA,KACF;AAAA,EACF;AAGA,EAAA,MAAM,MAAM,OAAA,CAAQ,aAAA;AACpB,EAAA,IACE,EAAE,MAAA,CAAO,QAAA,CAAS,GAAG,CAAA,IAAK,GAAA,KAAQ,OAAO,iBAAA,CAAA,IACxC,MAAA,CAAO,QAAA,CAAS,GAAG,MAAM,GAAA,GAAM,CAAA,IAAK,CAAC,MAAA,CAAO,SAAA,CAAU,GAAG,CAAA,CAAA,EAC1D;AACA,IAAA,MAAM,IAAI,UAAA;AAAA,MACR;AAAA,KACF;AAAA,EACF;AACF;;;;"}
|
package/dist/common/errors.d.ts
CHANGED
|
@@ -7,6 +7,22 @@ export interface ParseErrorOptions extends ErrorOptions {
|
|
|
7
7
|
* The position where the error occurred.
|
|
8
8
|
*/
|
|
9
9
|
position?: Position;
|
|
10
|
+
/**
|
|
11
|
+
* The row number where the error occurred.
|
|
12
|
+
*
|
|
13
|
+
* @remarks
|
|
14
|
+
* This represents the logical CSV row number (includes header if present),
|
|
15
|
+
* useful for error reporting to users.
|
|
16
|
+
*/
|
|
17
|
+
rowNumber?: number;
|
|
18
|
+
/**
|
|
19
|
+
* Source identifier (e.g., filename) for error reporting.
|
|
20
|
+
*
|
|
21
|
+
* @remarks
|
|
22
|
+
* A human-readable identifier for the CSV source to help locate
|
|
23
|
+
* which file or stream caused the error.
|
|
24
|
+
*/
|
|
25
|
+
source?: string;
|
|
10
26
|
}
|
|
11
27
|
/**
|
|
12
28
|
* Error class for parse errors.
|
|
@@ -23,5 +39,21 @@ export declare class ParseError extends SyntaxError {
|
|
|
23
39
|
* The position where the error occurred.
|
|
24
40
|
*/
|
|
25
41
|
position?: Position;
|
|
42
|
+
/**
|
|
43
|
+
* The row number where the error occurred.
|
|
44
|
+
*
|
|
45
|
+
* @remarks
|
|
46
|
+
* This represents the logical CSV row number (includes header if present),
|
|
47
|
+
* useful for error reporting to users.
|
|
48
|
+
*/
|
|
49
|
+
rowNumber?: number;
|
|
50
|
+
/**
|
|
51
|
+
* Source identifier (e.g., filename) for error reporting.
|
|
52
|
+
*
|
|
53
|
+
* @remarks
|
|
54
|
+
* A human-readable identifier for the CSV source to help locate
|
|
55
|
+
* which file or stream caused the error.
|
|
56
|
+
*/
|
|
57
|
+
source?: string;
|
|
26
58
|
constructor(message?: string, options?: ParseErrorOptions);
|
|
27
59
|
}
|
package/dist/common/errors.js
CHANGED
|
@@ -3,10 +3,28 @@ class ParseError extends SyntaxError {
|
|
|
3
3
|
* The position where the error occurred.
|
|
4
4
|
*/
|
|
5
5
|
position;
|
|
6
|
+
/**
|
|
7
|
+
* The row number where the error occurred.
|
|
8
|
+
*
|
|
9
|
+
* @remarks
|
|
10
|
+
* This represents the logical CSV row number (includes header if present),
|
|
11
|
+
* useful for error reporting to users.
|
|
12
|
+
*/
|
|
13
|
+
rowNumber;
|
|
14
|
+
/**
|
|
15
|
+
* Source identifier (e.g., filename) for error reporting.
|
|
16
|
+
*
|
|
17
|
+
* @remarks
|
|
18
|
+
* A human-readable identifier for the CSV source to help locate
|
|
19
|
+
* which file or stream caused the error.
|
|
20
|
+
*/
|
|
21
|
+
source;
|
|
6
22
|
constructor(message, options) {
|
|
7
23
|
super(message, { cause: options?.cause });
|
|
8
24
|
this.name = "ParseError";
|
|
9
25
|
this.position = options?.position;
|
|
26
|
+
this.rowNumber = options?.rowNumber;
|
|
27
|
+
this.source = options?.source;
|
|
10
28
|
}
|
|
11
29
|
}
|
|
12
30
|
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"errors.js","sources":["../../src/common/errors.ts"],"sourcesContent":["import type { Position } from \"./types.js\";\n\n/**\n * Options for creating a parse error.\n */\nexport interface ParseErrorOptions extends ErrorOptions {\n /**\n * The position where the error occurred.\n */\n position?: Position;\n}\n\n/**\n * Error class for parse errors.\n *\n * @remarks\n * This error is thrown when a parsing error occurs.\n * {@link ParseError} is a subclass of {@link !SyntaxError}.\n *\n * This is in reference to the specification\n * that the error thrown when a parse error occurs in the {@link !JSON.parse} function is {@link !SyntaxError}.\n */\nexport class ParseError extends SyntaxError {\n /**\n * The position where the error occurred.\n */\n public position?: Position;\n\n constructor(message?: string, options?: ParseErrorOptions) {\n super(message, { cause: options?.cause });\n this.name = \"ParseError\";\n this.position = options?.position;\n }\n}\n"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"errors.js","sources":["../../src/common/errors.ts"],"sourcesContent":["import type { Position } from \"./types.js\";\n\n/**\n * Options for creating a parse error.\n */\nexport interface ParseErrorOptions extends ErrorOptions {\n /**\n * The position where the error occurred.\n */\n position?: Position;\n /**\n * The row number where the error occurred.\n *\n * @remarks\n * This represents the logical CSV row number (includes header if present),\n * useful for error reporting to users.\n */\n rowNumber?: number;\n /**\n * Source identifier (e.g., filename) for error reporting.\n *\n * @remarks\n * A human-readable identifier for the CSV source to help locate\n * which file or stream caused the error.\n */\n source?: string;\n}\n\n/**\n * Error class for parse errors.\n *\n * @remarks\n * This error is thrown when a parsing error occurs.\n * {@link ParseError} is a subclass of {@link !SyntaxError}.\n *\n * This is in reference to the specification\n * that the error thrown when a parse error occurs in the {@link !JSON.parse} function is {@link !SyntaxError}.\n */\nexport class ParseError extends SyntaxError {\n /**\n * The position where the error occurred.\n */\n public position?: Position;\n /**\n * The row number where the error occurred.\n *\n * @remarks\n * This represents the logical CSV row number (includes header if present),\n * useful for error reporting to users.\n */\n public rowNumber?: number;\n /**\n * Source identifier (e.g., filename) for error reporting.\n *\n * @remarks\n * A human-readable identifier for the CSV source to help locate\n * which file or stream caused the error.\n */\n public source?: string;\n\n constructor(message?: string, options?: ParseErrorOptions) {\n super(message, { cause: options?.cause });\n this.name = \"ParseError\";\n this.position = options?.position;\n this.rowNumber = options?.rowNumber;\n this.source = options?.source;\n }\n}\n"],"names":[],"mappings":"AAsCO,MAAM,mBAAmB,WAAA,CAAY;AAAA;AAAA;AAAA;AAAA,EAInC,QAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQA,SAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQA,MAAA;AAAA,EAEP,WAAA,CAAY,SAAkB,OAAA,EAA6B;AACzD,IAAA,KAAA,CAAM,OAAA,EAAS,EAAE,KAAA,EAAO,OAAA,EAAS,OAAO,CAAA;AACxC,IAAA,IAAA,CAAK,IAAA,GAAO,YAAA;AACZ,IAAA,IAAA,CAAK,WAAW,OAAA,EAAS,QAAA;AACzB,IAAA,IAAA,CAAK,YAAY,OAAA,EAAS,SAAA;AAC1B,IAAA,IAAA,CAAK,SAAS,OAAA,EAAS,MAAA;AAAA,EACzB;AACF;;;;"}
|
package/dist/common/types.d.ts
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { DEFAULT_DELIMITER, DEFAULT_QUOTATION } from '../constants.ts';
|
|
2
|
-
import {
|
|
2
|
+
import { WorkerPool } from '../execution/worker/helpers/WorkerPool.ts';
|
|
3
|
+
import { JoinCSVFields } from '../utils/types.ts';
|
|
3
4
|
import { Field, FieldDelimiter, RecordDelimiter } from './constants.ts';
|
|
4
5
|
/**
|
|
5
6
|
* Position object.
|
|
@@ -34,12 +35,49 @@ export interface TokenLocation {
|
|
|
34
35
|
*/
|
|
35
36
|
end: Position;
|
|
36
37
|
/**
|
|
37
|
-
* Row number.
|
|
38
|
+
* Row number in the CSV (includes header if present).
|
|
38
39
|
* Starts from 1.
|
|
39
40
|
*
|
|
40
41
|
* @remarks
|
|
41
|
-
* This represents the logical row number
|
|
42
|
-
*
|
|
42
|
+
* This represents the logical CSV row number, not the physical line number.
|
|
43
|
+
* A single CSV row may span multiple lines if fields contain newline
|
|
44
|
+
* characters within quotes.
|
|
45
|
+
*
|
|
46
|
+
* **Important distinction**:
|
|
47
|
+
* - `line`: Physical line number (incremented by `\n` characters)
|
|
48
|
+
* - `rowNumber`: Logical CSV row (incremented by record delimiters)
|
|
49
|
+
*
|
|
50
|
+
* The header row (if present) is counted as row 1. This corresponds to
|
|
51
|
+
* the physical row position in the file, making it easy to locate in editors.
|
|
52
|
+
*
|
|
53
|
+
* For physical line numbers, use `start.line` or `end.line`.
|
|
54
|
+
*
|
|
55
|
+
* **Primary use case**: Error reporting. This field allows errors to be
|
|
56
|
+
* reported with both physical position (`line`, `column`) and logical
|
|
57
|
+
* row context (`rowNumber`), making it easier for users to locate
|
|
58
|
+
* issues in their CSV data.
|
|
59
|
+
*
|
|
60
|
+
* @example
|
|
61
|
+
* ```csv
|
|
62
|
+
* name,description <- rowNumber: 1 (header)
|
|
63
|
+
* Alice,"Lives in
|
|
64
|
+
* New York" <- rowNumber: 2 (spans line 2-3)
|
|
65
|
+
* Bob,"Works" <- rowNumber: 3 (line 4)
|
|
66
|
+
* ```
|
|
67
|
+
* - Header: `rowNumber: 1`
|
|
68
|
+
* - Alice's row: `start.line: 2, end.line: 3, rowNumber: 2`
|
|
69
|
+
* - Bob's row: `start.line: 4, end.line: 4, rowNumber: 3`
|
|
70
|
+
*
|
|
71
|
+
* @example Error reporting
|
|
72
|
+
* ```ts
|
|
73
|
+
* try {
|
|
74
|
+
* await parseString(csv);
|
|
75
|
+
* } catch (error) {
|
|
76
|
+
* if (error instanceof ParseError) {
|
|
77
|
+
* console.error(`Error at row ${error.rowNumber}, line ${error.position?.line}`);
|
|
78
|
+
* }
|
|
79
|
+
* }
|
|
80
|
+
* ```
|
|
43
81
|
*/
|
|
44
82
|
rowNumber: number;
|
|
45
83
|
}
|
|
@@ -128,11 +166,37 @@ export interface AbortSignalOptions {
|
|
|
128
166
|
*/
|
|
129
167
|
signal?: AbortSignal;
|
|
130
168
|
}
|
|
169
|
+
/**
|
|
170
|
+
* Source identifier option for error reporting.
|
|
171
|
+
* @category Types
|
|
172
|
+
*/
|
|
173
|
+
export interface SourceOption {
|
|
174
|
+
/**
|
|
175
|
+
* Source identifier for error reporting (e.g., filename, description).
|
|
176
|
+
*
|
|
177
|
+
* @remarks
|
|
178
|
+
* This option allows you to specify a human-readable identifier for the CSV source
|
|
179
|
+
* that will be included in error messages. This is particularly useful when parsing
|
|
180
|
+
* multiple files or streams to help identify which source caused an error.
|
|
181
|
+
*
|
|
182
|
+
* **Security Note**: Do not include sensitive information (API keys, tokens, full URLs)
|
|
183
|
+
* in this field as it may be exposed in error messages and logs.
|
|
184
|
+
*
|
|
185
|
+
* @example
|
|
186
|
+
* ```ts
|
|
187
|
+
* parseString(csv, { source: "users.csv" });
|
|
188
|
+
* // Error: Field count exceeded at row 5 in "users.csv"
|
|
189
|
+
* ```
|
|
190
|
+
*
|
|
191
|
+
* @default undefined
|
|
192
|
+
*/
|
|
193
|
+
source?: string;
|
|
194
|
+
}
|
|
131
195
|
/**
|
|
132
196
|
* CSV Common Options.
|
|
133
197
|
* @category Types
|
|
134
198
|
*/
|
|
135
|
-
export interface CommonOptions<Delimiter extends string, Quotation extends string> {
|
|
199
|
+
export interface CommonOptions<Delimiter extends string, Quotation extends string> extends SourceOption {
|
|
136
200
|
/**
|
|
137
201
|
* CSV field delimiter.
|
|
138
202
|
* If you want to parse TSV, specify `'\t'`.
|
|
@@ -340,6 +404,15 @@ export interface BinaryOptions {
|
|
|
340
404
|
* @category Types
|
|
341
405
|
*/
|
|
342
406
|
export interface CSVLexerTransformerOptions<Delimiter extends string = DEFAULT_DELIMITER, Quotation extends string = DEFAULT_QUOTATION> extends CommonOptions<Delimiter, Quotation>, AbortSignalOptions {
|
|
407
|
+
/**
|
|
408
|
+
* How often to check for backpressure (in number of tokens processed).
|
|
409
|
+
*
|
|
410
|
+
* Lower values = more responsive to backpressure but slight performance overhead.
|
|
411
|
+
* Higher values = less overhead but slower backpressure response.
|
|
412
|
+
*
|
|
413
|
+
* @default 100
|
|
414
|
+
*/
|
|
415
|
+
backpressureCheckInterval?: number;
|
|
343
416
|
}
|
|
344
417
|
/**
|
|
345
418
|
* CSV Record Assembler Options.
|
|
@@ -352,7 +425,7 @@ export interface CSVLexerTransformerOptions<Delimiter extends string = DEFAULT_D
|
|
|
352
425
|
* If you don't specify `header`,
|
|
353
426
|
* the first record will be treated as a header.
|
|
354
427
|
*/
|
|
355
|
-
export interface CSVRecordAssemblerOptions<Header extends ReadonlyArray<string>> extends AbortSignalOptions {
|
|
428
|
+
export interface CSVRecordAssemblerOptions<Header extends ReadonlyArray<string>> extends SourceOption, AbortSignalOptions {
|
|
356
429
|
/**
|
|
357
430
|
* CSV header.
|
|
358
431
|
*
|
|
@@ -386,6 +459,15 @@ export interface CSVRecordAssemblerOptions<Header extends ReadonlyArray<string>>
|
|
|
386
459
|
* @default false
|
|
387
460
|
*/
|
|
388
461
|
skipEmptyLines?: boolean;
|
|
462
|
+
/**
|
|
463
|
+
* How often to check for backpressure (in number of records processed).
|
|
464
|
+
*
|
|
465
|
+
* Lower values = more responsive to backpressure but slight performance overhead.
|
|
466
|
+
* Higher values = less overhead but slower backpressure response.
|
|
467
|
+
*
|
|
468
|
+
* @default 10
|
|
469
|
+
*/
|
|
470
|
+
backpressureCheckInterval?: number;
|
|
389
471
|
}
|
|
390
472
|
/**
|
|
391
473
|
* Worker communication strategy.
|
|
@@ -421,27 +503,168 @@ export interface EngineFallbackInfo {
|
|
|
421
503
|
error?: Error;
|
|
422
504
|
}
|
|
423
505
|
/**
|
|
424
|
-
*
|
|
506
|
+
* Backpressure monitoring intervals (count-based).
|
|
425
507
|
*
|
|
426
|
-
*
|
|
508
|
+
* Controls how frequently the internal parsers check for backpressure
|
|
509
|
+
* during streaming operations, based on the number of tokens/records processed.
|
|
427
510
|
*
|
|
511
|
+
* @experimental This API may change in future versions based on performance research.
|
|
428
512
|
* @category Types
|
|
429
513
|
*/
|
|
430
|
-
export interface
|
|
514
|
+
export interface BackpressureCheckInterval {
|
|
431
515
|
/**
|
|
432
|
-
*
|
|
516
|
+
* Check interval for the lexer stage (number of tokens processed).
|
|
517
|
+
*
|
|
518
|
+
* Lower values provide better responsiveness to backpressure but may have
|
|
519
|
+
* slight performance overhead.
|
|
520
|
+
*
|
|
521
|
+
* @default 100
|
|
522
|
+
*/
|
|
523
|
+
lexer?: number;
|
|
524
|
+
/**
|
|
525
|
+
* Check interval for the assembler stage (number of records processed).
|
|
526
|
+
*
|
|
527
|
+
* Lower values provide better responsiveness to backpressure but may have
|
|
528
|
+
* slight performance overhead.
|
|
529
|
+
*
|
|
530
|
+
* @default 10
|
|
531
|
+
*/
|
|
532
|
+
assembler?: number;
|
|
533
|
+
}
|
|
534
|
+
/**
|
|
535
|
+
* Internal streaming queuing strategies configuration.
|
|
536
|
+
*
|
|
537
|
+
* Controls the internal queuing behavior of the CSV parser's streaming pipeline.
|
|
538
|
+
* This affects memory usage and backpressure handling for large streaming operations.
|
|
539
|
+
*
|
|
540
|
+
* @remarks
|
|
541
|
+
* The CSV parser uses a two-stage pipeline:
|
|
542
|
+
* 1. **Lexer**: String → Token
|
|
543
|
+
* 2. **Assembler**: Token → CSVRecord
|
|
544
|
+
*
|
|
545
|
+
* Each stage has both writable (input) and readable (output) sides.
|
|
546
|
+
*
|
|
547
|
+
* @experimental This API may change in future versions based on performance research.
|
|
548
|
+
* @category Types
|
|
549
|
+
*/
|
|
550
|
+
export interface QueuingStrategyConfig {
|
|
551
|
+
/**
|
|
552
|
+
* Queuing strategy for the lexer's writable side (string input).
|
|
553
|
+
*
|
|
554
|
+
* Controls how string chunks are buffered before being processed by the lexer.
|
|
555
|
+
*
|
|
556
|
+
* @default `{ highWaterMark: 65536 }` (≈64KB of characters)
|
|
557
|
+
*/
|
|
558
|
+
lexerWritable?: QueuingStrategy<string>;
|
|
559
|
+
/**
|
|
560
|
+
* Queuing strategy for the lexer's readable side (token output).
|
|
561
|
+
*
|
|
562
|
+
* Controls how tokens are buffered after being produced by the lexer
|
|
563
|
+
* before being consumed by the assembler.
|
|
564
|
+
*
|
|
565
|
+
* @default `{ highWaterMark: 1024 }` (1024 tokens)
|
|
566
|
+
*/
|
|
567
|
+
lexerReadable?: QueuingStrategy<Token>;
|
|
568
|
+
/**
|
|
569
|
+
* Queuing strategy for the assembler's writable side (token input).
|
|
570
|
+
*
|
|
571
|
+
* Controls how tokens are buffered before being processed by the assembler.
|
|
572
|
+
* This is the input side of the assembler, receiving tokens from the lexer.
|
|
573
|
+
*
|
|
574
|
+
* @default `{ highWaterMark: 1024 }` (1024 tokens)
|
|
575
|
+
*/
|
|
576
|
+
assemblerWritable?: QueuingStrategy<Token>;
|
|
577
|
+
/**
|
|
578
|
+
* Queuing strategy for the assembler's readable side (record output).
|
|
579
|
+
*
|
|
580
|
+
* Controls how CSV records are buffered after being assembled.
|
|
581
|
+
*
|
|
582
|
+
* @default `{ highWaterMark: 256 }` (256 records)
|
|
583
|
+
*/
|
|
584
|
+
assemblerReadable?: QueuingStrategy<CSVRecord<any>>;
|
|
585
|
+
}
|
|
586
|
+
/**
|
|
587
|
+
* Base engine configuration shared by all execution modes.
|
|
588
|
+
*
|
|
589
|
+
* @category Types
|
|
590
|
+
*/
|
|
591
|
+
interface BaseEngineConfig {
|
|
592
|
+
/**
|
|
593
|
+
* Use WASM implementation.
|
|
594
|
+
*
|
|
595
|
+
* Requires prior initialization with {@link loadWASM}.
|
|
433
596
|
*
|
|
434
597
|
* @default false
|
|
435
598
|
*
|
|
436
|
-
* @example
|
|
599
|
+
* @example Main thread + WASM
|
|
437
600
|
* ```ts
|
|
438
|
-
*
|
|
601
|
+
* import { loadWASM, parse } from 'web-csv-toolbox';
|
|
602
|
+
*
|
|
603
|
+
* await loadWASM();
|
|
604
|
+
* parse(csv, { engine: { wasm: true } })
|
|
605
|
+
* ```
|
|
606
|
+
*
|
|
607
|
+
* @example Worker + WASM
|
|
608
|
+
* ```ts
|
|
609
|
+
* await loadWASM();
|
|
610
|
+
* parse(csv, { engine: { worker: true, wasm: true } })
|
|
439
611
|
* ```
|
|
440
612
|
*/
|
|
441
|
-
|
|
613
|
+
wasm?: boolean;
|
|
614
|
+
/**
|
|
615
|
+
* Blob reading strategy threshold (in bytes).
|
|
616
|
+
* Only applicable for `parseBlob()` and `parseFile()`.
|
|
617
|
+
*
|
|
618
|
+
* Determines when to use `blob.arrayBuffer()` vs `blob.stream()`:
|
|
619
|
+
* - Files smaller than threshold: Use `blob.arrayBuffer()` + `parseBinary()`
|
|
620
|
+
* - ✅ Faster for small files
|
|
621
|
+
* - ❌ Loads entire file into memory
|
|
622
|
+
* - Files equal to or larger than threshold: Use `blob.stream()` + `parseUint8ArrayStream()`
|
|
623
|
+
* - ✅ Memory-efficient for large files
|
|
624
|
+
* - ❌ Slight streaming overhead
|
|
625
|
+
*
|
|
626
|
+
* @default 1_048_576 (1MB)
|
|
627
|
+
*/
|
|
628
|
+
arrayBufferThreshold?: number;
|
|
629
|
+
/**
|
|
630
|
+
* Backpressure monitoring intervals (count-based: number of tokens/records processed).
|
|
631
|
+
*
|
|
632
|
+
* @default { lexer: 100, assembler: 10 }
|
|
633
|
+
* @experimental
|
|
634
|
+
*/
|
|
635
|
+
backpressureCheckInterval?: BackpressureCheckInterval;
|
|
636
|
+
/**
|
|
637
|
+
* Internal streaming queuing strategies.
|
|
638
|
+
*
|
|
639
|
+
* @experimental
|
|
640
|
+
*/
|
|
641
|
+
queuingStrategy?: QueuingStrategyConfig;
|
|
642
|
+
}
|
|
643
|
+
/**
|
|
644
|
+
* Engine configuration for main thread execution.
|
|
645
|
+
*
|
|
646
|
+
* @category Types
|
|
647
|
+
*/
|
|
648
|
+
export interface MainThreadEngineConfig extends BaseEngineConfig {
|
|
649
|
+
/**
|
|
650
|
+
* Execute in Worker thread.
|
|
651
|
+
*
|
|
652
|
+
* @default false
|
|
653
|
+
*/
|
|
654
|
+
worker?: false;
|
|
655
|
+
}
|
|
656
|
+
/**
|
|
657
|
+
* Engine configuration for worker thread execution.
|
|
658
|
+
*
|
|
659
|
+
* @category Types
|
|
660
|
+
*/
|
|
661
|
+
export interface WorkerEngineConfig extends BaseEngineConfig {
|
|
662
|
+
/**
|
|
663
|
+
* Execute in Worker thread.
|
|
664
|
+
*/
|
|
665
|
+
worker: true;
|
|
442
666
|
/**
|
|
443
667
|
* Custom Worker URL.
|
|
444
|
-
* Only applicable when `worker: true`.
|
|
445
668
|
*
|
|
446
669
|
* If not provided, uses the bundled worker.
|
|
447
670
|
*
|
|
@@ -461,7 +684,6 @@ export interface EngineConfig {
|
|
|
461
684
|
workerURL?: string | URL;
|
|
462
685
|
/**
|
|
463
686
|
* Worker pool for managing worker lifecycle.
|
|
464
|
-
* Only applicable when `worker: true`.
|
|
465
687
|
*
|
|
466
688
|
* When provided, the parsing function will use this pool's worker instance
|
|
467
689
|
* instead of creating/reusing a module-level singleton worker.
|
|
@@ -498,32 +720,9 @@ export interface EngineConfig {
|
|
|
498
720
|
* // Worker is reused for both operations
|
|
499
721
|
* ```
|
|
500
722
|
*/
|
|
501
|
-
workerPool?:
|
|
502
|
-
/**
|
|
503
|
-
* Use WASM implementation.
|
|
504
|
-
*
|
|
505
|
-
* Requires prior initialization with {@link loadWASM}.
|
|
506
|
-
*
|
|
507
|
-
* @default false
|
|
508
|
-
*
|
|
509
|
-
* @example Main thread + WASM
|
|
510
|
-
* ```ts
|
|
511
|
-
* import { loadWASM, parse } from 'web-csv-toolbox';
|
|
512
|
-
*
|
|
513
|
-
* await loadWASM();
|
|
514
|
-
* parse(csv, { engine: { wasm: true } })
|
|
515
|
-
* ```
|
|
516
|
-
*
|
|
517
|
-
* @example Worker + WASM
|
|
518
|
-
* ```ts
|
|
519
|
-
* await loadWASM();
|
|
520
|
-
* parse(csv, { engine: { worker: true, wasm: true } })
|
|
521
|
-
* ```
|
|
522
|
-
*/
|
|
523
|
-
wasm?: boolean;
|
|
723
|
+
workerPool?: WorkerPool;
|
|
524
724
|
/**
|
|
525
725
|
* Worker communication strategy.
|
|
526
|
-
* Only applicable when `worker: true`.
|
|
527
726
|
*
|
|
528
727
|
* - `"message-streaming"` (default): Message-based streaming
|
|
529
728
|
* - ✅ All browsers including Safari
|
|
@@ -563,7 +762,6 @@ export interface EngineConfig {
|
|
|
563
762
|
workerStrategy?: WorkerCommunicationStrategy;
|
|
564
763
|
/**
|
|
565
764
|
* Strict mode: disable automatic fallback.
|
|
566
|
-
* Only applicable when `workerStrategy: "stream-transfer"`.
|
|
567
765
|
*
|
|
568
766
|
* When enabled:
|
|
569
767
|
* - Throws error immediately if stream transfer fails
|
|
@@ -621,6 +819,15 @@ export interface EngineConfig {
|
|
|
621
819
|
*/
|
|
622
820
|
onFallback?: (info: EngineFallbackInfo) => void;
|
|
623
821
|
}
|
|
822
|
+
/**
|
|
823
|
+
* Engine configuration for CSV parsing.
|
|
824
|
+
*
|
|
825
|
+
* All parsing engine settings are unified in this type.
|
|
826
|
+
* Use discriminated union to ensure type-safe configuration based on worker mode.
|
|
827
|
+
*
|
|
828
|
+
* @category Types
|
|
829
|
+
*/
|
|
830
|
+
export type EngineConfig = MainThreadEngineConfig | WorkerEngineConfig;
|
|
624
831
|
/**
|
|
625
832
|
* Engine configuration options.
|
|
626
833
|
*
|
|
@@ -689,41 +896,17 @@ export type CSVRecord<Header extends ReadonlyArray<string>> = Record<Header[numb
|
|
|
689
896
|
*
|
|
690
897
|
* @category Types
|
|
691
898
|
*/
|
|
692
|
-
export type CSVString<Header extends ReadonlyArray<string> = [], Delimiter extends string = DEFAULT_DELIMITER, Quotation extends string = DEFAULT_QUOTATION> = Header extends readonly [string, ...string[]] ?
|
|
899
|
+
export type CSVString<Header extends ReadonlyArray<string> = [], Delimiter extends string = DEFAULT_DELIMITER, Quotation extends string = DEFAULT_QUOTATION> = Header extends readonly [string, ...string[]] ? JoinCSVFields<Header, Delimiter, Quotation> | ReadableStream<JoinCSVFields<Header, Delimiter, Quotation>> : string | ReadableStream<string>;
|
|
693
900
|
/**
|
|
694
901
|
* CSV Binary.
|
|
695
902
|
*
|
|
696
903
|
* @category Types
|
|
697
904
|
*/
|
|
698
905
|
export type CSVBinary = ReadableStream<Uint8Array> | Response | Request | Blob | ArrayBuffer | Uint8Array;
|
|
699
|
-
/**
|
|
700
|
-
* Backpressure monitoring options.
|
|
701
|
-
*
|
|
702
|
-
* @category Types
|
|
703
|
-
*/
|
|
704
|
-
export interface BackpressureOptions {
|
|
705
|
-
/**
|
|
706
|
-
* How often to check for backpressure (in number of items processed).
|
|
707
|
-
*
|
|
708
|
-
* Lower values = more responsive to backpressure but slight performance overhead.
|
|
709
|
-
* Higher values = less overhead but slower backpressure response.
|
|
710
|
-
*
|
|
711
|
-
* Default:
|
|
712
|
-
* - CSVLexerTransformer: 100 tokens
|
|
713
|
-
* - CSVRecordAssemblerTransformer: 10 records
|
|
714
|
-
*/
|
|
715
|
-
checkInterval?: number;
|
|
716
|
-
}
|
|
717
|
-
/**
|
|
718
|
-
* Extended queuing strategy with backpressure monitoring options.
|
|
719
|
-
*
|
|
720
|
-
* @category Types
|
|
721
|
-
*/
|
|
722
|
-
export interface ExtendedQueuingStrategy<T> extends QueuingStrategy<T>, BackpressureOptions {
|
|
723
|
-
}
|
|
724
906
|
/**
|
|
725
907
|
* CSV.
|
|
726
908
|
*
|
|
727
909
|
* @category Types
|
|
728
910
|
*/
|
|
729
911
|
export type CSV<Header extends ReadonlyArray<string> = [], Delimiter extends string = DEFAULT_DELIMITER, Quotation extends string = DEFAULT_QUOTATION> = Header extends [] ? CSVString | CSVBinary : CSVString<Header, Delimiter, Quotation>;
|
|
912
|
+
export {};
|
package/dist/constants.d.ts
CHANGED
|
@@ -17,3 +17,15 @@ export declare const DEFAULT_DELIMITER = ",";
|
|
|
17
17
|
export type DEFAULT_DELIMITER = typeof DEFAULT_DELIMITER;
|
|
18
18
|
export declare const DEFAULT_QUOTATION = "\"";
|
|
19
19
|
export type DEFAULT_QUOTATION = typeof DEFAULT_QUOTATION;
|
|
20
|
+
/**
|
|
21
|
+
* Default threshold (in bytes) for Blob reading strategy.
|
|
22
|
+
*
|
|
23
|
+
* Files smaller than this use `blob.arrayBuffer()` (faster),
|
|
24
|
+
* files equal or larger use `blob.stream()` (memory-efficient).
|
|
25
|
+
*
|
|
26
|
+
* This value is determined by benchmarks.
|
|
27
|
+
*
|
|
28
|
+
* @category Constants
|
|
29
|
+
*/
|
|
30
|
+
export declare const DEFAULT_ARRAY_BUFFER_THRESHOLD = 1048576;
|
|
31
|
+
export type DEFAULT_ARRAY_BUFFER_THRESHOLD = typeof DEFAULT_ARRAY_BUFFER_THRESHOLD;
|
package/dist/constants.js
CHANGED
|
@@ -5,6 +5,7 @@ const COMMA = ",";
|
|
|
5
5
|
const DOUBLE_QUOTE = '"';
|
|
6
6
|
const DEFAULT_DELIMITER = COMMA;
|
|
7
7
|
const DEFAULT_QUOTATION = DOUBLE_QUOTE;
|
|
8
|
+
const DEFAULT_ARRAY_BUFFER_THRESHOLD = 1048576;
|
|
8
9
|
|
|
9
|
-
export { COMMA, CR, CRLF, DEFAULT_DELIMITER, DEFAULT_QUOTATION, DOUBLE_QUOTE, LF };
|
|
10
|
+
export { COMMA, CR, CRLF, DEFAULT_ARRAY_BUFFER_THRESHOLD, DEFAULT_DELIMITER, DEFAULT_QUOTATION, DOUBLE_QUOTE, LF };
|
|
10
11
|
//# sourceMappingURL=constants.js.map
|
package/dist/constants.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"constants.js","sources":["../src/constants.ts"],"sourcesContent":["export const CR = \"\\r\";\nexport type CR = typeof CR;\n\nexport const CRLF = \"\\r\\n\";\nexport type CRLF = typeof CRLF;\n\nexport const LF = \"\\n\";\nexport type LF = typeof LF;\n\nexport type Newline = CRLF | CR | LF;\n\n/**\n * COMMA is a symbol for comma(,).\n */\nexport const COMMA = \",\";\n\n/**\n * DOUBLE_QUOTE is a symbol for double quote(\").\n */\nexport const DOUBLE_QUOTE = '\"';\n\nexport const DEFAULT_DELIMITER = COMMA;\nexport type DEFAULT_DELIMITER = typeof DEFAULT_DELIMITER;\n\nexport const DEFAULT_QUOTATION = DOUBLE_QUOTE;\nexport type DEFAULT_QUOTATION = typeof DEFAULT_QUOTATION;\n"],"names":[],"mappings":"AAAO,MAAM,EAAA,GAAK;AAGX,MAAM,IAAA,GAAO;AAGb,MAAM,EAAA,GAAK;AAQX,MAAM,KAAA,GAAQ;AAKd,MAAM,YAAA,GAAe;AAErB,MAAM,iBAAA,GAAoB;AAG1B,MAAM,iBAAA,GAAoB;;;;"}
|
|
1
|
+
{"version":3,"file":"constants.js","sources":["../src/constants.ts"],"sourcesContent":["export const CR = \"\\r\";\nexport type CR = typeof CR;\n\nexport const CRLF = \"\\r\\n\";\nexport type CRLF = typeof CRLF;\n\nexport const LF = \"\\n\";\nexport type LF = typeof LF;\n\nexport type Newline = CRLF | CR | LF;\n\n/**\n * COMMA is a symbol for comma(,).\n */\nexport const COMMA = \",\";\n\n/**\n * DOUBLE_QUOTE is a symbol for double quote(\").\n */\nexport const DOUBLE_QUOTE = '\"';\n\nexport const DEFAULT_DELIMITER = COMMA;\nexport type DEFAULT_DELIMITER = typeof DEFAULT_DELIMITER;\n\nexport const DEFAULT_QUOTATION = DOUBLE_QUOTE;\nexport type DEFAULT_QUOTATION = typeof DEFAULT_QUOTATION;\n\n/**\n * Default threshold (in bytes) for Blob reading strategy.\n *\n * Files smaller than this use `blob.arrayBuffer()` (faster),\n * files equal or larger use `blob.stream()` (memory-efficient).\n *\n * This value is determined by benchmarks.\n *\n * @category Constants\n */\nexport const DEFAULT_ARRAY_BUFFER_THRESHOLD = 1048576; // 1MB\nexport type DEFAULT_ARRAY_BUFFER_THRESHOLD =\n typeof DEFAULT_ARRAY_BUFFER_THRESHOLD;\n"],"names":[],"mappings":"AAAO,MAAM,EAAA,GAAK;AAGX,MAAM,IAAA,GAAO;AAGb,MAAM,EAAA,GAAK;AAQX,MAAM,KAAA,GAAQ;AAKd,MAAM,YAAA,GAAe;AAErB,MAAM,iBAAA,GAAoB;AAG1B,MAAM,iBAAA,GAAoB;AAa1B,MAAM,8BAAA,GAAiC;;;;"}
|