web-csv-toolbox 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/index.d.ts ADDED
@@ -0,0 +1,337 @@
1
+ /**
2
+ * FiledDelimiter is a symbol for field delimiter of CSV.
3
+ */
4
+ declare const FieldDelimiter: unique symbol;
5
+ /**
6
+ * RecordDelimiter is a symbol for record delimiter of CSV.
7
+ */
8
+ declare const RecordDelimiter: unique symbol;
9
+ /**
10
+ * Field is a symbol for field of CSV.
11
+ */
12
+ declare const Field: unique symbol;
13
+
14
+ /**
15
+ * Token is a atomic unit of a CSV file.
16
+ * It can be a field, field delimiter, or record delimiter.
17
+ *
18
+ * @example
19
+ * ```ts
20
+ * const fieldToken: Token = { type: Field, value: "foo" };
21
+ * const fieldDelimiterToken: Token = { type: FieldDelimiter, value: "," };
22
+ * const recordDelimiterToken: Token = { type: RecordDelimiter, value: "\n" };
23
+ * ```
24
+ */
25
+ interface Token<T extends TokenType = TokenType> {
26
+ type: T;
27
+ value: string;
28
+ }
29
+ /**
30
+ * Type of a token for CSV.
31
+ */
32
+ type TokenType = typeof FieldDelimiter | typeof RecordDelimiter | typeof Field;
33
+ /**
34
+ * CSV Common Options.
35
+ */
36
+ interface CommonOptions {
37
+ /**
38
+ * CSV field delimiter.
39
+ *
40
+ * @remarks
41
+ * If you want to parse TSV, specify `'\t'`.
42
+ *
43
+ * This library supports multi-character delimiters.
44
+ * @default ','
45
+ */
46
+ demiliter?: string;
47
+ /**
48
+ * CSV field quotation.
49
+ *
50
+ * @remarks
51
+ * This library supports multi-character quotations.
52
+ *
53
+ * @default '"'
54
+ */
55
+ quotation?: string;
56
+ }
57
+ /**
58
+ * CSV Parsing Options for binary.
59
+ */
60
+ interface BinaryOptions {
61
+ /**
62
+ * If the binary is compressed by a compression algorithm,
63
+ * the decompressed CSV can be parsed by specifying the algorithm.
64
+ *
65
+ * @remarks
66
+ * Make sure the runtime you are running supports stream decompression.
67
+ * See {@link https://developer.mozilla.org/en-US/docs/Web/API/DecompressionStream#browser_compatibility | DecompressionStream Compatibility}.
68
+ */
69
+ decomposition?: CompressionFormat;
70
+ /**
71
+ * You can specify the character encoding of the binary.
72
+ *
73
+ * @remarks
74
+ * {@link TextDecoderStream} is used internally.
75
+ * See {@link https://developer.mozilla.org/en-US/docs/Web/API/Encoding_API/Encodings | Encoding API Compatibility}
76
+ * for the encoding formats that can be specified.
77
+ *
78
+ * @default 'utf-8'
79
+ */
80
+ charset?: string;
81
+ /**
82
+ * If the binary has a BOM, you can specify whether to ignore it.
83
+ *
84
+ * @remarks
85
+ * If you specify true, the BOM will be ignored.
86
+ * If you specify false or not specify it, the BOM will be treated as a normal character.
87
+ * See {@link https://developer.mozilla.org/en-US/docs/Web/API/TextDecoderStream/ignoreBOM | TextDecoderOptions.ignoreBOM} for more information about the BOM.
88
+ * @default false
89
+ */
90
+ ignoreBOM?: boolean;
91
+ /**
92
+ * If the binary has a invalid character, you can specify whether to throw an error.
93
+ *
94
+ * @remarks
95
+ * If you specify true, an error will be thrown.
96
+ * If you specify false or not specify it, the invalid character will be replaced with `U+FFFD`.
97
+ * See {@link https://developer.mozilla.org/en-US/docs/Web/API/TextDecoderStream/fatal | TextDecoderOptions.fatal} for more information about the invalid character.
98
+ */
99
+ fatal?: boolean;
100
+ }
101
+ /**
102
+ * Record Assembler Options for CSV.
103
+ *
104
+ * @remarks
105
+ * If you specify `header: ['foo', 'bar']`,
106
+ * the first record will be treated as a normal record.
107
+ *
108
+ * If you don't specify `header`,
109
+ * the first record will be treated as a header.
110
+ */
111
+ interface RecordAssemblerOptions<Header extends ReadonlyArray<string>> {
112
+ /**
113
+ * CSV header.
114
+ *
115
+ * @remarks
116
+ * If you specify this option,
117
+ * the first record will be treated as a normal record.
118
+ *
119
+ * If you don't specify this option,
120
+ * the first record will be treated as a header.
121
+ *
122
+ * @default undefined
123
+ */
124
+ header?: Header;
125
+ }
126
+ /**
127
+ * Parse options for CSV string.
128
+ */
129
+ interface ParseOptions<Header extends ReadonlyArray<string>>
130
+ extends CommonOptions,
131
+ RecordAssemblerOptions<Header> {}
132
+ /**
133
+ * Parse options for CSV binary.
134
+ */
135
+ interface ParseBinaryOptions<Header extends ReadonlyArray<string>>
136
+ extends ParseOptions<Header>,
137
+ BinaryOptions {}
138
+ /**
139
+ * CSV Record.
140
+ * @template Header Header of the CSV.
141
+ *
142
+ * @example Header is `["foo", "bar"]`
143
+ * ```ts
144
+ * const record: CSVRecord<["foo", "bar"]> = {
145
+ * foo: "1",
146
+ * bar: "2",
147
+ * };
148
+ * ```
149
+ */
150
+ type CSVRecord<Header extends ReadonlyArray<string>> = Record<
151
+ Header[number],
152
+ string
153
+ >;
154
+
155
+ /**
156
+ * A transform stream that converts a stream of tokens into a stream of rows.
157
+ *
158
+ * @example Parse a CSV with headers by data
159
+ * ```ts
160
+ * new ReadableStream({
161
+ * start(controller) {
162
+ * controller.enqueue("name,age\r\n");
163
+ * controller.enqueue("Alice,20\r\n");
164
+ * controller.close();
165
+ * }
166
+ * })
167
+ * .pipeThrough(new LexerTransformer())
168
+ * .pipeTo(new WritableStream({ write(token) { console.log(token); }}));
169
+ * // { type: Field, value: "name" }
170
+ * // { type: FieldDelimiter, value: "," }
171
+ * // { type: Field, value: "age" }
172
+ * // { type: RecordDelimiter, value: "\r\n" }
173
+ * // { type: Field, value: "Alice" }
174
+ * // { type: FieldDelimiter, value: "," }
175
+ * // { type: Field, value: "20" }
176
+ * // { type: RecordDelimiter, value: "\r\n" }
177
+ * ```
178
+ */
179
+ declare class LexerTransformer extends TransformStream<string, Token> {
180
+ #private;
181
+ get demiliter(): string;
182
+ get quotation(): string;
183
+ constructor({ demiliter, quotation }?: CommonOptions);
184
+ private extractQuotedString;
185
+ }
186
+
187
+ /**
188
+ * A transform stream that converts a stream of tokens into a stream of rows.
189
+ * @template Header The type of the header row.
190
+ * @param options The options for the parser.
191
+ *
192
+ * @example Parse a CSV with headers by data
193
+ * ```ts
194
+ * new ReadableStream({
195
+ * start(controller) {
196
+ * controller.enqueue("name,age\r\n");
197
+ * controller.enqueue("Alice,20\r\n");
198
+ * controller.enqueue("Bob,25\r\n");
199
+ * controller.enqueue("Charlie,30\r\n");
200
+ * controller.close();
201
+ * })
202
+ * .pipeThrough(new LexerTransformer())
203
+ * .pipeThrough(new RecordAssemblerTransformar())
204
+ * .pipeTo(new WritableStream({ write(row) { console.log(row); }}));
205
+ * // { name: "Alice", age: "20" }
206
+ * // { name: "Bob", age: "25" }
207
+ * // { name: "Charlie", age: "30" }
208
+ * ```
209
+ *
210
+ * @example Parse a CSV with headers by options
211
+ * ```ts
212
+ * new ReadableStream({
213
+ * start(controller) {
214
+ * controller.enqueue("Alice,20\r\n");
215
+ * controller.enqueue("Bob,25\r\n");
216
+ * controller.enqueue("Charlie,30\r\n");
217
+ * controller.close();
218
+ * }
219
+ * })
220
+ * .pipeThrough(new LexerTransformer())
221
+ * .pipeThrough(new RecordAssemblerTransformar({ header: ["name", "age"] }))
222
+ * .pipeTo(new WritableStream({ write(row) { console.log(row); }}));
223
+ * // { name: "Alice", age: "20" }
224
+ * // { name: "Bob", age: "25" }
225
+ * // { name: "Charlie", age: "30" }
226
+ * ```
227
+ */
228
+ declare class RecordAssemblerTransformar<
229
+ Header extends ReadonlyArray<string>,
230
+ > extends TransformStream<Token, Record<Header[number], string | undefined>> {
231
+ #private;
232
+ constructor(options?: RecordAssemblerOptions<Header>);
233
+ }
234
+
235
+ /**
236
+ * Parse CSV string to records.
237
+ *
238
+ * @param csv CSV string to parse
239
+ * @param options Parsing options. See {@link ParseOptions}.
240
+ */
241
+ declare function streamingParse<Header extends ReadonlyArray<string>>(
242
+ csv: string,
243
+ options?: ParseOptions<Header>,
244
+ ): AsyncIterableIterator<CSVRecord<Header>>;
245
+ declare namespace streamingParse {
246
+ function toArray<Header extends ReadonlyArray<string>>(
247
+ stream: ReadableStream<Uint8Array>,
248
+ options?: ParseOptions<Header>,
249
+ ): Promise<CSVRecord<Header>[]>;
250
+ }
251
+
252
+ /**
253
+ * Parse CSV to records.
254
+ * This function is for parsing a binary stream.
255
+ *
256
+ * @remarks
257
+ * If you want to parse a string, use {@link streamingParse}.
258
+ * @param stream CSV string to parse
259
+ * @param options Parsing options. See {@link ParseBinaryOptions}.
260
+ */
261
+ declare function parseBinaryStream<Header extends ReadonlyArray<string>>(
262
+ stream: ReadableStream<Uint8Array>,
263
+ options?: ParseBinaryOptions<Header>,
264
+ ): AsyncIterableIterator<CSVRecord<Header>>;
265
+ declare namespace parseBinaryStream {
266
+ function toArray<Header extends ReadonlyArray<string>>(
267
+ stream: ReadableStream<Uint8Array>,
268
+ options?: ParseBinaryOptions<Header>,
269
+ ): Promise<CSVRecord<Header>[]>;
270
+ }
271
+
272
+ /**
273
+ * Parse CSV string to records.
274
+ *
275
+ * @param stream CSV string stream to parse
276
+ * @param options Parsing options. See {@link ParseOptions}.
277
+ */
278
+ declare function parseStringStream<Header extends ReadonlyArray<string>>(
279
+ stream: ReadableStream<string>,
280
+ options?: ParseOptions<Header>,
281
+ ): AsyncIterableIterator<CSVRecord<Header>>;
282
+ declare namespace parseStringStream {
283
+ function toArray<Header extends ReadonlyArray<string>>(
284
+ stream: ReadableStream<Uint8Array>,
285
+ options?: ParseOptions<Header>,
286
+ ): Promise<CSVRecord<Header>[]>;
287
+ }
288
+
289
+ /**
290
+ * Parse CSV to records.
291
+ *
292
+ * {@link String}, {@link Uint8Array}, ReadableStream<string | Uint8Array> and Response are supported.
293
+ *
294
+ * @remarks
295
+ * {@link streamingParse}, {@link parseBinaryStream},
296
+ * {@link parseStringStream} and {@link parseResponse} are used internally.
297
+ * If you known the type of the stream, it performs better to use them directly.
298
+ *
299
+ * If you want to parse a string, use {@link streamingParse}.
300
+ * If you want to parse a Uint8Array, use {@link parseStream}.
301
+ * If you want to parse a ReadableStream<string>, use {@link parseStringStream}.
302
+ * If you want to parse a ReadableStream<Uint8Array>, use {@link parseBinaryStream}.
303
+ * If you want to parse a Response, use {@link parseResponse}.
304
+ *
305
+ * @param csv CSV string to parse
306
+ * @param options Parsing options. See {@link ParseOptions}.
307
+ */
308
+ declare function parse<Header extends ReadonlyArray<string>>(
309
+ csv: string | ReadableStream<Uint8Array | string> | Response,
310
+ options?: ParseOptions<Header>,
311
+ ): AsyncIterableIterator<CSVRecord<Header>>;
312
+ declare namespace parse {
313
+ function toArray<Header extends ReadonlyArray<string>>(
314
+ csv: string | ReadableStream<string | Uint8Array> | Response,
315
+ options?: ParseOptions<Header>,
316
+ ): Promise<CSVRecord<Header>[]>;
317
+ }
318
+
319
+ export {
320
+ type BinaryOptions,
321
+ type CSVRecord,
322
+ type CommonOptions,
323
+ Field,
324
+ FieldDelimiter,
325
+ LexerTransformer,
326
+ type ParseBinaryOptions,
327
+ type ParseOptions,
328
+ type RecordAssemblerOptions,
329
+ RecordAssemblerTransformar,
330
+ RecordDelimiter,
331
+ type Token,
332
+ type TokenType,
333
+ parse,
334
+ parseBinaryStream,
335
+ parseStringStream,
336
+ streamingParse,
337
+ };
package/lib/index.js ADDED
@@ -0,0 +1,545 @@
1
+ /**
2
+ * FiledDelimiter is a symbol for field delimiter of CSV.
3
+ */
4
+ const FieldDelimiter = Symbol.for("web-streams-csv.FieldDelimiter");
5
+ /**
6
+ * RecordDelimiter is a symbol for record delimiter of CSV.
7
+ */
8
+ const RecordDelimiter = Symbol.for("web-streams-csv.RecordDelimiter");
9
+ /**
10
+ * Field is a symbol for field of CSV.
11
+ */
12
+ const Field = Symbol.for("web-streams-csv.Field");
13
+
14
+ const CR = "\r";
15
+ const CRLF = "\r\n";
16
+ const LF = "\n";
17
+ /**
18
+ * COMMA is a symbol for comma(,).
19
+ */
20
+ const COMMA = ",";
21
+ /**
22
+ * DOUBLE_QUATE is a symbol for double quate(").
23
+ */
24
+ const DOUBLE_QUATE = '"';
25
+
26
+ /**
27
+ * Assert that the options are valid.
28
+ *
29
+ * @param options The options to assert.
30
+ */
31
+ function assertCommonOptions(options) {
32
+ if (typeof options.quotation === "string" && options.quotation.length === 0) {
33
+ throw new Error("quotation must not be empty");
34
+ }
35
+ if (typeof options.demiliter === "string" && options.demiliter.length === 0) {
36
+ throw new Error("demiliter must not be empty");
37
+ }
38
+ if (options.quotation.includes(LF) || options.quotation.includes(CR)) {
39
+ throw new Error("quotation must not include CR or LF");
40
+ }
41
+ if (options.demiliter.includes(LF) || options.demiliter.includes(CR)) {
42
+ throw new Error("demiliter must not include CR or LF");
43
+ }
44
+ if (
45
+ options.demiliter.includes(options.quotation) ||
46
+ options.quotation.includes(options.demiliter)
47
+ ) {
48
+ throw new Error(
49
+ "demiliter and quotation must not include each other as a substring",
50
+ );
51
+ }
52
+ }
53
+
54
+ /**
55
+ * Escape a string for use in a regular expression.
56
+ *
57
+ * @see {@link https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_expressions#escaping Regular expressions#Escaping | MDN}
58
+ * @param v string to escape
59
+ * @returns escaped string
60
+ */
61
+ function escapeRegExp(v) {
62
+ return v.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
63
+ }
64
+
65
+ /**
66
+ * A transform stream that converts a stream of tokens into a stream of rows.
67
+ *
68
+ * @example Parse a CSV with headers by data
69
+ * ```ts
70
+ * new ReadableStream({
71
+ * start(controller) {
72
+ * controller.enqueue("name,age\r\n");
73
+ * controller.enqueue("Alice,20\r\n");
74
+ * controller.close();
75
+ * }
76
+ * })
77
+ * .pipeThrough(new LexerTransformer())
78
+ * .pipeTo(new WritableStream({ write(token) { console.log(token); }}));
79
+ * // { type: Field, value: "name" }
80
+ * // { type: FieldDelimiter, value: "," }
81
+ * // { type: Field, value: "age" }
82
+ * // { type: RecordDelimiter, value: "\r\n" }
83
+ * // { type: Field, value: "Alice" }
84
+ * // { type: FieldDelimiter, value: "," }
85
+ * // { type: Field, value: "20" }
86
+ * // { type: RecordDelimiter, value: "\r\n" }
87
+ * ```
88
+ */
89
+ class LexerTransformer extends TransformStream {
90
+ #demiliter;
91
+ #demiliterLength;
92
+ #quotation;
93
+ #quotationLength;
94
+ #matcher;
95
+ #buffer = "";
96
+ get demiliter() {
97
+ return this.#demiliter;
98
+ }
99
+ get quotation() {
100
+ return this.#quotation;
101
+ }
102
+ constructor({ demiliter = COMMA, quotation = DOUBLE_QUATE } = {}) {
103
+ assertCommonOptions({ demiliter, quotation });
104
+ super({
105
+ transform: (chunk, controller) => {
106
+ if (chunk.length !== 0) {
107
+ this.#buffer += chunk;
108
+ for (const token of this.#tokens({ flush: false })) {
109
+ controller.enqueue(token);
110
+ }
111
+ }
112
+ },
113
+ flush: (controller) => {
114
+ for (const token of this.#tokens({ flush: true })) {
115
+ controller.enqueue(token);
116
+ }
117
+ },
118
+ });
119
+ this.#demiliter = demiliter;
120
+ this.#demiliterLength = demiliter.length;
121
+ this.#quotation = quotation;
122
+ this.#quotationLength = quotation.length;
123
+ const d = escapeRegExp(demiliter);
124
+ const q = escapeRegExp(quotation);
125
+ this.#matcher = new RegExp(
126
+ `^(?:(?!${q})(?!${d})(?![\\r\\n]))([\\S\\s\\uFEFF\\xA0]+?)(?=${q}|${d}|\\r|\\n|$)`,
127
+ );
128
+ }
129
+ *#tokens({ flush }) {
130
+ let currentField = null;
131
+ for (let token; (token = this.#nextToken({ flush })); ) {
132
+ switch (token.type) {
133
+ case Field:
134
+ if (currentField) {
135
+ currentField.value += token.value;
136
+ } else {
137
+ currentField = token;
138
+ }
139
+ break;
140
+ case FieldDelimiter:
141
+ if (currentField) {
142
+ yield currentField;
143
+ currentField = null;
144
+ }
145
+ yield token;
146
+ break;
147
+ case RecordDelimiter:
148
+ if (currentField) {
149
+ yield currentField;
150
+ currentField = null;
151
+ }
152
+ yield token;
153
+ break;
154
+ }
155
+ }
156
+ if (currentField) {
157
+ yield currentField;
158
+ }
159
+ }
160
+ #nextToken({ flush = false } = {}) {
161
+ if (this.#buffer.length === 0) {
162
+ return null;
163
+ }
164
+ // Check for CRLF
165
+ if (this.#buffer.startsWith(CRLF)) {
166
+ this.#buffer = this.#buffer.slice(2);
167
+ return { type: RecordDelimiter, value: CRLF };
168
+ }
169
+ // Check for LF
170
+ if (this.#buffer.startsWith(LF)) {
171
+ this.#buffer = this.#buffer.slice(1);
172
+ return { type: RecordDelimiter, value: LF };
173
+ }
174
+ // Check for Delimiter
175
+ if (this.#buffer.startsWith(this.#demiliter)) {
176
+ this.#buffer = this.#buffer.slice(this.#demiliterLength);
177
+ return { type: FieldDelimiter, value: this.#demiliter };
178
+ }
179
+ // Check for Quoted String
180
+ if (this.#buffer.startsWith(this.#quotation)) {
181
+ // If we're flushing and the buffer doesn't end with a quote, then return null
182
+ // because we're not done with the quoted string
183
+ if (flush === false && this.#buffer.endsWith(this.#quotation)) {
184
+ return null;
185
+ }
186
+ return this.extractQuotedString();
187
+ }
188
+ // Check for Unquoted String
189
+ const match = this.#matcher.exec(this.#buffer);
190
+ if (match) {
191
+ // If we're flushing and the match doesn't consume the entire buffer,
192
+ // then return null
193
+ if (flush === false && match[0].length === this.#buffer.length) {
194
+ return null;
195
+ }
196
+ this.#buffer = this.#buffer.slice(match[0].length);
197
+ return { type: Field, value: match[0] };
198
+ }
199
+ // Otherwise, return null
200
+ return null;
201
+ }
202
+ extractQuotedString() {
203
+ let end = this.#quotationLength; // Skip the opening quote
204
+ let value = "";
205
+ while (end < this.#buffer.length) {
206
+ // Escaped quote
207
+ if (
208
+ this.#buffer.slice(end, end + this.#quotationLength) ===
209
+ this.quotation &&
210
+ this.#buffer.slice(
211
+ end + this.#quotationLength,
212
+ end + this.#quotationLength * 2,
213
+ ) === this.quotation
214
+ ) {
215
+ value += this.quotation;
216
+ end += this.#quotationLength * 2;
217
+ continue;
218
+ }
219
+ // Closing quote
220
+ if (
221
+ this.#buffer.slice(end, end + this.#quotationLength) === this.quotation
222
+ ) {
223
+ this.#buffer = this.#buffer.slice(end + this.#quotationLength);
224
+ return { type: Field, value };
225
+ }
226
+ value += this.#buffer[end];
227
+ end++;
228
+ }
229
+ // If we get here, we've reached the end of the buffer
230
+ return null;
231
+ }
232
+ }
233
+
234
+ /**
235
+ * A transform stream that converts a stream of tokens into a stream of rows.
236
+ * @template Header The type of the header row.
237
+ * @param options The options for the parser.
238
+ *
239
+ * @example Parse a CSV with headers by data
240
+ * ```ts
241
+ * new ReadableStream({
242
+ * start(controller) {
243
+ * controller.enqueue("name,age\r\n");
244
+ * controller.enqueue("Alice,20\r\n");
245
+ * controller.enqueue("Bob,25\r\n");
246
+ * controller.enqueue("Charlie,30\r\n");
247
+ * controller.close();
248
+ * })
249
+ * .pipeThrough(new LexerTransformer())
250
+ * .pipeThrough(new RecordAssemblerTransformar())
251
+ * .pipeTo(new WritableStream({ write(row) { console.log(row); }}));
252
+ * // { name: "Alice", age: "20" }
253
+ * // { name: "Bob", age: "25" }
254
+ * // { name: "Charlie", age: "30" }
255
+ * ```
256
+ *
257
+ * @example Parse a CSV with headers by options
258
+ * ```ts
259
+ * new ReadableStream({
260
+ * start(controller) {
261
+ * controller.enqueue("Alice,20\r\n");
262
+ * controller.enqueue("Bob,25\r\n");
263
+ * controller.enqueue("Charlie,30\r\n");
264
+ * controller.close();
265
+ * }
266
+ * })
267
+ * .pipeThrough(new LexerTransformer())
268
+ * .pipeThrough(new RecordAssemblerTransformar({ header: ["name", "age"] }))
269
+ * .pipeTo(new WritableStream({ write(row) { console.log(row); }}));
270
+ * // { name: "Alice", age: "20" }
271
+ * // { name: "Bob", age: "25" }
272
+ * // { name: "Charlie", age: "30" }
273
+ * ```
274
+ */
275
+ class RecordAssemblerTransformar extends TransformStream {
276
+ #fieldIndex = 0;
277
+ #row = [];
278
+ #header;
279
+ #darty = false;
280
+ constructor(options = {}) {
281
+ super({
282
+ transform: (token, controller) => {
283
+ switch (token.type) {
284
+ case Field:
285
+ this.#darty = true;
286
+ this.#row[this.#fieldIndex] = token.value;
287
+ break;
288
+ case FieldDelimiter:
289
+ this.#fieldIndex++;
290
+ break;
291
+ case RecordDelimiter:
292
+ if (this.#header === undefined) {
293
+ this.#setHeader(this.#row);
294
+ } else {
295
+ if (this.#darty) {
296
+ const record = Object.fromEntries(
297
+ this.#header
298
+ .filter((v) => v)
299
+ .map((header, index) => [header, this.#row.at(index)]),
300
+ );
301
+ controller.enqueue(record);
302
+ }
303
+ }
304
+ // Reset the row fields buffer.
305
+ this.#fieldIndex = 0;
306
+ this.#row = new Array(this.#header?.length);
307
+ this.#darty = false;
308
+ break;
309
+ }
310
+ },
311
+ flush: (controller) => {
312
+ if (this.#fieldIndex !== 0 && this.#header !== undefined) {
313
+ // console.log('B', this.#row)
314
+ if (this.#darty) {
315
+ const record = Object.fromEntries(
316
+ this.#header
317
+ .filter((v) => v)
318
+ .map((header, index) => [header, this.#row.at(index)]),
319
+ );
320
+ controller.enqueue(record);
321
+ }
322
+ }
323
+ },
324
+ });
325
+ if (options.header !== undefined && Array.isArray(options.header)) {
326
+ this.#setHeader(options.header);
327
+ }
328
+ }
329
+ #setHeader(header) {
330
+ this.#header = header;
331
+ if (this.#header.length === 0) {
332
+ throw new Error("The header must not be empty.");
333
+ }
334
+ if (new Set(this.#header).size !== this.#header.length) {
335
+ throw new Error("The header must not contain duplicate fields.");
336
+ }
337
+ }
338
+ }
339
+
340
+ class SingleValueReadableStream extends ReadableStream {
341
+ constructor(value) {
342
+ super({
343
+ start(controller) {
344
+ controller.enqueue(value);
345
+ controller.close();
346
+ },
347
+ });
348
+ }
349
+ }
350
+
351
+ async function toArray(...args) {
352
+ const rows = [];
353
+ for await (const row of this(...args)) {
354
+ rows.push(row);
355
+ }
356
+ return rows;
357
+ }
358
+
359
+ /**
360
+ * Parse CSV string to records.
361
+ *
362
+ * @param stream CSV string stream to parse
363
+ * @param options Parsing options. See {@link ParseOptions}.
364
+ */
365
+ async function* parseStringStream(stream, options) {
366
+ let controller;
367
+ const readable = new ReadableStream({
368
+ start: (controller_) => (controller = controller_),
369
+ });
370
+ await stream
371
+ .pipeThrough(new LexerTransformer(options))
372
+ .pipeThrough(new RecordAssemblerTransformar(options))
373
+ .pipeTo(
374
+ new WritableStream({
375
+ write: (row) => controller.enqueue(row),
376
+ close: () => controller.close(),
377
+ }),
378
+ );
379
+ const reader = readable.getReader();
380
+ try {
381
+ while (true) {
382
+ const { value, done } = await reader.read();
383
+ if (done) break;
384
+ yield value;
385
+ }
386
+ } finally {
387
+ reader.releaseLock();
388
+ }
389
+ }
390
+ (function (parseStringStream) {})(
391
+ parseStringStream || (parseStringStream = {}),
392
+ );
393
+ parseStringStream.toArray = toArray;
394
+
395
+ /**
396
+ * Parse CSV string to records.
397
+ *
398
+ * @param csv CSV string to parse
399
+ * @param options Parsing options. See {@link ParseOptions}.
400
+ */
401
+ async function* streamingParse(csv, options) {
402
+ yield* parseStringStream(new SingleValueReadableStream(csv), options);
403
+ }
404
+ (function (streamingParse) {})(streamingParse || (streamingParse = {}));
405
+ streamingParse.toArray = toArray;
406
+
407
+ /**
408
+ * Parse CSV to records.
409
+ * This function is for parsing a binary stream.
410
+ *
411
+ * @remarks
412
+ * If you want to parse a string, use {@link streamingParse}.
413
+ * @param stream CSV string to parse
414
+ * @param options Parsing options. See {@link ParseBinaryOptions}.
415
+ */
416
+ async function* parseBinaryStream(stream, options) {
417
+ const { charset, fatal, ignoreBOM, decomposition } = options ?? {};
418
+ yield* parseStringStream(
419
+ [
420
+ // NOTE: if decompression is undefined, it will be ignored.
421
+ ...(decomposition ? [new DecompressionStream(decomposition)] : []),
422
+ // NOTE: if charset is undefined, it will be decoded as utf-8.
423
+ new TextDecoderStream(charset, { fatal, ignoreBOM }),
424
+ ].reduce((stream, transformer) => stream.pipeThrough(transformer), stream),
425
+ options,
426
+ );
427
+ }
428
+ (function (parseBinaryStream) {})(
429
+ parseBinaryStream || (parseBinaryStream = {}),
430
+ );
431
+ parseBinaryStream.toArray = toArray;
432
+
433
+ function parseMime(contentType) {
434
+ const [type, ...parameters] = contentType.split(";");
435
+ const result = {
436
+ type: type.trim(),
437
+ parameters: {},
438
+ };
439
+ for (const paramator of parameters) {
440
+ const [key, value] = paramator.split("=");
441
+ result.parameters[key.trim()] = value.trim();
442
+ }
443
+ return result;
444
+ }
445
+
446
+ function parseResponse(response, options) {
447
+ const { headers } = response;
448
+ const contentType = headers.get("content-type") ?? "text/csv";
449
+ const mime = parseMime(contentType);
450
+ if (mime.type !== "text/csv") {
451
+ throw new Error(`Invalid mime type: ${contentType}`);
452
+ }
453
+ const decomposition = headers.get("content-encoding") ?? undefined;
454
+ const charset = mime.parameters.charset ?? "utf-8";
455
+ // TODO: Support header=present and header=absent
456
+ // const header = mime.parameters.header ?? "present";
457
+ if (response.body === null) {
458
+ throw new Error("Response body is null");
459
+ }
460
+ return parseBinaryStream(response.body, {
461
+ decomposition,
462
+ charset,
463
+ ...options,
464
+ });
465
+ }
466
+ (function (parseResponse) {})(parseResponse || (parseResponse = {}));
467
+ parseResponse.toArray = toArray;
468
+
469
+ /**
470
+ * Parse CSV Stream to records.
471
+ * string and Uint8Array are supported.
472
+ *
473
+ * @remarks
474
+ * {@link parseStringStream} and {@link parseBinaryStream} are used internally.
475
+ * If you known the type of the stream, it performs better to use them directly.
476
+ *
477
+ * If you want to parse a string, use {@link parseStringStream}.
478
+ * If you want to parse a Uint8Array, use {@link parseBinaryStream}.
479
+ *
480
+ * @param csv CSV string to parse
481
+ * @param options Parsing options. See {@link ParserOptions}.
482
+ */
483
+ async function* parseStream(stream, options) {
484
+ const [branch1, branch2] = stream.tee();
485
+ const reader1 = branch1.getReader();
486
+ const { value: firstChunk } = await reader1.read();
487
+ reader1.releaseLock();
488
+ switch (true) {
489
+ case typeof firstChunk === "string":
490
+ yield* parseStringStream(branch2, options);
491
+ break;
492
+ case firstChunk instanceof Uint8Array:
493
+ yield* parseBinaryStream(branch2, options);
494
+ break;
495
+ }
496
+ }
497
+ (function (parseStream) {})(parseStream || (parseStream = {}));
498
+ parseStream.toArray = toArray;
499
+
500
+ /**
501
+ * Parse CSV to records.
502
+ *
503
+ * {@link String}, {@link Uint8Array}, ReadableStream<string | Uint8Array> and Response are supported.
504
+ *
505
+ * @remarks
506
+ * {@link streamingParse}, {@link parseBinaryStream},
507
+ * {@link parseStringStream} and {@link parseResponse} are used internally.
508
+ * If you known the type of the stream, it performs better to use them directly.
509
+ *
510
+ * If you want to parse a string, use {@link streamingParse}.
511
+ * If you want to parse a Uint8Array, use {@link parseStream}.
512
+ * If you want to parse a ReadableStream<string>, use {@link parseStringStream}.
513
+ * If you want to parse a ReadableStream<Uint8Array>, use {@link parseBinaryStream}.
514
+ * If you want to parse a Response, use {@link parseResponse}.
515
+ *
516
+ * @param csv CSV string to parse
517
+ * @param options Parsing options. See {@link ParseOptions}.
518
+ */
519
+ async function* parse(csv, options) {
520
+ switch (true) {
521
+ case typeof csv === "string":
522
+ yield* streamingParse(csv, options);
523
+ break;
524
+ case csv instanceof ReadableStream:
525
+ yield* parseStream(csv, options);
526
+ break;
527
+ case csv instanceof Response:
528
+ yield* parseResponse(csv, options);
529
+ break;
530
+ }
531
+ }
532
+ (function (parse) {})(parse || (parse = {}));
533
+ parse.toArray = toArray;
534
+
535
+ export {
536
+ Field,
537
+ FieldDelimiter,
538
+ LexerTransformer,
539
+ RecordAssemblerTransformar,
540
+ RecordDelimiter,
541
+ parse,
542
+ parseBinaryStream,
543
+ parseStringStream,
544
+ streamingParse,
545
+ };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "web-csv-toolbox",
3
- "version": "0.0.1",
3
+ "version": "0.0.2",
4
4
  "description": "A CSV Toolbox utilizing Web Standard APIs.",
5
5
  "type": "module",
6
6
  "main": "lib/index.js",