jtcsv 2.2.7 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +31 -1
- package/bin/jtcsv.js +891 -821
- package/bin/jtcsv.ts +2534 -0
- package/csv-to-json.js +168 -145
- package/dist/jtcsv-core.cjs.js +1407 -0
- package/dist/jtcsv-core.cjs.js.map +1 -0
- package/dist/jtcsv-core.esm.js +1379 -0
- package/dist/jtcsv-core.esm.js.map +1 -0
- package/dist/jtcsv-core.umd.js +1413 -0
- package/dist/jtcsv-core.umd.js.map +1 -0
- package/dist/jtcsv-full.cjs.js +1912 -0
- package/dist/jtcsv-full.cjs.js.map +1 -0
- package/dist/jtcsv-full.esm.js +1880 -0
- package/dist/jtcsv-full.esm.js.map +1 -0
- package/dist/jtcsv-full.umd.js +1918 -0
- package/dist/jtcsv-full.umd.js.map +1 -0
- package/dist/jtcsv-workers.esm.js +759 -0
- package/dist/jtcsv-workers.esm.js.map +1 -0
- package/dist/jtcsv-workers.umd.js +773 -0
- package/dist/jtcsv-workers.umd.js.map +1 -0
- package/dist/jtcsv.cjs.js +61 -19
- package/dist/jtcsv.cjs.js.map +1 -1
- package/dist/jtcsv.esm.js +61 -19
- package/dist/jtcsv.esm.js.map +1 -1
- package/dist/jtcsv.umd.js +61 -19
- package/dist/jtcsv.umd.js.map +1 -1
- package/errors.js +188 -2
- package/examples/advanced/conditional-transformations.js +446 -0
- package/examples/advanced/conditional-transformations.ts +446 -0
- package/examples/advanced/csv-parser.worker.js +89 -0
- package/examples/advanced/csv-parser.worker.ts +89 -0
- package/examples/advanced/nested-objects-example.js +306 -0
- package/examples/advanced/nested-objects-example.ts +306 -0
- package/examples/advanced/performance-optimization.js +504 -0
- package/examples/advanced/performance-optimization.ts +504 -0
- package/examples/advanced/run-demo-server.js +116 -0
- package/examples/advanced/run-demo-server.ts +116 -0
- package/examples/advanced/web-worker-usage.html +874 -0
- package/examples/async-multithreaded-example.ts +335 -0
- package/examples/cli-advanced-usage.md +288 -0
- package/examples/cli-batch-processing.ts +38 -0
- package/examples/cli-tool.js +0 -3
- package/examples/cli-tool.ts +183 -0
- package/examples/error-handling.js +21 -7
- package/examples/error-handling.ts +356 -0
- package/examples/express-api.js +0 -3
- package/examples/express-api.ts +164 -0
- package/examples/large-dataset-example.js +0 -3
- package/examples/large-dataset-example.ts +204 -0
- package/examples/ndjson-processing.js +1 -1
- package/examples/ndjson-processing.ts +456 -0
- package/examples/plugin-excel-exporter.js +3 -4
- package/examples/plugin-excel-exporter.ts +406 -0
- package/examples/react-integration.tsx +637 -0
- package/examples/schema-validation.ts +640 -0
- package/examples/simple-usage.js +254 -254
- package/examples/simple-usage.ts +194 -0
- package/examples/streaming-example.js +4 -5
- package/examples/streaming-example.ts +419 -0
- package/examples/web-workers-advanced.ts +28 -0
- package/index.d.ts +1 -3
- package/index.js +15 -1
- package/json-save.js +9 -3
- package/json-to-csv.js +168 -21
- package/package.json +69 -10
- package/plugins/express-middleware/README.md +21 -2
- package/plugins/express-middleware/example.js +3 -4
- package/plugins/express-middleware/example.ts +135 -0
- package/plugins/express-middleware/index.d.ts +1 -1
- package/plugins/express-middleware/index.js +270 -118
- package/plugins/express-middleware/index.ts +557 -0
- package/plugins/fastify-plugin/index.js +2 -4
- package/plugins/fastify-plugin/index.ts +443 -0
- package/plugins/hono/index.ts +226 -0
- package/plugins/nestjs/index.ts +201 -0
- package/plugins/nextjs-api/examples/ConverterComponent.tsx +386 -0
- package/plugins/nextjs-api/examples/api-convert.js +0 -2
- package/plugins/nextjs-api/examples/api-convert.ts +67 -0
- package/plugins/nextjs-api/index.tsx +339 -0
- package/plugins/nextjs-api/route.js +2 -3
- package/plugins/nextjs-api/route.ts +370 -0
- package/plugins/nuxt/index.ts +94 -0
- package/plugins/nuxt/runtime/composables/useJtcsv.ts +100 -0
- package/plugins/nuxt/runtime/plugin.ts +71 -0
- package/plugins/remix/index.js +1 -1
- package/plugins/remix/index.ts +260 -0
- package/plugins/sveltekit/index.js +1 -1
- package/plugins/sveltekit/index.ts +301 -0
- package/plugins/trpc/index.ts +267 -0
- package/src/browser/browser-functions.ts +402 -0
- package/src/browser/core.js +92 -0
- package/src/browser/core.ts +152 -0
- package/src/browser/csv-to-json-browser.d.ts +3 -0
- package/src/browser/csv-to-json-browser.js +36 -14
- package/src/browser/csv-to-json-browser.ts +264 -0
- package/src/browser/errors-browser.ts +303 -0
- package/src/browser/extensions/plugins.js +92 -0
- package/src/browser/extensions/plugins.ts +93 -0
- package/src/browser/extensions/workers.js +39 -0
- package/src/browser/extensions/workers.ts +39 -0
- package/src/browser/globals.d.ts +5 -0
- package/src/browser/index.ts +192 -0
- package/src/browser/json-to-csv-browser.d.ts +3 -0
- package/src/browser/json-to-csv-browser.js +13 -3
- package/src/browser/json-to-csv-browser.ts +262 -0
- package/src/browser/streams.js +12 -2
- package/src/browser/streams.ts +336 -0
- package/src/browser/workers/csv-parser.worker.ts +377 -0
- package/src/browser/workers/worker-pool.ts +548 -0
- package/src/core/delimiter-cache.js +22 -8
- package/src/core/delimiter-cache.ts +310 -0
- package/src/core/node-optimizations.ts +449 -0
- package/src/core/plugin-system.js +29 -11
- package/src/core/plugin-system.ts +400 -0
- package/src/core/transform-hooks.ts +558 -0
- package/src/engines/fast-path-engine-new.ts +347 -0
- package/src/engines/fast-path-engine.ts +854 -0
- package/src/errors.ts +72 -0
- package/src/formats/ndjson-parser.ts +469 -0
- package/src/formats/tsv-parser.ts +334 -0
- package/src/index-with-plugins.js +16 -9
- package/src/index-with-plugins.ts +395 -0
- package/src/types/index.ts +255 -0
- package/src/utils/bom-utils.js +259 -0
- package/src/utils/bom-utils.ts +373 -0
- package/src/utils/encoding-support.js +124 -0
- package/src/utils/encoding-support.ts +155 -0
- package/src/utils/schema-validator.js +19 -19
- package/src/utils/schema-validator.ts +819 -0
- package/src/utils/transform-loader.js +1 -1
- package/src/utils/transform-loader.ts +389 -0
- package/src/utils/zod-adapter.js +170 -0
- package/src/utils/zod-adapter.ts +280 -0
- package/src/web-server/index.js +10 -10
- package/src/web-server/index.ts +683 -0
- package/src/workers/csv-multithreaded.ts +310 -0
- package/src/workers/csv-parser.worker.ts +227 -0
- package/src/workers/worker-pool.ts +409 -0
- package/stream-csv-to-json.js +26 -8
- package/stream-json-to-csv.js +1 -0
|
@@ -0,0 +1,259 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* BOM (Byte Order Mark) Utilities for jtcsv
|
|
3
|
+
*
|
|
4
|
+
* Provides functions to detect and strip BOM characters from UTF-8, UTF-16 LE/BE,
|
|
5
|
+
* and UTF-32 encoded strings/buffers.
|
|
6
|
+
*
|
|
7
|
+
* @module bom-utils
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* BOM signatures for different encodings
|
|
12
|
+
*/
|
|
13
|
+
const BOM_SIGNATURES = {
|
|
14
|
+
'utf-8': Buffer.from([0xEF, 0xBB, 0xBF]),
|
|
15
|
+
'utf-16le': Buffer.from([0xFF, 0xFE]),
|
|
16
|
+
'utf-16be': Buffer.from([0xFE, 0xFF]),
|
|
17
|
+
'utf-32le': Buffer.from([0xFF, 0xFE, 0x00, 0x00]),
|
|
18
|
+
'utf-32be': Buffer.from([0x00, 0x00, 0xFE, 0xFF])
|
|
19
|
+
};
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Detects if a buffer or string starts with a BOM
|
|
23
|
+
*
|
|
24
|
+
* @param {Buffer|string} input - Input to check for BOM
|
|
25
|
+
* @returns {Object|null} Detection result or null if no BOM found
|
|
26
|
+
* @property {string} encoding - Detected encoding ('utf-8', 'utf-16le', etc.)
|
|
27
|
+
* @property {number} bomLength - Length of BOM in bytes
|
|
28
|
+
*/
|
|
29
|
+
function detectBom(input) {
|
|
30
|
+
if (!input) {
|
|
31
|
+
return null;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
let buffer;
|
|
35
|
+
if (typeof input === 'string') {
|
|
36
|
+
buffer = Buffer.from(input, 'utf8');
|
|
37
|
+
} else if (Buffer.isBuffer(input)) {
|
|
38
|
+
buffer = input;
|
|
39
|
+
} else {
|
|
40
|
+
return null;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
// Check each BOM signature
|
|
44
|
+
for (const [encoding, signature] of Object.entries(BOM_SIGNATURES)) {
|
|
45
|
+
if (buffer.length >= signature.length) {
|
|
46
|
+
if (buffer.slice(0, signature.length).equals(signature)) {
|
|
47
|
+
return {
|
|
48
|
+
encoding,
|
|
49
|
+
bomLength: signature.length,
|
|
50
|
+
hasBom: true
|
|
51
|
+
};
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
return null;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
/**
|
|
60
|
+
* Strips BOM from a buffer or string
|
|
61
|
+
*
|
|
62
|
+
* @param {Buffer|string} input - Input to strip BOM from
|
|
63
|
+
* @returns {Buffer|string} Input without BOM
|
|
64
|
+
*/
|
|
65
|
+
function stripBom(input) {
|
|
66
|
+
if (!input) {
|
|
67
|
+
return input;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
const bomInfo = detectBom(input);
|
|
71
|
+
if (!bomInfo) {
|
|
72
|
+
return input;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
if (Buffer.isBuffer(input)) {
|
|
76
|
+
return input.slice(bomInfo.bomLength);
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
if (typeof input === 'string') {
|
|
80
|
+
// Convert to buffer, strip BOM, then convert back to string
|
|
81
|
+
const buffer = Buffer.from(input, 'utf8');
|
|
82
|
+
const strippedBuffer = buffer.slice(bomInfo.bomLength);
|
|
83
|
+
|
|
84
|
+
// Determine correct encoding for conversion
|
|
85
|
+
let encoding = 'utf8';
|
|
86
|
+
if (bomInfo.encoding === 'utf-16le') {
|
|
87
|
+
encoding = 'utf16le';
|
|
88
|
+
} else if (bomInfo.encoding === 'utf-16be') {
|
|
89
|
+
encoding = 'utf16be';
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
return strippedBuffer.toString(encoding);
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
return input;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
/**
|
|
99
|
+
* Strips BOM from a string (optimized for strings)
|
|
100
|
+
*
|
|
101
|
+
* @param {string} str - String to strip BOM from
|
|
102
|
+
* @returns {string} String without BOM
|
|
103
|
+
*/
|
|
104
|
+
function stripBomFromString(str) {
|
|
105
|
+
if (typeof str !== 'string') {
|
|
106
|
+
return str;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
// Check for UTF-8 BOM (most common)
|
|
110
|
+
if (str.charCodeAt(0) === 0xFEFF) {
|
|
111
|
+
return str.slice(1);
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
// Check for UTF-8 BOM bytes as characters
|
|
115
|
+
if (str.length >= 3 &&
|
|
116
|
+
str.charCodeAt(0) === 0xEF &&
|
|
117
|
+
str.charCodeAt(1) === 0xBB &&
|
|
118
|
+
str.charCodeAt(2) === 0xBF) {
|
|
119
|
+
return str.slice(3);
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
return str;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
/**
|
|
126
|
+
* Creates a transform stream that strips BOM from incoming data
|
|
127
|
+
*
|
|
128
|
+
* @returns {Transform} Transform stream
|
|
129
|
+
*/
|
|
130
|
+
function createBomStripStream() {
|
|
131
|
+
const { Transform } = require('stream');
|
|
132
|
+
let bomStripped = false;
|
|
133
|
+
|
|
134
|
+
return new Transform({
|
|
135
|
+
transform(chunk, encoding, callback) {
|
|
136
|
+
if (!bomStripped) {
|
|
137
|
+
const bomInfo = detectBom(chunk);
|
|
138
|
+
if (bomInfo) {
|
|
139
|
+
// Strip BOM from first chunk
|
|
140
|
+
chunk = chunk.slice(bomInfo.bomLength);
|
|
141
|
+
bomStripped = true;
|
|
142
|
+
} else {
|
|
143
|
+
bomStripped = true; // No BOM found, but we've checked
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
this.push(chunk);
|
|
148
|
+
callback();
|
|
149
|
+
}
|
|
150
|
+
});
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
/**
|
|
154
|
+
* Reads a file and automatically handles BOM
|
|
155
|
+
*
|
|
156
|
+
* @param {string} filePath - Path to file
|
|
157
|
+
* @param {Object} options - Read options
|
|
158
|
+
* @returns {Promise<{data: Buffer|string, encoding: string, hadBom: boolean}>}
|
|
159
|
+
*/
|
|
160
|
+
async function readFileWithBomHandling(filePath, options = {}) {
|
|
161
|
+
const fs = require('fs').promises;
|
|
162
|
+
const buffer = await fs.readFile(filePath);
|
|
163
|
+
|
|
164
|
+
const bomInfo = detectBom(buffer);
|
|
165
|
+
const hadBom = !!bomInfo;
|
|
166
|
+
|
|
167
|
+
let data;
|
|
168
|
+
let encoding = options.encoding || 'utf8';
|
|
169
|
+
|
|
170
|
+
if (bomInfo) {
|
|
171
|
+
// Strip BOM
|
|
172
|
+
data = buffer.slice(bomInfo.bomLength);
|
|
173
|
+
|
|
174
|
+
// Use detected encoding if not specified
|
|
175
|
+
if (!options.encoding) {
|
|
176
|
+
encoding = bomInfo.encoding;
|
|
177
|
+
}
|
|
178
|
+
} else {
|
|
179
|
+
data = buffer;
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
// Convert to string if encoding is specified
|
|
183
|
+
if (options.encoding || bomInfo) {
|
|
184
|
+
data = data.toString(encoding);
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
return {
|
|
188
|
+
data,
|
|
189
|
+
encoding,
|
|
190
|
+
hadBom,
|
|
191
|
+
bomInfo: bomInfo || null
|
|
192
|
+
};
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
/**
|
|
196
|
+
* Checks if a file has BOM (synchronous)
|
|
197
|
+
*
|
|
198
|
+
* @param {string} filePath - Path to file
|
|
199
|
+
* @returns {Object|null} BOM info or null
|
|
200
|
+
*/
|
|
201
|
+
function fileHasBomSync(filePath) {
|
|
202
|
+
const fs = require('fs');
|
|
203
|
+
const fd = fs.openSync(filePath, 'r');
|
|
204
|
+
const buffer = Buffer.alloc(4);
|
|
205
|
+
const bytesRead = fs.readSync(fd, buffer, 0, 4, 0);
|
|
206
|
+
fs.closeSync(fd);
|
|
207
|
+
|
|
208
|
+
if (bytesRead < 2) {
|
|
209
|
+
return null;
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
return detectBom(buffer.slice(0, bytesRead));
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
/**
|
|
216
|
+
* Normalizes CSV input by stripping BOM and ensuring proper encoding
|
|
217
|
+
*
|
|
218
|
+
* @param {string|Buffer} csvInput - CSV input
|
|
219
|
+
* @param {Object} options - Processing options
|
|
220
|
+
* @returns {string} Normalized CSV string
|
|
221
|
+
*/
|
|
222
|
+
function normalizeCsvInput(csvInput, options = {}) {
|
|
223
|
+
if (!csvInput) {
|
|
224
|
+
return '';
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
let normalized;
|
|
228
|
+
|
|
229
|
+
if (Buffer.isBuffer(csvInput)) {
|
|
230
|
+
const bomInfo = detectBom(csvInput);
|
|
231
|
+
if (bomInfo) {
|
|
232
|
+
normalized = csvInput.slice(bomInfo.bomLength).toString(bomInfo.encoding);
|
|
233
|
+
} else {
|
|
234
|
+
normalized = csvInput.toString(options.encoding || 'utf8');
|
|
235
|
+
}
|
|
236
|
+
} else if (typeof csvInput === 'string') {
|
|
237
|
+
normalized = stripBomFromString(csvInput);
|
|
238
|
+
} else {
|
|
239
|
+
throw new Error('CSV input must be a string or Buffer');
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
// Ensure proper line endings
|
|
243
|
+
if (options.normalizeLineEndings !== false) {
|
|
244
|
+
normalized = normalized.replace(/\r\n|\r/g, '\n');
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
return normalized;
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
module.exports = {
|
|
251
|
+
detectBom,
|
|
252
|
+
stripBom,
|
|
253
|
+
stripBomFromString,
|
|
254
|
+
createBomStripStream,
|
|
255
|
+
readFileWithBomHandling,
|
|
256
|
+
fileHasBomSync,
|
|
257
|
+
normalizeCsvInput,
|
|
258
|
+
BOM_SIGNATURES
|
|
259
|
+
};
|
|
@@ -0,0 +1,373 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* BOM (Byte Order Mark) Utilities for jtcsv
|
|
3
|
+
*
|
|
4
|
+
* Provides functions to detect and strip BOM characters from UTF-8, UTF-16 LE/BE,
|
|
5
|
+
* and UTF-32 encoded strings/buffers.
|
|
6
|
+
*
|
|
7
|
+
* @module bom-utils
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import { Transform } from 'stream';
|
|
11
|
+
import * as fs from 'fs';
|
|
12
|
+
import * as fsPromises from 'fs/promises';
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* BOM signatures for different encodings
|
|
16
|
+
*/
|
|
17
|
+
export const BOM_SIGNATURES = {
|
|
18
|
+
'utf-8': Buffer.from([0xEF, 0xBB, 0xBF]),
|
|
19
|
+
'utf-16le': Buffer.from([0xFF, 0xFE]),
|
|
20
|
+
'utf-16be': Buffer.from([0xFE, 0xFF]),
|
|
21
|
+
'utf-32le': Buffer.from([0xFF, 0xFE, 0x00, 0x00]),
|
|
22
|
+
'utf-32be': Buffer.from([0x00, 0x00, 0xFE, 0xFF])
|
|
23
|
+
} as const;
|
|
24
|
+
|
|
25
|
+
export type Encoding = keyof typeof BOM_SIGNATURES;
|
|
26
|
+
|
|
27
|
+
export interface BomDetectionResult {
|
|
28
|
+
encoding: Encoding;
|
|
29
|
+
bomLength: number;
|
|
30
|
+
hasBom: boolean;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
export interface ReadFileWithBomResult {
|
|
34
|
+
data: Buffer | string;
|
|
35
|
+
encoding: string;
|
|
36
|
+
hadBom: boolean;
|
|
37
|
+
bomInfo: BomDetectionResult | null;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
export interface NormalizeCsvInputOptions {
|
|
41
|
+
encoding?: string;
|
|
42
|
+
normalizeLineEndings?: boolean;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Detects if a buffer or string starts with a BOM
|
|
47
|
+
*
|
|
48
|
+
* @param input - Input to check for BOM
|
|
49
|
+
* @returns Detection result or null if no BOM found
|
|
50
|
+
*/
|
|
51
|
+
export function detectBom(input: Buffer | string | null | undefined): BomDetectionResult | null {
|
|
52
|
+
if (!input) {
|
|
53
|
+
return null;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
let buffer: Buffer;
|
|
57
|
+
if (typeof input === 'string') {
|
|
58
|
+
buffer = Buffer.from(input, 'utf8');
|
|
59
|
+
} else if (Buffer.isBuffer(input)) {
|
|
60
|
+
buffer = input;
|
|
61
|
+
} else {
|
|
62
|
+
return null;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
// Check each BOM signature
|
|
66
|
+
for (const [encoding, signature] of Object.entries(BOM_SIGNATURES)) {
|
|
67
|
+
if (buffer.length >= signature.length) {
|
|
68
|
+
if (buffer.slice(0, signature.length).equals(signature)) {
|
|
69
|
+
return {
|
|
70
|
+
encoding: encoding as Encoding,
|
|
71
|
+
bomLength: signature.length,
|
|
72
|
+
hasBom: true
|
|
73
|
+
};
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
return null;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
/**
|
|
82
|
+
* Strips BOM from a buffer or string
|
|
83
|
+
*
|
|
84
|
+
* @param input - Input to strip BOM from
|
|
85
|
+
* @returns Input without BOM
|
|
86
|
+
*/
|
|
87
|
+
export function stripBom(input: Buffer | string | null | undefined): Buffer | string {
|
|
88
|
+
if (!input) {
|
|
89
|
+
return input as any;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
const bomInfo = detectBom(input);
|
|
93
|
+
if (!bomInfo) {
|
|
94
|
+
return input;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
if (Buffer.isBuffer(input)) {
|
|
98
|
+
return input.slice(bomInfo.bomLength);
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
if (typeof input === 'string') {
|
|
102
|
+
// Convert to buffer, strip BOM, then convert back to string
|
|
103
|
+
const buffer = Buffer.from(input, 'utf8');
|
|
104
|
+
const strippedBuffer = buffer.slice(bomInfo.bomLength);
|
|
105
|
+
|
|
106
|
+
// Determine correct encoding for conversion
|
|
107
|
+
let encoding: BufferEncoding = 'utf8';
|
|
108
|
+
if (bomInfo.encoding === 'utf-16le') {
|
|
109
|
+
encoding = 'utf16le';
|
|
110
|
+
} else if (bomInfo.encoding === 'utf-16be') {
|
|
111
|
+
encoding = 'utf16le'; // Node.js uses utf16le for both LE and BE, conversion handled by Buffer
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
return strippedBuffer.toString(encoding);
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
return input;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
/**
|
|
121
|
+
* Strips BOM from a string (optimized for strings)
|
|
122
|
+
*
|
|
123
|
+
* @param str - String to strip BOM from
|
|
124
|
+
* @returns String without BOM
|
|
125
|
+
*/
|
|
126
|
+
export function stripBomFromString(str: string): string {
|
|
127
|
+
if (typeof str !== 'string') {
|
|
128
|
+
return str as any;
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
// Check for UTF-8 BOM (most common)
|
|
132
|
+
if (str.charCodeAt(0) === 0xFEFF) {
|
|
133
|
+
return str.slice(1);
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
// Check for UTF-8 BOM bytes as characters
|
|
137
|
+
if (str.length >= 3 &&
|
|
138
|
+
str.charCodeAt(0) === 0xEF &&
|
|
139
|
+
str.charCodeAt(1) === 0xBB &&
|
|
140
|
+
str.charCodeAt(2) === 0xBF) {
|
|
141
|
+
return str.slice(3);
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
return str;
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
/**
|
|
148
|
+
* Creates a transform stream that strips BOM from incoming data
|
|
149
|
+
*
|
|
150
|
+
* @returns Transform stream
|
|
151
|
+
*/
|
|
152
|
+
export function createBomStripStream(): Transform {
|
|
153
|
+
let bomStripped = false;
|
|
154
|
+
|
|
155
|
+
return new Transform({
|
|
156
|
+
transform(chunk: Buffer, encoding: string, callback: (error?: Error | null, data?: Buffer) => void) {
|
|
157
|
+
if (!bomStripped) {
|
|
158
|
+
const bomInfo = detectBom(chunk);
|
|
159
|
+
if (bomInfo) {
|
|
160
|
+
// Strip BOM from first chunk
|
|
161
|
+
chunk = chunk.slice(bomInfo.bomLength);
|
|
162
|
+
bomStripped = true;
|
|
163
|
+
} else {
|
|
164
|
+
bomStripped = true; // No BOM found, but we've checked
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
this.push(chunk);
|
|
169
|
+
callback();
|
|
170
|
+
}
|
|
171
|
+
});
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
/**
|
|
175
|
+
* Reads a file and automatically handles BOM
|
|
176
|
+
*
|
|
177
|
+
* @param filePath - Path to file
|
|
178
|
+
* @param options - Read options
|
|
179
|
+
* @returns Promise with file data and BOM info
|
|
180
|
+
*/
|
|
181
|
+
export async function readFileWithBomHandling(
|
|
182
|
+
filePath: string,
|
|
183
|
+
options: { encoding?: BufferEncoding } = {}
|
|
184
|
+
): Promise<ReadFileWithBomResult> {
|
|
185
|
+
const buffer = await fsPromises.readFile(filePath);
|
|
186
|
+
|
|
187
|
+
const bomInfo = detectBom(buffer);
|
|
188
|
+
const hadBom = !!bomInfo;
|
|
189
|
+
|
|
190
|
+
let data: Buffer | string;
|
|
191
|
+
let encoding = options.encoding || 'utf8';
|
|
192
|
+
|
|
193
|
+
if (bomInfo) {
|
|
194
|
+
// Strip BOM
|
|
195
|
+
data = buffer.slice(bomInfo.bomLength);
|
|
196
|
+
|
|
197
|
+
// Use detected encoding if not specified
|
|
198
|
+
if (!options.encoding) {
|
|
199
|
+
// Convert our encoding names to Node.js BufferEncoding
|
|
200
|
+
if (bomInfo.encoding === 'utf-8') {
|
|
201
|
+
encoding = 'utf8';
|
|
202
|
+
} else if (bomInfo.encoding === 'utf-16le') {
|
|
203
|
+
encoding = 'utf16le';
|
|
204
|
+
} else if (bomInfo.encoding === 'utf-16be') {
|
|
205
|
+
encoding = 'utf16le'; // Node.js uses utf16le for both
|
|
206
|
+
} else {
|
|
207
|
+
encoding = 'utf8'; // fallback
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
} else {
|
|
211
|
+
data = buffer;
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
// Convert to string if encoding is specified
|
|
215
|
+
if (options.encoding || bomInfo) {
|
|
216
|
+
data = data.toString(encoding);
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
return {
|
|
220
|
+
data,
|
|
221
|
+
encoding,
|
|
222
|
+
hadBom,
|
|
223
|
+
bomInfo: bomInfo || null
|
|
224
|
+
};
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
/**
|
|
228
|
+
* Checks if a file has BOM (synchronous)
|
|
229
|
+
*
|
|
230
|
+
* @param filePath - Path to file
|
|
231
|
+
* @returns BOM info or null
|
|
232
|
+
*/
|
|
233
|
+
export function fileHasBomSync(filePath: string): BomDetectionResult | null {
|
|
234
|
+
const fd = fs.openSync(filePath, 'r');
|
|
235
|
+
const buffer = Buffer.alloc(4);
|
|
236
|
+
const bytesRead = fs.readSync(fd, buffer, 0, 4, 0);
|
|
237
|
+
fs.closeSync(fd);
|
|
238
|
+
|
|
239
|
+
if (bytesRead < 2) {
|
|
240
|
+
return null;
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
return detectBom(buffer.slice(0, bytesRead));
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
/**
|
|
247
|
+
* Normalizes CSV input by stripping BOM and ensuring proper encoding
|
|
248
|
+
*
|
|
249
|
+
* @param csvInput - CSV input
|
|
250
|
+
* @param options - Processing options
|
|
251
|
+
* @returns Normalized CSV string
|
|
252
|
+
*/
|
|
253
|
+
export function normalizeCsvInput(
|
|
254
|
+
csvInput: string | Buffer,
|
|
255
|
+
options: NormalizeCsvInputOptions = {}
|
|
256
|
+
): string {
|
|
257
|
+
if (!csvInput) {
|
|
258
|
+
return '';
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
let normalized: string;
|
|
262
|
+
|
|
263
|
+
if (Buffer.isBuffer(csvInput)) {
|
|
264
|
+
const bomInfo = detectBom(csvInput);
|
|
265
|
+
if (bomInfo) {
|
|
266
|
+
normalized = csvInput.slice(bomInfo.bomLength).toString(bomInfo.encoding as BufferEncoding);
|
|
267
|
+
} else {
|
|
268
|
+
normalized = csvInput.toString((options.encoding as BufferEncoding) || 'utf8');
|
|
269
|
+
}
|
|
270
|
+
} else if (typeof csvInput === 'string') {
|
|
271
|
+
normalized = stripBomFromString(csvInput);
|
|
272
|
+
} else {
|
|
273
|
+
throw new Error('CSV input must be a string or Buffer');
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
// Ensure proper line endings
|
|
277
|
+
if (options.normalizeLineEndings !== false) {
|
|
278
|
+
normalized = normalized.replace(/\r\n|\r/g, '\n');
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
return normalized;
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
/**
|
|
285
|
+
* Async version of normalizeCsvInput that can handle large files
|
|
286
|
+
*
|
|
287
|
+
* @param csvInput - CSV input as string, Buffer, or file path
|
|
288
|
+
* @param options - Processing options
|
|
289
|
+
* @returns Promise with normalized CSV string
|
|
290
|
+
*/
|
|
291
|
+
export async function normalizeCsvInputAsync(
|
|
292
|
+
csvInput: string | Buffer | { filePath: string },
|
|
293
|
+
options: NormalizeCsvInputOptions = {}
|
|
294
|
+
): Promise<string> {
|
|
295
|
+
if (typeof csvInput === 'object' && 'filePath' in csvInput) {
|
|
296
|
+
// Read file asynchronously
|
|
297
|
+
const result = await readFileWithBomHandling(csvInput.filePath, {
|
|
298
|
+
encoding: options.encoding as BufferEncoding || 'utf8'
|
|
299
|
+
});
|
|
300
|
+
let normalized = typeof result.data === 'string' ? result.data : result.data.toString();
|
|
301
|
+
|
|
302
|
+
// Ensure proper line endings
|
|
303
|
+
if (options.normalizeLineEndings !== false) {
|
|
304
|
+
normalized = normalized.replace(/\r\n|\r/g, '\n');
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
return normalized;
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
// Handle string or Buffer input
|
|
311
|
+
return normalizeCsvInput(csvInput as string | Buffer, options);
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
/**
|
|
315
|
+
* Creates an async iterator that strips BOM from a stream
|
|
316
|
+
*
|
|
317
|
+
* @param stream - Readable stream
|
|
318
|
+
* @returns Async iterator yielding chunks without BOM
|
|
319
|
+
*/
|
|
320
|
+
export async function* createBomStrippingIterator(
|
|
321
|
+
stream: NodeJS.ReadableStream
|
|
322
|
+
): AsyncIterableIterator<Buffer> {
|
|
323
|
+
let bomStripped = false;
|
|
324
|
+
|
|
325
|
+
for await (const chunk of stream) {
|
|
326
|
+
if (!bomStripped) {
|
|
327
|
+
const bomInfo = detectBom(chunk as Buffer);
|
|
328
|
+
if (bomInfo) {
|
|
329
|
+
// Strip BOM from first chunk
|
|
330
|
+
yield (chunk as Buffer).slice(bomInfo.bomLength);
|
|
331
|
+
bomStripped = true;
|
|
332
|
+
continue;
|
|
333
|
+
} else {
|
|
334
|
+
bomStripped = true;
|
|
335
|
+
}
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
yield chunk as Buffer;
|
|
339
|
+
}
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
/**
|
|
343
|
+
* Detects BOM asynchronously for large files
|
|
344
|
+
*
|
|
345
|
+
* @param filePath - Path to file
|
|
346
|
+
* @returns Promise with BOM info or null
|
|
347
|
+
*/
|
|
348
|
+
export async function detectBomAsync(filePath: string): Promise<BomDetectionResult | null> {
|
|
349
|
+
const fd = await fsPromises.open(filePath, 'r');
|
|
350
|
+
const buffer = Buffer.alloc(4);
|
|
351
|
+
const { bytesRead } = await fd.read(buffer, 0, 4, 0);
|
|
352
|
+
await fd.close();
|
|
353
|
+
|
|
354
|
+
if (bytesRead < 2) {
|
|
355
|
+
return null;
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
return detectBom(buffer.slice(0, bytesRead));
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
export default {
|
|
362
|
+
detectBom,
|
|
363
|
+
stripBom,
|
|
364
|
+
stripBomFromString,
|
|
365
|
+
createBomStripStream,
|
|
366
|
+
readFileWithBomHandling,
|
|
367
|
+
fileHasBomSync,
|
|
368
|
+
normalizeCsvInput,
|
|
369
|
+
normalizeCsvInputAsync,
|
|
370
|
+
createBomStrippingIterator,
|
|
371
|
+
detectBomAsync,
|
|
372
|
+
BOM_SIGNATURES
|
|
373
|
+
};
|