jtcsv 2.2.7 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (140) hide show
  1. package/README.md +31 -1
  2. package/bin/jtcsv.js +891 -821
  3. package/bin/jtcsv.ts +2534 -0
  4. package/csv-to-json.js +168 -145
  5. package/dist/jtcsv-core.cjs.js +1407 -0
  6. package/dist/jtcsv-core.cjs.js.map +1 -0
  7. package/dist/jtcsv-core.esm.js +1379 -0
  8. package/dist/jtcsv-core.esm.js.map +1 -0
  9. package/dist/jtcsv-core.umd.js +1413 -0
  10. package/dist/jtcsv-core.umd.js.map +1 -0
  11. package/dist/jtcsv-full.cjs.js +1912 -0
  12. package/dist/jtcsv-full.cjs.js.map +1 -0
  13. package/dist/jtcsv-full.esm.js +1880 -0
  14. package/dist/jtcsv-full.esm.js.map +1 -0
  15. package/dist/jtcsv-full.umd.js +1918 -0
  16. package/dist/jtcsv-full.umd.js.map +1 -0
  17. package/dist/jtcsv-workers.esm.js +759 -0
  18. package/dist/jtcsv-workers.esm.js.map +1 -0
  19. package/dist/jtcsv-workers.umd.js +773 -0
  20. package/dist/jtcsv-workers.umd.js.map +1 -0
  21. package/dist/jtcsv.cjs.js +61 -19
  22. package/dist/jtcsv.cjs.js.map +1 -1
  23. package/dist/jtcsv.esm.js +61 -19
  24. package/dist/jtcsv.esm.js.map +1 -1
  25. package/dist/jtcsv.umd.js +61 -19
  26. package/dist/jtcsv.umd.js.map +1 -1
  27. package/errors.js +188 -2
  28. package/examples/advanced/conditional-transformations.js +446 -0
  29. package/examples/advanced/conditional-transformations.ts +446 -0
  30. package/examples/advanced/csv-parser.worker.js +89 -0
  31. package/examples/advanced/csv-parser.worker.ts +89 -0
  32. package/examples/advanced/nested-objects-example.js +306 -0
  33. package/examples/advanced/nested-objects-example.ts +306 -0
  34. package/examples/advanced/performance-optimization.js +504 -0
  35. package/examples/advanced/performance-optimization.ts +504 -0
  36. package/examples/advanced/run-demo-server.js +116 -0
  37. package/examples/advanced/run-demo-server.ts +116 -0
  38. package/examples/advanced/web-worker-usage.html +874 -0
  39. package/examples/async-multithreaded-example.ts +335 -0
  40. package/examples/cli-advanced-usage.md +288 -0
  41. package/examples/cli-batch-processing.ts +38 -0
  42. package/examples/cli-tool.js +0 -3
  43. package/examples/cli-tool.ts +183 -0
  44. package/examples/error-handling.js +21 -7
  45. package/examples/error-handling.ts +356 -0
  46. package/examples/express-api.js +0 -3
  47. package/examples/express-api.ts +164 -0
  48. package/examples/large-dataset-example.js +0 -3
  49. package/examples/large-dataset-example.ts +204 -0
  50. package/examples/ndjson-processing.js +1 -1
  51. package/examples/ndjson-processing.ts +456 -0
  52. package/examples/plugin-excel-exporter.js +3 -4
  53. package/examples/plugin-excel-exporter.ts +406 -0
  54. package/examples/react-integration.tsx +637 -0
  55. package/examples/schema-validation.ts +640 -0
  56. package/examples/simple-usage.js +254 -254
  57. package/examples/simple-usage.ts +194 -0
  58. package/examples/streaming-example.js +4 -5
  59. package/examples/streaming-example.ts +419 -0
  60. package/examples/web-workers-advanced.ts +28 -0
  61. package/index.d.ts +1 -3
  62. package/index.js +15 -1
  63. package/json-save.js +9 -3
  64. package/json-to-csv.js +168 -21
  65. package/package.json +69 -10
  66. package/plugins/express-middleware/README.md +21 -2
  67. package/plugins/express-middleware/example.js +3 -4
  68. package/plugins/express-middleware/example.ts +135 -0
  69. package/plugins/express-middleware/index.d.ts +1 -1
  70. package/plugins/express-middleware/index.js +270 -118
  71. package/plugins/express-middleware/index.ts +557 -0
  72. package/plugins/fastify-plugin/index.js +2 -4
  73. package/plugins/fastify-plugin/index.ts +443 -0
  74. package/plugins/hono/index.ts +226 -0
  75. package/plugins/nestjs/index.ts +201 -0
  76. package/plugins/nextjs-api/examples/ConverterComponent.tsx +386 -0
  77. package/plugins/nextjs-api/examples/api-convert.js +0 -2
  78. package/plugins/nextjs-api/examples/api-convert.ts +67 -0
  79. package/plugins/nextjs-api/index.tsx +339 -0
  80. package/plugins/nextjs-api/route.js +2 -3
  81. package/plugins/nextjs-api/route.ts +370 -0
  82. package/plugins/nuxt/index.ts +94 -0
  83. package/plugins/nuxt/runtime/composables/useJtcsv.ts +100 -0
  84. package/plugins/nuxt/runtime/plugin.ts +71 -0
  85. package/plugins/remix/index.js +1 -1
  86. package/plugins/remix/index.ts +260 -0
  87. package/plugins/sveltekit/index.js +1 -1
  88. package/plugins/sveltekit/index.ts +301 -0
  89. package/plugins/trpc/index.ts +267 -0
  90. package/src/browser/browser-functions.ts +402 -0
  91. package/src/browser/core.js +92 -0
  92. package/src/browser/core.ts +152 -0
  93. package/src/browser/csv-to-json-browser.d.ts +3 -0
  94. package/src/browser/csv-to-json-browser.js +36 -14
  95. package/src/browser/csv-to-json-browser.ts +264 -0
  96. package/src/browser/errors-browser.ts +303 -0
  97. package/src/browser/extensions/plugins.js +92 -0
  98. package/src/browser/extensions/plugins.ts +93 -0
  99. package/src/browser/extensions/workers.js +39 -0
  100. package/src/browser/extensions/workers.ts +39 -0
  101. package/src/browser/globals.d.ts +5 -0
  102. package/src/browser/index.ts +192 -0
  103. package/src/browser/json-to-csv-browser.d.ts +3 -0
  104. package/src/browser/json-to-csv-browser.js +13 -3
  105. package/src/browser/json-to-csv-browser.ts +262 -0
  106. package/src/browser/streams.js +12 -2
  107. package/src/browser/streams.ts +336 -0
  108. package/src/browser/workers/csv-parser.worker.ts +377 -0
  109. package/src/browser/workers/worker-pool.ts +548 -0
  110. package/src/core/delimiter-cache.js +22 -8
  111. package/src/core/delimiter-cache.ts +310 -0
  112. package/src/core/node-optimizations.ts +449 -0
  113. package/src/core/plugin-system.js +29 -11
  114. package/src/core/plugin-system.ts +400 -0
  115. package/src/core/transform-hooks.ts +558 -0
  116. package/src/engines/fast-path-engine-new.ts +347 -0
  117. package/src/engines/fast-path-engine.ts +854 -0
  118. package/src/errors.ts +72 -0
  119. package/src/formats/ndjson-parser.ts +469 -0
  120. package/src/formats/tsv-parser.ts +334 -0
  121. package/src/index-with-plugins.js +16 -9
  122. package/src/index-with-plugins.ts +395 -0
  123. package/src/types/index.ts +255 -0
  124. package/src/utils/bom-utils.js +259 -0
  125. package/src/utils/bom-utils.ts +373 -0
  126. package/src/utils/encoding-support.js +124 -0
  127. package/src/utils/encoding-support.ts +155 -0
  128. package/src/utils/schema-validator.js +19 -19
  129. package/src/utils/schema-validator.ts +819 -0
  130. package/src/utils/transform-loader.js +1 -1
  131. package/src/utils/transform-loader.ts +389 -0
  132. package/src/utils/zod-adapter.js +170 -0
  133. package/src/utils/zod-adapter.ts +280 -0
  134. package/src/web-server/index.js +10 -10
  135. package/src/web-server/index.ts +683 -0
  136. package/src/workers/csv-multithreaded.ts +310 -0
  137. package/src/workers/csv-parser.worker.ts +227 -0
  138. package/src/workers/worker-pool.ts +409 -0
  139. package/stream-csv-to-json.js +26 -8
  140. package/stream-json-to-csv.js +1 -0
@@ -0,0 +1,259 @@
1
+ /**
2
+ * BOM (Byte Order Mark) Utilities for jtcsv
3
+ *
4
+ * Provides functions to detect and strip BOM characters from UTF-8, UTF-16 LE/BE,
5
+ * and UTF-32 encoded strings/buffers.
6
+ *
7
+ * @module bom-utils
8
+ */
9
+
10
+ /**
11
+ * BOM signatures for different encodings
12
+ */
13
+ const BOM_SIGNATURES = {
14
+ 'utf-8': Buffer.from([0xEF, 0xBB, 0xBF]),
15
+ 'utf-16le': Buffer.from([0xFF, 0xFE]),
16
+ 'utf-16be': Buffer.from([0xFE, 0xFF]),
17
+ 'utf-32le': Buffer.from([0xFF, 0xFE, 0x00, 0x00]),
18
+ 'utf-32be': Buffer.from([0x00, 0x00, 0xFE, 0xFF])
19
+ };
20
+
21
+ /**
22
+ * Detects if a buffer or string starts with a BOM
23
+ *
24
+ * @param {Buffer|string} input - Input to check for BOM
25
+ * @returns {Object|null} Detection result or null if no BOM found
26
+ * @property {string} encoding - Detected encoding ('utf-8', 'utf-16le', etc.)
27
+ * @property {number} bomLength - Length of BOM in bytes
28
+ */
29
+ function detectBom(input) {
30
+ if (!input) {
31
+ return null;
32
+ }
33
+
34
+ let buffer;
35
+ if (typeof input === 'string') {
36
+ buffer = Buffer.from(input, 'utf8');
37
+ } else if (Buffer.isBuffer(input)) {
38
+ buffer = input;
39
+ } else {
40
+ return null;
41
+ }
42
+
43
+ // Check each BOM signature
44
+ for (const [encoding, signature] of Object.entries(BOM_SIGNATURES)) {
45
+ if (buffer.length >= signature.length) {
46
+ if (buffer.slice(0, signature.length).equals(signature)) {
47
+ return {
48
+ encoding,
49
+ bomLength: signature.length,
50
+ hasBom: true
51
+ };
52
+ }
53
+ }
54
+ }
55
+
56
+ return null;
57
+ }
58
+
59
+ /**
60
+ * Strips BOM from a buffer or string
61
+ *
62
+ * @param {Buffer|string} input - Input to strip BOM from
63
+ * @returns {Buffer|string} Input without BOM
64
+ */
65
+ function stripBom(input) {
66
+ if (!input) {
67
+ return input;
68
+ }
69
+
70
+ const bomInfo = detectBom(input);
71
+ if (!bomInfo) {
72
+ return input;
73
+ }
74
+
75
+ if (Buffer.isBuffer(input)) {
76
+ return input.slice(bomInfo.bomLength);
77
+ }
78
+
79
+ if (typeof input === 'string') {
80
+ // Convert to buffer, strip BOM, then convert back to string
81
+ const buffer = Buffer.from(input, 'utf8');
82
+ const strippedBuffer = buffer.slice(bomInfo.bomLength);
83
+
84
+ // Determine correct encoding for conversion
85
+ let encoding = 'utf8';
86
+ if (bomInfo.encoding === 'utf-16le') {
87
+ encoding = 'utf16le';
88
+ } else if (bomInfo.encoding === 'utf-16be') {
89
+ encoding = 'utf16be';
90
+ }
91
+
92
+ return strippedBuffer.toString(encoding);
93
+ }
94
+
95
+ return input;
96
+ }
97
+
98
+ /**
99
+ * Strips BOM from a string (optimized for strings)
100
+ *
101
+ * @param {string} str - String to strip BOM from
102
+ * @returns {string} String without BOM
103
+ */
104
+ function stripBomFromString(str) {
105
+ if (typeof str !== 'string') {
106
+ return str;
107
+ }
108
+
109
+ // Check for UTF-8 BOM (most common)
110
+ if (str.charCodeAt(0) === 0xFEFF) {
111
+ return str.slice(1);
112
+ }
113
+
114
+ // Check for UTF-8 BOM bytes as characters
115
+ if (str.length >= 3 &&
116
+ str.charCodeAt(0) === 0xEF &&
117
+ str.charCodeAt(1) === 0xBB &&
118
+ str.charCodeAt(2) === 0xBF) {
119
+ return str.slice(3);
120
+ }
121
+
122
+ return str;
123
+ }
124
+
125
+ /**
126
+ * Creates a transform stream that strips BOM from incoming data
127
+ *
128
+ * @returns {Transform} Transform stream
129
+ */
130
+ function createBomStripStream() {
131
+ const { Transform } = require('stream');
132
+ let bomStripped = false;
133
+
134
+ return new Transform({
135
+ transform(chunk, encoding, callback) {
136
+ if (!bomStripped) {
137
+ const bomInfo = detectBom(chunk);
138
+ if (bomInfo) {
139
+ // Strip BOM from first chunk
140
+ chunk = chunk.slice(bomInfo.bomLength);
141
+ bomStripped = true;
142
+ } else {
143
+ bomStripped = true; // No BOM found, but we've checked
144
+ }
145
+ }
146
+
147
+ this.push(chunk);
148
+ callback();
149
+ }
150
+ });
151
+ }
152
+
153
+ /**
154
+ * Reads a file and automatically handles BOM
155
+ *
156
+ * @param {string} filePath - Path to file
157
+ * @param {Object} options - Read options
158
+ * @returns {Promise<{data: Buffer|string, encoding: string, hadBom: boolean}>}
159
+ */
160
+ async function readFileWithBomHandling(filePath, options = {}) {
161
+ const fs = require('fs').promises;
162
+ const buffer = await fs.readFile(filePath);
163
+
164
+ const bomInfo = detectBom(buffer);
165
+ const hadBom = !!bomInfo;
166
+
167
+ let data;
168
+ let encoding = options.encoding || 'utf8';
169
+
170
+ if (bomInfo) {
171
+ // Strip BOM
172
+ data = buffer.slice(bomInfo.bomLength);
173
+
174
+ // Use detected encoding if not specified
175
+ if (!options.encoding) {
176
+ encoding = bomInfo.encoding;
177
+ }
178
+ } else {
179
+ data = buffer;
180
+ }
181
+
182
+ // Convert to string if encoding is specified
183
+ if (options.encoding || bomInfo) {
184
+ data = data.toString(encoding);
185
+ }
186
+
187
+ return {
188
+ data,
189
+ encoding,
190
+ hadBom,
191
+ bomInfo: bomInfo || null
192
+ };
193
+ }
194
+
195
+ /**
196
+ * Checks if a file has BOM (synchronous)
197
+ *
198
+ * @param {string} filePath - Path to file
199
+ * @returns {Object|null} BOM info or null
200
+ */
201
+ function fileHasBomSync(filePath) {
202
+ const fs = require('fs');
203
+ const fd = fs.openSync(filePath, 'r');
204
+ const buffer = Buffer.alloc(4);
205
+ const bytesRead = fs.readSync(fd, buffer, 0, 4, 0);
206
+ fs.closeSync(fd);
207
+
208
+ if (bytesRead < 2) {
209
+ return null;
210
+ }
211
+
212
+ return detectBom(buffer.slice(0, bytesRead));
213
+ }
214
+
215
+ /**
216
+ * Normalizes CSV input by stripping BOM and ensuring proper encoding
217
+ *
218
+ * @param {string|Buffer} csvInput - CSV input
219
+ * @param {Object} options - Processing options
220
+ * @returns {string} Normalized CSV string
221
+ */
222
+ function normalizeCsvInput(csvInput, options = {}) {
223
+ if (!csvInput) {
224
+ return '';
225
+ }
226
+
227
+ let normalized;
228
+
229
+ if (Buffer.isBuffer(csvInput)) {
230
+ const bomInfo = detectBom(csvInput);
231
+ if (bomInfo) {
232
+ normalized = csvInput.slice(bomInfo.bomLength).toString(bomInfo.encoding);
233
+ } else {
234
+ normalized = csvInput.toString(options.encoding || 'utf8');
235
+ }
236
+ } else if (typeof csvInput === 'string') {
237
+ normalized = stripBomFromString(csvInput);
238
+ } else {
239
+ throw new Error('CSV input must be a string or Buffer');
240
+ }
241
+
242
+ // Ensure proper line endings
243
+ if (options.normalizeLineEndings !== false) {
244
+ normalized = normalized.replace(/\r\n|\r/g, '\n');
245
+ }
246
+
247
+ return normalized;
248
+ }
249
+
250
+ module.exports = {
251
+ detectBom,
252
+ stripBom,
253
+ stripBomFromString,
254
+ createBomStripStream,
255
+ readFileWithBomHandling,
256
+ fileHasBomSync,
257
+ normalizeCsvInput,
258
+ BOM_SIGNATURES
259
+ };
@@ -0,0 +1,373 @@
1
+ /**
2
+ * BOM (Byte Order Mark) Utilities for jtcsv
3
+ *
4
+ * Provides functions to detect and strip BOM characters from UTF-8, UTF-16 LE/BE,
5
+ * and UTF-32 encoded strings/buffers.
6
+ *
7
+ * @module bom-utils
8
+ */
9
+
10
+ import { Transform } from 'stream';
11
+ import * as fs from 'fs';
12
+ import * as fsPromises from 'fs/promises';
13
+
14
+ /**
15
+ * BOM signatures for different encodings
16
+ */
17
+ export const BOM_SIGNATURES = {
18
+ 'utf-8': Buffer.from([0xEF, 0xBB, 0xBF]),
19
+ 'utf-16le': Buffer.from([0xFF, 0xFE]),
20
+ 'utf-16be': Buffer.from([0xFE, 0xFF]),
21
+ 'utf-32le': Buffer.from([0xFF, 0xFE, 0x00, 0x00]),
22
+ 'utf-32be': Buffer.from([0x00, 0x00, 0xFE, 0xFF])
23
+ } as const;
24
+
25
+ export type Encoding = keyof typeof BOM_SIGNATURES;
26
+
27
+ export interface BomDetectionResult {
28
+ encoding: Encoding;
29
+ bomLength: number;
30
+ hasBom: boolean;
31
+ }
32
+
33
+ export interface ReadFileWithBomResult {
34
+ data: Buffer | string;
35
+ encoding: string;
36
+ hadBom: boolean;
37
+ bomInfo: BomDetectionResult | null;
38
+ }
39
+
40
+ export interface NormalizeCsvInputOptions {
41
+ encoding?: string;
42
+ normalizeLineEndings?: boolean;
43
+ }
44
+
45
+ /**
46
+ * Detects if a buffer or string starts with a BOM
47
+ *
48
+ * @param input - Input to check for BOM
49
+ * @returns Detection result or null if no BOM found
50
+ */
51
+ export function detectBom(input: Buffer | string | null | undefined): BomDetectionResult | null {
52
+ if (!input) {
53
+ return null;
54
+ }
55
+
56
+ let buffer: Buffer;
57
+ if (typeof input === 'string') {
58
+ buffer = Buffer.from(input, 'utf8');
59
+ } else if (Buffer.isBuffer(input)) {
60
+ buffer = input;
61
+ } else {
62
+ return null;
63
+ }
64
+
65
+ // Check each BOM signature
66
+ for (const [encoding, signature] of Object.entries(BOM_SIGNATURES)) {
67
+ if (buffer.length >= signature.length) {
68
+ if (buffer.slice(0, signature.length).equals(signature)) {
69
+ return {
70
+ encoding: encoding as Encoding,
71
+ bomLength: signature.length,
72
+ hasBom: true
73
+ };
74
+ }
75
+ }
76
+ }
77
+
78
+ return null;
79
+ }
80
+
81
+ /**
82
+ * Strips BOM from a buffer or string
83
+ *
84
+ * @param input - Input to strip BOM from
85
+ * @returns Input without BOM
86
+ */
87
+ export function stripBom(input: Buffer | string | null | undefined): Buffer | string {
88
+ if (!input) {
89
+ return input as any;
90
+ }
91
+
92
+ const bomInfo = detectBom(input);
93
+ if (!bomInfo) {
94
+ return input;
95
+ }
96
+
97
+ if (Buffer.isBuffer(input)) {
98
+ return input.slice(bomInfo.bomLength);
99
+ }
100
+
101
+ if (typeof input === 'string') {
102
+ // Convert to buffer, strip BOM, then convert back to string
103
+ const buffer = Buffer.from(input, 'utf8');
104
+ const strippedBuffer = buffer.slice(bomInfo.bomLength);
105
+
106
+ // Determine correct encoding for conversion
107
+ let encoding: BufferEncoding = 'utf8';
108
+ if (bomInfo.encoding === 'utf-16le') {
109
+ encoding = 'utf16le';
110
+ } else if (bomInfo.encoding === 'utf-16be') {
111
+ encoding = 'utf16le'; // Node.js uses utf16le for both LE and BE, conversion handled by Buffer
112
+ }
113
+
114
+ return strippedBuffer.toString(encoding);
115
+ }
116
+
117
+ return input;
118
+ }
119
+
120
+ /**
121
+ * Strips BOM from a string (optimized for strings)
122
+ *
123
+ * @param str - String to strip BOM from
124
+ * @returns String without BOM
125
+ */
126
+ export function stripBomFromString(str: string): string {
127
+ if (typeof str !== 'string') {
128
+ return str as any;
129
+ }
130
+
131
+ // Check for UTF-8 BOM (most common)
132
+ if (str.charCodeAt(0) === 0xFEFF) {
133
+ return str.slice(1);
134
+ }
135
+
136
+ // Check for UTF-8 BOM bytes as characters
137
+ if (str.length >= 3 &&
138
+ str.charCodeAt(0) === 0xEF &&
139
+ str.charCodeAt(1) === 0xBB &&
140
+ str.charCodeAt(2) === 0xBF) {
141
+ return str.slice(3);
142
+ }
143
+
144
+ return str;
145
+ }
146
+
147
+ /**
148
+ * Creates a transform stream that strips BOM from incoming data
149
+ *
150
+ * @returns Transform stream
151
+ */
152
+ export function createBomStripStream(): Transform {
153
+ let bomStripped = false;
154
+
155
+ return new Transform({
156
+ transform(chunk: Buffer, encoding: string, callback: (error?: Error | null, data?: Buffer) => void) {
157
+ if (!bomStripped) {
158
+ const bomInfo = detectBom(chunk);
159
+ if (bomInfo) {
160
+ // Strip BOM from first chunk
161
+ chunk = chunk.slice(bomInfo.bomLength);
162
+ bomStripped = true;
163
+ } else {
164
+ bomStripped = true; // No BOM found, but we've checked
165
+ }
166
+ }
167
+
168
+ this.push(chunk);
169
+ callback();
170
+ }
171
+ });
172
+ }
173
+
174
+ /**
175
+ * Reads a file and automatically handles BOM
176
+ *
177
+ * @param filePath - Path to file
178
+ * @param options - Read options
179
+ * @returns Promise with file data and BOM info
180
+ */
181
+ export async function readFileWithBomHandling(
182
+ filePath: string,
183
+ options: { encoding?: BufferEncoding } = {}
184
+ ): Promise<ReadFileWithBomResult> {
185
+ const buffer = await fsPromises.readFile(filePath);
186
+
187
+ const bomInfo = detectBom(buffer);
188
+ const hadBom = !!bomInfo;
189
+
190
+ let data: Buffer | string;
191
+ let encoding = options.encoding || 'utf8';
192
+
193
+ if (bomInfo) {
194
+ // Strip BOM
195
+ data = buffer.slice(bomInfo.bomLength);
196
+
197
+ // Use detected encoding if not specified
198
+ if (!options.encoding) {
199
+ // Convert our encoding names to Node.js BufferEncoding
200
+ if (bomInfo.encoding === 'utf-8') {
201
+ encoding = 'utf8';
202
+ } else if (bomInfo.encoding === 'utf-16le') {
203
+ encoding = 'utf16le';
204
+ } else if (bomInfo.encoding === 'utf-16be') {
205
+ encoding = 'utf16le'; // Node.js uses utf16le for both
206
+ } else {
207
+ encoding = 'utf8'; // fallback
208
+ }
209
+ }
210
+ } else {
211
+ data = buffer;
212
+ }
213
+
214
+ // Convert to string if encoding is specified
215
+ if (options.encoding || bomInfo) {
216
+ data = data.toString(encoding);
217
+ }
218
+
219
+ return {
220
+ data,
221
+ encoding,
222
+ hadBom,
223
+ bomInfo: bomInfo || null
224
+ };
225
+ }
226
+
227
+ /**
228
+ * Checks if a file has BOM (synchronous)
229
+ *
230
+ * @param filePath - Path to file
231
+ * @returns BOM info or null
232
+ */
233
+ export function fileHasBomSync(filePath: string): BomDetectionResult | null {
234
+ const fd = fs.openSync(filePath, 'r');
235
+ const buffer = Buffer.alloc(4);
236
+ const bytesRead = fs.readSync(fd, buffer, 0, 4, 0);
237
+ fs.closeSync(fd);
238
+
239
+ if (bytesRead < 2) {
240
+ return null;
241
+ }
242
+
243
+ return detectBom(buffer.slice(0, bytesRead));
244
+ }
245
+
246
+ /**
247
+ * Normalizes CSV input by stripping BOM and ensuring proper encoding
248
+ *
249
+ * @param csvInput - CSV input
250
+ * @param options - Processing options
251
+ * @returns Normalized CSV string
252
+ */
253
+ export function normalizeCsvInput(
254
+ csvInput: string | Buffer,
255
+ options: NormalizeCsvInputOptions = {}
256
+ ): string {
257
+ if (!csvInput) {
258
+ return '';
259
+ }
260
+
261
+ let normalized: string;
262
+
263
+ if (Buffer.isBuffer(csvInput)) {
264
+ const bomInfo = detectBom(csvInput);
265
+ if (bomInfo) {
266
+ normalized = csvInput.slice(bomInfo.bomLength).toString(bomInfo.encoding as BufferEncoding);
267
+ } else {
268
+ normalized = csvInput.toString((options.encoding as BufferEncoding) || 'utf8');
269
+ }
270
+ } else if (typeof csvInput === 'string') {
271
+ normalized = stripBomFromString(csvInput);
272
+ } else {
273
+ throw new Error('CSV input must be a string or Buffer');
274
+ }
275
+
276
+ // Ensure proper line endings
277
+ if (options.normalizeLineEndings !== false) {
278
+ normalized = normalized.replace(/\r\n|\r/g, '\n');
279
+ }
280
+
281
+ return normalized;
282
+ }
283
+
284
+ /**
285
+ * Async version of normalizeCsvInput that can handle large files
286
+ *
287
+ * @param csvInput - CSV input as string, Buffer, or file path
288
+ * @param options - Processing options
289
+ * @returns Promise with normalized CSV string
290
+ */
291
+ export async function normalizeCsvInputAsync(
292
+ csvInput: string | Buffer | { filePath: string },
293
+ options: NormalizeCsvInputOptions = {}
294
+ ): Promise<string> {
295
+ if (typeof csvInput === 'object' && 'filePath' in csvInput) {
296
+ // Read file asynchronously
297
+ const result = await readFileWithBomHandling(csvInput.filePath, {
298
+ encoding: options.encoding as BufferEncoding || 'utf8'
299
+ });
300
+ let normalized = typeof result.data === 'string' ? result.data : result.data.toString();
301
+
302
+ // Ensure proper line endings
303
+ if (options.normalizeLineEndings !== false) {
304
+ normalized = normalized.replace(/\r\n|\r/g, '\n');
305
+ }
306
+
307
+ return normalized;
308
+ }
309
+
310
+ // Handle string or Buffer input
311
+ return normalizeCsvInput(csvInput as string | Buffer, options);
312
+ }
313
+
314
+ /**
315
+ * Creates an async iterator that strips BOM from a stream
316
+ *
317
+ * @param stream - Readable stream
318
+ * @returns Async iterator yielding chunks without BOM
319
+ */
320
+ export async function* createBomStrippingIterator(
321
+ stream: NodeJS.ReadableStream
322
+ ): AsyncIterableIterator<Buffer> {
323
+ let bomStripped = false;
324
+
325
+ for await (const chunk of stream) {
326
+ if (!bomStripped) {
327
+ const bomInfo = detectBom(chunk as Buffer);
328
+ if (bomInfo) {
329
+ // Strip BOM from first chunk
330
+ yield (chunk as Buffer).slice(bomInfo.bomLength);
331
+ bomStripped = true;
332
+ continue;
333
+ } else {
334
+ bomStripped = true;
335
+ }
336
+ }
337
+
338
+ yield chunk as Buffer;
339
+ }
340
+ }
341
+
342
+ /**
343
+ * Detects BOM asynchronously for large files
344
+ *
345
+ * @param filePath - Path to file
346
+ * @returns Promise with BOM info or null
347
+ */
348
+ export async function detectBomAsync(filePath: string): Promise<BomDetectionResult | null> {
349
+ const fd = await fsPromises.open(filePath, 'r');
350
+ const buffer = Buffer.alloc(4);
351
+ const { bytesRead } = await fd.read(buffer, 0, 4, 0);
352
+ await fd.close();
353
+
354
+ if (bytesRead < 2) {
355
+ return null;
356
+ }
357
+
358
+ return detectBom(buffer.slice(0, bytesRead));
359
+ }
360
+
361
+ export default {
362
+ detectBom,
363
+ stripBom,
364
+ stripBomFromString,
365
+ createBomStripStream,
366
+ readFileWithBomHandling,
367
+ fileHasBomSync,
368
+ normalizeCsvInput,
369
+ normalizeCsvInputAsync,
370
+ createBomStrippingIterator,
371
+ detectBomAsync,
372
+ BOM_SIGNATURES
373
+ };