jtcsv 3.0.0 → 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +205 -146
- package/bin/jtcsv.ts +280 -202
- package/browser.d.ts +142 -0
- package/dist/benchmark.js +446 -0
- package/dist/benchmark.js.map +1 -0
- package/dist/bin/jtcsv.js +1940 -0
- package/dist/bin/jtcsv.js.map +1 -0
- package/dist/csv-to-json.js +1262 -0
- package/dist/csv-to-json.js.map +1 -0
- package/dist/errors.js +291 -0
- package/dist/errors.js.map +1 -0
- package/dist/eslint.config.js +147 -0
- package/dist/eslint.config.js.map +1 -0
- package/dist/index-core.js +95 -0
- package/dist/index-core.js.map +1 -0
- package/dist/index.js +93 -0
- package/dist/index.js.map +1 -0
- package/dist/json-save.js +229 -0
- package/dist/json-save.js.map +1 -0
- package/dist/json-to-csv.js +576 -0
- package/dist/json-to-csv.js.map +1 -0
- package/dist/jtcsv-core.cjs.js +336 -7
- package/dist/jtcsv-core.cjs.js.map +1 -1
- package/dist/jtcsv-core.esm.js +336 -7
- package/dist/jtcsv-core.esm.js.map +1 -1
- package/dist/jtcsv-core.umd.js +336 -7
- package/dist/jtcsv-core.umd.js.map +1 -1
- package/dist/jtcsv-full.cjs.js +336 -7
- package/dist/jtcsv-full.cjs.js.map +1 -1
- package/dist/jtcsv-full.esm.js +336 -7
- package/dist/jtcsv-full.esm.js.map +1 -1
- package/dist/jtcsv-full.umd.js +336 -7
- package/dist/jtcsv-full.umd.js.map +1 -1
- package/dist/jtcsv-workers.esm.js +9 -0
- package/dist/jtcsv-workers.esm.js.map +1 -1
- package/dist/jtcsv-workers.umd.js +9 -0
- package/dist/jtcsv-workers.umd.js.map +1 -1
- package/dist/jtcsv.cjs.js +1998 -2092
- package/dist/jtcsv.cjs.js.map +1 -1
- package/dist/jtcsv.esm.js +1994 -2092
- package/dist/jtcsv.esm.js.map +1 -1
- package/dist/jtcsv.umd.js +2157 -2251
- package/dist/jtcsv.umd.js.map +1 -1
- package/dist/plugins/express-middleware/index.js +350 -0
- package/dist/plugins/express-middleware/index.js.map +1 -0
- package/dist/plugins/fastify-plugin/index.js +315 -0
- package/dist/plugins/fastify-plugin/index.js.map +1 -0
- package/dist/plugins/hono/index.js +111 -0
- package/dist/plugins/hono/index.js.map +1 -0
- package/dist/plugins/nestjs/index.js +112 -0
- package/dist/plugins/nestjs/index.js.map +1 -0
- package/dist/plugins/nuxt/index.js +53 -0
- package/dist/plugins/nuxt/index.js.map +1 -0
- package/dist/plugins/remix/index.js +133 -0
- package/dist/plugins/remix/index.js.map +1 -0
- package/dist/plugins/sveltekit/index.js +155 -0
- package/dist/plugins/sveltekit/index.js.map +1 -0
- package/dist/plugins/trpc/index.js +136 -0
- package/dist/plugins/trpc/index.js.map +1 -0
- package/dist/run-demo.js +49 -0
- package/dist/run-demo.js.map +1 -0
- package/dist/src/browser/browser-functions.js +193 -0
- package/dist/src/browser/browser-functions.js.map +1 -0
- package/dist/src/browser/core.js +123 -0
- package/dist/src/browser/core.js.map +1 -0
- package/dist/src/browser/csv-to-json-browser.js +353 -0
- package/dist/src/browser/csv-to-json-browser.js.map +1 -0
- package/dist/src/browser/errors-browser.js +219 -0
- package/dist/src/browser/errors-browser.js.map +1 -0
- package/dist/src/browser/extensions/plugins.js +106 -0
- package/dist/src/browser/extensions/plugins.js.map +1 -0
- package/dist/src/browser/extensions/workers.js +66 -0
- package/dist/src/browser/extensions/workers.js.map +1 -0
- package/dist/src/browser/index.js +140 -0
- package/dist/src/browser/index.js.map +1 -0
- package/dist/src/browser/json-to-csv-browser.js +225 -0
- package/dist/src/browser/json-to-csv-browser.js.map +1 -0
- package/dist/src/browser/streams.js +340 -0
- package/dist/src/browser/streams.js.map +1 -0
- package/dist/src/browser/workers/csv-parser.worker.js +264 -0
- package/dist/src/browser/workers/csv-parser.worker.js.map +1 -0
- package/dist/src/browser/workers/worker-pool.js +338 -0
- package/dist/src/browser/workers/worker-pool.js.map +1 -0
- package/dist/src/core/delimiter-cache.js +196 -0
- package/dist/src/core/delimiter-cache.js.map +1 -0
- package/dist/src/core/node-optimizations.js +279 -0
- package/dist/src/core/node-optimizations.js.map +1 -0
- package/dist/src/core/plugin-system.js +399 -0
- package/dist/src/core/plugin-system.js.map +1 -0
- package/dist/src/core/transform-hooks.js +348 -0
- package/dist/src/core/transform-hooks.js.map +1 -0
- package/dist/src/engines/fast-path-engine-new.js +262 -0
- package/dist/src/engines/fast-path-engine-new.js.map +1 -0
- package/dist/src/engines/fast-path-engine.js +671 -0
- package/dist/src/engines/fast-path-engine.js.map +1 -0
- package/dist/src/errors.js +18 -0
- package/dist/src/errors.js.map +1 -0
- package/dist/src/formats/ndjson-parser.js +332 -0
- package/dist/src/formats/ndjson-parser.js.map +1 -0
- package/dist/src/formats/tsv-parser.js +230 -0
- package/dist/src/formats/tsv-parser.js.map +1 -0
- package/dist/src/index-with-plugins.js +259 -0
- package/dist/src/index-with-plugins.js.map +1 -0
- package/dist/src/types/index.js +3 -0
- package/dist/src/types/index.js.map +1 -0
- package/dist/src/utils/bom-utils.js +267 -0
- package/dist/src/utils/bom-utils.js.map +1 -0
- package/dist/src/utils/encoding-support.js +77 -0
- package/dist/src/utils/encoding-support.js.map +1 -0
- package/dist/src/utils/schema-validator.js +609 -0
- package/dist/src/utils/schema-validator.js.map +1 -0
- package/dist/src/utils/transform-loader.js +281 -0
- package/dist/src/utils/transform-loader.js.map +1 -0
- package/dist/src/utils/validators.js +40 -0
- package/dist/src/utils/validators.js.map +1 -0
- package/dist/src/utils/zod-adapter.js +144 -0
- package/dist/src/utils/zod-adapter.js.map +1 -0
- package/{src → dist/src}/web-server/index.js +251 -286
- package/dist/src/web-server/index.js.map +1 -0
- package/dist/src/workers/csv-multithreaded.js +211 -0
- package/dist/src/workers/csv-multithreaded.js.map +1 -0
- package/dist/src/workers/csv-parser.worker.js +179 -0
- package/dist/src/workers/csv-parser.worker.js.map +1 -0
- package/dist/src/workers/worker-pool.js +228 -0
- package/dist/src/workers/worker-pool.js.map +1 -0
- package/dist/stream-csv-to-json.js +665 -0
- package/dist/stream-csv-to-json.js.map +1 -0
- package/dist/stream-json-to-csv.js +389 -0
- package/dist/stream-json-to-csv.js.map +1 -0
- package/examples/advanced/conditional-transformations.ts +2 -2
- package/examples/advanced/performance-optimization.ts +2 -2
- package/examples/cli-advanced-usage.md +2 -0
- package/examples/cli-tool.ts +1 -1
- package/examples/large-dataset-example.ts +2 -2
- package/examples/simple-usage.ts +2 -2
- package/examples/streaming-example.ts +1 -1
- package/index.d.ts +186 -15
- package/package.json +43 -108
- package/plugins.d.ts +37 -0
- package/schema.d.ts +103 -0
- package/src/browser/csv-to-json-browser.ts +233 -3
- package/src/browser/errors-browser.ts +45 -28
- package/src/browser/json-to-csv-browser.ts +81 -5
- package/src/browser/streams.ts +73 -6
- package/src/core/delimiter-cache.ts +21 -11
- package/src/core/plugin-system.ts +343 -155
- package/src/core/transform-hooks.ts +20 -12
- package/src/engines/fast-path-engine.ts +48 -32
- package/src/errors.ts +1 -72
- package/src/formats/ndjson-parser.ts +6 -0
- package/src/formats/tsv-parser.ts +6 -0
- package/src/types/index.ts +21 -1
- package/src/utils/validators.ts +35 -0
- package/src/web-server/index.ts +1 -1
- package/bin/jtcsv.js +0 -2532
- package/csv-to-json.js +0 -711
- package/errors.js +0 -394
- package/examples/advanced/conditional-transformations.js +0 -446
- package/examples/advanced/csv-parser.worker.js +0 -89
- package/examples/advanced/nested-objects-example.js +0 -306
- package/examples/advanced/performance-optimization.js +0 -504
- package/examples/advanced/run-demo-server.js +0 -116
- package/examples/cli-batch-processing.js +0 -38
- package/examples/cli-tool.js +0 -183
- package/examples/error-handling.js +0 -338
- package/examples/express-api.js +0 -164
- package/examples/large-dataset-example.js +0 -182
- package/examples/ndjson-processing.js +0 -434
- package/examples/plugin-excel-exporter.js +0 -406
- package/examples/schema-validation.js +0 -640
- package/examples/simple-usage.js +0 -282
- package/examples/streaming-example.js +0 -418
- package/examples/web-workers-advanced.js +0 -28
- package/index.js +0 -82
- package/json-save.js +0 -255
- package/json-to-csv.js +0 -668
- package/plugins/README.md +0 -91
- package/plugins/express-middleware/README.md +0 -83
- package/plugins/express-middleware/example.js +0 -135
- package/plugins/express-middleware/example.ts +0 -135
- package/plugins/express-middleware/index.d.ts +0 -114
- package/plugins/express-middleware/index.js +0 -512
- package/plugins/express-middleware/index.ts +0 -557
- package/plugins/express-middleware/package.json +0 -52
- package/plugins/fastify-plugin/index.js +0 -404
- package/plugins/fastify-plugin/index.ts +0 -443
- package/plugins/fastify-plugin/package.json +0 -55
- package/plugins/hono/README.md +0 -28
- package/plugins/hono/index.d.ts +0 -12
- package/plugins/hono/index.js +0 -36
- package/plugins/hono/index.ts +0 -226
- package/plugins/hono/package.json +0 -35
- package/plugins/nestjs/README.md +0 -35
- package/plugins/nestjs/index.d.ts +0 -25
- package/plugins/nestjs/index.js +0 -77
- package/plugins/nestjs/index.ts +0 -201
- package/plugins/nestjs/package.json +0 -37
- package/plugins/nextjs-api/README.md +0 -57
- package/plugins/nextjs-api/examples/ConverterComponent.jsx +0 -386
- package/plugins/nextjs-api/examples/ConverterComponent.tsx +0 -386
- package/plugins/nextjs-api/examples/api-convert.js +0 -67
- package/plugins/nextjs-api/examples/api-convert.ts +0 -67
- package/plugins/nextjs-api/index.js +0 -387
- package/plugins/nextjs-api/index.tsx +0 -339
- package/plugins/nextjs-api/package.json +0 -63
- package/plugins/nextjs-api/route.js +0 -370
- package/plugins/nextjs-api/route.ts +0 -370
- package/plugins/nuxt/README.md +0 -24
- package/plugins/nuxt/index.js +0 -21
- package/plugins/nuxt/index.ts +0 -94
- package/plugins/nuxt/package.json +0 -35
- package/plugins/nuxt/runtime/composables/useJtcsv.js +0 -6
- package/plugins/nuxt/runtime/composables/useJtcsv.ts +0 -100
- package/plugins/nuxt/runtime/plugin.js +0 -6
- package/plugins/nuxt/runtime/plugin.ts +0 -71
- package/plugins/remix/README.md +0 -26
- package/plugins/remix/index.d.ts +0 -16
- package/plugins/remix/index.js +0 -62
- package/plugins/remix/index.ts +0 -260
- package/plugins/remix/package.json +0 -35
- package/plugins/sveltekit/README.md +0 -28
- package/plugins/sveltekit/index.d.ts +0 -17
- package/plugins/sveltekit/index.js +0 -54
- package/plugins/sveltekit/index.ts +0 -301
- package/plugins/sveltekit/package.json +0 -33
- package/plugins/trpc/README.md +0 -25
- package/plugins/trpc/index.d.ts +0 -7
- package/plugins/trpc/index.js +0 -32
- package/plugins/trpc/index.ts +0 -267
- package/plugins/trpc/package.json +0 -34
- package/src/browser/browser-functions.js +0 -219
- package/src/browser/core.js +0 -92
- package/src/browser/csv-to-json-browser.js +0 -722
- package/src/browser/errors-browser.js +0 -212
- package/src/browser/extensions/plugins.js +0 -92
- package/src/browser/extensions/workers.js +0 -39
- package/src/browser/index.js +0 -113
- package/src/browser/json-to-csv-browser.js +0 -319
- package/src/browser/streams.js +0 -403
- package/src/browser/workers/csv-parser.worker.js +0 -377
- package/src/browser/workers/worker-pool.js +0 -527
- package/src/core/delimiter-cache.js +0 -200
- package/src/core/node-optimizations.js +0 -408
- package/src/core/plugin-system.js +0 -494
- package/src/core/transform-hooks.js +0 -350
- package/src/engines/fast-path-engine-new.js +0 -338
- package/src/engines/fast-path-engine.js +0 -844
- package/src/errors.js +0 -26
- package/src/formats/ndjson-parser.js +0 -467
- package/src/formats/tsv-parser.js +0 -339
- package/src/index-with-plugins.js +0 -378
- package/src/utils/bom-utils.js +0 -259
- package/src/utils/encoding-support.js +0 -124
- package/src/utils/schema-validator.js +0 -594
- package/src/utils/transform-loader.js +0 -205
- package/src/utils/zod-adapter.js +0 -170
- package/stream-csv-to-json.js +0 -560
- package/stream-json-to-csv.js +0 -465
package/src/utils/bom-utils.js
DELETED
|
@@ -1,259 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* BOM (Byte Order Mark) Utilities for jtcsv
|
|
3
|
-
*
|
|
4
|
-
* Provides functions to detect and strip BOM characters from UTF-8, UTF-16 LE/BE,
|
|
5
|
-
* and UTF-32 encoded strings/buffers.
|
|
6
|
-
*
|
|
7
|
-
* @module bom-utils
|
|
8
|
-
*/
|
|
9
|
-
|
|
10
|
-
/**
|
|
11
|
-
* BOM signatures for different encodings
|
|
12
|
-
*/
|
|
13
|
-
const BOM_SIGNATURES = {
|
|
14
|
-
'utf-8': Buffer.from([0xEF, 0xBB, 0xBF]),
|
|
15
|
-
'utf-16le': Buffer.from([0xFF, 0xFE]),
|
|
16
|
-
'utf-16be': Buffer.from([0xFE, 0xFF]),
|
|
17
|
-
'utf-32le': Buffer.from([0xFF, 0xFE, 0x00, 0x00]),
|
|
18
|
-
'utf-32be': Buffer.from([0x00, 0x00, 0xFE, 0xFF])
|
|
19
|
-
};
|
|
20
|
-
|
|
21
|
-
/**
|
|
22
|
-
* Detects if a buffer or string starts with a BOM
|
|
23
|
-
*
|
|
24
|
-
* @param {Buffer|string} input - Input to check for BOM
|
|
25
|
-
* @returns {Object|null} Detection result or null if no BOM found
|
|
26
|
-
* @property {string} encoding - Detected encoding ('utf-8', 'utf-16le', etc.)
|
|
27
|
-
* @property {number} bomLength - Length of BOM in bytes
|
|
28
|
-
*/
|
|
29
|
-
function detectBom(input) {
|
|
30
|
-
if (!input) {
|
|
31
|
-
return null;
|
|
32
|
-
}
|
|
33
|
-
|
|
34
|
-
let buffer;
|
|
35
|
-
if (typeof input === 'string') {
|
|
36
|
-
buffer = Buffer.from(input, 'utf8');
|
|
37
|
-
} else if (Buffer.isBuffer(input)) {
|
|
38
|
-
buffer = input;
|
|
39
|
-
} else {
|
|
40
|
-
return null;
|
|
41
|
-
}
|
|
42
|
-
|
|
43
|
-
// Check each BOM signature
|
|
44
|
-
for (const [encoding, signature] of Object.entries(BOM_SIGNATURES)) {
|
|
45
|
-
if (buffer.length >= signature.length) {
|
|
46
|
-
if (buffer.slice(0, signature.length).equals(signature)) {
|
|
47
|
-
return {
|
|
48
|
-
encoding,
|
|
49
|
-
bomLength: signature.length,
|
|
50
|
-
hasBom: true
|
|
51
|
-
};
|
|
52
|
-
}
|
|
53
|
-
}
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
return null;
|
|
57
|
-
}
|
|
58
|
-
|
|
59
|
-
/**
|
|
60
|
-
* Strips BOM from a buffer or string
|
|
61
|
-
*
|
|
62
|
-
* @param {Buffer|string} input - Input to strip BOM from
|
|
63
|
-
* @returns {Buffer|string} Input without BOM
|
|
64
|
-
*/
|
|
65
|
-
function stripBom(input) {
|
|
66
|
-
if (!input) {
|
|
67
|
-
return input;
|
|
68
|
-
}
|
|
69
|
-
|
|
70
|
-
const bomInfo = detectBom(input);
|
|
71
|
-
if (!bomInfo) {
|
|
72
|
-
return input;
|
|
73
|
-
}
|
|
74
|
-
|
|
75
|
-
if (Buffer.isBuffer(input)) {
|
|
76
|
-
return input.slice(bomInfo.bomLength);
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
if (typeof input === 'string') {
|
|
80
|
-
// Convert to buffer, strip BOM, then convert back to string
|
|
81
|
-
const buffer = Buffer.from(input, 'utf8');
|
|
82
|
-
const strippedBuffer = buffer.slice(bomInfo.bomLength);
|
|
83
|
-
|
|
84
|
-
// Determine correct encoding for conversion
|
|
85
|
-
let encoding = 'utf8';
|
|
86
|
-
if (bomInfo.encoding === 'utf-16le') {
|
|
87
|
-
encoding = 'utf16le';
|
|
88
|
-
} else if (bomInfo.encoding === 'utf-16be') {
|
|
89
|
-
encoding = 'utf16be';
|
|
90
|
-
}
|
|
91
|
-
|
|
92
|
-
return strippedBuffer.toString(encoding);
|
|
93
|
-
}
|
|
94
|
-
|
|
95
|
-
return input;
|
|
96
|
-
}
|
|
97
|
-
|
|
98
|
-
/**
|
|
99
|
-
* Strips BOM from a string (optimized for strings)
|
|
100
|
-
*
|
|
101
|
-
* @param {string} str - String to strip BOM from
|
|
102
|
-
* @returns {string} String without BOM
|
|
103
|
-
*/
|
|
104
|
-
function stripBomFromString(str) {
|
|
105
|
-
if (typeof str !== 'string') {
|
|
106
|
-
return str;
|
|
107
|
-
}
|
|
108
|
-
|
|
109
|
-
// Check for UTF-8 BOM (most common)
|
|
110
|
-
if (str.charCodeAt(0) === 0xFEFF) {
|
|
111
|
-
return str.slice(1);
|
|
112
|
-
}
|
|
113
|
-
|
|
114
|
-
// Check for UTF-8 BOM bytes as characters
|
|
115
|
-
if (str.length >= 3 &&
|
|
116
|
-
str.charCodeAt(0) === 0xEF &&
|
|
117
|
-
str.charCodeAt(1) === 0xBB &&
|
|
118
|
-
str.charCodeAt(2) === 0xBF) {
|
|
119
|
-
return str.slice(3);
|
|
120
|
-
}
|
|
121
|
-
|
|
122
|
-
return str;
|
|
123
|
-
}
|
|
124
|
-
|
|
125
|
-
/**
|
|
126
|
-
* Creates a transform stream that strips BOM from incoming data
|
|
127
|
-
*
|
|
128
|
-
* @returns {Transform} Transform stream
|
|
129
|
-
*/
|
|
130
|
-
function createBomStripStream() {
|
|
131
|
-
const { Transform } = require('stream');
|
|
132
|
-
let bomStripped = false;
|
|
133
|
-
|
|
134
|
-
return new Transform({
|
|
135
|
-
transform(chunk, encoding, callback) {
|
|
136
|
-
if (!bomStripped) {
|
|
137
|
-
const bomInfo = detectBom(chunk);
|
|
138
|
-
if (bomInfo) {
|
|
139
|
-
// Strip BOM from first chunk
|
|
140
|
-
chunk = chunk.slice(bomInfo.bomLength);
|
|
141
|
-
bomStripped = true;
|
|
142
|
-
} else {
|
|
143
|
-
bomStripped = true; // No BOM found, but we've checked
|
|
144
|
-
}
|
|
145
|
-
}
|
|
146
|
-
|
|
147
|
-
this.push(chunk);
|
|
148
|
-
callback();
|
|
149
|
-
}
|
|
150
|
-
});
|
|
151
|
-
}
|
|
152
|
-
|
|
153
|
-
/**
|
|
154
|
-
* Reads a file and automatically handles BOM
|
|
155
|
-
*
|
|
156
|
-
* @param {string} filePath - Path to file
|
|
157
|
-
* @param {Object} options - Read options
|
|
158
|
-
* @returns {Promise<{data: Buffer|string, encoding: string, hadBom: boolean}>}
|
|
159
|
-
*/
|
|
160
|
-
async function readFileWithBomHandling(filePath, options = {}) {
|
|
161
|
-
const fs = require('fs').promises;
|
|
162
|
-
const buffer = await fs.readFile(filePath);
|
|
163
|
-
|
|
164
|
-
const bomInfo = detectBom(buffer);
|
|
165
|
-
const hadBom = !!bomInfo;
|
|
166
|
-
|
|
167
|
-
let data;
|
|
168
|
-
let encoding = options.encoding || 'utf8';
|
|
169
|
-
|
|
170
|
-
if (bomInfo) {
|
|
171
|
-
// Strip BOM
|
|
172
|
-
data = buffer.slice(bomInfo.bomLength);
|
|
173
|
-
|
|
174
|
-
// Use detected encoding if not specified
|
|
175
|
-
if (!options.encoding) {
|
|
176
|
-
encoding = bomInfo.encoding;
|
|
177
|
-
}
|
|
178
|
-
} else {
|
|
179
|
-
data = buffer;
|
|
180
|
-
}
|
|
181
|
-
|
|
182
|
-
// Convert to string if encoding is specified
|
|
183
|
-
if (options.encoding || bomInfo) {
|
|
184
|
-
data = data.toString(encoding);
|
|
185
|
-
}
|
|
186
|
-
|
|
187
|
-
return {
|
|
188
|
-
data,
|
|
189
|
-
encoding,
|
|
190
|
-
hadBom,
|
|
191
|
-
bomInfo: bomInfo || null
|
|
192
|
-
};
|
|
193
|
-
}
|
|
194
|
-
|
|
195
|
-
/**
|
|
196
|
-
* Checks if a file has BOM (synchronous)
|
|
197
|
-
*
|
|
198
|
-
* @param {string} filePath - Path to file
|
|
199
|
-
* @returns {Object|null} BOM info or null
|
|
200
|
-
*/
|
|
201
|
-
function fileHasBomSync(filePath) {
|
|
202
|
-
const fs = require('fs');
|
|
203
|
-
const fd = fs.openSync(filePath, 'r');
|
|
204
|
-
const buffer = Buffer.alloc(4);
|
|
205
|
-
const bytesRead = fs.readSync(fd, buffer, 0, 4, 0);
|
|
206
|
-
fs.closeSync(fd);
|
|
207
|
-
|
|
208
|
-
if (bytesRead < 2) {
|
|
209
|
-
return null;
|
|
210
|
-
}
|
|
211
|
-
|
|
212
|
-
return detectBom(buffer.slice(0, bytesRead));
|
|
213
|
-
}
|
|
214
|
-
|
|
215
|
-
/**
|
|
216
|
-
* Normalizes CSV input by stripping BOM and ensuring proper encoding
|
|
217
|
-
*
|
|
218
|
-
* @param {string|Buffer} csvInput - CSV input
|
|
219
|
-
* @param {Object} options - Processing options
|
|
220
|
-
* @returns {string} Normalized CSV string
|
|
221
|
-
*/
|
|
222
|
-
function normalizeCsvInput(csvInput, options = {}) {
|
|
223
|
-
if (!csvInput) {
|
|
224
|
-
return '';
|
|
225
|
-
}
|
|
226
|
-
|
|
227
|
-
let normalized;
|
|
228
|
-
|
|
229
|
-
if (Buffer.isBuffer(csvInput)) {
|
|
230
|
-
const bomInfo = detectBom(csvInput);
|
|
231
|
-
if (bomInfo) {
|
|
232
|
-
normalized = csvInput.slice(bomInfo.bomLength).toString(bomInfo.encoding);
|
|
233
|
-
} else {
|
|
234
|
-
normalized = csvInput.toString(options.encoding || 'utf8');
|
|
235
|
-
}
|
|
236
|
-
} else if (typeof csvInput === 'string') {
|
|
237
|
-
normalized = stripBomFromString(csvInput);
|
|
238
|
-
} else {
|
|
239
|
-
throw new Error('CSV input must be a string or Buffer');
|
|
240
|
-
}
|
|
241
|
-
|
|
242
|
-
// Ensure proper line endings
|
|
243
|
-
if (options.normalizeLineEndings !== false) {
|
|
244
|
-
normalized = normalized.replace(/\r\n|\r/g, '\n');
|
|
245
|
-
}
|
|
246
|
-
|
|
247
|
-
return normalized;
|
|
248
|
-
}
|
|
249
|
-
|
|
250
|
-
module.exports = {
|
|
251
|
-
detectBom,
|
|
252
|
-
stripBom,
|
|
253
|
-
stripBomFromString,
|
|
254
|
-
createBomStripStream,
|
|
255
|
-
readFileWithBomHandling,
|
|
256
|
-
fileHasBomSync,
|
|
257
|
-
normalizeCsvInput,
|
|
258
|
-
BOM_SIGNATURES
|
|
259
|
-
};
|
|
@@ -1,124 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Encoding detection and conversion utilities.
|
|
3
|
-
*
|
|
4
|
-
* Provides basic auto‑detection of UTF‑8, UTF‑16LE, UTF‑16BE with BOM,
|
|
5
|
-
* and fallback to a default encoding.
|
|
6
|
-
*
|
|
7
|
-
* @example
|
|
8
|
-
* const { detectEncoding, convertToUtf8 } = require('./encoding-support');
|
|
9
|
-
*
|
|
10
|
-
* const buffer = fs.readFileSync('data.csv');
|
|
11
|
-
* const encoding = detectEncoding(buffer, { fallback: 'utf8' });
|
|
12
|
-
* const utf8Text = convertToUtf8(buffer, encoding);
|
|
13
|
-
*/
|
|
14
|
-
|
|
15
|
-
const { ValidationError } = require('../errors');
|
|
16
|
-
|
|
17
|
-
/**
|
|
18
|
-
* Detects encoding from buffer based on BOM.
|
|
19
|
-
*
|
|
20
|
-
* @param {Buffer} buffer - Input buffer
|
|
21
|
-
* @param {Object} options - Detection options
|
|
22
|
-
* @param {string} options.fallback - Fallback encoding if detection fails (default: 'utf8')
|
|
23
|
-
* @returns {string} Detected encoding: 'utf8', 'utf16le', 'utf16be', or fallback
|
|
24
|
-
*/
|
|
25
|
-
function detectEncoding(buffer, options = {}) {
|
|
26
|
-
const { fallback = 'utf8' } = options;
|
|
27
|
-
|
|
28
|
-
if (!Buffer.isBuffer(buffer)) {
|
|
29
|
-
throw new ValidationError('Input must be a Buffer');
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
// Check BOM
|
|
33
|
-
if (buffer.length >= 3 && buffer[0] === 0xEF && buffer[1] === 0xBB && buffer[2] === 0xBF) {
|
|
34
|
-
return 'utf8';
|
|
35
|
-
}
|
|
36
|
-
if (buffer.length >= 2 && buffer[0] === 0xFE && buffer[1] === 0xFF) {
|
|
37
|
-
return 'utf16be';
|
|
38
|
-
}
|
|
39
|
-
if (buffer.length >= 2 && buffer[0] === 0xFF && buffer[1] === 0xFE) {
|
|
40
|
-
return 'utf16le';
|
|
41
|
-
}
|
|
42
|
-
|
|
43
|
-
// No BOM detected, use fallback
|
|
44
|
-
return fallback;
|
|
45
|
-
}
|
|
46
|
-
|
|
47
|
-
/**
|
|
48
|
-
* Converts buffer to UTF‑8 string using detected encoding.
|
|
49
|
-
* Strips BOM if present.
|
|
50
|
-
*
|
|
51
|
-
* @param {Buffer} buffer - Input buffer
|
|
52
|
-
* @param {string} encoding - Source encoding ('utf8', 'utf16le', 'utf16be')
|
|
53
|
-
* @returns {string} UTF‑8 string without BOM
|
|
54
|
-
*/
|
|
55
|
-
function convertToUtf8(buffer, encoding = 'utf8') {
|
|
56
|
-
if (!Buffer.isBuffer(buffer)) {
|
|
57
|
-
throw new ValidationError('Input must be a Buffer');
|
|
58
|
-
}
|
|
59
|
-
|
|
60
|
-
let offset = 0;
|
|
61
|
-
|
|
62
|
-
// Skip BOM
|
|
63
|
-
if (encoding === 'utf8' && buffer.length >= 3 && buffer[0] === 0xEF && buffer[1] === 0xBB && buffer[2] === 0xBF) {
|
|
64
|
-
offset = 3;
|
|
65
|
-
} else if (encoding === 'utf16be' && buffer.length >= 2 && buffer[0] === 0xFE && buffer[1] === 0xFF) {
|
|
66
|
-
offset = 2;
|
|
67
|
-
} else if (encoding === 'utf16le' && buffer.length >= 2 && buffer[0] === 0xFF && buffer[1] === 0xFE) {
|
|
68
|
-
offset = 2;
|
|
69
|
-
}
|
|
70
|
-
|
|
71
|
-
const slice = buffer.slice(offset);
|
|
72
|
-
return slice.toString(encoding);
|
|
73
|
-
}
|
|
74
|
-
|
|
75
|
-
/**
|
|
76
|
-
* Auto‑detects encoding and converts buffer to UTF‑8 string.
|
|
77
|
-
*
|
|
78
|
-
* @param {Buffer} buffer - Input buffer
|
|
79
|
-
* @param {Object} options - Options
|
|
80
|
-
* @param {string} options.fallback - Fallback encoding (default: 'utf8')
|
|
81
|
-
* @returns {{ encoding: string, text: string }} Detected encoding and converted text
|
|
82
|
-
*/
|
|
83
|
-
function autoDetectAndConvert(buffer, options = {}) {
|
|
84
|
-
const encoding = detectEncoding(buffer, options);
|
|
85
|
-
const text = convertToUtf8(buffer, encoding);
|
|
86
|
-
return { encoding, text };
|
|
87
|
-
}
|
|
88
|
-
|
|
89
|
-
/**
|
|
90
|
-
* Creates a wrapper around csvToJson that accepts Buffer or string with encoding detection.
|
|
91
|
-
*
|
|
92
|
-
* @param {Buffer|string} input - CSV as Buffer or string
|
|
93
|
-
* @param {Object} parseOptions - Options for csvToJson
|
|
94
|
-
* @param {string} parseOptions.encoding - Explicit encoding (default: 'auto')
|
|
95
|
-
* @param {string} parseOptions.fallbackEncoding - Fallback if auto detection fails (default: 'utf8')
|
|
96
|
-
* @returns {Promise<Array>} Parsed JSON data
|
|
97
|
-
*/
|
|
98
|
-
async function csvToJsonWithEncoding(input, parseOptions = {}) {
|
|
99
|
-
const { csvToJson } = require('../index');
|
|
100
|
-
const { encoding = 'auto', fallbackEncoding = 'utf8', ...restOptions } = parseOptions;
|
|
101
|
-
|
|
102
|
-
let text;
|
|
103
|
-
if (Buffer.isBuffer(input)) {
|
|
104
|
-
if (encoding === 'auto') {
|
|
105
|
-
const detected = autoDetectAndConvert(input, { fallback: fallbackEncoding });
|
|
106
|
-
text = detected.text;
|
|
107
|
-
} else {
|
|
108
|
-
text = convertToUtf8(input, encoding);
|
|
109
|
-
}
|
|
110
|
-
} else if (typeof input === 'string') {
|
|
111
|
-
text = input;
|
|
112
|
-
} else {
|
|
113
|
-
throw new ValidationError('Input must be a Buffer or string');
|
|
114
|
-
}
|
|
115
|
-
|
|
116
|
-
return csvToJson(text, restOptions);
|
|
117
|
-
}
|
|
118
|
-
|
|
119
|
-
module.exports = {
|
|
120
|
-
detectEncoding,
|
|
121
|
-
convertToUtf8,
|
|
122
|
-
autoDetectAndConvert,
|
|
123
|
-
csvToJsonWithEncoding
|
|
124
|
-
};
|