jtcsv 2.1.0 → 2.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +63 -17
- package/bin/jtcsv.js +1013 -117
- package/csv-to-json.js +385 -311
- package/examples/simple-usage.js +2 -3
- package/index.d.ts +288 -5
- package/index.js +23 -0
- package/json-to-csv.js +130 -89
- package/package.json +47 -19
- package/plugins/README.md +146 -2
- package/plugins/hono/README.md +25 -0
- package/plugins/hono/index.d.ts +12 -0
- package/plugins/hono/index.js +36 -0
- package/plugins/hono/package.json +35 -0
- package/plugins/nestjs/README.md +33 -0
- package/plugins/nestjs/index.d.ts +25 -0
- package/plugins/nestjs/index.js +77 -0
- package/plugins/nestjs/package.json +37 -0
- package/plugins/nuxt/README.md +25 -0
- package/plugins/nuxt/index.js +21 -0
- package/plugins/nuxt/package.json +35 -0
- package/plugins/nuxt/runtime/composables/useJtcsv.js +6 -0
- package/plugins/nuxt/runtime/plugin.js +6 -0
- package/plugins/remix/README.md +26 -0
- package/plugins/remix/index.d.ts +16 -0
- package/plugins/remix/index.js +62 -0
- package/plugins/remix/package.json +35 -0
- package/plugins/sveltekit/README.md +28 -0
- package/plugins/sveltekit/index.d.ts +17 -0
- package/plugins/sveltekit/index.js +54 -0
- package/plugins/sveltekit/package.json +33 -0
- package/plugins/trpc/README.md +22 -0
- package/plugins/trpc/index.d.ts +7 -0
- package/plugins/trpc/index.js +32 -0
- package/plugins/trpc/package.json +34 -0
- package/src/core/delimiter-cache.js +186 -0
- package/src/core/transform-hooks.js +350 -0
- package/src/engines/fast-path-engine.js +829 -340
- package/src/formats/tsv-parser.js +336 -0
- package/src/index-with-plugins.js +36 -14
- package/cli-tui.js +0 -5
package/csv-to-json.js
CHANGED
|
@@ -17,6 +17,14 @@ const {
|
|
|
17
17
|
safeExecute
|
|
18
18
|
} = require('./errors');
|
|
19
19
|
|
|
20
|
+
const { TransformHooks, predefinedHooks } = require('./src/core/transform-hooks');
|
|
21
|
+
const DelimiterCache = require('./src/core/delimiter-cache');
|
|
22
|
+
const FastPathEngine = require('./src/engines/fast-path-engine');
|
|
23
|
+
|
|
24
|
+
// Глобальный экземпляр кэша для авто-детектирования разделителя
|
|
25
|
+
const globalDelimiterCache = new DelimiterCache(100);
|
|
26
|
+
const globalFastPathEngine = new FastPathEngine();
|
|
27
|
+
|
|
20
28
|
/**
|
|
21
29
|
* Validates CSV input and options
|
|
22
30
|
* @private
|
|
@@ -56,133 +64,50 @@ function validateCsvInput(csv, options) {
|
|
|
56
64
|
throw new ConfigurationError('maxRows must be a positive number');
|
|
57
65
|
}
|
|
58
66
|
|
|
59
|
-
|
|
60
|
-
|
|
67
|
+
// Validate cache options
|
|
68
|
+
if (options?.useCache !== undefined && typeof options.useCache !== 'boolean') {
|
|
69
|
+
throw new ConfigurationError('useCache must be a boolean');
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
if (options?.cache && !(options.cache instanceof DelimiterCache)) {
|
|
73
|
+
throw new ConfigurationError('cache must be an instance of DelimiterCache');
|
|
74
|
+
}
|
|
61
75
|
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
76
|
+
if (options?.useFastPath !== undefined && typeof options.useFastPath !== 'boolean') {
|
|
77
|
+
throw new ConfigurationError('useFastPath must be a boolean');
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
if (options?.fastPathMode !== undefined
|
|
81
|
+
&& options.fastPathMode !== 'objects'
|
|
82
|
+
&& options.fastPathMode !== 'compact'
|
|
83
|
+
&& options.fastPathMode !== 'stream') {
|
|
84
|
+
throw new ConfigurationError('fastPathMode must be "objects", "compact", or "stream"');
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
// Validate hooks
|
|
88
|
+
if (options?.hooks) {
|
|
89
|
+
if (typeof options.hooks !== 'object') {
|
|
90
|
+
throw new ConfigurationError('hooks must be an object');
|
|
91
|
+
}
|
|
74
92
|
|
|
75
|
-
if (
|
|
76
|
-
|
|
77
|
-
escapeNext = false;
|
|
78
|
-
continue;
|
|
93
|
+
if (options.hooks.beforeConvert && typeof options.hooks.beforeConvert !== 'function') {
|
|
94
|
+
throw new ConfigurationError('hooks.beforeConvert must be a function');
|
|
79
95
|
}
|
|
80
|
-
|
|
81
|
-
if (
|
|
82
|
-
|
|
83
|
-
// Backslash at end of line - treat as literal
|
|
84
|
-
currentField += char;
|
|
85
|
-
} else if (line[i + 1] === '\\') {
|
|
86
|
-
// Double backslash - add one backslash to field and skip next
|
|
87
|
-
currentField += char;
|
|
88
|
-
i++; // Skip next backslash
|
|
89
|
-
} else {
|
|
90
|
-
// Escape next character
|
|
91
|
-
escapeNext = true;
|
|
92
|
-
}
|
|
93
|
-
continue;
|
|
96
|
+
|
|
97
|
+
if (options.hooks.afterConvert && typeof options.hooks.afterConvert !== 'function') {
|
|
98
|
+
throw new ConfigurationError('hooks.afterConvert must be a function');
|
|
94
99
|
}
|
|
95
|
-
|
|
96
|
-
if (
|
|
97
|
-
|
|
98
|
-
if (i + 1 < line.length && line[i + 1] === '"') {
|
|
99
|
-
// Could be escaped quote ("") or double quote at end ("")
|
|
100
|
-
if (i + 2 === line.length) {
|
|
101
|
-
// This is the pattern "" at the end of the line
|
|
102
|
-
// First quote is part of field, second is closing quote
|
|
103
|
-
currentField += '"';
|
|
104
|
-
i++; // Skip the closing quote
|
|
105
|
-
insideQuotes = false;
|
|
106
|
-
} else {
|
|
107
|
-
// Escaped quote inside quotes ("" -> ")
|
|
108
|
-
currentField += '"';
|
|
109
|
-
i++; // Skip next quote
|
|
110
|
-
// Check if this is the end of the quoted field
|
|
111
|
-
// Look ahead to see if next char is delimiter or end of line
|
|
112
|
-
let isEndOfField = false;
|
|
113
|
-
let j = i + 1;
|
|
114
|
-
// Skip whitespace
|
|
115
|
-
while (j < line.length && (line[j] === ' ' || line[j] === '\t')) {
|
|
116
|
-
j++;
|
|
117
|
-
}
|
|
118
|
-
if (j === line.length || line[j] === delimiter) {
|
|
119
|
-
isEndOfField = true;
|
|
120
|
-
}
|
|
121
|
-
|
|
122
|
-
if (isEndOfField) {
|
|
123
|
-
// This is the closing quote
|
|
124
|
-
insideQuotes = false;
|
|
125
|
-
}
|
|
126
|
-
}
|
|
127
|
-
} else {
|
|
128
|
-
// Check if this is really the end of the quoted field
|
|
129
|
-
// Look ahead to see if next char is delimiter or end of line
|
|
130
|
-
let isEndOfField = false;
|
|
131
|
-
let j = i + 1;
|
|
132
|
-
// Skip whitespace
|
|
133
|
-
while (j < line.length && (line[j] === ' ' || line[j] === '\t')) {
|
|
134
|
-
j++;
|
|
135
|
-
}
|
|
136
|
-
if (j === line.length || line[j] === delimiter) {
|
|
137
|
-
isEndOfField = true;
|
|
138
|
-
}
|
|
139
|
-
|
|
140
|
-
if (isEndOfField) {
|
|
141
|
-
// This is the closing quote
|
|
142
|
-
insideQuotes = false;
|
|
143
|
-
} else {
|
|
144
|
-
// This quote is part of the field content
|
|
145
|
-
currentField += '"';
|
|
146
|
-
}
|
|
147
|
-
}
|
|
148
|
-
} else {
|
|
149
|
-
// Start of quoted field
|
|
150
|
-
insideQuotes = true;
|
|
151
|
-
}
|
|
152
|
-
continue;
|
|
100
|
+
|
|
101
|
+
if (options.hooks.perRow && typeof options.hooks.perRow !== 'function') {
|
|
102
|
+
throw new ConfigurationError('hooks.perRow must be a function');
|
|
153
103
|
}
|
|
154
|
-
|
|
155
|
-
if (
|
|
156
|
-
|
|
157
|
-
fields.push(currentField);
|
|
158
|
-
currentField = '';
|
|
159
|
-
continue;
|
|
104
|
+
|
|
105
|
+
if (options.hooks.transformHooks && !(options.hooks.transformHooks instanceof TransformHooks)) {
|
|
106
|
+
throw new ConfigurationError('hooks.transformHooks must be an instance of TransformHooks');
|
|
160
107
|
}
|
|
161
|
-
|
|
162
|
-
currentField += char;
|
|
163
|
-
}
|
|
164
|
-
|
|
165
|
-
// Handle case where escapeNext is still true at end of line
|
|
166
|
-
if (escapeNext) {
|
|
167
|
-
// This happens when line ends with backslash
|
|
168
|
-
// Add the backslash as literal character
|
|
169
|
-
currentField += '\\';
|
|
170
108
|
}
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
fields.push(currentField);
|
|
174
|
-
|
|
175
|
-
// Check for unclosed quotes
|
|
176
|
-
if (insideQuotes) {
|
|
177
|
-
throw new ParsingError('Unclosed quotes in CSV', lineNumber);
|
|
178
|
-
}
|
|
179
|
-
|
|
180
|
-
// Validate field count consistency
|
|
181
|
-
if (fields.length === 0) {
|
|
182
|
-
throw new ParsingError('No fields found', lineNumber);
|
|
183
|
-
}
|
|
184
|
-
|
|
185
|
-
return fields;
|
|
109
|
+
|
|
110
|
+
return true;
|
|
186
111
|
}
|
|
187
112
|
|
|
188
113
|
/**
|
|
@@ -223,63 +148,26 @@ function parseCsvValue(value, options) {
|
|
|
223
148
|
}
|
|
224
149
|
|
|
225
150
|
// Parse empty strings as null
|
|
226
|
-
if (result === '') {
|
|
227
|
-
return null;
|
|
228
|
-
}
|
|
229
|
-
|
|
230
|
-
return result;
|
|
231
|
-
}
|
|
232
|
-
|
|
233
|
-
/**
|
|
234
|
-
* Auto-detect CSV delimiter from content
|
|
235
|
-
* @private
|
|
236
|
-
*/
|
|
237
|
-
function autoDetectDelimiter(csv, candidates = [';', ',', '\t', '|']) {
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
}
|
|
241
|
-
|
|
242
|
-
const lines = csv.split('\n').filter(line => line.trim().length > 0);
|
|
243
|
-
|
|
244
|
-
if (lines.length === 0) {
|
|
245
|
-
return ';'; // default
|
|
246
|
-
}
|
|
247
|
-
|
|
248
|
-
// Use first non-empty line for detection
|
|
249
|
-
const firstLine = lines[0];
|
|
250
|
-
|
|
251
|
-
const counts = {};
|
|
252
|
-
candidates.forEach(delim => {
|
|
253
|
-
// Escape special regex characters
|
|
254
|
-
const escapedDelim = delim.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
255
|
-
const regex = new RegExp(escapedDelim, 'g');
|
|
256
|
-
const matches = firstLine.match(regex);
|
|
257
|
-
counts[delim] = matches ? matches.length : 0;
|
|
258
|
-
});
|
|
259
|
-
|
|
260
|
-
// Find delimiter with maximum count
|
|
261
|
-
let maxCount = -1;
|
|
262
|
-
let detectedDelimiter = ';'; // default
|
|
263
|
-
|
|
264
|
-
for (const [delim, count] of Object.entries(counts)) {
|
|
265
|
-
if (count > maxCount) {
|
|
266
|
-
maxCount = count;
|
|
267
|
-
detectedDelimiter = delim;
|
|
268
|
-
}
|
|
269
|
-
}
|
|
270
|
-
|
|
271
|
-
// If no delimiter found or tie, return default
|
|
272
|
-
if (maxCount === 0) {
|
|
273
|
-
return ';'; // default
|
|
274
|
-
}
|
|
275
|
-
|
|
276
|
-
return detectedDelimiter;
|
|
151
|
+
if (result === '') {
|
|
152
|
+
return null;
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
return result;
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
/**
|
|
159
|
+
* Auto-detect CSV delimiter from content with caching support
|
|
160
|
+
* @private
|
|
161
|
+
*/
|
|
162
|
+
function autoDetectDelimiter(csv, candidates = [';', ',', '\t', '|'], cache = null) {
|
|
163
|
+
// Используем статический метод DelimiterCache с поддержкой кэширования
|
|
164
|
+
return DelimiterCache.autoDetectDelimiter(csv, candidates, cache);
|
|
277
165
|
}
|
|
278
166
|
|
|
279
167
|
/**
|
|
280
|
-
* Converts CSV string to JSON array
|
|
168
|
+
* Converts CSV string to JSON array with hooks and caching support
|
|
281
169
|
*
|
|
282
|
-
|
|
170
|
+
* @param {string} csv - CSV string to convert
|
|
283
171
|
* @param {Object} [options] - Configuration options
|
|
284
172
|
* @param {string} [options.delimiter] - CSV delimiter character (default: auto-detected)
|
|
285
173
|
* @param {boolean} [options.autoDetect=true] - Auto-detect delimiter if not specified
|
|
@@ -290,6 +178,13 @@ function autoDetectDelimiter(csv, candidates = [';', ',', '\t', '|']) {
|
|
|
290
178
|
* @param {boolean} [options.parseNumbers=false] - Parse numeric values
|
|
291
179
|
* @param {boolean} [options.parseBooleans=false] - Parse boolean values
|
|
292
180
|
* @param {number} [options.maxRows] - Maximum number of rows to process (optional, no limit by default)
|
|
181
|
+
* @param {boolean} [options.useCache=true] - Use caching for delimiter detection
|
|
182
|
+
* @param {DelimiterCache} [options.cache] - Custom cache instance (optional)
|
|
183
|
+
* @param {Object} [options.hooks] - Transform hooks
|
|
184
|
+
* @param {Function} [options.hooks.beforeConvert] - Hook called before conversion
|
|
185
|
+
* @param {Function} [options.hooks.afterConvert] - Hook called after conversion
|
|
186
|
+
* @param {Function} [options.hooks.perRow] - Hook called for each row
|
|
187
|
+
* @param {TransformHooks} [options.hooks.transformHooks] - TransformHooks instance
|
|
293
188
|
* @returns {Array<Object>} JSON array
|
|
294
189
|
*
|
|
295
190
|
* @example
|
|
@@ -298,7 +193,20 @@ function autoDetectDelimiter(csv, candidates = [';', ',', '\t', '|']) {
|
|
|
298
193
|
* const csv = `id;name;email\n1;John;john@example.com\n2;Jane;jane@example.com`;
|
|
299
194
|
* const json = csvToJson(csv, {
|
|
300
195
|
* delimiter: ';',
|
|
301
|
-
* parseNumbers: true
|
|
196
|
+
* parseNumbers: true,
|
|
197
|
+
* useCache: true, // Включить кэширование
|
|
198
|
+
* hooks: {
|
|
199
|
+
* beforeConvert: (data) => {
|
|
200
|
+
* console.log('Starting conversion...');
|
|
201
|
+
* return data;
|
|
202
|
+
* },
|
|
203
|
+
* perRow: (row, index) => {
|
|
204
|
+
* return { ...row, processed: true, index };
|
|
205
|
+
* },
|
|
206
|
+
* afterConvert: (data) => {
|
|
207
|
+
* return data.filter(item => item.id > 0);
|
|
208
|
+
* }
|
|
209
|
+
* }
|
|
302
210
|
* });
|
|
303
211
|
*/
|
|
304
212
|
function csvToJson(csv, options = {}) {
|
|
@@ -315,164 +223,286 @@ function csvToJson(csv, options = {}) {
|
|
|
315
223
|
hasHeaders = true,
|
|
316
224
|
renameMap = {},
|
|
317
225
|
trim = true,
|
|
318
|
-
parseNumbers = false,
|
|
319
|
-
parseBooleans = false,
|
|
320
|
-
maxRows
|
|
321
|
-
|
|
226
|
+
parseNumbers = false,
|
|
227
|
+
parseBooleans = false,
|
|
228
|
+
maxRows,
|
|
229
|
+
useCache = true,
|
|
230
|
+
cache: customCache,
|
|
231
|
+
useFastPath = true,
|
|
232
|
+
fastPathMode = 'objects',
|
|
233
|
+
hooks = {}
|
|
234
|
+
} = opts;
|
|
235
|
+
|
|
236
|
+
if (fastPathMode === 'stream') {
|
|
237
|
+
return csvToJsonIterator(csv, { ...opts, useFastPath, fastPathMode: 'objects' });
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
// Выбираем кэш для использования
|
|
241
|
+
const cacheToUse = useCache ? (customCache || globalDelimiterCache) : null;
|
|
242
|
+
|
|
243
|
+
// Create transform hooks system
|
|
244
|
+
const transformHooks = new TransformHooks();
|
|
245
|
+
|
|
246
|
+
// Add individual hooks if provided
|
|
247
|
+
if (hooks.beforeConvert) {
|
|
248
|
+
transformHooks.beforeConvert(hooks.beforeConvert);
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
if (hooks.afterConvert) {
|
|
252
|
+
transformHooks.afterConvert(hooks.afterConvert);
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
if (hooks.perRow) {
|
|
256
|
+
transformHooks.perRow(hooks.perRow);
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
// Use provided TransformHooks instance if available
|
|
260
|
+
const finalHooks = hooks.transformHooks || transformHooks;
|
|
261
|
+
|
|
262
|
+
// Apply beforeConvert hooks to CSV string
|
|
263
|
+
const processedCsv = finalHooks.applyBeforeConvert(csv, {
|
|
264
|
+
operation: 'csvToJson',
|
|
265
|
+
options: opts
|
|
266
|
+
});
|
|
322
267
|
|
|
323
|
-
// Determine delimiter
|
|
268
|
+
// Determine delimiter with caching support
|
|
324
269
|
let finalDelimiter = delimiter;
|
|
325
270
|
if (!finalDelimiter && autoDetect) {
|
|
326
|
-
finalDelimiter = autoDetectDelimiter(
|
|
271
|
+
finalDelimiter = autoDetectDelimiter(processedCsv, candidates, cacheToUse);
|
|
327
272
|
}
|
|
328
273
|
finalDelimiter = finalDelimiter || ';'; // fallback
|
|
329
274
|
|
|
330
275
|
// Handle empty CSV
|
|
331
|
-
if (
|
|
276
|
+
if (processedCsv.trim() === '') {
|
|
332
277
|
return [];
|
|
333
278
|
}
|
|
334
279
|
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
let
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
const
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
280
|
+
let headers = null;
|
|
281
|
+
let totalRows = 0;
|
|
282
|
+
let dataRowIndex = 0;
|
|
283
|
+
const result = [];
|
|
284
|
+
|
|
285
|
+
try {
|
|
286
|
+
const parseOptions = { delimiter: finalDelimiter };
|
|
287
|
+
if (useFastPath === false) {
|
|
288
|
+
parseOptions.forceEngine = 'STANDARD';
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
globalFastPathEngine.parseRows(processedCsv, parseOptions, (fields) => {
|
|
292
|
+
totalRows++;
|
|
293
|
+
|
|
294
|
+
if (!headers) {
|
|
295
|
+
if (hasHeaders) {
|
|
296
|
+
headers = fields.map(header => {
|
|
297
|
+
const trimmed = trim ? header.trim() : header;
|
|
298
|
+
return renameMap[trimmed] || trimmed;
|
|
299
|
+
});
|
|
300
|
+
return;
|
|
301
|
+
}
|
|
302
|
+
headers = fields.map((_, index) => `column${index + 1}`);
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
if (!fields || fields.length === 0) {
|
|
306
|
+
return;
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
if (maxRows && totalRows > maxRows) {
|
|
310
|
+
throw new LimitError(
|
|
311
|
+
`CSV size exceeds maximum limit of ${maxRows} rows`,
|
|
312
|
+
maxRows,
|
|
313
|
+
totalRows
|
|
314
|
+
);
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
const fieldCount = Math.min(fields.length, headers.length);
|
|
318
|
+
let row;
|
|
319
|
+
|
|
320
|
+
if (fastPathMode === 'compact') {
|
|
321
|
+
row = new Array(fieldCount);
|
|
322
|
+
for (let j = 0; j < fieldCount; j++) {
|
|
323
|
+
row[j] = parseCsvValue(fields[j], { trim, parseNumbers, parseBooleans });
|
|
324
|
+
}
|
|
348
325
|
} else {
|
|
349
|
-
|
|
350
|
-
|
|
326
|
+
row = {};
|
|
327
|
+
for (let j = 0; j < fieldCount; j++) {
|
|
328
|
+
row[headers[j]] = parseCsvValue(fields[j], { trim, parseNumbers, parseBooleans });
|
|
329
|
+
}
|
|
351
330
|
}
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
331
|
+
|
|
332
|
+
const processedRow = finalHooks.applyPerRow(row, dataRowIndex, {
|
|
333
|
+
lineNumber: totalRows,
|
|
334
|
+
headers,
|
|
335
|
+
options: opts
|
|
336
|
+
});
|
|
337
|
+
|
|
338
|
+
dataRowIndex++;
|
|
339
|
+
result.push(processedRow);
|
|
340
|
+
|
|
341
|
+
if (fields.length > headers.length && process.env.NODE_ENV === 'development') {
|
|
342
|
+
console.warn(`[jtcsv] Line ${totalRows}: ${fields.length - headers.length} extra fields ignored`);
|
|
343
|
+
}
|
|
344
|
+
});
|
|
345
|
+
} catch (error) {
|
|
346
|
+
if (error && error.code === 'FAST_PATH_UNCLOSED_QUOTES') {
|
|
347
|
+
throw new ParsingError(error.message, error.lineNumber);
|
|
366
348
|
}
|
|
367
|
-
|
|
368
|
-
currentLine += char;
|
|
369
|
-
}
|
|
370
|
-
|
|
371
|
-
// Add the last line
|
|
372
|
-
if (currentLine !== '' || insideQuotes) {
|
|
373
|
-
lines.push(currentLine);
|
|
349
|
+
throw error;
|
|
374
350
|
}
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
// Note: This check is moved to parseCsvLine which has better context
|
|
378
|
-
// for handling escaped quotes like ""
|
|
379
|
-
// if (insideQuotes) {
|
|
380
|
-
// throw new ParsingError('Unclosed quotes in CSV', lines.length);
|
|
381
|
-
// }
|
|
382
|
-
|
|
383
|
-
if (lines.length === 0) {
|
|
351
|
+
|
|
352
|
+
if (!headers) {
|
|
384
353
|
return [];
|
|
385
354
|
}
|
|
386
355
|
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
'
|
|
391
|
-
'
|
|
392
|
-
'
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
356
|
+
if (totalRows > 1000000 && !maxRows && process.env.NODE_ENV !== 'test') {
|
|
357
|
+
console.warn(
|
|
358
|
+
'Warning: Processing >1M records in memory may be slow.\n' +
|
|
359
|
+
'Consider using createCsvToJsonStream() for better performance with large files.\n' +
|
|
360
|
+
'Current size: ' + totalRows.toLocaleString() + ' rows\n' +
|
|
361
|
+
'Tip: Use { maxRows: N } option to set a custom limit if needed.'
|
|
362
|
+
);
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
return finalHooks.applyAfterConvert(result, {
|
|
366
|
+
operation: 'csvToJson',
|
|
367
|
+
totalRows: result.length,
|
|
368
|
+
options: opts
|
|
369
|
+
});
|
|
396
370
|
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
throw new LimitError(
|
|
400
|
-
`CSV size exceeds maximum limit of ${maxRows} rows`,
|
|
401
|
-
maxRows,
|
|
402
|
-
lines.length
|
|
403
|
-
);
|
|
404
|
-
}
|
|
371
|
+
}, 'PARSE_FAILED', { function: 'csvToJson' });
|
|
372
|
+
}
|
|
405
373
|
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
374
|
+
async function* csvToJsonIterator(csv, options = {}) {
|
|
375
|
+
validateCsvInput(csv, options);
|
|
376
|
+
|
|
377
|
+
const opts = options && typeof options === 'object' ? options : {};
|
|
378
|
+
|
|
379
|
+
const {
|
|
380
|
+
delimiter,
|
|
381
|
+
autoDetect = true,
|
|
382
|
+
candidates = [';', ',', '\t', '|'],
|
|
383
|
+
hasHeaders = true,
|
|
384
|
+
renameMap = {},
|
|
385
|
+
trim = true,
|
|
386
|
+
parseNumbers = false,
|
|
387
|
+
parseBooleans = false,
|
|
388
|
+
maxRows,
|
|
389
|
+
useCache = true,
|
|
390
|
+
cache: customCache,
|
|
391
|
+
useFastPath = true,
|
|
392
|
+
fastPathMode = 'objects',
|
|
393
|
+
hooks = {}
|
|
394
|
+
} = opts;
|
|
395
|
+
|
|
396
|
+
const cacheToUse = useCache ? (customCache || globalDelimiterCache) : null;
|
|
397
|
+
|
|
398
|
+
const transformHooks = new TransformHooks();
|
|
399
|
+
|
|
400
|
+
if (hooks.beforeConvert) {
|
|
401
|
+
transformHooks.beforeConvert(hooks.beforeConvert);
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
if (hooks.afterConvert) {
|
|
405
|
+
transformHooks.afterConvert(hooks.afterConvert);
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
if (hooks.perRow) {
|
|
409
|
+
transformHooks.perRow(hooks.perRow);
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
const finalHooks = hooks.transformHooks || transformHooks;
|
|
413
|
+
|
|
414
|
+
const processedCsv = finalHooks.applyBeforeConvert(csv, {
|
|
415
|
+
operation: 'csvToJson',
|
|
416
|
+
options: opts
|
|
417
|
+
});
|
|
418
|
+
|
|
419
|
+
let finalDelimiter = delimiter;
|
|
420
|
+
if (!finalDelimiter && autoDetect) {
|
|
421
|
+
finalDelimiter = autoDetectDelimiter(processedCsv, candidates, cacheToUse);
|
|
422
|
+
}
|
|
423
|
+
finalDelimiter = finalDelimiter || ';';
|
|
424
|
+
|
|
425
|
+
if (processedCsv.trim() === '') {
|
|
426
|
+
return;
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
let headers = null;
|
|
430
|
+
let totalRows = 0;
|
|
431
|
+
let dataRowIndex = 0;
|
|
432
|
+
|
|
433
|
+
const handleFields = (fields, lineNumber) => {
|
|
434
|
+
if (!headers) {
|
|
435
|
+
if (hasHeaders) {
|
|
436
|
+
headers = fields.map(header => {
|
|
413
437
|
const trimmed = trim ? header.trim() : header;
|
|
414
|
-
// Apply rename map
|
|
415
438
|
return renameMap[trimmed] || trimmed;
|
|
416
439
|
});
|
|
417
|
-
|
|
418
|
-
} catch (error) {
|
|
419
|
-
if (error instanceof ParsingError) {
|
|
420
|
-
throw new ParsingError(`Failed to parse headers: ${error.message}`, 1);
|
|
421
|
-
}
|
|
422
|
-
throw error;
|
|
423
|
-
}
|
|
424
|
-
} else {
|
|
425
|
-
// Generate numeric headers from first line
|
|
426
|
-
try {
|
|
427
|
-
const firstLineFields = parseCsvLine(lines[0], 1, finalDelimiter);
|
|
428
|
-
headers = firstLineFields.map((_, index) => `column${index + 1}`);
|
|
429
|
-
} catch (error) {
|
|
430
|
-
if (error instanceof ParsingError) {
|
|
431
|
-
throw new ParsingError(`Failed to parse first line: ${error.message}`, 1);
|
|
432
|
-
}
|
|
433
|
-
throw error;
|
|
440
|
+
return null;
|
|
434
441
|
}
|
|
442
|
+
headers = fields.map((_, index) => `column${index + 1}`);
|
|
435
443
|
}
|
|
436
444
|
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
for (let j = 0; j < fieldCount; j++) {
|
|
456
|
-
row[headers[j]] = parseCsvValue(fields[j], { trim, parseNumbers, parseBooleans });
|
|
457
|
-
}
|
|
458
|
-
|
|
459
|
-
// Warn about extra fields
|
|
460
|
-
if (fields.length > headers.length && process.env.NODE_ENV === 'development') {
|
|
461
|
-
console.warn(`[jtcsv] Line ${i + 1}: ${fields.length - headers.length} extra fields ignored`);
|
|
462
|
-
}
|
|
463
|
-
|
|
464
|
-
result.push(row);
|
|
465
|
-
} catch (error) {
|
|
466
|
-
if (error instanceof ParsingError) {
|
|
467
|
-
throw new ParsingError(`Line ${i + 1}: ${error.message}`, i + 1);
|
|
468
|
-
}
|
|
469
|
-
throw error;
|
|
445
|
+
if (!fields || fields.length === 0) {
|
|
446
|
+
return null;
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
const fieldCount = Math.min(fields.length, headers.length);
|
|
450
|
+
let row;
|
|
451
|
+
|
|
452
|
+
const resolvedFastPathMode = fastPathMode === 'stream' ? 'objects' : fastPathMode;
|
|
453
|
+
|
|
454
|
+
if (resolvedFastPathMode === 'compact') {
|
|
455
|
+
row = new Array(fieldCount);
|
|
456
|
+
for (let j = 0; j < fieldCount; j++) {
|
|
457
|
+
row[j] = parseCsvValue(fields[j], { trim, parseNumbers, parseBooleans });
|
|
458
|
+
}
|
|
459
|
+
} else {
|
|
460
|
+
row = {};
|
|
461
|
+
for (let j = 0; j < fieldCount; j++) {
|
|
462
|
+
row[headers[j]] = parseCsvValue(fields[j], { trim, parseNumbers, parseBooleans });
|
|
470
463
|
}
|
|
471
464
|
}
|
|
472
465
|
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
466
|
+
const processedRow = finalHooks.applyPerRow(row, dataRowIndex, {
|
|
467
|
+
lineNumber,
|
|
468
|
+
headers,
|
|
469
|
+
options: opts
|
|
470
|
+
});
|
|
471
|
+
|
|
472
|
+
dataRowIndex++;
|
|
473
|
+
return processedRow;
|
|
474
|
+
};
|
|
475
|
+
|
|
476
|
+
try {
|
|
477
|
+
const parseOptions = { delimiter: finalDelimiter };
|
|
478
|
+
if (useFastPath === false) {
|
|
479
|
+
parseOptions.forceEngine = 'STANDARD';
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
for (const fields of globalFastPathEngine.iterateRows(processedCsv, parseOptions)) {
|
|
483
|
+
totalRows++;
|
|
484
|
+
if (maxRows && totalRows > maxRows) {
|
|
485
|
+
throw new LimitError(
|
|
486
|
+
`CSV size exceeds maximum limit of ${maxRows} rows`,
|
|
487
|
+
maxRows,
|
|
488
|
+
totalRows
|
|
489
|
+
);
|
|
490
|
+
}
|
|
491
|
+
const processedRow = handleFields(fields, totalRows);
|
|
492
|
+
if (processedRow !== undefined && processedRow !== null) {
|
|
493
|
+
if (fields.length > headers.length && process.env.NODE_ENV === 'development') {
|
|
494
|
+
console.warn(`[jtcsv] Line ${totalRows}: ${fields.length - headers.length} extra fields ignored`);
|
|
495
|
+
}
|
|
496
|
+
yield processedRow;
|
|
497
|
+
}
|
|
498
|
+
}
|
|
499
|
+
} catch (error) {
|
|
500
|
+
if (error && error.code === 'FAST_PATH_UNCLOSED_QUOTES') {
|
|
501
|
+
throw new ParsingError(error.message, error.lineNumber);
|
|
502
|
+
}
|
|
503
|
+
throw error;
|
|
504
|
+
}
|
|
505
|
+
}
|
|
476
506
|
|
|
477
507
|
/**
|
|
478
508
|
* Validates file path for CSV reading
|
|
@@ -504,18 +534,22 @@ function validateCsvFilePath(filePath) {
|
|
|
504
534
|
}
|
|
505
535
|
|
|
506
536
|
/**
|
|
507
|
-
* Reads CSV file and converts it to JSON array
|
|
537
|
+
* Reads CSV file and converts it to JSON array with hooks and caching support
|
|
508
538
|
*
|
|
509
539
|
* @param {string} filePath - Path to CSV file
|
|
510
540
|
* @param {Object} [options] - Configuration options (same as csvToJson)
|
|
511
541
|
* @returns {Promise<Array<Object>>} Promise that resolves to JSON array
|
|
512
542
|
*
|
|
513
543
|
* @example
|
|
514
|
-
*
|
|
544
|
+
* const { readCsvAsJson } = require('./csv-to-json');
|
|
515
545
|
*
|
|
516
546
|
* const json = await readCsvAsJson('./data.csv', {
|
|
517
547
|
* delimiter: ',',
|
|
518
|
-
* parseNumbers: true
|
|
548
|
+
* parseNumbers: true,
|
|
549
|
+
* useCache: true,
|
|
550
|
+
* hooks: {
|
|
551
|
+
* perRow: (row) => ({ ...row, processed: true })
|
|
552
|
+
* }
|
|
519
553
|
* });
|
|
520
554
|
*/
|
|
521
555
|
async function readCsvAsJson(filePath, options = {}) {
|
|
@@ -528,7 +562,7 @@ async function readCsvAsJson(filePath, options = {}) {
|
|
|
528
562
|
// Read file
|
|
529
563
|
const csvContent = await fs.readFile(safePath, 'utf8');
|
|
530
564
|
|
|
531
|
-
// Parse CSV
|
|
565
|
+
// Parse CSV with hooks and caching
|
|
532
566
|
return csvToJson(csvContent, options);
|
|
533
567
|
} catch (error) {
|
|
534
568
|
// Re-throw parsing errors as-is
|
|
@@ -552,7 +586,7 @@ async function readCsvAsJson(filePath, options = {}) {
|
|
|
552
586
|
}
|
|
553
587
|
|
|
554
588
|
/**
|
|
555
|
-
* Synchronously reads CSV file and converts it to JSON array
|
|
589
|
+
* Synchronously reads CSV file and converts it to JSON array with hooks and caching support
|
|
556
590
|
*
|
|
557
591
|
* @param {string} filePath - Path to CSV file
|
|
558
592
|
* @param {Object} [options] - Configuration options (same as csvToJson)
|
|
@@ -568,7 +602,7 @@ function readCsvAsJsonSync(filePath, options = {}) {
|
|
|
568
602
|
// Read file
|
|
569
603
|
const csvContent = fs.readFileSync(safePath, 'utf8');
|
|
570
604
|
|
|
571
|
-
// Parse CSV
|
|
605
|
+
// Parse CSV with hooks and caching
|
|
572
606
|
return csvToJson(csvContent, options);
|
|
573
607
|
} catch (error) {
|
|
574
608
|
// Re-throw parsing errors as-is
|
|
@@ -591,12 +625,52 @@ function readCsvAsJsonSync(filePath, options = {}) {
|
|
|
591
625
|
}
|
|
592
626
|
}
|
|
593
627
|
|
|
628
|
+
/**
|
|
629
|
+
* Creates a new TransformHooks instance
|
|
630
|
+
* @returns {TransformHooks} New TransformHooks instance
|
|
631
|
+
*/
|
|
632
|
+
function createTransformHooks() {
|
|
633
|
+
return new TransformHooks();
|
|
634
|
+
}
|
|
635
|
+
|
|
636
|
+
/**
|
|
637
|
+
* Creates a new DelimiterCache instance
|
|
638
|
+
* @param {number} maxSize - Maximum cache size (default: 100)
|
|
639
|
+
* @returns {DelimiterCache} New DelimiterCache instance
|
|
640
|
+
*/
|
|
641
|
+
function createDelimiterCache(maxSize = 100) {
|
|
642
|
+
return new DelimiterCache(maxSize);
|
|
643
|
+
}
|
|
644
|
+
|
|
645
|
+
/**
|
|
646
|
+
* Gets statistics from the global delimiter cache
|
|
647
|
+
* @returns {Object} Cache statistics
|
|
648
|
+
*/
|
|
649
|
+
function getDelimiterCacheStats() {
|
|
650
|
+
return globalDelimiterCache.getStats();
|
|
651
|
+
}
|
|
652
|
+
|
|
653
|
+
/**
|
|
654
|
+
* Clears the global delimiter cache
|
|
655
|
+
*/
|
|
656
|
+
function clearDelimiterCache() {
|
|
657
|
+
globalDelimiterCache.clear();
|
|
658
|
+
}
|
|
659
|
+
|
|
594
660
|
// Export the functions
|
|
595
661
|
module.exports = {
|
|
596
662
|
csvToJson,
|
|
663
|
+
csvToJsonIterator,
|
|
597
664
|
readCsvAsJson,
|
|
598
665
|
readCsvAsJsonSync,
|
|
599
|
-
autoDetectDelimiter
|
|
666
|
+
autoDetectDelimiter,
|
|
667
|
+
createTransformHooks,
|
|
668
|
+
createDelimiterCache,
|
|
669
|
+
getDelimiterCacheStats,
|
|
670
|
+
clearDelimiterCache,
|
|
671
|
+
TransformHooks,
|
|
672
|
+
DelimiterCache,
|
|
673
|
+
predefinedHooks
|
|
600
674
|
};
|
|
601
675
|
|
|
602
676
|
// For ES6 module compatibility
|