jtcsv 2.1.0 → 2.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/README.md +63 -17
  2. package/bin/jtcsv.js +1013 -117
  3. package/csv-to-json.js +385 -311
  4. package/examples/simple-usage.js +2 -3
  5. package/index.d.ts +288 -5
  6. package/index.js +23 -0
  7. package/json-to-csv.js +130 -89
  8. package/package.json +47 -19
  9. package/plugins/README.md +146 -2
  10. package/plugins/hono/README.md +25 -0
  11. package/plugins/hono/index.d.ts +12 -0
  12. package/plugins/hono/index.js +36 -0
  13. package/plugins/hono/package.json +35 -0
  14. package/plugins/nestjs/README.md +33 -0
  15. package/plugins/nestjs/index.d.ts +25 -0
  16. package/plugins/nestjs/index.js +77 -0
  17. package/plugins/nestjs/package.json +37 -0
  18. package/plugins/nuxt/README.md +25 -0
  19. package/plugins/nuxt/index.js +21 -0
  20. package/plugins/nuxt/package.json +35 -0
  21. package/plugins/nuxt/runtime/composables/useJtcsv.js +6 -0
  22. package/plugins/nuxt/runtime/plugin.js +6 -0
  23. package/plugins/remix/README.md +26 -0
  24. package/plugins/remix/index.d.ts +16 -0
  25. package/plugins/remix/index.js +62 -0
  26. package/plugins/remix/package.json +35 -0
  27. package/plugins/sveltekit/README.md +28 -0
  28. package/plugins/sveltekit/index.d.ts +17 -0
  29. package/plugins/sveltekit/index.js +54 -0
  30. package/plugins/sveltekit/package.json +33 -0
  31. package/plugins/trpc/README.md +22 -0
  32. package/plugins/trpc/index.d.ts +7 -0
  33. package/plugins/trpc/index.js +32 -0
  34. package/plugins/trpc/package.json +34 -0
  35. package/src/core/delimiter-cache.js +186 -0
  36. package/src/core/transform-hooks.js +350 -0
  37. package/src/engines/fast-path-engine.js +829 -340
  38. package/src/formats/tsv-parser.js +336 -0
  39. package/src/index-with-plugins.js +36 -14
  40. package/cli-tui.js +0 -5
@@ -1,347 +1,836 @@
1
- /**
2
- * Fast-Path Engine для оптимизации CSV парсинга
3
- * Автоматически выбирает оптимальный парсер на основе структуры CSV
4
- *
5
- * @version 1.0.0
6
- * @date 2026-01-22
7
- */
8
-
9
- class FastPathEngine {
10
- constructor() {
11
- this.compilers = new Map();
12
- this.stats = {
13
- simpleParserCount: 0,
14
- quoteAwareParserCount: 0,
15
- standardParserCount: 0,
16
- cacheHits: 0,
17
- cacheMisses: 0
18
- };
19
- }
20
-
21
- /**
22
- * Анализирует структуру CSV и определяет оптимальный парсер
23
- */
24
- analyzeStructure(sample, options = {}) {
25
- const delimiter = options.delimiter || this._detectDelimiter(sample);
26
- const lines = sample.split('\n').slice(0, 10);
27
-
28
- let hasQuotes = false;
29
- let hasNewlinesInFields = false;
30
- let hasEscapedQuotes = false;
31
- let maxFields = 0;
32
- let totalFields = 0;
33
-
34
- for (const line of lines) {
35
- if (line.includes('"')) {
36
- hasQuotes = true;
37
- if (line.includes('""')) {
38
- hasEscapedQuotes = true;
39
- }
40
- }
41
-
42
- const quoteCount = (line.match(/"/g) || []).length;
43
- if (quoteCount % 2 !== 0) {
44
- hasNewlinesInFields = true;
45
- }
46
-
47
- const fieldCount = line.split(delimiter).length;
48
- totalFields += fieldCount;
49
- if (fieldCount > maxFields) {
50
- maxFields = fieldCount;
51
- }
52
- }
53
-
54
- const avgFieldsPerLine = totalFields / lines.length;
55
- const fieldConsistency = maxFields === avgFieldsPerLine;
56
-
57
- return {
58
- delimiter,
59
- hasQuotes,
60
- hasEscapedQuotes,
61
- hasNewlinesInFields,
62
- fieldConsistency,
63
- avgFieldsPerLine,
64
- maxFields,
65
- recommendedEngine: this._selectEngine(hasQuotes, hasNewlinesInFields, fieldConsistency)
66
- };
67
- }
68
-
69
- /**
70
- * Автоматически определяет разделитель
71
- */
72
- _detectDelimiter(sample) {
73
- const candidates = [',', ';', '\t', '|'];
74
- const firstLine = sample.split('\n')[0];
75
-
76
- let bestDelimiter = ',';
77
- let bestScore = 0;
78
-
79
- for (const delimiter of candidates) {
80
- const fields = firstLine.split(delimiter);
81
- const score = fields.length;
82
-
83
- // Если разделитель не найден в строке, пропускаем его
84
- if (score === 1 && !firstLine.includes(delimiter)) {
85
- continue;
86
- }
87
-
88
- const avgLength = fields.reduce((sum, field) => sum + field.length, 0) / fields.length;
89
- const variance = fields.reduce((sum, field) => sum + Math.pow(field.length - avgLength, 2), 0) / fields.length;
90
-
91
- const finalScore = score / (variance + 1);
92
-
93
- if (finalScore > bestScore) {
94
- bestScore = finalScore;
95
- bestDelimiter = delimiter;
96
- }
97
- }
98
-
99
- return bestDelimiter;
100
- }
101
-
102
- /**
103
- * Выбирает оптимальный движок парсинга
104
- */
105
- _selectEngine(hasQuotes, hasNewlinesInFields, _fieldConsistency) {
106
- if (!hasQuotes && !hasNewlinesInFields) {
107
- return 'SIMPLE';
108
- }
109
-
110
- if (hasQuotes && !hasNewlinesInFields) {
111
- return 'QUOTE_AWARE';
112
- }
113
-
114
- return 'STANDARD';
115
- }
116
-
117
- /**
118
- * Создает простой парсер (разделитель без кавычек)
119
- */
120
- _createSimpleParser(structure) {
121
- const { delimiter } = structure;
122
-
123
- return (csv) => {
124
- const rows = [];
125
- const lines = csv.split('\n');
126
-
127
- for (const line of lines) {
128
- if (line.trim() === '') {
129
- continue;
130
- }
131
- const fields = line.split(delimiter);
132
- rows.push(fields);
133
- }
134
-
135
- return rows;
136
- };
137
- }
138
-
139
- /**
140
- * State machine парсер для CSV с кавычками (RFC 4180)
141
- */
142
- _createQuoteAwareParser(structure) {
143
- const { delimiter, hasEscapedQuotes } = structure;
144
-
145
- return (csv) => {
146
- const rows = [];
147
- let currentRow = [];
148
- let currentField = '';
149
- let insideQuotes = false;
150
- let i = 0;
151
-
152
- while (i < csv.length) {
153
- const char = csv[i];
154
- const nextChar = csv[i + 1];
155
-
156
- if (char === '"') {
157
- if (insideQuotes) {
158
- if (hasEscapedQuotes && nextChar === '"') {
159
- currentField += '"';
160
- i += 2;
161
- } else {
162
- insideQuotes = false;
163
- i++;
164
- }
165
- } else {
166
- insideQuotes = true;
167
- i++;
168
- }
169
- } else if (char === delimiter && !insideQuotes) {
170
- currentRow.push(currentField);
171
- currentField = '';
172
- i++;
173
- } else if ((char === '\n' || (char === '\r' && nextChar === '\n')) && !insideQuotes) {
174
- currentRow.push(currentField);
175
- if (currentRow.length > 0 && currentRow.some(field => field !== '')) {
176
- rows.push(currentRow);
177
- }
178
- currentRow = [];
179
- currentField = '';
180
- i += (char === '\r' && nextChar === '\n') ? 2 : 1;
181
- } else {
182
- currentField += char;
183
- i++;
184
- }
185
- }
186
-
187
- if (currentField !== '' || currentRow.length > 0) {
188
- currentRow.push(currentField);
189
- if (currentRow.length > 0 && currentRow.some(field => field !== '')) {
190
- rows.push(currentRow);
191
- }
192
- }
193
-
194
- return rows;
195
- };
196
- }
197
-
198
- /**
199
- * Стандартный парсер (fallback)
200
- */
201
- _createStandardParser(structure) {
202
- const { delimiter } = structure;
203
-
204
- return (csv) => {
205
- const rows = [];
206
- const lines = csv.split('\n');
207
- let insideQuotes = false;
208
- let currentLine = '';
209
-
210
- for (const line of lines) {
211
- const quoteCount = (line.match(/"/g) || []).length;
212
-
213
- if (insideQuotes) {
214
- currentLine += '\n' + line;
215
- if (quoteCount % 2 !== 0) {
216
- insideQuotes = false;
217
- rows.push(this._parseLineWithQuotes(currentLine, delimiter));
218
- currentLine = '';
219
- }
220
- } else {
221
- if (quoteCount % 2 !== 0) {
222
- insideQuotes = true;
223
- currentLine = line;
224
- } else {
225
- rows.push(this._parseLineWithQuotes(line, delimiter));
226
- }
227
- }
228
- }
229
-
230
- return rows;
231
- };
232
- }
233
-
234
- /**
235
- * Парсит строку с учетом кавычек
236
- */
237
- _parseLineWithQuotes(line, delimiter) {
238
- const fields = [];
239
- let currentField = '';
240
- let insideQuotes = false;
241
- let i = 0;
242
-
243
- while (i < line.length) {
244
- const char = line[i];
245
- const nextChar = line[i + 1];
246
-
247
- if (char === '"') {
248
- if (insideQuotes && nextChar === '"') {
249
- currentField += '"';
250
- i += 2;
251
- } else {
252
- insideQuotes = !insideQuotes;
253
- i++;
254
- }
255
- } else if (char === delimiter && !insideQuotes) {
256
- fields.push(currentField);
257
- currentField = '';
258
- i++;
259
- } else {
260
- currentField += char;
261
- i++;
262
- }
263
- }
264
-
265
- fields.push(currentField);
266
- return fields;
267
- }
268
-
269
- /**
270
- * Компилирует парсер на основе структуры CSV
271
- */
272
- compileParser(structure) {
273
- const cacheKey = JSON.stringify(structure);
274
-
275
- // Проверяем кеш
276
- if (this.compilers.has(cacheKey)) {
277
- this.stats.cacheHits++;
278
- return this.compilers.get(cacheKey);
279
- }
280
-
281
- this.stats.cacheMisses++;
282
-
283
- let parser;
284
- switch (structure.recommendedEngine) {
285
- case 'SIMPLE':
286
- parser = this._createSimpleParser(structure);
287
- this.stats.simpleParserCount++;
288
- break;
289
- case 'QUOTE_AWARE':
290
- parser = this._createQuoteAwareParser(structure);
291
- this.stats.quoteAwareParserCount++;
292
- break;
293
- case 'STANDARD':
294
- parser = this._createStandardParser(structure);
295
- this.stats.standardParserCount++;
296
- break;
297
- default:
298
- parser = this._createStandardParser(structure);
299
- this.stats.standardParserCount++;
300
- }
301
-
302
- // Кешируем парсер
303
- this.compilers.set(cacheKey, parser);
304
-
305
- return parser;
306
- }
307
-
308
- /**
309
- * Парсит CSV с использованием оптимального парсера
310
- */
311
- parse(csv, options = {}) {
1
+ /**
2
+ * Fast-Path Engine для оптимизации CSV парсинга
3
+ * Автоматически выбирает оптимальный парсер на основе структуры CSV
4
+ *
5
+ * @version 1.0.0
6
+ * @date 2026-01-22
7
+ */
8
+
9
+ class FastPathEngine {
10
+ constructor() {
11
+ this.compilers = new Map();
12
+ this.rowCompilers = new Map();
13
+ this.stats = {
14
+ simpleParserCount: 0,
15
+ quoteAwareParserCount: 0,
16
+ standardParserCount: 0,
17
+ cacheHits: 0,
18
+ cacheMisses: 0
19
+ };
20
+ }
21
+
22
+ _hasQuotes(csv) {
23
+ return csv.indexOf('"') !== -1;
24
+ }
25
+
26
+ _hasEscapedQuotes(csv) {
27
+ return csv.indexOf('""') !== -1;
28
+ }
29
+
30
+ _hasBackslashes(csv) {
31
+ return csv.indexOf('\\') !== -1;
32
+ }
33
+
34
+ _getStructureForParse(csv, options) {
312
35
  const sampleSize = Math.min(1000, csv.length);
313
36
  const sample = csv.substring(0, sampleSize);
314
-
315
37
  const structure = this.analyzeStructure(sample, options);
316
- const parser = this.compileParser(structure);
317
-
318
- return parser(csv);
319
- }
38
+ const hasBackslashes = this._hasBackslashes(csv);
39
+ const hasQuotes = structure.hasQuotes ? true : this._hasQuotes(csv);
40
+ const hasEscapedQuotes = structure.hasEscapedQuotes
41
+ ? true
42
+ : (hasQuotes ? this._hasEscapedQuotes(csv) : false);
43
+
44
+ let normalized = {
45
+ ...structure,
46
+ hasQuotes,
47
+ hasEscapedQuotes,
48
+ hasBackslashes
49
+ };
50
+
51
+ if (structure.recommendedEngine === 'SIMPLE' && hasQuotes) {
52
+ normalized = {
53
+ ...normalized,
54
+ hasNewlinesInFields: true,
55
+ recommendedEngine: 'QUOTE_AWARE'
56
+ };
57
+ }
320
58
 
321
- /**
322
- * Возвращает статистику использования парсеров
323
- */
324
- getStats() {
325
- return {
326
- ...this.stats,
327
- totalParsers: this.compilers.size,
328
- hitRate: this.stats.cacheHits / (this.stats.cacheHits + this.stats.cacheMisses) || 0
329
- };
330
- }
59
+ if (options && options.forceEngine) {
60
+ normalized = {
61
+ ...normalized,
62
+ recommendedEngine: options.forceEngine
63
+ };
64
+ }
331
65
 
332
- /**
333
- * Сбрасывает статистику и кеш
334
- */
335
- reset() {
336
- this.compilers.clear();
337
- this.stats = {
338
- simpleParserCount: 0,
339
- quoteAwareParserCount: 0,
340
- standardParserCount: 0,
341
- cacheHits: 0,
342
- cacheMisses: 0
343
- };
66
+ return normalized;
344
67
  }
345
- }
346
-
68
+
69
+ /**
70
+ * Анализирует структуру CSV и определяет оптимальный парсер
71
+ */
72
+ analyzeStructure(sample, options = {}) {
73
+ const delimiter = options.delimiter || this._detectDelimiter(sample);
74
+ const lines = sample.split('\n').slice(0, 10);
75
+
76
+ let hasQuotes = false;
77
+ let hasNewlinesInFields = false;
78
+ let hasEscapedQuotes = false;
79
+ let maxFields = 0;
80
+ let totalFields = 0;
81
+
82
+ for (const line of lines) {
83
+ if (line.includes('"')) {
84
+ hasQuotes = true;
85
+ if (line.includes('""')) {
86
+ hasEscapedQuotes = true;
87
+ }
88
+ }
89
+
90
+ const quoteCount = (line.match(/"/g) || []).length;
91
+ if (quoteCount % 2 !== 0) {
92
+ hasNewlinesInFields = true;
93
+ }
94
+
95
+ const fieldCount = line.split(delimiter).length;
96
+ totalFields += fieldCount;
97
+ if (fieldCount > maxFields) {
98
+ maxFields = fieldCount;
99
+ }
100
+ }
101
+
102
+ const avgFieldsPerLine = totalFields / lines.length;
103
+ const fieldConsistency = maxFields === avgFieldsPerLine;
104
+
105
+ return {
106
+ delimiter,
107
+ hasQuotes,
108
+ hasEscapedQuotes,
109
+ hasNewlinesInFields,
110
+ fieldConsistency,
111
+ avgFieldsPerLine,
112
+ maxFields,
113
+ recommendedEngine: this._selectEngine(hasQuotes, hasNewlinesInFields, fieldConsistency)
114
+ };
115
+ }
116
+
117
+ /**
118
+ * Автоматически определяет разделитель
119
+ */
120
+ _detectDelimiter(sample) {
121
+ const candidates = [',', ';', '\t', '|'];
122
+ const firstLine = sample.split('\n')[0];
123
+
124
+ let bestDelimiter = ',';
125
+ let bestScore = 0;
126
+
127
+ for (const delimiter of candidates) {
128
+ const fields = firstLine.split(delimiter);
129
+ const score = fields.length;
130
+
131
+ // Если разделитель не найден в строке, пропускаем его
132
+ if (score === 1 && !firstLine.includes(delimiter)) {
133
+ continue;
134
+ }
135
+
136
+ const avgLength = fields.reduce((sum, field) => sum + field.length, 0) / fields.length;
137
+ const variance = fields.reduce((sum, field) => sum + Math.pow(field.length - avgLength, 2), 0) / fields.length;
138
+
139
+ const finalScore = score / (variance + 1);
140
+
141
+ if (finalScore > bestScore) {
142
+ bestScore = finalScore;
143
+ bestDelimiter = delimiter;
144
+ }
145
+ }
146
+
147
+ return bestDelimiter;
148
+ }
149
+
150
+ /**
151
+ * Выбирает оптимальный движок парсинга
152
+ */
153
+ _selectEngine(hasQuotes, hasNewlinesInFields, _fieldConsistency) {
154
+ if (!hasQuotes && !hasNewlinesInFields) {
155
+ return 'SIMPLE';
156
+ }
157
+
158
+ if (hasQuotes && !hasNewlinesInFields) {
159
+ return 'QUOTE_AWARE';
160
+ }
161
+
162
+ return 'STANDARD';
163
+ }
164
+
165
+ /**
166
+ * Создает простой парсер (разделитель без кавычек)
167
+ */
168
+ _createSimpleParser(structure) {
169
+ const { delimiter, hasBackslashes } = structure;
170
+
171
+ return (csv) => {
172
+ const rows = [];
173
+ if (hasBackslashes) {
174
+ this._emitSimpleRowsEscaped(csv, delimiter, (row) => rows.push(row));
175
+ } else {
176
+ this._emitSimpleRows(csv, delimiter, (row) => rows.push(row));
177
+ }
178
+
179
+ return rows;
180
+ };
181
+ }
182
+
183
+ _emitSimpleRows(csv, delimiter, onRow) {
184
+ let currentRow = [];
185
+ let rowHasData = false;
186
+ let fieldStart = 0;
187
+ let i = 0;
188
+
189
+ while (i <= csv.length) {
190
+ const char = i < csv.length ? csv[i] : '\n';
191
+
192
+ if (char !== '\r' && char !== '\n' && char !== ' ' && char !== '\t') {
193
+ rowHasData = true;
194
+ }
195
+
196
+ if (char === delimiter || char === '\n' || char === '\r' || i === csv.length) {
197
+ const field = csv.slice(fieldStart, i);
198
+ currentRow.push(field);
199
+
200
+ if (char === '\n' || char === '\r' || i === csv.length) {
201
+ if (rowHasData) {
202
+ onRow(currentRow);
203
+ }
204
+ currentRow = [];
205
+ rowHasData = false;
206
+ }
207
+
208
+ if (char === '\r' && csv[i + 1] === '\n') {
209
+ i++;
210
+ }
211
+
212
+ fieldStart = i + 1;
213
+ }
214
+
215
+ i++;
216
+ }
217
+ }
218
+
219
+ _emitSimpleRowsEscaped(csv, delimiter, onRow) {
220
+ let currentRow = [];
221
+ let currentField = '';
222
+ let rowHasData = false;
223
+ let escapeNext = false;
224
+ let i = 0;
225
+
226
+ while (i <= csv.length) {
227
+ const char = i < csv.length ? csv[i] : '\n';
228
+ const nextChar = i + 1 < csv.length ? csv[i + 1] : '';
229
+
230
+ if (char !== '\r' && char !== '\n' && char !== ' ' && char !== '\t') {
231
+ rowHasData = true;
232
+ }
233
+
234
+ if (escapeNext) {
235
+ currentField += char;
236
+ escapeNext = false;
237
+ i++;
238
+ continue;
239
+ }
240
+
241
+ if (char === '\\') {
242
+ if (i + 1 >= csv.length) {
243
+ currentField += '\\';
244
+ i++;
245
+ continue;
246
+ }
247
+
248
+ if (nextChar === '\\') {
249
+ currentField += '\\';
250
+ i += 2;
251
+ continue;
252
+ }
253
+
254
+ if (nextChar === '\n' || nextChar === '\r') {
255
+ currentField += '\\';
256
+ i++;
257
+ continue;
258
+ }
259
+
260
+ escapeNext = true;
261
+ i++;
262
+ continue;
263
+ }
264
+
265
+ if (char === delimiter || char === '\n' || char === '\r' || i === csv.length) {
266
+ currentRow.push(currentField);
267
+ currentField = '';
268
+
269
+ if (char === '\n' || char === '\r' || i === csv.length) {
270
+ if (rowHasData) {
271
+ onRow(currentRow);
272
+ }
273
+ currentRow = [];
274
+ rowHasData = false;
275
+ }
276
+
277
+ if (char === '\r' && csv[i + 1] === '\n') {
278
+ i++;
279
+ }
280
+
281
+ i++;
282
+ continue;
283
+ }
284
+
285
+ currentField += char;
286
+ i++;
287
+ }
288
+ }
289
+
290
+ *_simpleRowsGenerator(csv, delimiter) {
291
+ let currentRow = [];
292
+ let rowHasData = false;
293
+ let fieldStart = 0;
294
+ let i = 0;
295
+
296
+ while (i <= csv.length) {
297
+ const char = i < csv.length ? csv[i] : '\n';
298
+
299
+ if (char !== '\r' && char !== '\n' && char !== ' ' && char !== '\t') {
300
+ rowHasData = true;
301
+ }
302
+
303
+ if (char === delimiter || char === '\n' || char === '\r' || i === csv.length) {
304
+ const field = csv.slice(fieldStart, i);
305
+ currentRow.push(field);
306
+
307
+ if (char === '\n' || char === '\r' || i === csv.length) {
308
+ if (rowHasData) {
309
+ yield currentRow;
310
+ }
311
+ currentRow = [];
312
+ rowHasData = false;
313
+ }
314
+
315
+ if (char === '\r' && csv[i + 1] === '\n') {
316
+ i++;
317
+ }
318
+
319
+ fieldStart = i + 1;
320
+ }
321
+
322
+ i++;
323
+ }
324
+ }
325
+
326
+ *_simpleEscapedRowsGenerator(csv, delimiter) {
327
+ let currentRow = [];
328
+ let currentField = '';
329
+ let rowHasData = false;
330
+ let escapeNext = false;
331
+ let i = 0;
332
+
333
+ while (i <= csv.length) {
334
+ const char = i < csv.length ? csv[i] : '\n';
335
+ const nextChar = i + 1 < csv.length ? csv[i + 1] : '';
336
+
337
+ if (char !== '\r' && char !== '\n' && char !== ' ' && char !== '\t') {
338
+ rowHasData = true;
339
+ }
340
+
341
+ if (escapeNext) {
342
+ currentField += char;
343
+ escapeNext = false;
344
+ i++;
345
+ continue;
346
+ }
347
+
348
+ if (char === '\\') {
349
+ if (i + 1 >= csv.length) {
350
+ currentField += '\\';
351
+ i++;
352
+ continue;
353
+ }
354
+
355
+ if (nextChar === '\\') {
356
+ currentField += '\\';
357
+ i += 2;
358
+ continue;
359
+ }
360
+
361
+ if (nextChar === '\n' || nextChar === '\r') {
362
+ currentField += '\\';
363
+ i++;
364
+ continue;
365
+ }
366
+
367
+ escapeNext = true;
368
+ i++;
369
+ continue;
370
+ }
371
+
372
+ if (char === delimiter || char === '\n' || char === '\r' || i === csv.length) {
373
+ currentRow.push(currentField);
374
+ currentField = '';
375
+
376
+ if (char === '\n' || char === '\r' || i === csv.length) {
377
+ if (rowHasData) {
378
+ yield currentRow;
379
+ }
380
+ currentRow = [];
381
+ rowHasData = false;
382
+ }
383
+
384
+ if (char === '\r' && csv[i + 1] === '\n') {
385
+ i++;
386
+ }
387
+
388
+ i++;
389
+ continue;
390
+ }
391
+
392
+ currentField += char;
393
+ i++;
394
+ }
395
+ }
396
+
397
+ /**
398
+ * Simple row emitter that avoids storing all rows in memory.
399
+ */
400
+ _createSimpleRowEmitter(structure) {
401
+ const { delimiter, hasBackslashes } = structure;
402
+
403
+ return (csv, onRow) => {
404
+ if (hasBackslashes) {
405
+ this._emitSimpleRowsEscaped(csv, delimiter, onRow);
406
+ } else {
407
+ this._emitSimpleRows(csv, delimiter, onRow);
408
+ }
409
+ };
410
+ }
411
+
412
+ /**
413
+ * State machine парсер для CSV с кавычками (RFC 4180)
414
+ */
415
+ _createQuoteAwareParser(structure) {
416
+ const { delimiter, hasEscapedQuotes, hasBackslashes } = structure;
417
+
418
+ return (csv) => {
419
+ const rows = [];
420
+ const iterator = hasBackslashes
421
+ ? this._quoteAwareEscapedRowsGenerator(csv, delimiter, hasEscapedQuotes)
422
+ : this._quoteAwareRowsGenerator(csv, delimiter, hasEscapedQuotes);
423
+
424
+ for (const row of iterator) {
425
+ rows.push(row);
426
+ }
427
+
428
+ return rows;
429
+ };
430
+ }
431
+
432
+ /**
433
+ * Quote-aware row emitter that avoids storing all rows in memory.
434
+ */
435
+ _createQuoteAwareRowEmitter(structure) {
436
+ const { delimiter, hasEscapedQuotes, hasBackslashes } = structure;
437
+
438
+ return (csv, onRow) => {
439
+ const iterator = hasBackslashes
440
+ ? this._quoteAwareEscapedRowsGenerator(csv, delimiter, hasEscapedQuotes)
441
+ : this._quoteAwareRowsGenerator(csv, delimiter, hasEscapedQuotes);
442
+
443
+ for (const row of iterator) {
444
+ onRow(row);
445
+ }
446
+ };
447
+ }
448
+
449
+ *_quoteAwareRowsGenerator(csv, delimiter, hasEscapedQuotes) {
450
+ let currentRow = [];
451
+ let currentField = '';
452
+ let rowHasData = false;
453
+ let insideQuotes = false;
454
+ let lineNumber = 1;
455
+ let i = 0;
456
+
457
+ while (i < csv.length) {
458
+ const char = csv[i];
459
+ const nextChar = csv[i + 1];
460
+
461
+ if (char !== '\r' && char !== '\n' && char !== ' ' && char !== '\t') {
462
+ rowHasData = true;
463
+ }
464
+
465
+ if (char === '"') {
466
+ if (insideQuotes) {
467
+ if (hasEscapedQuotes && nextChar === '"') {
468
+ const afterNext = csv[i + 2];
469
+ const isLineEnd = i + 2 >= csv.length || afterNext === '\n' || afterNext === '\r';
470
+
471
+ currentField += '"';
472
+ if (isLineEnd) {
473
+ insideQuotes = false;
474
+ i += 2;
475
+ continue;
476
+ }
477
+
478
+ i += 2;
479
+
480
+ let j = i;
481
+ while (j < csv.length && (csv[j] === ' ' || csv[j] === '\t')) {
482
+ j++;
483
+ }
484
+ if (j >= csv.length || csv[j] === delimiter || csv[j] === '\n' || csv[j] === '\r') {
485
+ insideQuotes = false;
486
+ }
487
+ continue;
488
+ }
489
+
490
+ let j = i + 1;
491
+ while (j < csv.length && (csv[j] === ' ' || csv[j] === '\t')) {
492
+ j++;
493
+ }
494
+ if (j >= csv.length || csv[j] === delimiter || csv[j] === '\n' || csv[j] === '\r') {
495
+ insideQuotes = false;
496
+ i++;
497
+ continue;
498
+ }
499
+
500
+ currentField += '"';
501
+ i++;
502
+ continue;
503
+ }
504
+
505
+ insideQuotes = true;
506
+ i++;
507
+ continue;
508
+ }
509
+
510
+ if (!insideQuotes && (char === delimiter || char === '\n' || char === '\r')) {
511
+ currentRow.push(currentField);
512
+ currentField = '';
513
+
514
+ if (char === '\n' || char === '\r') {
515
+ if (rowHasData) {
516
+ yield currentRow;
517
+ }
518
+ currentRow = [];
519
+ rowHasData = false;
520
+ lineNumber++;
521
+
522
+ if (char === '\r' && nextChar === '\n') {
523
+ i++;
524
+ }
525
+ }
526
+
527
+ i++;
528
+ continue;
529
+ }
530
+
531
+ currentField += char;
532
+ i++;
533
+ }
534
+
535
+ if (insideQuotes) {
536
+ const error = new Error('Unclosed quotes in CSV');
537
+ error.code = 'FAST_PATH_UNCLOSED_QUOTES';
538
+ error.lineNumber = lineNumber;
539
+ throw error;
540
+ }
541
+
542
+ if (currentField !== '' || currentRow.length > 0) {
543
+ currentRow.push(currentField);
544
+ if (rowHasData) {
545
+ yield currentRow;
546
+ }
547
+ }
548
+ }
549
+
550
+ *_quoteAwareEscapedRowsGenerator(csv, delimiter, hasEscapedQuotes) {
551
+ let currentRow = [];
552
+ let currentField = '';
553
+ let rowHasData = false;
554
+ let insideQuotes = false;
555
+ let escapeNext = false;
556
+ let lineNumber = 1;
557
+ let i = 0;
558
+
559
+ while (i < csv.length) {
560
+ const char = csv[i];
561
+ const nextChar = csv[i + 1];
562
+
563
+ if (char !== '\r' && char !== '\n' && char !== ' ' && char !== '\t') {
564
+ rowHasData = true;
565
+ }
566
+
567
+ if (escapeNext) {
568
+ currentField += char;
569
+ escapeNext = false;
570
+ i++;
571
+ continue;
572
+ }
573
+
574
+ if (char === '\\') {
575
+ if (i + 1 >= csv.length) {
576
+ currentField += '\\';
577
+ i++;
578
+ continue;
579
+ }
580
+
581
+ if (!insideQuotes && (nextChar === '\n' || nextChar === '\r')) {
582
+ currentField += '\\';
583
+ i++;
584
+ continue;
585
+ }
586
+
587
+ if (nextChar === '\\') {
588
+ currentField += '\\';
589
+ i += 2;
590
+ continue;
591
+ }
592
+
593
+ escapeNext = true;
594
+ i++;
595
+ continue;
596
+ }
597
+
598
+ if (char === '"') {
599
+ if (insideQuotes) {
600
+ if (hasEscapedQuotes && nextChar === '"') {
601
+ const afterNext = csv[i + 2];
602
+ const isLineEnd = i + 2 >= csv.length || afterNext === '\n' || afterNext === '\r';
603
+
604
+ currentField += '"';
605
+ if (isLineEnd) {
606
+ insideQuotes = false;
607
+ i += 2;
608
+ continue;
609
+ }
610
+
611
+ i += 2;
612
+
613
+ let j = i;
614
+ while (j < csv.length && (csv[j] === ' ' || csv[j] === '\t')) {
615
+ j++;
616
+ }
617
+ if (j >= csv.length || csv[j] === delimiter || csv[j] === '\n' || csv[j] === '\r') {
618
+ insideQuotes = false;
619
+ }
620
+ continue;
621
+ }
622
+
623
+ let j = i + 1;
624
+ while (j < csv.length && (csv[j] === ' ' || csv[j] === '\t')) {
625
+ j++;
626
+ }
627
+ if (j >= csv.length || csv[j] === delimiter || csv[j] === '\n' || csv[j] === '\r') {
628
+ insideQuotes = false;
629
+ i++;
630
+ continue;
631
+ }
632
+
633
+ currentField += '"';
634
+ i++;
635
+ continue;
636
+ }
637
+
638
+ insideQuotes = true;
639
+ i++;
640
+ continue;
641
+ }
642
+
643
+ if (!insideQuotes && (char === delimiter || char === '\n' || char === '\r')) {
644
+ currentRow.push(currentField);
645
+ currentField = '';
646
+
647
+ if (char === '\n' || char === '\r') {
648
+ if (rowHasData) {
649
+ yield currentRow;
650
+ }
651
+ currentRow = [];
652
+ rowHasData = false;
653
+ lineNumber++;
654
+
655
+ if (char === '\r' && nextChar === '\n') {
656
+ i++;
657
+ }
658
+ }
659
+
660
+ i++;
661
+ continue;
662
+ }
663
+
664
+ currentField += char;
665
+ i++;
666
+ }
667
+
668
+ if (escapeNext) {
669
+ currentField += '\\';
670
+ }
671
+
672
+ if (insideQuotes) {
673
+ const error = new Error('Unclosed quotes in CSV');
674
+ error.code = 'FAST_PATH_UNCLOSED_QUOTES';
675
+ error.lineNumber = lineNumber;
676
+ throw error;
677
+ }
678
+
679
+ if (currentField !== '' || currentRow.length > 0) {
680
+ currentRow.push(currentField);
681
+ if (rowHasData) {
682
+ yield currentRow;
683
+ }
684
+ }
685
+ }
686
+
687
+ compileParser(structure) {
688
+ const cacheKey = JSON.stringify(structure);
689
+
690
+ // Проверяем кеш
691
+ if (this.compilers.has(cacheKey)) {
692
+ this.stats.cacheHits++;
693
+ return this.compilers.get(cacheKey);
694
+ }
695
+
696
+ this.stats.cacheMisses++;
697
+
698
+ let parser;
699
+ switch (structure.recommendedEngine) {
700
+ case 'SIMPLE':
701
+ parser = this._createSimpleParser(structure);
702
+ this.stats.simpleParserCount++;
703
+ break;
704
+ case 'QUOTE_AWARE':
705
+ parser = this._createQuoteAwareParser(structure);
706
+ this.stats.quoteAwareParserCount++;
707
+ break;
708
+ case 'STANDARD':
709
+ parser = this._createQuoteAwareParser(structure);
710
+ this.stats.standardParserCount++;
711
+ break;
712
+ default:
713
+ parser = this._createQuoteAwareParser(structure);
714
+ this.stats.standardParserCount++;
715
+ }
716
+
717
+ // Кешируем парсер
718
+ this.compilers.set(cacheKey, parser);
719
+
720
+ return parser;
721
+ }
722
+
723
+ /**
724
+ * Compiles a row-emitter parser for streaming conversion.
725
+ */
726
+ compileRowEmitter(structure) {
727
+ const cacheKey = JSON.stringify(structure);
728
+
729
+ if (this.rowCompilers.has(cacheKey)) {
730
+ return this.rowCompilers.get(cacheKey);
731
+ }
732
+
733
+ let emitter;
734
+ switch (structure.recommendedEngine) {
735
+ case 'SIMPLE':
736
+ emitter = this._createSimpleRowEmitter(structure);
737
+ break;
738
+ case 'QUOTE_AWARE':
739
+ emitter = this._createQuoteAwareRowEmitter(structure);
740
+ break;
741
+ case 'STANDARD':
742
+ emitter = this._createQuoteAwareRowEmitter(structure);
743
+ break;
744
+ default:
745
+ emitter = this._createQuoteAwareRowEmitter(structure);
746
+ }
747
+
748
+ this.rowCompilers.set(cacheKey, emitter);
749
+ return emitter;
750
+ }
751
+
752
+ /**
753
+ * Iterates rows without allocating the full result set.
754
+ */
755
+ *iterateRows(csv, options = {}) {
756
+ const structure = this._getStructureForParse(csv, options);
757
+ const useEscapes = structure.hasBackslashes;
758
+
759
+ switch (structure.recommendedEngine) {
760
+ case 'SIMPLE':
761
+ if (useEscapes) {
762
+ yield* this._simpleEscapedRowsGenerator(csv, structure.delimiter);
763
+ } else {
764
+ yield* this._simpleRowsGenerator(csv, structure.delimiter);
765
+ }
766
+ break;
767
+ case 'QUOTE_AWARE':
768
+ if (useEscapes) {
769
+ yield* this._quoteAwareEscapedRowsGenerator(csv, structure.delimiter, structure.hasEscapedQuotes);
770
+ } else {
771
+ yield* this._quoteAwareRowsGenerator(csv, structure.delimiter, structure.hasEscapedQuotes);
772
+ }
773
+ break;
774
+ case 'STANDARD':
775
+ if (useEscapes) {
776
+ yield* this._quoteAwareEscapedRowsGenerator(csv, structure.delimiter, structure.hasEscapedQuotes);
777
+ } else {
778
+ yield* this._quoteAwareRowsGenerator(csv, structure.delimiter, structure.hasEscapedQuotes);
779
+ }
780
+ break;
781
+ default:
782
+ if (useEscapes) {
783
+ yield* this._quoteAwareEscapedRowsGenerator(csv, structure.delimiter, structure.hasEscapedQuotes);
784
+ } else {
785
+ yield* this._quoteAwareRowsGenerator(csv, structure.delimiter, structure.hasEscapedQuotes);
786
+ }
787
+ }
788
+ }
789
+
790
+ /**
791
+ * Парсит CSV с использованием оптимального парсера
792
+ */
793
+ parse(csv, options = {}) {
794
+ const structure = this._getStructureForParse(csv, options);
795
+ const parser = this.compileParser(structure);
796
+
797
+ return parser(csv);
798
+ }
799
+
800
+ /**
801
+ * Parses CSV and emits rows via a callback to reduce memory usage.
802
+ */
803
+ parseRows(csv, options = {}, onRow) {
804
+ for (const row of this.iterateRows(csv, options)) {
805
+ onRow(row);
806
+ }
807
+ }
808
+
809
+ /**
810
+ * Возвращает статистику использования парсеров
811
+ */
812
+ getStats() {
813
+ return {
814
+ ...this.stats,
815
+ totalParsers: this.compilers.size,
816
+ hitRate: this.stats.cacheHits / (this.stats.cacheHits + this.stats.cacheMisses) || 0
817
+ };
818
+ }
819
+
820
+ /**
821
+ * Сбрасывает статистику и кеш
822
+ */
823
+ reset() {
824
+ this.compilers.clear();
825
+ this.rowCompilers.clear();
826
+ this.stats = {
827
+ simpleParserCount: 0,
828
+ quoteAwareParserCount: 0,
829
+ standardParserCount: 0,
830
+ cacheHits: 0,
831
+ cacheMisses: 0
832
+ };
833
+ }
834
+ }
835
+
347
836
  module.exports = FastPathEngine;