jtcsv 2.1.0 → 2.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +63 -17
- package/bin/jtcsv.js +1013 -117
- package/csv-to-json.js +385 -311
- package/examples/simple-usage.js +2 -3
- package/index.d.ts +288 -5
- package/index.js +23 -0
- package/json-to-csv.js +130 -89
- package/package.json +47 -19
- package/plugins/README.md +146 -2
- package/plugins/hono/README.md +25 -0
- package/plugins/hono/index.d.ts +12 -0
- package/plugins/hono/index.js +36 -0
- package/plugins/hono/package.json +35 -0
- package/plugins/nestjs/README.md +33 -0
- package/plugins/nestjs/index.d.ts +25 -0
- package/plugins/nestjs/index.js +77 -0
- package/plugins/nestjs/package.json +37 -0
- package/plugins/nuxt/README.md +25 -0
- package/plugins/nuxt/index.js +21 -0
- package/plugins/nuxt/package.json +35 -0
- package/plugins/nuxt/runtime/composables/useJtcsv.js +6 -0
- package/plugins/nuxt/runtime/plugin.js +6 -0
- package/plugins/remix/README.md +26 -0
- package/plugins/remix/index.d.ts +16 -0
- package/plugins/remix/index.js +62 -0
- package/plugins/remix/package.json +35 -0
- package/plugins/sveltekit/README.md +28 -0
- package/plugins/sveltekit/index.d.ts +17 -0
- package/plugins/sveltekit/index.js +54 -0
- package/plugins/sveltekit/package.json +33 -0
- package/plugins/trpc/README.md +22 -0
- package/plugins/trpc/index.d.ts +7 -0
- package/plugins/trpc/index.js +32 -0
- package/plugins/trpc/package.json +34 -0
- package/src/core/delimiter-cache.js +186 -0
- package/src/core/transform-hooks.js +350 -0
- package/src/engines/fast-path-engine.js +829 -340
- package/src/formats/tsv-parser.js +336 -0
- package/src/index-with-plugins.js +36 -14
- package/cli-tui.js +0 -5
|
@@ -1,347 +1,836 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Fast-Path Engine для оптимизации CSV парсинга
|
|
3
|
-
* Автоматически выбирает оптимальный парсер на основе структуры CSV
|
|
4
|
-
*
|
|
5
|
-
* @version 1.0.0
|
|
6
|
-
* @date 2026-01-22
|
|
7
|
-
*/
|
|
8
|
-
|
|
9
|
-
class FastPathEngine {
|
|
10
|
-
constructor() {
|
|
11
|
-
this.compilers = new Map();
|
|
12
|
-
this.
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
if (line.includes('"')) {
|
|
36
|
-
hasQuotes = true;
|
|
37
|
-
if (line.includes('""')) {
|
|
38
|
-
hasEscapedQuotes = true;
|
|
39
|
-
}
|
|
40
|
-
}
|
|
41
|
-
|
|
42
|
-
const quoteCount = (line.match(/"/g) || []).length;
|
|
43
|
-
if (quoteCount % 2 !== 0) {
|
|
44
|
-
hasNewlinesInFields = true;
|
|
45
|
-
}
|
|
46
|
-
|
|
47
|
-
const fieldCount = line.split(delimiter).length;
|
|
48
|
-
totalFields += fieldCount;
|
|
49
|
-
if (fieldCount > maxFields) {
|
|
50
|
-
maxFields = fieldCount;
|
|
51
|
-
}
|
|
52
|
-
}
|
|
53
|
-
|
|
54
|
-
const avgFieldsPerLine = totalFields / lines.length;
|
|
55
|
-
const fieldConsistency = maxFields === avgFieldsPerLine;
|
|
56
|
-
|
|
57
|
-
return {
|
|
58
|
-
delimiter,
|
|
59
|
-
hasQuotes,
|
|
60
|
-
hasEscapedQuotes,
|
|
61
|
-
hasNewlinesInFields,
|
|
62
|
-
fieldConsistency,
|
|
63
|
-
avgFieldsPerLine,
|
|
64
|
-
maxFields,
|
|
65
|
-
recommendedEngine: this._selectEngine(hasQuotes, hasNewlinesInFields, fieldConsistency)
|
|
66
|
-
};
|
|
67
|
-
}
|
|
68
|
-
|
|
69
|
-
/**
|
|
70
|
-
* Автоматически определяет разделитель
|
|
71
|
-
*/
|
|
72
|
-
_detectDelimiter(sample) {
|
|
73
|
-
const candidates = [',', ';', '\t', '|'];
|
|
74
|
-
const firstLine = sample.split('\n')[0];
|
|
75
|
-
|
|
76
|
-
let bestDelimiter = ',';
|
|
77
|
-
let bestScore = 0;
|
|
78
|
-
|
|
79
|
-
for (const delimiter of candidates) {
|
|
80
|
-
const fields = firstLine.split(delimiter);
|
|
81
|
-
const score = fields.length;
|
|
82
|
-
|
|
83
|
-
// Если разделитель не найден в строке, пропускаем его
|
|
84
|
-
if (score === 1 && !firstLine.includes(delimiter)) {
|
|
85
|
-
continue;
|
|
86
|
-
}
|
|
87
|
-
|
|
88
|
-
const avgLength = fields.reduce((sum, field) => sum + field.length, 0) / fields.length;
|
|
89
|
-
const variance = fields.reduce((sum, field) => sum + Math.pow(field.length - avgLength, 2), 0) / fields.length;
|
|
90
|
-
|
|
91
|
-
const finalScore = score / (variance + 1);
|
|
92
|
-
|
|
93
|
-
if (finalScore > bestScore) {
|
|
94
|
-
bestScore = finalScore;
|
|
95
|
-
bestDelimiter = delimiter;
|
|
96
|
-
}
|
|
97
|
-
}
|
|
98
|
-
|
|
99
|
-
return bestDelimiter;
|
|
100
|
-
}
|
|
101
|
-
|
|
102
|
-
/**
|
|
103
|
-
* Выбирает оптимальный движок парсинга
|
|
104
|
-
*/
|
|
105
|
-
_selectEngine(hasQuotes, hasNewlinesInFields, _fieldConsistency) {
|
|
106
|
-
if (!hasQuotes && !hasNewlinesInFields) {
|
|
107
|
-
return 'SIMPLE';
|
|
108
|
-
}
|
|
109
|
-
|
|
110
|
-
if (hasQuotes && !hasNewlinesInFields) {
|
|
111
|
-
return 'QUOTE_AWARE';
|
|
112
|
-
}
|
|
113
|
-
|
|
114
|
-
return 'STANDARD';
|
|
115
|
-
}
|
|
116
|
-
|
|
117
|
-
/**
|
|
118
|
-
* Создает простой парсер (разделитель без кавычек)
|
|
119
|
-
*/
|
|
120
|
-
_createSimpleParser(structure) {
|
|
121
|
-
const { delimiter } = structure;
|
|
122
|
-
|
|
123
|
-
return (csv) => {
|
|
124
|
-
const rows = [];
|
|
125
|
-
const lines = csv.split('\n');
|
|
126
|
-
|
|
127
|
-
for (const line of lines) {
|
|
128
|
-
if (line.trim() === '') {
|
|
129
|
-
continue;
|
|
130
|
-
}
|
|
131
|
-
const fields = line.split(delimiter);
|
|
132
|
-
rows.push(fields);
|
|
133
|
-
}
|
|
134
|
-
|
|
135
|
-
return rows;
|
|
136
|
-
};
|
|
137
|
-
}
|
|
138
|
-
|
|
139
|
-
/**
|
|
140
|
-
* State machine парсер для CSV с кавычками (RFC 4180)
|
|
141
|
-
*/
|
|
142
|
-
_createQuoteAwareParser(structure) {
|
|
143
|
-
const { delimiter, hasEscapedQuotes } = structure;
|
|
144
|
-
|
|
145
|
-
return (csv) => {
|
|
146
|
-
const rows = [];
|
|
147
|
-
let currentRow = [];
|
|
148
|
-
let currentField = '';
|
|
149
|
-
let insideQuotes = false;
|
|
150
|
-
let i = 0;
|
|
151
|
-
|
|
152
|
-
while (i < csv.length) {
|
|
153
|
-
const char = csv[i];
|
|
154
|
-
const nextChar = csv[i + 1];
|
|
155
|
-
|
|
156
|
-
if (char === '"') {
|
|
157
|
-
if (insideQuotes) {
|
|
158
|
-
if (hasEscapedQuotes && nextChar === '"') {
|
|
159
|
-
currentField += '"';
|
|
160
|
-
i += 2;
|
|
161
|
-
} else {
|
|
162
|
-
insideQuotes = false;
|
|
163
|
-
i++;
|
|
164
|
-
}
|
|
165
|
-
} else {
|
|
166
|
-
insideQuotes = true;
|
|
167
|
-
i++;
|
|
168
|
-
}
|
|
169
|
-
} else if (char === delimiter && !insideQuotes) {
|
|
170
|
-
currentRow.push(currentField);
|
|
171
|
-
currentField = '';
|
|
172
|
-
i++;
|
|
173
|
-
} else if ((char === '\n' || (char === '\r' && nextChar === '\n')) && !insideQuotes) {
|
|
174
|
-
currentRow.push(currentField);
|
|
175
|
-
if (currentRow.length > 0 && currentRow.some(field => field !== '')) {
|
|
176
|
-
rows.push(currentRow);
|
|
177
|
-
}
|
|
178
|
-
currentRow = [];
|
|
179
|
-
currentField = '';
|
|
180
|
-
i += (char === '\r' && nextChar === '\n') ? 2 : 1;
|
|
181
|
-
} else {
|
|
182
|
-
currentField += char;
|
|
183
|
-
i++;
|
|
184
|
-
}
|
|
185
|
-
}
|
|
186
|
-
|
|
187
|
-
if (currentField !== '' || currentRow.length > 0) {
|
|
188
|
-
currentRow.push(currentField);
|
|
189
|
-
if (currentRow.length > 0 && currentRow.some(field => field !== '')) {
|
|
190
|
-
rows.push(currentRow);
|
|
191
|
-
}
|
|
192
|
-
}
|
|
193
|
-
|
|
194
|
-
return rows;
|
|
195
|
-
};
|
|
196
|
-
}
|
|
197
|
-
|
|
198
|
-
/**
|
|
199
|
-
* Стандартный парсер (fallback)
|
|
200
|
-
*/
|
|
201
|
-
_createStandardParser(structure) {
|
|
202
|
-
const { delimiter } = structure;
|
|
203
|
-
|
|
204
|
-
return (csv) => {
|
|
205
|
-
const rows = [];
|
|
206
|
-
const lines = csv.split('\n');
|
|
207
|
-
let insideQuotes = false;
|
|
208
|
-
let currentLine = '';
|
|
209
|
-
|
|
210
|
-
for (const line of lines) {
|
|
211
|
-
const quoteCount = (line.match(/"/g) || []).length;
|
|
212
|
-
|
|
213
|
-
if (insideQuotes) {
|
|
214
|
-
currentLine += '\n' + line;
|
|
215
|
-
if (quoteCount % 2 !== 0) {
|
|
216
|
-
insideQuotes = false;
|
|
217
|
-
rows.push(this._parseLineWithQuotes(currentLine, delimiter));
|
|
218
|
-
currentLine = '';
|
|
219
|
-
}
|
|
220
|
-
} else {
|
|
221
|
-
if (quoteCount % 2 !== 0) {
|
|
222
|
-
insideQuotes = true;
|
|
223
|
-
currentLine = line;
|
|
224
|
-
} else {
|
|
225
|
-
rows.push(this._parseLineWithQuotes(line, delimiter));
|
|
226
|
-
}
|
|
227
|
-
}
|
|
228
|
-
}
|
|
229
|
-
|
|
230
|
-
return rows;
|
|
231
|
-
};
|
|
232
|
-
}
|
|
233
|
-
|
|
234
|
-
/**
|
|
235
|
-
* Парсит строку с учетом кавычек
|
|
236
|
-
*/
|
|
237
|
-
_parseLineWithQuotes(line, delimiter) {
|
|
238
|
-
const fields = [];
|
|
239
|
-
let currentField = '';
|
|
240
|
-
let insideQuotes = false;
|
|
241
|
-
let i = 0;
|
|
242
|
-
|
|
243
|
-
while (i < line.length) {
|
|
244
|
-
const char = line[i];
|
|
245
|
-
const nextChar = line[i + 1];
|
|
246
|
-
|
|
247
|
-
if (char === '"') {
|
|
248
|
-
if (insideQuotes && nextChar === '"') {
|
|
249
|
-
currentField += '"';
|
|
250
|
-
i += 2;
|
|
251
|
-
} else {
|
|
252
|
-
insideQuotes = !insideQuotes;
|
|
253
|
-
i++;
|
|
254
|
-
}
|
|
255
|
-
} else if (char === delimiter && !insideQuotes) {
|
|
256
|
-
fields.push(currentField);
|
|
257
|
-
currentField = '';
|
|
258
|
-
i++;
|
|
259
|
-
} else {
|
|
260
|
-
currentField += char;
|
|
261
|
-
i++;
|
|
262
|
-
}
|
|
263
|
-
}
|
|
264
|
-
|
|
265
|
-
fields.push(currentField);
|
|
266
|
-
return fields;
|
|
267
|
-
}
|
|
268
|
-
|
|
269
|
-
/**
|
|
270
|
-
* Компилирует парсер на основе структуры CSV
|
|
271
|
-
*/
|
|
272
|
-
compileParser(structure) {
|
|
273
|
-
const cacheKey = JSON.stringify(structure);
|
|
274
|
-
|
|
275
|
-
// Проверяем кеш
|
|
276
|
-
if (this.compilers.has(cacheKey)) {
|
|
277
|
-
this.stats.cacheHits++;
|
|
278
|
-
return this.compilers.get(cacheKey);
|
|
279
|
-
}
|
|
280
|
-
|
|
281
|
-
this.stats.cacheMisses++;
|
|
282
|
-
|
|
283
|
-
let parser;
|
|
284
|
-
switch (structure.recommendedEngine) {
|
|
285
|
-
case 'SIMPLE':
|
|
286
|
-
parser = this._createSimpleParser(structure);
|
|
287
|
-
this.stats.simpleParserCount++;
|
|
288
|
-
break;
|
|
289
|
-
case 'QUOTE_AWARE':
|
|
290
|
-
parser = this._createQuoteAwareParser(structure);
|
|
291
|
-
this.stats.quoteAwareParserCount++;
|
|
292
|
-
break;
|
|
293
|
-
case 'STANDARD':
|
|
294
|
-
parser = this._createStandardParser(structure);
|
|
295
|
-
this.stats.standardParserCount++;
|
|
296
|
-
break;
|
|
297
|
-
default:
|
|
298
|
-
parser = this._createStandardParser(structure);
|
|
299
|
-
this.stats.standardParserCount++;
|
|
300
|
-
}
|
|
301
|
-
|
|
302
|
-
// Кешируем парсер
|
|
303
|
-
this.compilers.set(cacheKey, parser);
|
|
304
|
-
|
|
305
|
-
return parser;
|
|
306
|
-
}
|
|
307
|
-
|
|
308
|
-
/**
|
|
309
|
-
* Парсит CSV с использованием оптимального парсера
|
|
310
|
-
*/
|
|
311
|
-
parse(csv, options = {}) {
|
|
1
|
+
/**
|
|
2
|
+
* Fast-Path Engine для оптимизации CSV парсинга
|
|
3
|
+
* Автоматически выбирает оптимальный парсер на основе структуры CSV
|
|
4
|
+
*
|
|
5
|
+
* @version 1.0.0
|
|
6
|
+
* @date 2026-01-22
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
class FastPathEngine {
|
|
10
|
+
constructor() {
|
|
11
|
+
this.compilers = new Map();
|
|
12
|
+
this.rowCompilers = new Map();
|
|
13
|
+
this.stats = {
|
|
14
|
+
simpleParserCount: 0,
|
|
15
|
+
quoteAwareParserCount: 0,
|
|
16
|
+
standardParserCount: 0,
|
|
17
|
+
cacheHits: 0,
|
|
18
|
+
cacheMisses: 0
|
|
19
|
+
};
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
_hasQuotes(csv) {
|
|
23
|
+
return csv.indexOf('"') !== -1;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
_hasEscapedQuotes(csv) {
|
|
27
|
+
return csv.indexOf('""') !== -1;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
_hasBackslashes(csv) {
|
|
31
|
+
return csv.indexOf('\\') !== -1;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
_getStructureForParse(csv, options) {
|
|
312
35
|
const sampleSize = Math.min(1000, csv.length);
|
|
313
36
|
const sample = csv.substring(0, sampleSize);
|
|
314
|
-
|
|
315
37
|
const structure = this.analyzeStructure(sample, options);
|
|
316
|
-
const
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
38
|
+
const hasBackslashes = this._hasBackslashes(csv);
|
|
39
|
+
const hasQuotes = structure.hasQuotes ? true : this._hasQuotes(csv);
|
|
40
|
+
const hasEscapedQuotes = structure.hasEscapedQuotes
|
|
41
|
+
? true
|
|
42
|
+
: (hasQuotes ? this._hasEscapedQuotes(csv) : false);
|
|
43
|
+
|
|
44
|
+
let normalized = {
|
|
45
|
+
...structure,
|
|
46
|
+
hasQuotes,
|
|
47
|
+
hasEscapedQuotes,
|
|
48
|
+
hasBackslashes
|
|
49
|
+
};
|
|
50
|
+
|
|
51
|
+
if (structure.recommendedEngine === 'SIMPLE' && hasQuotes) {
|
|
52
|
+
normalized = {
|
|
53
|
+
...normalized,
|
|
54
|
+
hasNewlinesInFields: true,
|
|
55
|
+
recommendedEngine: 'QUOTE_AWARE'
|
|
56
|
+
};
|
|
57
|
+
}
|
|
320
58
|
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
totalParsers: this.compilers.size,
|
|
328
|
-
hitRate: this.stats.cacheHits / (this.stats.cacheHits + this.stats.cacheMisses) || 0
|
|
329
|
-
};
|
|
330
|
-
}
|
|
59
|
+
if (options && options.forceEngine) {
|
|
60
|
+
normalized = {
|
|
61
|
+
...normalized,
|
|
62
|
+
recommendedEngine: options.forceEngine
|
|
63
|
+
};
|
|
64
|
+
}
|
|
331
65
|
|
|
332
|
-
|
|
333
|
-
* Сбрасывает статистику и кеш
|
|
334
|
-
*/
|
|
335
|
-
reset() {
|
|
336
|
-
this.compilers.clear();
|
|
337
|
-
this.stats = {
|
|
338
|
-
simpleParserCount: 0,
|
|
339
|
-
quoteAwareParserCount: 0,
|
|
340
|
-
standardParserCount: 0,
|
|
341
|
-
cacheHits: 0,
|
|
342
|
-
cacheMisses: 0
|
|
343
|
-
};
|
|
66
|
+
return normalized;
|
|
344
67
|
}
|
|
345
|
-
|
|
346
|
-
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* Анализирует структуру CSV и определяет оптимальный парсер
|
|
71
|
+
*/
|
|
72
|
+
analyzeStructure(sample, options = {}) {
|
|
73
|
+
const delimiter = options.delimiter || this._detectDelimiter(sample);
|
|
74
|
+
const lines = sample.split('\n').slice(0, 10);
|
|
75
|
+
|
|
76
|
+
let hasQuotes = false;
|
|
77
|
+
let hasNewlinesInFields = false;
|
|
78
|
+
let hasEscapedQuotes = false;
|
|
79
|
+
let maxFields = 0;
|
|
80
|
+
let totalFields = 0;
|
|
81
|
+
|
|
82
|
+
for (const line of lines) {
|
|
83
|
+
if (line.includes('"')) {
|
|
84
|
+
hasQuotes = true;
|
|
85
|
+
if (line.includes('""')) {
|
|
86
|
+
hasEscapedQuotes = true;
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
const quoteCount = (line.match(/"/g) || []).length;
|
|
91
|
+
if (quoteCount % 2 !== 0) {
|
|
92
|
+
hasNewlinesInFields = true;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
const fieldCount = line.split(delimiter).length;
|
|
96
|
+
totalFields += fieldCount;
|
|
97
|
+
if (fieldCount > maxFields) {
|
|
98
|
+
maxFields = fieldCount;
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
const avgFieldsPerLine = totalFields / lines.length;
|
|
103
|
+
const fieldConsistency = maxFields === avgFieldsPerLine;
|
|
104
|
+
|
|
105
|
+
return {
|
|
106
|
+
delimiter,
|
|
107
|
+
hasQuotes,
|
|
108
|
+
hasEscapedQuotes,
|
|
109
|
+
hasNewlinesInFields,
|
|
110
|
+
fieldConsistency,
|
|
111
|
+
avgFieldsPerLine,
|
|
112
|
+
maxFields,
|
|
113
|
+
recommendedEngine: this._selectEngine(hasQuotes, hasNewlinesInFields, fieldConsistency)
|
|
114
|
+
};
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
/**
|
|
118
|
+
* Автоматически определяет разделитель
|
|
119
|
+
*/
|
|
120
|
+
_detectDelimiter(sample) {
|
|
121
|
+
const candidates = [',', ';', '\t', '|'];
|
|
122
|
+
const firstLine = sample.split('\n')[0];
|
|
123
|
+
|
|
124
|
+
let bestDelimiter = ',';
|
|
125
|
+
let bestScore = 0;
|
|
126
|
+
|
|
127
|
+
for (const delimiter of candidates) {
|
|
128
|
+
const fields = firstLine.split(delimiter);
|
|
129
|
+
const score = fields.length;
|
|
130
|
+
|
|
131
|
+
// Если разделитель не найден в строке, пропускаем его
|
|
132
|
+
if (score === 1 && !firstLine.includes(delimiter)) {
|
|
133
|
+
continue;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
const avgLength = fields.reduce((sum, field) => sum + field.length, 0) / fields.length;
|
|
137
|
+
const variance = fields.reduce((sum, field) => sum + Math.pow(field.length - avgLength, 2), 0) / fields.length;
|
|
138
|
+
|
|
139
|
+
const finalScore = score / (variance + 1);
|
|
140
|
+
|
|
141
|
+
if (finalScore > bestScore) {
|
|
142
|
+
bestScore = finalScore;
|
|
143
|
+
bestDelimiter = delimiter;
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
return bestDelimiter;
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
/**
|
|
151
|
+
* Выбирает оптимальный движок парсинга
|
|
152
|
+
*/
|
|
153
|
+
_selectEngine(hasQuotes, hasNewlinesInFields, _fieldConsistency) {
|
|
154
|
+
if (!hasQuotes && !hasNewlinesInFields) {
|
|
155
|
+
return 'SIMPLE';
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
if (hasQuotes && !hasNewlinesInFields) {
|
|
159
|
+
return 'QUOTE_AWARE';
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
return 'STANDARD';
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
/**
|
|
166
|
+
* Создает простой парсер (разделитель без кавычек)
|
|
167
|
+
*/
|
|
168
|
+
_createSimpleParser(structure) {
|
|
169
|
+
const { delimiter, hasBackslashes } = structure;
|
|
170
|
+
|
|
171
|
+
return (csv) => {
|
|
172
|
+
const rows = [];
|
|
173
|
+
if (hasBackslashes) {
|
|
174
|
+
this._emitSimpleRowsEscaped(csv, delimiter, (row) => rows.push(row));
|
|
175
|
+
} else {
|
|
176
|
+
this._emitSimpleRows(csv, delimiter, (row) => rows.push(row));
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
return rows;
|
|
180
|
+
};
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
_emitSimpleRows(csv, delimiter, onRow) {
|
|
184
|
+
let currentRow = [];
|
|
185
|
+
let rowHasData = false;
|
|
186
|
+
let fieldStart = 0;
|
|
187
|
+
let i = 0;
|
|
188
|
+
|
|
189
|
+
while (i <= csv.length) {
|
|
190
|
+
const char = i < csv.length ? csv[i] : '\n';
|
|
191
|
+
|
|
192
|
+
if (char !== '\r' && char !== '\n' && char !== ' ' && char !== '\t') {
|
|
193
|
+
rowHasData = true;
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
if (char === delimiter || char === '\n' || char === '\r' || i === csv.length) {
|
|
197
|
+
const field = csv.slice(fieldStart, i);
|
|
198
|
+
currentRow.push(field);
|
|
199
|
+
|
|
200
|
+
if (char === '\n' || char === '\r' || i === csv.length) {
|
|
201
|
+
if (rowHasData) {
|
|
202
|
+
onRow(currentRow);
|
|
203
|
+
}
|
|
204
|
+
currentRow = [];
|
|
205
|
+
rowHasData = false;
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
if (char === '\r' && csv[i + 1] === '\n') {
|
|
209
|
+
i++;
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
fieldStart = i + 1;
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
i++;
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
_emitSimpleRowsEscaped(csv, delimiter, onRow) {
|
|
220
|
+
let currentRow = [];
|
|
221
|
+
let currentField = '';
|
|
222
|
+
let rowHasData = false;
|
|
223
|
+
let escapeNext = false;
|
|
224
|
+
let i = 0;
|
|
225
|
+
|
|
226
|
+
while (i <= csv.length) {
|
|
227
|
+
const char = i < csv.length ? csv[i] : '\n';
|
|
228
|
+
const nextChar = i + 1 < csv.length ? csv[i + 1] : '';
|
|
229
|
+
|
|
230
|
+
if (char !== '\r' && char !== '\n' && char !== ' ' && char !== '\t') {
|
|
231
|
+
rowHasData = true;
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
if (escapeNext) {
|
|
235
|
+
currentField += char;
|
|
236
|
+
escapeNext = false;
|
|
237
|
+
i++;
|
|
238
|
+
continue;
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
if (char === '\\') {
|
|
242
|
+
if (i + 1 >= csv.length) {
|
|
243
|
+
currentField += '\\';
|
|
244
|
+
i++;
|
|
245
|
+
continue;
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
if (nextChar === '\\') {
|
|
249
|
+
currentField += '\\';
|
|
250
|
+
i += 2;
|
|
251
|
+
continue;
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
if (nextChar === '\n' || nextChar === '\r') {
|
|
255
|
+
currentField += '\\';
|
|
256
|
+
i++;
|
|
257
|
+
continue;
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
escapeNext = true;
|
|
261
|
+
i++;
|
|
262
|
+
continue;
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
if (char === delimiter || char === '\n' || char === '\r' || i === csv.length) {
|
|
266
|
+
currentRow.push(currentField);
|
|
267
|
+
currentField = '';
|
|
268
|
+
|
|
269
|
+
if (char === '\n' || char === '\r' || i === csv.length) {
|
|
270
|
+
if (rowHasData) {
|
|
271
|
+
onRow(currentRow);
|
|
272
|
+
}
|
|
273
|
+
currentRow = [];
|
|
274
|
+
rowHasData = false;
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
if (char === '\r' && csv[i + 1] === '\n') {
|
|
278
|
+
i++;
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
i++;
|
|
282
|
+
continue;
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
currentField += char;
|
|
286
|
+
i++;
|
|
287
|
+
}
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
*_simpleRowsGenerator(csv, delimiter) {
|
|
291
|
+
let currentRow = [];
|
|
292
|
+
let rowHasData = false;
|
|
293
|
+
let fieldStart = 0;
|
|
294
|
+
let i = 0;
|
|
295
|
+
|
|
296
|
+
while (i <= csv.length) {
|
|
297
|
+
const char = i < csv.length ? csv[i] : '\n';
|
|
298
|
+
|
|
299
|
+
if (char !== '\r' && char !== '\n' && char !== ' ' && char !== '\t') {
|
|
300
|
+
rowHasData = true;
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
if (char === delimiter || char === '\n' || char === '\r' || i === csv.length) {
|
|
304
|
+
const field = csv.slice(fieldStart, i);
|
|
305
|
+
currentRow.push(field);
|
|
306
|
+
|
|
307
|
+
if (char === '\n' || char === '\r' || i === csv.length) {
|
|
308
|
+
if (rowHasData) {
|
|
309
|
+
yield currentRow;
|
|
310
|
+
}
|
|
311
|
+
currentRow = [];
|
|
312
|
+
rowHasData = false;
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
if (char === '\r' && csv[i + 1] === '\n') {
|
|
316
|
+
i++;
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
fieldStart = i + 1;
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
i++;
|
|
323
|
+
}
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
*_simpleEscapedRowsGenerator(csv, delimiter) {
|
|
327
|
+
let currentRow = [];
|
|
328
|
+
let currentField = '';
|
|
329
|
+
let rowHasData = false;
|
|
330
|
+
let escapeNext = false;
|
|
331
|
+
let i = 0;
|
|
332
|
+
|
|
333
|
+
while (i <= csv.length) {
|
|
334
|
+
const char = i < csv.length ? csv[i] : '\n';
|
|
335
|
+
const nextChar = i + 1 < csv.length ? csv[i + 1] : '';
|
|
336
|
+
|
|
337
|
+
if (char !== '\r' && char !== '\n' && char !== ' ' && char !== '\t') {
|
|
338
|
+
rowHasData = true;
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
if (escapeNext) {
|
|
342
|
+
currentField += char;
|
|
343
|
+
escapeNext = false;
|
|
344
|
+
i++;
|
|
345
|
+
continue;
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
if (char === '\\') {
|
|
349
|
+
if (i + 1 >= csv.length) {
|
|
350
|
+
currentField += '\\';
|
|
351
|
+
i++;
|
|
352
|
+
continue;
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
if (nextChar === '\\') {
|
|
356
|
+
currentField += '\\';
|
|
357
|
+
i += 2;
|
|
358
|
+
continue;
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
if (nextChar === '\n' || nextChar === '\r') {
|
|
362
|
+
currentField += '\\';
|
|
363
|
+
i++;
|
|
364
|
+
continue;
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
escapeNext = true;
|
|
368
|
+
i++;
|
|
369
|
+
continue;
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
if (char === delimiter || char === '\n' || char === '\r' || i === csv.length) {
|
|
373
|
+
currentRow.push(currentField);
|
|
374
|
+
currentField = '';
|
|
375
|
+
|
|
376
|
+
if (char === '\n' || char === '\r' || i === csv.length) {
|
|
377
|
+
if (rowHasData) {
|
|
378
|
+
yield currentRow;
|
|
379
|
+
}
|
|
380
|
+
currentRow = [];
|
|
381
|
+
rowHasData = false;
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
if (char === '\r' && csv[i + 1] === '\n') {
|
|
385
|
+
i++;
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
i++;
|
|
389
|
+
continue;
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
currentField += char;
|
|
393
|
+
i++;
|
|
394
|
+
}
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
/**
|
|
398
|
+
* Simple row emitter that avoids storing all rows in memory.
|
|
399
|
+
*/
|
|
400
|
+
_createSimpleRowEmitter(structure) {
|
|
401
|
+
const { delimiter, hasBackslashes } = structure;
|
|
402
|
+
|
|
403
|
+
return (csv, onRow) => {
|
|
404
|
+
if (hasBackslashes) {
|
|
405
|
+
this._emitSimpleRowsEscaped(csv, delimiter, onRow);
|
|
406
|
+
} else {
|
|
407
|
+
this._emitSimpleRows(csv, delimiter, onRow);
|
|
408
|
+
}
|
|
409
|
+
};
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
/**
|
|
413
|
+
* State machine парсер для CSV с кавычками (RFC 4180)
|
|
414
|
+
*/
|
|
415
|
+
_createQuoteAwareParser(structure) {
|
|
416
|
+
const { delimiter, hasEscapedQuotes, hasBackslashes } = structure;
|
|
417
|
+
|
|
418
|
+
return (csv) => {
|
|
419
|
+
const rows = [];
|
|
420
|
+
const iterator = hasBackslashes
|
|
421
|
+
? this._quoteAwareEscapedRowsGenerator(csv, delimiter, hasEscapedQuotes)
|
|
422
|
+
: this._quoteAwareRowsGenerator(csv, delimiter, hasEscapedQuotes);
|
|
423
|
+
|
|
424
|
+
for (const row of iterator) {
|
|
425
|
+
rows.push(row);
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
return rows;
|
|
429
|
+
};
|
|
430
|
+
}
|
|
431
|
+
|
|
432
|
+
/**
|
|
433
|
+
* Quote-aware row emitter that avoids storing all rows in memory.
|
|
434
|
+
*/
|
|
435
|
+
_createQuoteAwareRowEmitter(structure) {
|
|
436
|
+
const { delimiter, hasEscapedQuotes, hasBackslashes } = structure;
|
|
437
|
+
|
|
438
|
+
return (csv, onRow) => {
|
|
439
|
+
const iterator = hasBackslashes
|
|
440
|
+
? this._quoteAwareEscapedRowsGenerator(csv, delimiter, hasEscapedQuotes)
|
|
441
|
+
: this._quoteAwareRowsGenerator(csv, delimiter, hasEscapedQuotes);
|
|
442
|
+
|
|
443
|
+
for (const row of iterator) {
|
|
444
|
+
onRow(row);
|
|
445
|
+
}
|
|
446
|
+
};
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
*_quoteAwareRowsGenerator(csv, delimiter, hasEscapedQuotes) {
|
|
450
|
+
let currentRow = [];
|
|
451
|
+
let currentField = '';
|
|
452
|
+
let rowHasData = false;
|
|
453
|
+
let insideQuotes = false;
|
|
454
|
+
let lineNumber = 1;
|
|
455
|
+
let i = 0;
|
|
456
|
+
|
|
457
|
+
while (i < csv.length) {
|
|
458
|
+
const char = csv[i];
|
|
459
|
+
const nextChar = csv[i + 1];
|
|
460
|
+
|
|
461
|
+
if (char !== '\r' && char !== '\n' && char !== ' ' && char !== '\t') {
|
|
462
|
+
rowHasData = true;
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
if (char === '"') {
|
|
466
|
+
if (insideQuotes) {
|
|
467
|
+
if (hasEscapedQuotes && nextChar === '"') {
|
|
468
|
+
const afterNext = csv[i + 2];
|
|
469
|
+
const isLineEnd = i + 2 >= csv.length || afterNext === '\n' || afterNext === '\r';
|
|
470
|
+
|
|
471
|
+
currentField += '"';
|
|
472
|
+
if (isLineEnd) {
|
|
473
|
+
insideQuotes = false;
|
|
474
|
+
i += 2;
|
|
475
|
+
continue;
|
|
476
|
+
}
|
|
477
|
+
|
|
478
|
+
i += 2;
|
|
479
|
+
|
|
480
|
+
let j = i;
|
|
481
|
+
while (j < csv.length && (csv[j] === ' ' || csv[j] === '\t')) {
|
|
482
|
+
j++;
|
|
483
|
+
}
|
|
484
|
+
if (j >= csv.length || csv[j] === delimiter || csv[j] === '\n' || csv[j] === '\r') {
|
|
485
|
+
insideQuotes = false;
|
|
486
|
+
}
|
|
487
|
+
continue;
|
|
488
|
+
}
|
|
489
|
+
|
|
490
|
+
let j = i + 1;
|
|
491
|
+
while (j < csv.length && (csv[j] === ' ' || csv[j] === '\t')) {
|
|
492
|
+
j++;
|
|
493
|
+
}
|
|
494
|
+
if (j >= csv.length || csv[j] === delimiter || csv[j] === '\n' || csv[j] === '\r') {
|
|
495
|
+
insideQuotes = false;
|
|
496
|
+
i++;
|
|
497
|
+
continue;
|
|
498
|
+
}
|
|
499
|
+
|
|
500
|
+
currentField += '"';
|
|
501
|
+
i++;
|
|
502
|
+
continue;
|
|
503
|
+
}
|
|
504
|
+
|
|
505
|
+
insideQuotes = true;
|
|
506
|
+
i++;
|
|
507
|
+
continue;
|
|
508
|
+
}
|
|
509
|
+
|
|
510
|
+
if (!insideQuotes && (char === delimiter || char === '\n' || char === '\r')) {
|
|
511
|
+
currentRow.push(currentField);
|
|
512
|
+
currentField = '';
|
|
513
|
+
|
|
514
|
+
if (char === '\n' || char === '\r') {
|
|
515
|
+
if (rowHasData) {
|
|
516
|
+
yield currentRow;
|
|
517
|
+
}
|
|
518
|
+
currentRow = [];
|
|
519
|
+
rowHasData = false;
|
|
520
|
+
lineNumber++;
|
|
521
|
+
|
|
522
|
+
if (char === '\r' && nextChar === '\n') {
|
|
523
|
+
i++;
|
|
524
|
+
}
|
|
525
|
+
}
|
|
526
|
+
|
|
527
|
+
i++;
|
|
528
|
+
continue;
|
|
529
|
+
}
|
|
530
|
+
|
|
531
|
+
currentField += char;
|
|
532
|
+
i++;
|
|
533
|
+
}
|
|
534
|
+
|
|
535
|
+
if (insideQuotes) {
|
|
536
|
+
const error = new Error('Unclosed quotes in CSV');
|
|
537
|
+
error.code = 'FAST_PATH_UNCLOSED_QUOTES';
|
|
538
|
+
error.lineNumber = lineNumber;
|
|
539
|
+
throw error;
|
|
540
|
+
}
|
|
541
|
+
|
|
542
|
+
if (currentField !== '' || currentRow.length > 0) {
|
|
543
|
+
currentRow.push(currentField);
|
|
544
|
+
if (rowHasData) {
|
|
545
|
+
yield currentRow;
|
|
546
|
+
}
|
|
547
|
+
}
|
|
548
|
+
}
|
|
549
|
+
|
|
550
|
+
*_quoteAwareEscapedRowsGenerator(csv, delimiter, hasEscapedQuotes) {
|
|
551
|
+
let currentRow = [];
|
|
552
|
+
let currentField = '';
|
|
553
|
+
let rowHasData = false;
|
|
554
|
+
let insideQuotes = false;
|
|
555
|
+
let escapeNext = false;
|
|
556
|
+
let lineNumber = 1;
|
|
557
|
+
let i = 0;
|
|
558
|
+
|
|
559
|
+
while (i < csv.length) {
|
|
560
|
+
const char = csv[i];
|
|
561
|
+
const nextChar = csv[i + 1];
|
|
562
|
+
|
|
563
|
+
if (char !== '\r' && char !== '\n' && char !== ' ' && char !== '\t') {
|
|
564
|
+
rowHasData = true;
|
|
565
|
+
}
|
|
566
|
+
|
|
567
|
+
if (escapeNext) {
|
|
568
|
+
currentField += char;
|
|
569
|
+
escapeNext = false;
|
|
570
|
+
i++;
|
|
571
|
+
continue;
|
|
572
|
+
}
|
|
573
|
+
|
|
574
|
+
if (char === '\\') {
|
|
575
|
+
if (i + 1 >= csv.length) {
|
|
576
|
+
currentField += '\\';
|
|
577
|
+
i++;
|
|
578
|
+
continue;
|
|
579
|
+
}
|
|
580
|
+
|
|
581
|
+
if (!insideQuotes && (nextChar === '\n' || nextChar === '\r')) {
|
|
582
|
+
currentField += '\\';
|
|
583
|
+
i++;
|
|
584
|
+
continue;
|
|
585
|
+
}
|
|
586
|
+
|
|
587
|
+
if (nextChar === '\\') {
|
|
588
|
+
currentField += '\\';
|
|
589
|
+
i += 2;
|
|
590
|
+
continue;
|
|
591
|
+
}
|
|
592
|
+
|
|
593
|
+
escapeNext = true;
|
|
594
|
+
i++;
|
|
595
|
+
continue;
|
|
596
|
+
}
|
|
597
|
+
|
|
598
|
+
if (char === '"') {
|
|
599
|
+
if (insideQuotes) {
|
|
600
|
+
if (hasEscapedQuotes && nextChar === '"') {
|
|
601
|
+
const afterNext = csv[i + 2];
|
|
602
|
+
const isLineEnd = i + 2 >= csv.length || afterNext === '\n' || afterNext === '\r';
|
|
603
|
+
|
|
604
|
+
currentField += '"';
|
|
605
|
+
if (isLineEnd) {
|
|
606
|
+
insideQuotes = false;
|
|
607
|
+
i += 2;
|
|
608
|
+
continue;
|
|
609
|
+
}
|
|
610
|
+
|
|
611
|
+
i += 2;
|
|
612
|
+
|
|
613
|
+
let j = i;
|
|
614
|
+
while (j < csv.length && (csv[j] === ' ' || csv[j] === '\t')) {
|
|
615
|
+
j++;
|
|
616
|
+
}
|
|
617
|
+
if (j >= csv.length || csv[j] === delimiter || csv[j] === '\n' || csv[j] === '\r') {
|
|
618
|
+
insideQuotes = false;
|
|
619
|
+
}
|
|
620
|
+
continue;
|
|
621
|
+
}
|
|
622
|
+
|
|
623
|
+
let j = i + 1;
|
|
624
|
+
while (j < csv.length && (csv[j] === ' ' || csv[j] === '\t')) {
|
|
625
|
+
j++;
|
|
626
|
+
}
|
|
627
|
+
if (j >= csv.length || csv[j] === delimiter || csv[j] === '\n' || csv[j] === '\r') {
|
|
628
|
+
insideQuotes = false;
|
|
629
|
+
i++;
|
|
630
|
+
continue;
|
|
631
|
+
}
|
|
632
|
+
|
|
633
|
+
currentField += '"';
|
|
634
|
+
i++;
|
|
635
|
+
continue;
|
|
636
|
+
}
|
|
637
|
+
|
|
638
|
+
insideQuotes = true;
|
|
639
|
+
i++;
|
|
640
|
+
continue;
|
|
641
|
+
}
|
|
642
|
+
|
|
643
|
+
if (!insideQuotes && (char === delimiter || char === '\n' || char === '\r')) {
|
|
644
|
+
currentRow.push(currentField);
|
|
645
|
+
currentField = '';
|
|
646
|
+
|
|
647
|
+
if (char === '\n' || char === '\r') {
|
|
648
|
+
if (rowHasData) {
|
|
649
|
+
yield currentRow;
|
|
650
|
+
}
|
|
651
|
+
currentRow = [];
|
|
652
|
+
rowHasData = false;
|
|
653
|
+
lineNumber++;
|
|
654
|
+
|
|
655
|
+
if (char === '\r' && nextChar === '\n') {
|
|
656
|
+
i++;
|
|
657
|
+
}
|
|
658
|
+
}
|
|
659
|
+
|
|
660
|
+
i++;
|
|
661
|
+
continue;
|
|
662
|
+
}
|
|
663
|
+
|
|
664
|
+
currentField += char;
|
|
665
|
+
i++;
|
|
666
|
+
}
|
|
667
|
+
|
|
668
|
+
if (escapeNext) {
|
|
669
|
+
currentField += '\\';
|
|
670
|
+
}
|
|
671
|
+
|
|
672
|
+
if (insideQuotes) {
|
|
673
|
+
const error = new Error('Unclosed quotes in CSV');
|
|
674
|
+
error.code = 'FAST_PATH_UNCLOSED_QUOTES';
|
|
675
|
+
error.lineNumber = lineNumber;
|
|
676
|
+
throw error;
|
|
677
|
+
}
|
|
678
|
+
|
|
679
|
+
if (currentField !== '' || currentRow.length > 0) {
|
|
680
|
+
currentRow.push(currentField);
|
|
681
|
+
if (rowHasData) {
|
|
682
|
+
yield currentRow;
|
|
683
|
+
}
|
|
684
|
+
}
|
|
685
|
+
}
|
|
686
|
+
|
|
687
|
+
compileParser(structure) {
|
|
688
|
+
const cacheKey = JSON.stringify(structure);
|
|
689
|
+
|
|
690
|
+
// Проверяем кеш
|
|
691
|
+
if (this.compilers.has(cacheKey)) {
|
|
692
|
+
this.stats.cacheHits++;
|
|
693
|
+
return this.compilers.get(cacheKey);
|
|
694
|
+
}
|
|
695
|
+
|
|
696
|
+
this.stats.cacheMisses++;
|
|
697
|
+
|
|
698
|
+
let parser;
|
|
699
|
+
switch (structure.recommendedEngine) {
|
|
700
|
+
case 'SIMPLE':
|
|
701
|
+
parser = this._createSimpleParser(structure);
|
|
702
|
+
this.stats.simpleParserCount++;
|
|
703
|
+
break;
|
|
704
|
+
case 'QUOTE_AWARE':
|
|
705
|
+
parser = this._createQuoteAwareParser(structure);
|
|
706
|
+
this.stats.quoteAwareParserCount++;
|
|
707
|
+
break;
|
|
708
|
+
case 'STANDARD':
|
|
709
|
+
parser = this._createQuoteAwareParser(structure);
|
|
710
|
+
this.stats.standardParserCount++;
|
|
711
|
+
break;
|
|
712
|
+
default:
|
|
713
|
+
parser = this._createQuoteAwareParser(structure);
|
|
714
|
+
this.stats.standardParserCount++;
|
|
715
|
+
}
|
|
716
|
+
|
|
717
|
+
// Кешируем парсер
|
|
718
|
+
this.compilers.set(cacheKey, parser);
|
|
719
|
+
|
|
720
|
+
return parser;
|
|
721
|
+
}
|
|
722
|
+
|
|
723
|
+
/**
|
|
724
|
+
* Compiles a row-emitter parser for streaming conversion.
|
|
725
|
+
*/
|
|
726
|
+
compileRowEmitter(structure) {
|
|
727
|
+
const cacheKey = JSON.stringify(structure);
|
|
728
|
+
|
|
729
|
+
if (this.rowCompilers.has(cacheKey)) {
|
|
730
|
+
return this.rowCompilers.get(cacheKey);
|
|
731
|
+
}
|
|
732
|
+
|
|
733
|
+
let emitter;
|
|
734
|
+
switch (structure.recommendedEngine) {
|
|
735
|
+
case 'SIMPLE':
|
|
736
|
+
emitter = this._createSimpleRowEmitter(structure);
|
|
737
|
+
break;
|
|
738
|
+
case 'QUOTE_AWARE':
|
|
739
|
+
emitter = this._createQuoteAwareRowEmitter(structure);
|
|
740
|
+
break;
|
|
741
|
+
case 'STANDARD':
|
|
742
|
+
emitter = this._createQuoteAwareRowEmitter(structure);
|
|
743
|
+
break;
|
|
744
|
+
default:
|
|
745
|
+
emitter = this._createQuoteAwareRowEmitter(structure);
|
|
746
|
+
}
|
|
747
|
+
|
|
748
|
+
this.rowCompilers.set(cacheKey, emitter);
|
|
749
|
+
return emitter;
|
|
750
|
+
}
|
|
751
|
+
|
|
752
|
+
/**
|
|
753
|
+
* Iterates rows without allocating the full result set.
|
|
754
|
+
*/
|
|
755
|
+
*iterateRows(csv, options = {}) {
|
|
756
|
+
const structure = this._getStructureForParse(csv, options);
|
|
757
|
+
const useEscapes = structure.hasBackslashes;
|
|
758
|
+
|
|
759
|
+
switch (structure.recommendedEngine) {
|
|
760
|
+
case 'SIMPLE':
|
|
761
|
+
if (useEscapes) {
|
|
762
|
+
yield* this._simpleEscapedRowsGenerator(csv, structure.delimiter);
|
|
763
|
+
} else {
|
|
764
|
+
yield* this._simpleRowsGenerator(csv, structure.delimiter);
|
|
765
|
+
}
|
|
766
|
+
break;
|
|
767
|
+
case 'QUOTE_AWARE':
|
|
768
|
+
if (useEscapes) {
|
|
769
|
+
yield* this._quoteAwareEscapedRowsGenerator(csv, structure.delimiter, structure.hasEscapedQuotes);
|
|
770
|
+
} else {
|
|
771
|
+
yield* this._quoteAwareRowsGenerator(csv, structure.delimiter, structure.hasEscapedQuotes);
|
|
772
|
+
}
|
|
773
|
+
break;
|
|
774
|
+
case 'STANDARD':
|
|
775
|
+
if (useEscapes) {
|
|
776
|
+
yield* this._quoteAwareEscapedRowsGenerator(csv, structure.delimiter, structure.hasEscapedQuotes);
|
|
777
|
+
} else {
|
|
778
|
+
yield* this._quoteAwareRowsGenerator(csv, structure.delimiter, structure.hasEscapedQuotes);
|
|
779
|
+
}
|
|
780
|
+
break;
|
|
781
|
+
default:
|
|
782
|
+
if (useEscapes) {
|
|
783
|
+
yield* this._quoteAwareEscapedRowsGenerator(csv, structure.delimiter, structure.hasEscapedQuotes);
|
|
784
|
+
} else {
|
|
785
|
+
yield* this._quoteAwareRowsGenerator(csv, structure.delimiter, structure.hasEscapedQuotes);
|
|
786
|
+
}
|
|
787
|
+
}
|
|
788
|
+
}
|
|
789
|
+
|
|
790
|
+
/**
|
|
791
|
+
* Парсит CSV с использованием оптимального парсера
|
|
792
|
+
*/
|
|
793
|
+
parse(csv, options = {}) {
|
|
794
|
+
const structure = this._getStructureForParse(csv, options);
|
|
795
|
+
const parser = this.compileParser(structure);
|
|
796
|
+
|
|
797
|
+
return parser(csv);
|
|
798
|
+
}
|
|
799
|
+
|
|
800
|
+
/**
|
|
801
|
+
* Parses CSV and emits rows via a callback to reduce memory usage.
|
|
802
|
+
*/
|
|
803
|
+
parseRows(csv, options = {}, onRow) {
|
|
804
|
+
for (const row of this.iterateRows(csv, options)) {
|
|
805
|
+
onRow(row);
|
|
806
|
+
}
|
|
807
|
+
}
|
|
808
|
+
|
|
809
|
+
/**
|
|
810
|
+
* Возвращает статистику использования парсеров
|
|
811
|
+
*/
|
|
812
|
+
getStats() {
|
|
813
|
+
return {
|
|
814
|
+
...this.stats,
|
|
815
|
+
totalParsers: this.compilers.size,
|
|
816
|
+
hitRate: this.stats.cacheHits / (this.stats.cacheHits + this.stats.cacheMisses) || 0
|
|
817
|
+
};
|
|
818
|
+
}
|
|
819
|
+
|
|
820
|
+
/**
|
|
821
|
+
* Сбрасывает статистику и кеш
|
|
822
|
+
*/
|
|
823
|
+
reset() {
|
|
824
|
+
this.compilers.clear();
|
|
825
|
+
this.rowCompilers.clear();
|
|
826
|
+
this.stats = {
|
|
827
|
+
simpleParserCount: 0,
|
|
828
|
+
quoteAwareParserCount: 0,
|
|
829
|
+
standardParserCount: 0,
|
|
830
|
+
cacheHits: 0,
|
|
831
|
+
cacheMisses: 0
|
|
832
|
+
};
|
|
833
|
+
}
|
|
834
|
+
}
|
|
835
|
+
|
|
347
836
|
module.exports = FastPathEngine;
|