jtcsv 2.2.7 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +31 -1
- package/bin/jtcsv.js +891 -821
- package/bin/jtcsv.ts +2534 -0
- package/csv-to-json.js +168 -145
- package/dist/jtcsv-core.cjs.js +1407 -0
- package/dist/jtcsv-core.cjs.js.map +1 -0
- package/dist/jtcsv-core.esm.js +1379 -0
- package/dist/jtcsv-core.esm.js.map +1 -0
- package/dist/jtcsv-core.umd.js +1413 -0
- package/dist/jtcsv-core.umd.js.map +1 -0
- package/dist/jtcsv-full.cjs.js +1912 -0
- package/dist/jtcsv-full.cjs.js.map +1 -0
- package/dist/jtcsv-full.esm.js +1880 -0
- package/dist/jtcsv-full.esm.js.map +1 -0
- package/dist/jtcsv-full.umd.js +1918 -0
- package/dist/jtcsv-full.umd.js.map +1 -0
- package/dist/jtcsv-workers.esm.js +759 -0
- package/dist/jtcsv-workers.esm.js.map +1 -0
- package/dist/jtcsv-workers.umd.js +773 -0
- package/dist/jtcsv-workers.umd.js.map +1 -0
- package/dist/jtcsv.cjs.js +61 -19
- package/dist/jtcsv.cjs.js.map +1 -1
- package/dist/jtcsv.esm.js +61 -19
- package/dist/jtcsv.esm.js.map +1 -1
- package/dist/jtcsv.umd.js +61 -19
- package/dist/jtcsv.umd.js.map +1 -1
- package/errors.js +188 -2
- package/examples/advanced/conditional-transformations.js +446 -0
- package/examples/advanced/conditional-transformations.ts +446 -0
- package/examples/advanced/csv-parser.worker.js +89 -0
- package/examples/advanced/csv-parser.worker.ts +89 -0
- package/examples/advanced/nested-objects-example.js +306 -0
- package/examples/advanced/nested-objects-example.ts +306 -0
- package/examples/advanced/performance-optimization.js +504 -0
- package/examples/advanced/performance-optimization.ts +504 -0
- package/examples/advanced/run-demo-server.js +116 -0
- package/examples/advanced/run-demo-server.ts +116 -0
- package/examples/advanced/web-worker-usage.html +874 -0
- package/examples/async-multithreaded-example.ts +335 -0
- package/examples/cli-advanced-usage.md +288 -0
- package/examples/cli-batch-processing.ts +38 -0
- package/examples/cli-tool.js +0 -3
- package/examples/cli-tool.ts +183 -0
- package/examples/error-handling.js +21 -7
- package/examples/error-handling.ts +356 -0
- package/examples/express-api.js +0 -3
- package/examples/express-api.ts +164 -0
- package/examples/large-dataset-example.js +0 -3
- package/examples/large-dataset-example.ts +204 -0
- package/examples/ndjson-processing.js +1 -1
- package/examples/ndjson-processing.ts +456 -0
- package/examples/plugin-excel-exporter.js +3 -4
- package/examples/plugin-excel-exporter.ts +406 -0
- package/examples/react-integration.tsx +637 -0
- package/examples/schema-validation.ts +640 -0
- package/examples/simple-usage.js +254 -254
- package/examples/simple-usage.ts +194 -0
- package/examples/streaming-example.js +4 -5
- package/examples/streaming-example.ts +419 -0
- package/examples/web-workers-advanced.ts +28 -0
- package/index.d.ts +1 -3
- package/index.js +15 -1
- package/json-save.js +9 -3
- package/json-to-csv.js +168 -21
- package/package.json +69 -10
- package/plugins/express-middleware/README.md +21 -2
- package/plugins/express-middleware/example.js +3 -4
- package/plugins/express-middleware/example.ts +135 -0
- package/plugins/express-middleware/index.d.ts +1 -1
- package/plugins/express-middleware/index.js +270 -118
- package/plugins/express-middleware/index.ts +557 -0
- package/plugins/fastify-plugin/index.js +2 -4
- package/plugins/fastify-plugin/index.ts +443 -0
- package/plugins/hono/index.ts +226 -0
- package/plugins/nestjs/index.ts +201 -0
- package/plugins/nextjs-api/examples/ConverterComponent.tsx +386 -0
- package/plugins/nextjs-api/examples/api-convert.js +0 -2
- package/plugins/nextjs-api/examples/api-convert.ts +67 -0
- package/plugins/nextjs-api/index.tsx +339 -0
- package/plugins/nextjs-api/route.js +2 -3
- package/plugins/nextjs-api/route.ts +370 -0
- package/plugins/nuxt/index.ts +94 -0
- package/plugins/nuxt/runtime/composables/useJtcsv.ts +100 -0
- package/plugins/nuxt/runtime/plugin.ts +71 -0
- package/plugins/remix/index.js +1 -1
- package/plugins/remix/index.ts +260 -0
- package/plugins/sveltekit/index.js +1 -1
- package/plugins/sveltekit/index.ts +301 -0
- package/plugins/trpc/index.ts +267 -0
- package/src/browser/browser-functions.ts +402 -0
- package/src/browser/core.js +92 -0
- package/src/browser/core.ts +152 -0
- package/src/browser/csv-to-json-browser.d.ts +3 -0
- package/src/browser/csv-to-json-browser.js +36 -14
- package/src/browser/csv-to-json-browser.ts +264 -0
- package/src/browser/errors-browser.ts +303 -0
- package/src/browser/extensions/plugins.js +92 -0
- package/src/browser/extensions/plugins.ts +93 -0
- package/src/browser/extensions/workers.js +39 -0
- package/src/browser/extensions/workers.ts +39 -0
- package/src/browser/globals.d.ts +5 -0
- package/src/browser/index.ts +192 -0
- package/src/browser/json-to-csv-browser.d.ts +3 -0
- package/src/browser/json-to-csv-browser.js +13 -3
- package/src/browser/json-to-csv-browser.ts +262 -0
- package/src/browser/streams.js +12 -2
- package/src/browser/streams.ts +336 -0
- package/src/browser/workers/csv-parser.worker.ts +377 -0
- package/src/browser/workers/worker-pool.ts +548 -0
- package/src/core/delimiter-cache.js +22 -8
- package/src/core/delimiter-cache.ts +310 -0
- package/src/core/node-optimizations.ts +449 -0
- package/src/core/plugin-system.js +29 -11
- package/src/core/plugin-system.ts +400 -0
- package/src/core/transform-hooks.ts +558 -0
- package/src/engines/fast-path-engine-new.ts +347 -0
- package/src/engines/fast-path-engine.ts +854 -0
- package/src/errors.ts +72 -0
- package/src/formats/ndjson-parser.ts +469 -0
- package/src/formats/tsv-parser.ts +334 -0
- package/src/index-with-plugins.js +16 -9
- package/src/index-with-plugins.ts +395 -0
- package/src/types/index.ts +255 -0
- package/src/utils/bom-utils.js +259 -0
- package/src/utils/bom-utils.ts +373 -0
- package/src/utils/encoding-support.js +124 -0
- package/src/utils/encoding-support.ts +155 -0
- package/src/utils/schema-validator.js +19 -19
- package/src/utils/schema-validator.ts +819 -0
- package/src/utils/transform-loader.js +1 -1
- package/src/utils/transform-loader.ts +389 -0
- package/src/utils/zod-adapter.js +170 -0
- package/src/utils/zod-adapter.ts +280 -0
- package/src/web-server/index.js +10 -10
- package/src/web-server/index.ts +683 -0
- package/src/workers/csv-multithreaded.ts +310 -0
- package/src/workers/csv-parser.worker.ts +227 -0
- package/src/workers/worker-pool.ts +409 -0
- package/stream-csv-to-json.js +26 -8
- package/stream-json-to-csv.js +1 -0
package/src/errors.ts
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
export class ValidationError extends Error {
|
|
2
|
+
constructor(message: string) {
|
|
3
|
+
super(message);
|
|
4
|
+
this.name = 'ValidationError';
|
|
5
|
+
}
|
|
6
|
+
}
|
|
7
|
+
|
|
8
|
+
export class SecurityError extends Error {
|
|
9
|
+
constructor(message: string) {
|
|
10
|
+
super(message);
|
|
11
|
+
this.name = 'SecurityError';
|
|
12
|
+
}
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
export class ConfigurationError extends Error {
|
|
16
|
+
constructor(message: string) {
|
|
17
|
+
super(message);
|
|
18
|
+
this.name = 'ConfigurationError';
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
export class FileSystemError extends Error {
|
|
23
|
+
cause?: Error;
|
|
24
|
+
|
|
25
|
+
constructor(message: string, cause?: Error) {
|
|
26
|
+
super(message);
|
|
27
|
+
this.name = 'FileSystemError';
|
|
28
|
+
if (cause) {
|
|
29
|
+
this.cause = cause;
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
export class LimitError extends Error {
|
|
35
|
+
constructor(message: string) {
|
|
36
|
+
super(message);
|
|
37
|
+
this.name = 'LimitError';
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
export class ParsingError extends Error {
|
|
42
|
+
lineNumber?: number;
|
|
43
|
+
|
|
44
|
+
constructor(message: string, lineNumber?: number) {
|
|
45
|
+
super(message);
|
|
46
|
+
this.name = 'ParsingError';
|
|
47
|
+
if (lineNumber !== undefined) {
|
|
48
|
+
this.lineNumber = lineNumber;
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
export class JtcsvError extends Error {
|
|
54
|
+
code?: string;
|
|
55
|
+
|
|
56
|
+
constructor(message: string, code?: string) {
|
|
57
|
+
super(message);
|
|
58
|
+
this.name = 'JtcsvError';
|
|
59
|
+
if (code) {
|
|
60
|
+
this.code = code;
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
export type ErrorCode =
|
|
66
|
+
| 'VALIDATION_ERROR'
|
|
67
|
+
| 'SECURITY_ERROR'
|
|
68
|
+
| 'CONFIGURATION_ERROR'
|
|
69
|
+
| 'FILE_SYSTEM_ERROR'
|
|
70
|
+
| 'LIMIT_ERROR'
|
|
71
|
+
| 'PARSING_ERROR'
|
|
72
|
+
| 'UNKNOWN_ERROR';
|
|
@@ -0,0 +1,469 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* NDJSON (Newline Delimited JSON) парсер
|
|
3
|
+
* Поддержка потоковой обработки больших JSON файлов
|
|
4
|
+
*
|
|
5
|
+
* @version 1.0.0
|
|
6
|
+
* @date 2026-01-22
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
function createTextDecoder() {
|
|
10
|
+
if (typeof TextDecoder !== 'undefined') {
|
|
11
|
+
return new TextDecoder('utf-8');
|
|
12
|
+
}
|
|
13
|
+
try {
|
|
14
|
+
const { TextDecoder: UtilTextDecoder } = require("util");
|
|
15
|
+
return new UtilTextDecoder('utf-8');
|
|
16
|
+
} catch (_error) {
|
|
17
|
+
return null;
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
function getTransformStream() {
|
|
22
|
+
if (typeof TransformStream !== 'undefined') {
|
|
23
|
+
return TransformStream;
|
|
24
|
+
}
|
|
25
|
+
try {
|
|
26
|
+
const streamModule = require("stream/web");
|
|
27
|
+
return streamModule.TransformStream;
|
|
28
|
+
} catch (_error) {
|
|
29
|
+
return null;
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
class NdjsonParser {
|
|
34
|
+
/**
|
|
35
|
+
* Парсит NDJSON поток и возвращает async iterator
|
|
36
|
+
* @param {ReadableStream|string} input - Входные данные (поток или строка)
|
|
37
|
+
* @param {Object} options - Опции парсинга
|
|
38
|
+
* @returns {AsyncGenerator} Async iterator с объектами JSON
|
|
39
|
+
*
|
|
40
|
+
* @example
|
|
41
|
+
* // Использование с потоком
|
|
42
|
+
* const stream = fs.createReadStream('data.ndjson');
|
|
43
|
+
* for await (const obj of NdjsonParser.parseStream(stream)) {
|
|
44
|
+
* console.log(obj);
|
|
45
|
+
* }
|
|
46
|
+
*
|
|
47
|
+
* @example
|
|
48
|
+
* // Использование со строкой
|
|
49
|
+
* const ndjson = '{"name":"John"}\n{"name":"Jane"}';
|
|
50
|
+
* for await (const obj of NdjsonParser.parseStream(ndjson)) {
|
|
51
|
+
* console.log(obj);
|
|
52
|
+
* }
|
|
53
|
+
*/
|
|
54
|
+
static async *parseStream(input: any, options: any = {}) {
|
|
55
|
+
const {
|
|
56
|
+
bufferSize: _bufferSize = 64 * 1024, // 64KB буфер
|
|
57
|
+
maxLineLength = 10 * 1024 * 1024, // 10MB максимальная длина строки
|
|
58
|
+
onError = null
|
|
59
|
+
} = options;
|
|
60
|
+
|
|
61
|
+
let buffer = '';
|
|
62
|
+
let lineNumber = 0;
|
|
63
|
+
|
|
64
|
+
// Если входные данные - строка, преобразуем в async iterator
|
|
65
|
+
if (typeof input === 'string') {
|
|
66
|
+
const lines = input.split('\n');
|
|
67
|
+
for (const line of lines) {
|
|
68
|
+
lineNumber++;
|
|
69
|
+
if (line.trim()) {
|
|
70
|
+
try {
|
|
71
|
+
yield JSON.parse(line);
|
|
72
|
+
} catch (error) {
|
|
73
|
+
/* istanbul ignore next */
|
|
74
|
+
/* istanbul ignore next */
|
|
75
|
+
if (onError) {
|
|
76
|
+
onError(error, line, lineNumber);
|
|
77
|
+
} else {
|
|
78
|
+
console.error(`Ошибка парсинга NDJSON строки ${lineNumber}:`, error.message);
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
return;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
// Если входные данные - поток
|
|
87
|
+
const reader = input.getReader ? input.getReader() : input;
|
|
88
|
+
const decoder = createTextDecoder();
|
|
89
|
+
if (!decoder) {
|
|
90
|
+
throw new Error('TextDecoder is not available in this environment');
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
try {
|
|
94
|
+
while (true) {
|
|
95
|
+
const { done, value } = await reader.read();
|
|
96
|
+
|
|
97
|
+
if (done) {
|
|
98
|
+
// Обрабатываем оставшиеся данные в буфере
|
|
99
|
+
if (buffer.trim()) {
|
|
100
|
+
const lines = buffer.split('\n');
|
|
101
|
+
for (const line of lines) {
|
|
102
|
+
lineNumber++;
|
|
103
|
+
/* istanbul ignore next */
|
|
104
|
+
if (line.trim()) {
|
|
105
|
+
try {
|
|
106
|
+
yield JSON.parse(line);
|
|
107
|
+
} catch (error) {
|
|
108
|
+
/* istanbul ignore next */
|
|
109
|
+
if (onError) {
|
|
110
|
+
onError(error, line, lineNumber);
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
break;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
// Добавляем новые данные в буфере
|
|
120
|
+
buffer += decoder.decode(value, { stream: true });
|
|
121
|
+
|
|
122
|
+
// Проверяем длину буфера
|
|
123
|
+
if (buffer.length > maxLineLength) {
|
|
124
|
+
throw new Error(`Строка превышает максимальную длину ${maxLineLength} байт`);
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
// Обрабатываем полные строки
|
|
128
|
+
const lines = buffer.split('\n');
|
|
129
|
+
|
|
130
|
+
// Оставляем последнюю (возможно неполную) строку в буфере
|
|
131
|
+
buffer = lines.pop() || '';
|
|
132
|
+
|
|
133
|
+
// Обрабатываем полные строки
|
|
134
|
+
for (const line of lines) {
|
|
135
|
+
lineNumber++;
|
|
136
|
+
/* istanbul ignore next */
|
|
137
|
+
if (line.trim()) {
|
|
138
|
+
try {
|
|
139
|
+
yield JSON.parse(line);
|
|
140
|
+
} catch (error) {
|
|
141
|
+
/* istanbul ignore next */
|
|
142
|
+
if (onError) {
|
|
143
|
+
onError(error, line, lineNumber);
|
|
144
|
+
} else {
|
|
145
|
+
console.error(`Ошибка парсинга NDJSON строки ${lineNumber}:`, error.message);
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
} finally {
|
|
152
|
+
// Освобождаем ресурсы
|
|
153
|
+
/* istanbul ignore next */
|
|
154
|
+
if (reader.releaseLock) {
|
|
155
|
+
reader.releaseLock();
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
/**
|
|
161
|
+
* Конвертирует массив объектов в NDJSON строку
|
|
162
|
+
* @param {Array} data - Массив объектов
|
|
163
|
+
* @param {Object} options - Опции форматирования
|
|
164
|
+
* @returns {string} NDJSON строка
|
|
165
|
+
*
|
|
166
|
+
* @example
|
|
167
|
+
* const data = [{ name: 'John' }, { name: 'Jane' }];
|
|
168
|
+
* const ndjson = NdjsonParser.toNdjson(data);
|
|
169
|
+
* // Результат: '{"name":"John"}\n{"name":"Jane"}'
|
|
170
|
+
*/
|
|
171
|
+
static toNdjson(data: any, options: any = {}) {
|
|
172
|
+
if (!Array.isArray(data)) {
|
|
173
|
+
throw new Error('Input must be an array');
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
const {
|
|
177
|
+
replacer = null,
|
|
178
|
+
space = 0
|
|
179
|
+
} = options;
|
|
180
|
+
|
|
181
|
+
return data
|
|
182
|
+
.map(item => JSON.stringify(item, replacer, space))
|
|
183
|
+
.join('\n');
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
/**
|
|
187
|
+
* Конвертирует NDJSON строку в массив объектов
|
|
188
|
+
* @param {string} ndjsonString - NDJSON строка
|
|
189
|
+
* @param {Object} options - Опции парсинга
|
|
190
|
+
* @returns {Array} Массив объектов
|
|
191
|
+
*
|
|
192
|
+
* @example
|
|
193
|
+
* const ndjson = '{"name":"John"}\n{"name":"Jane"}';
|
|
194
|
+
* const data = NdjsonParser.fromNdjson(ndjson);
|
|
195
|
+
* // Результат: [{ name: 'John' }, { name: 'Jane' }]
|
|
196
|
+
*/
|
|
197
|
+
static fromNdjson(ndjsonString: any, options: any = {}) {
|
|
198
|
+
const {
|
|
199
|
+
filter = null,
|
|
200
|
+
transform = null,
|
|
201
|
+
onError = null
|
|
202
|
+
} = options;
|
|
203
|
+
|
|
204
|
+
return ndjsonString
|
|
205
|
+
.split('\n')
|
|
206
|
+
.map((line: any, index: any) => {
|
|
207
|
+
if (!line.trim()) {
|
|
208
|
+
return null;
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
try {
|
|
212
|
+
const obj = JSON.parse(line);
|
|
213
|
+
|
|
214
|
+
// Применяем фильтр если задан
|
|
215
|
+
if (filter && !filter(obj, index)) {
|
|
216
|
+
return null;
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
// Применяем трансформацию если задана
|
|
220
|
+
return transform ? transform(obj, index) : obj;
|
|
221
|
+
} catch (error) {
|
|
222
|
+
/* istanbul ignore next */ if (onError) {
|
|
223
|
+
onError(error, line, index + 1);
|
|
224
|
+
}
|
|
225
|
+
return null;
|
|
226
|
+
}
|
|
227
|
+
})
|
|
228
|
+
.filter(obj => obj !== null);
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
/**
|
|
232
|
+
* Создает преобразователь NDJSON в CSV
|
|
233
|
+
* @param {Object} options - Опции конвертации
|
|
234
|
+
* @returns {TransformStream} Transform stream
|
|
235
|
+
*/
|
|
236
|
+
static createNdjsonToCsvStream(options: any = {}) {
|
|
237
|
+
const {
|
|
238
|
+
delimiter = ',',
|
|
239
|
+
includeHeaders = true,
|
|
240
|
+
..._csvOptions
|
|
241
|
+
} = options;
|
|
242
|
+
|
|
243
|
+
let headers = null;
|
|
244
|
+
let firstChunk = true;
|
|
245
|
+
|
|
246
|
+
const TransformStreamCtor = getTransformStream();
|
|
247
|
+
if (!TransformStreamCtor) {
|
|
248
|
+
throw new Error('TransformStream is not available in this environment');
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
return new TransformStreamCtor({
|
|
252
|
+
async transform(chunk: any, controller: any) {
|
|
253
|
+
try {
|
|
254
|
+
const obj = JSON.parse(chunk);
|
|
255
|
+
|
|
256
|
+
// Определяем заголовки при первом объекте
|
|
257
|
+
if (firstChunk && includeHeaders) {
|
|
258
|
+
headers = Object.keys(obj);
|
|
259
|
+
controller.enqueue(headers.join(delimiter) + '\n');
|
|
260
|
+
firstChunk = false;
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
// Конвертируем объект в CSV строку
|
|
264
|
+
const row = headers
|
|
265
|
+
? headers.map(header => this._escapeCsvField(obj[header], delimiter))
|
|
266
|
+
: Object.values(obj).map(value => this._escapeCsvField(value, delimiter));
|
|
267
|
+
|
|
268
|
+
controller.enqueue(row.join(delimiter) + '\n');
|
|
269
|
+
} catch (error) {
|
|
270
|
+
console.error('Ошибка преобразования NDJSON в CSV:', error);
|
|
271
|
+
}
|
|
272
|
+
},
|
|
273
|
+
|
|
274
|
+
_escapeCsvField(value: any, delimiter: any) {
|
|
275
|
+
if (value === null || value === undefined) {
|
|
276
|
+
return '';
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
const str = String(value);
|
|
280
|
+
|
|
281
|
+
// Экранируем если содержит delimiter, кавычки или перенос строки
|
|
282
|
+
if (str.includes(delimiter) || str.includes('"') || str.includes('\n')) {
|
|
283
|
+
return '"' + str.replace(/"/g, '""') + '"';
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
return str;
|
|
287
|
+
}
|
|
288
|
+
});
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
/**
|
|
292
|
+
* Создает преобразователь CSV в NDJSON
|
|
293
|
+
* @param {Object} options - Опции конвертации
|
|
294
|
+
* @returns {TransformStream} Transform stream
|
|
295
|
+
*/
|
|
296
|
+
static createCsvToNdjsonStream(options: any = {}) {
|
|
297
|
+
const {
|
|
298
|
+
delimiter = ',',
|
|
299
|
+
hasHeaders = true,
|
|
300
|
+
..._csvOptions
|
|
301
|
+
} = options;
|
|
302
|
+
|
|
303
|
+
let headers = null;
|
|
304
|
+
let firstLine = true;
|
|
305
|
+
|
|
306
|
+
const TransformStreamCtor = getTransformStream();
|
|
307
|
+
if (!TransformStreamCtor) {
|
|
308
|
+
throw new Error('TransformStream is not available in this environment');
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
return new TransformStreamCtor({
|
|
312
|
+
transform(chunk: any, controller: any) {
|
|
313
|
+
const lines = chunk.toString().split('\n');
|
|
314
|
+
|
|
315
|
+
for (const line of lines) {
|
|
316
|
+
if (!line.trim()) {
|
|
317
|
+
continue;
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
const fields = this._parseCsvLine(line, delimiter);
|
|
321
|
+
|
|
322
|
+
if (firstLine && hasHeaders) {
|
|
323
|
+
headers = fields;
|
|
324
|
+
firstLine = false;
|
|
325
|
+
} else {
|
|
326
|
+
const obj = headers
|
|
327
|
+
? headers.reduce((acc: any, header: any, index: any) => {
|
|
328
|
+
acc[header] = fields[index] || '';
|
|
329
|
+
return acc;
|
|
330
|
+
}, {})
|
|
331
|
+
: fields.reduce((acc: any, field: any, index: any) => {
|
|
332
|
+
acc[`field_${index}`] = field;
|
|
333
|
+
return acc;
|
|
334
|
+
}, {});
|
|
335
|
+
|
|
336
|
+
controller.enqueue(JSON.stringify(obj) + '\n');
|
|
337
|
+
}
|
|
338
|
+
}
|
|
339
|
+
},
|
|
340
|
+
|
|
341
|
+
_parseCsvLine(line: any, delimiter: any) {
|
|
342
|
+
const fields = [];
|
|
343
|
+
let currentField = '';
|
|
344
|
+
let insideQuotes = false;
|
|
345
|
+
|
|
346
|
+
for (let i = 0; i < line.length; i++) {
|
|
347
|
+
const char = line[i];
|
|
348
|
+
const nextChar = line[i + 1];
|
|
349
|
+
|
|
350
|
+
if (char === '"') {
|
|
351
|
+
if (insideQuotes && nextChar === '"') {
|
|
352
|
+
currentField += '"';
|
|
353
|
+
i++;
|
|
354
|
+
} else {
|
|
355
|
+
insideQuotes = !insideQuotes;
|
|
356
|
+
}
|
|
357
|
+
} else if (char === delimiter && !insideQuotes) {
|
|
358
|
+
fields.push(currentField);
|
|
359
|
+
currentField = '';
|
|
360
|
+
} else {
|
|
361
|
+
currentField += char;
|
|
362
|
+
}
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
fields.push(currentField);
|
|
366
|
+
return fields;
|
|
367
|
+
}
|
|
368
|
+
});
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
/**
|
|
372
|
+
* Статистика по NDJSON файлу
|
|
373
|
+
* @param {string|ReadableStream} input - Входные данные
|
|
374
|
+
* @returns {Promise<Object>} Статистика
|
|
375
|
+
*/
|
|
376
|
+
static async getStats(input) {
|
|
377
|
+
const stats = {
|
|
378
|
+
totalLines: 0,
|
|
379
|
+
validLines: 0,
|
|
380
|
+
errorLines: 0,
|
|
381
|
+
totalBytes: 0,
|
|
382
|
+
errors: [],
|
|
383
|
+
successRate: 0
|
|
384
|
+
};
|
|
385
|
+
|
|
386
|
+
if (typeof input === 'string') {
|
|
387
|
+
stats.totalBytes = Buffer.byteLength(input, 'utf8');
|
|
388
|
+
const lines = input.split('\n');
|
|
389
|
+
stats.totalLines = lines.length;
|
|
390
|
+
|
|
391
|
+
for (const line of lines) {
|
|
392
|
+
if (line.trim()) {
|
|
393
|
+
try {
|
|
394
|
+
JSON.parse(line);
|
|
395
|
+
stats.validLines++;
|
|
396
|
+
} catch (error) {
|
|
397
|
+
stats.errorLines++;
|
|
398
|
+
stats.errors.push({
|
|
399
|
+
line: stats.totalLines,
|
|
400
|
+
error: error.message,
|
|
401
|
+
content: line.substring(0, 100)
|
|
402
|
+
});
|
|
403
|
+
}
|
|
404
|
+
}
|
|
405
|
+
}
|
|
406
|
+
} else {
|
|
407
|
+
// Для потоков
|
|
408
|
+
const reader = input.getReader();
|
|
409
|
+
const decoder = createTextDecoder();
|
|
410
|
+
if (!decoder) {
|
|
411
|
+
throw new Error('TextDecoder is not available in this environment');
|
|
412
|
+
}
|
|
413
|
+
let buffer = '';
|
|
414
|
+
|
|
415
|
+
try {
|
|
416
|
+
while (true) {
|
|
417
|
+
const { done, value } = await reader.read();
|
|
418
|
+
|
|
419
|
+
if (done) {
|
|
420
|
+
// Обрабатываем оставшийся буфер
|
|
421
|
+
/* istanbul ignore next */
|
|
422
|
+
if (buffer.trim()) {
|
|
423
|
+
stats.totalLines++;
|
|
424
|
+
try {
|
|
425
|
+
JSON.parse(buffer.trim());
|
|
426
|
+
stats.validLines++;
|
|
427
|
+
} catch (_error) {
|
|
428
|
+
stats.errorLines++;
|
|
429
|
+
}
|
|
430
|
+
}
|
|
431
|
+
break;
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
stats.totalBytes += value.length;
|
|
435
|
+
buffer += decoder.decode(value, { stream: true });
|
|
436
|
+
|
|
437
|
+
const lines = buffer.split('\n');
|
|
438
|
+
buffer = lines.pop() || '';
|
|
439
|
+
|
|
440
|
+
for (const line of lines) {
|
|
441
|
+
stats.totalLines++;
|
|
442
|
+
/* istanbul ignore next */
|
|
443
|
+
if (line.trim()) {
|
|
444
|
+
try {
|
|
445
|
+
JSON.parse(line);
|
|
446
|
+
stats.validLines++;
|
|
447
|
+
} catch (error) {
|
|
448
|
+
stats.errorLines++;
|
|
449
|
+
stats.errors.push({
|
|
450
|
+
line: stats.totalLines,
|
|
451
|
+
error: error.message,
|
|
452
|
+
content: line.substring(0, 100)
|
|
453
|
+
});
|
|
454
|
+
}
|
|
455
|
+
}
|
|
456
|
+
}
|
|
457
|
+
}
|
|
458
|
+
} finally {
|
|
459
|
+
reader.releaseLock();
|
|
460
|
+
}
|
|
461
|
+
}
|
|
462
|
+
|
|
463
|
+
/* istanbul ignore next */
|
|
464
|
+
stats.successRate = stats.totalLines > 0 ? (stats.validLines / stats.totalLines) * 100 : 0;
|
|
465
|
+
return stats;
|
|
466
|
+
}
|
|
467
|
+
}
|
|
468
|
+
|
|
469
|
+
export default NdjsonParser;
|