jtcsv 2.2.7 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +31 -1
- package/bin/jtcsv.js +891 -821
- package/bin/jtcsv.ts +2534 -0
- package/csv-to-json.js +168 -145
- package/dist/jtcsv-core.cjs.js +1407 -0
- package/dist/jtcsv-core.cjs.js.map +1 -0
- package/dist/jtcsv-core.esm.js +1379 -0
- package/dist/jtcsv-core.esm.js.map +1 -0
- package/dist/jtcsv-core.umd.js +1413 -0
- package/dist/jtcsv-core.umd.js.map +1 -0
- package/dist/jtcsv-full.cjs.js +1912 -0
- package/dist/jtcsv-full.cjs.js.map +1 -0
- package/dist/jtcsv-full.esm.js +1880 -0
- package/dist/jtcsv-full.esm.js.map +1 -0
- package/dist/jtcsv-full.umd.js +1918 -0
- package/dist/jtcsv-full.umd.js.map +1 -0
- package/dist/jtcsv-workers.esm.js +759 -0
- package/dist/jtcsv-workers.esm.js.map +1 -0
- package/dist/jtcsv-workers.umd.js +773 -0
- package/dist/jtcsv-workers.umd.js.map +1 -0
- package/dist/jtcsv.cjs.js +61 -19
- package/dist/jtcsv.cjs.js.map +1 -1
- package/dist/jtcsv.esm.js +61 -19
- package/dist/jtcsv.esm.js.map +1 -1
- package/dist/jtcsv.umd.js +61 -19
- package/dist/jtcsv.umd.js.map +1 -1
- package/errors.js +188 -2
- package/examples/advanced/conditional-transformations.js +446 -0
- package/examples/advanced/conditional-transformations.ts +446 -0
- package/examples/advanced/csv-parser.worker.js +89 -0
- package/examples/advanced/csv-parser.worker.ts +89 -0
- package/examples/advanced/nested-objects-example.js +306 -0
- package/examples/advanced/nested-objects-example.ts +306 -0
- package/examples/advanced/performance-optimization.js +504 -0
- package/examples/advanced/performance-optimization.ts +504 -0
- package/examples/advanced/run-demo-server.js +116 -0
- package/examples/advanced/run-demo-server.ts +116 -0
- package/examples/advanced/web-worker-usage.html +874 -0
- package/examples/async-multithreaded-example.ts +335 -0
- package/examples/cli-advanced-usage.md +288 -0
- package/examples/cli-batch-processing.ts +38 -0
- package/examples/cli-tool.js +0 -3
- package/examples/cli-tool.ts +183 -0
- package/examples/error-handling.js +21 -7
- package/examples/error-handling.ts +356 -0
- package/examples/express-api.js +0 -3
- package/examples/express-api.ts +164 -0
- package/examples/large-dataset-example.js +0 -3
- package/examples/large-dataset-example.ts +204 -0
- package/examples/ndjson-processing.js +1 -1
- package/examples/ndjson-processing.ts +456 -0
- package/examples/plugin-excel-exporter.js +3 -4
- package/examples/plugin-excel-exporter.ts +406 -0
- package/examples/react-integration.tsx +637 -0
- package/examples/schema-validation.ts +640 -0
- package/examples/simple-usage.js +254 -254
- package/examples/simple-usage.ts +194 -0
- package/examples/streaming-example.js +4 -5
- package/examples/streaming-example.ts +419 -0
- package/examples/web-workers-advanced.ts +28 -0
- package/index.d.ts +1 -3
- package/index.js +15 -1
- package/json-save.js +9 -3
- package/json-to-csv.js +168 -21
- package/package.json +69 -10
- package/plugins/express-middleware/README.md +21 -2
- package/plugins/express-middleware/example.js +3 -4
- package/plugins/express-middleware/example.ts +135 -0
- package/plugins/express-middleware/index.d.ts +1 -1
- package/plugins/express-middleware/index.js +270 -118
- package/plugins/express-middleware/index.ts +557 -0
- package/plugins/fastify-plugin/index.js +2 -4
- package/plugins/fastify-plugin/index.ts +443 -0
- package/plugins/hono/index.ts +226 -0
- package/plugins/nestjs/index.ts +201 -0
- package/plugins/nextjs-api/examples/ConverterComponent.tsx +386 -0
- package/plugins/nextjs-api/examples/api-convert.js +0 -2
- package/plugins/nextjs-api/examples/api-convert.ts +67 -0
- package/plugins/nextjs-api/index.tsx +339 -0
- package/plugins/nextjs-api/route.js +2 -3
- package/plugins/nextjs-api/route.ts +370 -0
- package/plugins/nuxt/index.ts +94 -0
- package/plugins/nuxt/runtime/composables/useJtcsv.ts +100 -0
- package/plugins/nuxt/runtime/plugin.ts +71 -0
- package/plugins/remix/index.js +1 -1
- package/plugins/remix/index.ts +260 -0
- package/plugins/sveltekit/index.js +1 -1
- package/plugins/sveltekit/index.ts +301 -0
- package/plugins/trpc/index.ts +267 -0
- package/src/browser/browser-functions.ts +402 -0
- package/src/browser/core.js +92 -0
- package/src/browser/core.ts +152 -0
- package/src/browser/csv-to-json-browser.d.ts +3 -0
- package/src/browser/csv-to-json-browser.js +36 -14
- package/src/browser/csv-to-json-browser.ts +264 -0
- package/src/browser/errors-browser.ts +303 -0
- package/src/browser/extensions/plugins.js +92 -0
- package/src/browser/extensions/plugins.ts +93 -0
- package/src/browser/extensions/workers.js +39 -0
- package/src/browser/extensions/workers.ts +39 -0
- package/src/browser/globals.d.ts +5 -0
- package/src/browser/index.ts +192 -0
- package/src/browser/json-to-csv-browser.d.ts +3 -0
- package/src/browser/json-to-csv-browser.js +13 -3
- package/src/browser/json-to-csv-browser.ts +262 -0
- package/src/browser/streams.js +12 -2
- package/src/browser/streams.ts +336 -0
- package/src/browser/workers/csv-parser.worker.ts +377 -0
- package/src/browser/workers/worker-pool.ts +548 -0
- package/src/core/delimiter-cache.js +22 -8
- package/src/core/delimiter-cache.ts +310 -0
- package/src/core/node-optimizations.ts +449 -0
- package/src/core/plugin-system.js +29 -11
- package/src/core/plugin-system.ts +400 -0
- package/src/core/transform-hooks.ts +558 -0
- package/src/engines/fast-path-engine-new.ts +347 -0
- package/src/engines/fast-path-engine.ts +854 -0
- package/src/errors.ts +72 -0
- package/src/formats/ndjson-parser.ts +469 -0
- package/src/formats/tsv-parser.ts +334 -0
- package/src/index-with-plugins.js +16 -9
- package/src/index-with-plugins.ts +395 -0
- package/src/types/index.ts +255 -0
- package/src/utils/bom-utils.js +259 -0
- package/src/utils/bom-utils.ts +373 -0
- package/src/utils/encoding-support.js +124 -0
- package/src/utils/encoding-support.ts +155 -0
- package/src/utils/schema-validator.js +19 -19
- package/src/utils/schema-validator.ts +819 -0
- package/src/utils/transform-loader.js +1 -1
- package/src/utils/transform-loader.ts +389 -0
- package/src/utils/zod-adapter.js +170 -0
- package/src/utils/zod-adapter.ts +280 -0
- package/src/web-server/index.js +10 -10
- package/src/web-server/index.ts +683 -0
- package/src/workers/csv-multithreaded.ts +310 -0
- package/src/workers/csv-parser.worker.ts +227 -0
- package/src/workers/worker-pool.ts +409 -0
- package/stream-csv-to-json.js +26 -8
- package/stream-json-to-csv.js +1 -0
|
@@ -0,0 +1,310 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Многопоточная обработка CSV данных
|
|
3
|
+
*
|
|
4
|
+
* Интеграция Worker Pool с существующими асинхронными функциями
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import { getWorkerPool, createWorkerTask } from './worker-pool';
|
|
8
|
+
import { CsvToJsonOptions, AsyncCsvToJsonOptions, AnyArray, AnyObject } from '../types';
|
|
9
|
+
import { chunkData, createChunkTasks, mergeChunkResults } from './csv-parser.worker';
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* Многопоточная версия csvToJson
|
|
13
|
+
*
|
|
14
|
+
* @param csv - CSV строка для парсинга
|
|
15
|
+
* @param options - Опции с поддержкой многопоточности
|
|
16
|
+
* @returns Promise с результатом парсинга
|
|
17
|
+
*/
|
|
18
|
+
export async function csvToJsonMultithreaded(
|
|
19
|
+
csv: string,
|
|
20
|
+
options: AsyncCsvToJsonOptions = {}
|
|
21
|
+
): Promise<AnyArray> {
|
|
22
|
+
const {
|
|
23
|
+
useWorkers = true,
|
|
24
|
+
workerCount,
|
|
25
|
+
chunkSize = 1000,
|
|
26
|
+
onProgress,
|
|
27
|
+
...csvOptions
|
|
28
|
+
} = options;
|
|
29
|
+
|
|
30
|
+
// Если многопоточность отключена или данные маленькие, используем обычную версию
|
|
31
|
+
if (!useWorkers || csv.length < 10000) {
|
|
32
|
+
const { csvToJson } = await import('../../csv-to-json');
|
|
33
|
+
return csvToJson(csv, csvOptions);
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
// Разделяем CSV на строки
|
|
37
|
+
const lines = csv.split('\n').filter(line => line.trim() !== '');
|
|
38
|
+
|
|
39
|
+
if (lines.length === 0) {
|
|
40
|
+
return [];
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
// Определяем есть ли заголовки
|
|
44
|
+
const hasHeaders = csvOptions.hasHeaders !== false;
|
|
45
|
+
|
|
46
|
+
// Разделяем данные на чанки
|
|
47
|
+
let dataChunks: string[][];
|
|
48
|
+
let headers: string[] = [];
|
|
49
|
+
|
|
50
|
+
if (hasHeaders) {
|
|
51
|
+
headers = lines[0].split(csvOptions.delimiter || ';');
|
|
52
|
+
const dataLines = lines.slice(1);
|
|
53
|
+
dataChunks = chunkData(dataLines, chunkSize);
|
|
54
|
+
} else {
|
|
55
|
+
dataChunks = chunkData(lines, chunkSize);
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
// Создаем задачи для воркеров
|
|
59
|
+
const tasks = dataChunks.map((chunk, index) => {
|
|
60
|
+
// Восстанавливаем CSV чанк с заголовками если нужно
|
|
61
|
+
let chunkCsv = chunk.join('\n');
|
|
62
|
+
if (hasHeaders && headers.length > 0) {
|
|
63
|
+
chunkCsv = headers.join(csvOptions.delimiter || ';') + '\n' + chunkCsv;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
return createWorkerTask('csv_parse', chunkCsv, {
|
|
67
|
+
...csvOptions,
|
|
68
|
+
chunkIndex: index,
|
|
69
|
+
totalChunks: dataChunks.length,
|
|
70
|
+
hasHeaders: index === 0 ? hasHeaders : false // Только первый чанк получает заголовки
|
|
71
|
+
});
|
|
72
|
+
});
|
|
73
|
+
|
|
74
|
+
// Получаем пул воркеров
|
|
75
|
+
const workerPool = getWorkerPool(workerCount);
|
|
76
|
+
|
|
77
|
+
// Отправляем прогресс если есть callback
|
|
78
|
+
if (onProgress) {
|
|
79
|
+
workerPool.on('taskCompleted', ({ task }) => {
|
|
80
|
+
const chunkIndex = task.options?.chunkIndex || 0;
|
|
81
|
+
const totalChunks = task.options?.totalChunks || 1;
|
|
82
|
+
|
|
83
|
+
onProgress({
|
|
84
|
+
processed: chunkIndex + 1,
|
|
85
|
+
total: totalChunks,
|
|
86
|
+
percentage: Math.round(((chunkIndex + 1) / totalChunks) * 100)
|
|
87
|
+
});
|
|
88
|
+
});
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
// Выполняем задачи параллельно
|
|
92
|
+
const results = await workerPool.executeTasks(tasks);
|
|
93
|
+
|
|
94
|
+
// Объединяем результаты
|
|
95
|
+
const mergedResults = mergeChunkResults(results);
|
|
96
|
+
|
|
97
|
+
return mergedResults;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
/**
|
|
101
|
+
* Многопоточная версия jsonToCsv
|
|
102
|
+
*
|
|
103
|
+
* @param data - JSON данные для конвертации
|
|
104
|
+
* @param options - Опции с поддержкой многопоточности
|
|
105
|
+
* @returns Promise с CSV строкой
|
|
106
|
+
*/
|
|
107
|
+
export async function jsonToCsvMultithreaded(
|
|
108
|
+
data: AnyArray | AnyObject,
|
|
109
|
+
options: any = {} // TODO: Добавить тип AsyncJsonToCsvOptions
|
|
110
|
+
): Promise<string> {
|
|
111
|
+
const {
|
|
112
|
+
useWorkers = true,
|
|
113
|
+
workerCount,
|
|
114
|
+
chunkSize = 1000,
|
|
115
|
+
onProgress,
|
|
116
|
+
...jsonOptions
|
|
117
|
+
} = options;
|
|
118
|
+
|
|
119
|
+
// Подготавливаем данные
|
|
120
|
+
const dataArray = Array.isArray(data) ? data : [data];
|
|
121
|
+
|
|
122
|
+
// Если многопоточность отключена или данные маленькие, используем обычную версию
|
|
123
|
+
if (!useWorkers || dataArray.length < 1000) {
|
|
124
|
+
const { jsonToCsv } = await import('../../json-to-csv');
|
|
125
|
+
return jsonToCsv(dataArray, jsonOptions);
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
// Разделяем данные на чанки
|
|
129
|
+
const dataChunks = chunkData(dataArray, chunkSize);
|
|
130
|
+
|
|
131
|
+
// Создаем задачи для воркеров
|
|
132
|
+
const tasks = dataChunks.map((chunk, index) => {
|
|
133
|
+
return createWorkerTask('json_to_csv', chunk, {
|
|
134
|
+
...jsonOptions,
|
|
135
|
+
chunkIndex: index,
|
|
136
|
+
totalChunks: dataChunks.length,
|
|
137
|
+
includeHeaders: index === 0 // Только первый чанк включает заголовки
|
|
138
|
+
});
|
|
139
|
+
});
|
|
140
|
+
|
|
141
|
+
// Получаем пул воркеров
|
|
142
|
+
const workerPool = getWorkerPool(workerCount);
|
|
143
|
+
|
|
144
|
+
// Отправляем прогресс если есть callback
|
|
145
|
+
if (onProgress) {
|
|
146
|
+
workerPool.on('taskCompleted', ({ task }) => {
|
|
147
|
+
const chunkIndex = task.options?.chunkIndex || 0;
|
|
148
|
+
const totalChunks = task.options?.totalChunks || 1;
|
|
149
|
+
|
|
150
|
+
onProgress({
|
|
151
|
+
processed: chunkIndex + 1,
|
|
152
|
+
total: totalChunks,
|
|
153
|
+
percentage: Math.round(((chunkIndex + 1) / totalChunks) * 100)
|
|
154
|
+
});
|
|
155
|
+
});
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
// Выполняем задачи параллельно
|
|
159
|
+
const results = await workerPool.executeTasks(tasks);
|
|
160
|
+
|
|
161
|
+
// Объединяем CSV чанки
|
|
162
|
+
let finalCsv = '';
|
|
163
|
+
|
|
164
|
+
for (let i = 0; i < results.length; i++) {
|
|
165
|
+
const chunkCsv = results[i];
|
|
166
|
+
|
|
167
|
+
if (i === 0) {
|
|
168
|
+
// Первый чанк включает заголовки
|
|
169
|
+
finalCsv = chunkCsv;
|
|
170
|
+
} else {
|
|
171
|
+
// Последующие чанки - только данные (убираем первую строку если это заголовки)
|
|
172
|
+
const lines = chunkCsv.split('\n');
|
|
173
|
+
if (lines.length > 1 && jsonOptions.includeHeaders !== false) {
|
|
174
|
+
finalCsv += '\n' + lines.slice(1).join('\n');
|
|
175
|
+
} else {
|
|
176
|
+
finalCsv += '\n' + chunkCsv;
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
return finalCsv;
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
/**
|
|
185
|
+
* Бенчмарк многопоточной обработки
|
|
186
|
+
*
|
|
187
|
+
* @param data - Данные для тестирования
|
|
188
|
+
* @param iterations - Количество итераций
|
|
189
|
+
* @returns Результаты бенчмарка
|
|
190
|
+
*/
|
|
191
|
+
export async function benchmarkMultithreaded(
|
|
192
|
+
data: AnyArray | string,
|
|
193
|
+
iterations: number = 10
|
|
194
|
+
): Promise<{
|
|
195
|
+
singleThread: number;
|
|
196
|
+
multiThread: number;
|
|
197
|
+
speedup: number;
|
|
198
|
+
efficiency: number;
|
|
199
|
+
}> {
|
|
200
|
+
const workerPool = getWorkerPool();
|
|
201
|
+
const cpuCount = require('os').cpus().length;
|
|
202
|
+
|
|
203
|
+
// Тестируем однопоточную обработку
|
|
204
|
+
const singleThreadStart = Date.now();
|
|
205
|
+
|
|
206
|
+
if (typeof data === 'string') {
|
|
207
|
+
// CSV парсинг
|
|
208
|
+
const { csvToJson } = await import('../../csv-to-json');
|
|
209
|
+
for (let i = 0; i < iterations; i++) {
|
|
210
|
+
await csvToJson(data);
|
|
211
|
+
}
|
|
212
|
+
} else {
|
|
213
|
+
// JSON to CSV
|
|
214
|
+
const { jsonToCsv } = await import('../../json-to-csv');
|
|
215
|
+
for (let i = 0; i < iterations; i++) {
|
|
216
|
+
await jsonToCsv(data);
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
const singleThreadTime = Date.now() - singleThreadStart;
|
|
221
|
+
|
|
222
|
+
// Тестируем многопоточную обработку
|
|
223
|
+
const multiThreadStart = Date.now();
|
|
224
|
+
|
|
225
|
+
if (typeof data === 'string') {
|
|
226
|
+
for (let i = 0; i < iterations; i++) {
|
|
227
|
+
await csvToJsonMultithreaded(data, {
|
|
228
|
+
useWorkers: true,
|
|
229
|
+
workerCount: cpuCount - 1
|
|
230
|
+
});
|
|
231
|
+
}
|
|
232
|
+
} else {
|
|
233
|
+
for (let i = 0; i < iterations; i++) {
|
|
234
|
+
await jsonToCsvMultithreaded(data, {
|
|
235
|
+
useWorkers: true,
|
|
236
|
+
workerCount: cpuCount - 1
|
|
237
|
+
});
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
const multiThreadTime = Date.now() - multiThreadStart;
|
|
242
|
+
|
|
243
|
+
// Вычисляем ускорение и эффективность
|
|
244
|
+
const speedup = singleThreadTime / multiThreadTime;
|
|
245
|
+
const efficiency = (speedup / (cpuCount - 1)) * 100;
|
|
246
|
+
|
|
247
|
+
return {
|
|
248
|
+
singleThread: singleThreadTime,
|
|
249
|
+
multiThread: multiThreadTime,
|
|
250
|
+
speedup,
|
|
251
|
+
efficiency
|
|
252
|
+
};
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
/**
|
|
256
|
+
* Оптимизирует размер чанка на основе размера данных
|
|
257
|
+
*
|
|
258
|
+
* @param dataSize - Размер данных (количество строк или байт)
|
|
259
|
+
* @param workerCount - Количество воркеров
|
|
260
|
+
* @returns Оптимальный размер чанка
|
|
261
|
+
*/
|
|
262
|
+
export function optimizeChunkSize(
|
|
263
|
+
dataSize: number,
|
|
264
|
+
workerCount: number = Math.max(1, require('os').cpus().length - 1)
|
|
265
|
+
): number {
|
|
266
|
+
// Базовый размер чанка
|
|
267
|
+
let chunkSize = 1000;
|
|
268
|
+
|
|
269
|
+
if (dataSize > 1000000) {
|
|
270
|
+
// Очень большие данные - увеличиваем размер чанка
|
|
271
|
+
chunkSize = 10000;
|
|
272
|
+
} else if (dataSize > 100000) {
|
|
273
|
+
// Большие данные
|
|
274
|
+
chunkSize = 5000;
|
|
275
|
+
} else if (dataSize > 10000) {
|
|
276
|
+
// Средние данные
|
|
277
|
+
chunkSize = 2000;
|
|
278
|
+
} else if (dataSize < 1000) {
|
|
279
|
+
// Маленькие данные - уменьшаем размер чанка
|
|
280
|
+
chunkSize = Math.max(100, Math.ceil(dataSize / workerCount));
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
// Учитываем количество воркеров
|
|
284
|
+
chunkSize = Math.max(chunkSize, Math.ceil(dataSize / (workerCount * 10)));
|
|
285
|
+
|
|
286
|
+
return chunkSize;
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
/**
|
|
290
|
+
* Мониторинг использования ресурсов
|
|
291
|
+
*
|
|
292
|
+
* @returns Статистика использования ресурсов
|
|
293
|
+
*/
|
|
294
|
+
export function getResourceUsage(): {
|
|
295
|
+
cpuUsage: NodeJS.CpuUsage;
|
|
296
|
+
memoryUsage: NodeJS.MemoryUsage;
|
|
297
|
+
workerStats: any;
|
|
298
|
+
} {
|
|
299
|
+
const cpuUsage = process.cpuUsage();
|
|
300
|
+
const memoryUsage = process.memoryUsage();
|
|
301
|
+
|
|
302
|
+
const workerPool = getWorkerPool();
|
|
303
|
+
const workerStats = workerPool.getStats();
|
|
304
|
+
|
|
305
|
+
return {
|
|
306
|
+
cpuUsage,
|
|
307
|
+
memoryUsage,
|
|
308
|
+
workerStats
|
|
309
|
+
};
|
|
310
|
+
}
|
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Worker для парсинга CSV данных
|
|
3
|
+
*
|
|
4
|
+
* Выполняется в отдельном потоке для многопоточной обработки
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import { parentPort, workerData, isMainThread } from 'worker_threads';
|
|
8
|
+
import { WorkerTask, WorkerResult } from '../types';
|
|
9
|
+
|
|
10
|
+
// Импортируем функции парсинга из основных модулей
|
|
11
|
+
// Используем динамический импорт чтобы избежать циклических зависимостей
|
|
12
|
+
let csvToJson: any = null;
|
|
13
|
+
let jsonToCsv: any = null;
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* Инициализирует функции парсинга
|
|
17
|
+
*/
|
|
18
|
+
async function initializeParserFunctions(): Promise<void> {
|
|
19
|
+
if (!csvToJson) {
|
|
20
|
+
// Динамический импорт чтобы избежать проблем с циклическими зависимостями
|
|
21
|
+
const csvModule = await import('../../csv-to-json');
|
|
22
|
+
csvToJson = csvModule.csvToJson;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
if (!jsonToCsv) {
|
|
26
|
+
const jsonModule = await import('../../json-to-csv');
|
|
27
|
+
jsonToCsv = jsonModule.jsonToCsv;
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* Обрабатывает задачу парсинга CSV
|
|
33
|
+
*/
|
|
34
|
+
async function processCsvParsing(task: WorkerTask): Promise<any> {
|
|
35
|
+
await initializeParserFunctions();
|
|
36
|
+
|
|
37
|
+
const { data, options } = task;
|
|
38
|
+
|
|
39
|
+
if (typeof data !== 'string') {
|
|
40
|
+
throw new Error('CSV data must be a string');
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
return csvToJson(data, options);
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* Обрабатывает задачу конвертации JSON в CSV
|
|
48
|
+
*/
|
|
49
|
+
async function processJsonToCsv(task: WorkerTask): Promise<any> {
|
|
50
|
+
await initializeParserFunctions();
|
|
51
|
+
|
|
52
|
+
const { data, options } = task;
|
|
53
|
+
|
|
54
|
+
if (!Array.isArray(data) && (typeof data !== 'object' || data === null)) {
|
|
55
|
+
throw new Error('JSON data must be an array or object');
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
return jsonToCsv(data, options);
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* Обрабатывает задачу обработки чанка данных
|
|
63
|
+
*/
|
|
64
|
+
async function processDataChunk(task: WorkerTask): Promise<any> {
|
|
65
|
+
const { type, data, options } = task;
|
|
66
|
+
|
|
67
|
+
switch (type) {
|
|
68
|
+
case 'csv_parse':
|
|
69
|
+
return processCsvParsing(task);
|
|
70
|
+
|
|
71
|
+
case 'json_to_csv':
|
|
72
|
+
return processJsonToCsv(task);
|
|
73
|
+
|
|
74
|
+
case 'transform_data':
|
|
75
|
+
// Простая трансформация данных
|
|
76
|
+
if (Array.isArray(data)) {
|
|
77
|
+
return data.map((item, index) => ({
|
|
78
|
+
...item,
|
|
79
|
+
_workerId: workerData?.workerId || 0,
|
|
80
|
+
_chunkIndex: index
|
|
81
|
+
}));
|
|
82
|
+
}
|
|
83
|
+
return data;
|
|
84
|
+
|
|
85
|
+
case 'validate_data':
|
|
86
|
+
// Валидация данных
|
|
87
|
+
if (Array.isArray(data)) {
|
|
88
|
+
const invalidItems = data.filter(item =>
|
|
89
|
+
item === null || item === undefined ||
|
|
90
|
+
(typeof item === 'object' && Object.keys(item).length === 0)
|
|
91
|
+
);
|
|
92
|
+
|
|
93
|
+
return {
|
|
94
|
+
valid: invalidItems.length === 0,
|
|
95
|
+
totalItems: data.length,
|
|
96
|
+
invalidItems: invalidItems.length,
|
|
97
|
+
invalidIndexes: data
|
|
98
|
+
.map((item, index) => ({ item, index }))
|
|
99
|
+
.filter(({ item }) =>
|
|
100
|
+
item === null || item === undefined ||
|
|
101
|
+
(typeof item === 'object' && Object.keys(item).length === 0)
|
|
102
|
+
)
|
|
103
|
+
.map(({ index }) => index)
|
|
104
|
+
};
|
|
105
|
+
}
|
|
106
|
+
return { valid: true, totalItems: 1 };
|
|
107
|
+
|
|
108
|
+
default:
|
|
109
|
+
throw new Error(`Unknown task type: ${type}`);
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
/**
|
|
114
|
+
* Основная функция воркера
|
|
115
|
+
*/
|
|
116
|
+
async function main() {
|
|
117
|
+
if (isMainThread) {
|
|
118
|
+
console.error('Worker script should not be run in main thread');
|
|
119
|
+
process.exit(1);
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
// Регистрируем обработчик сообщений
|
|
123
|
+
parentPort?.on('message', async (task: WorkerTask) => {
|
|
124
|
+
const startTime = Date.now();
|
|
125
|
+
|
|
126
|
+
try {
|
|
127
|
+
const result = await processDataChunk(task);
|
|
128
|
+
const duration = Date.now() - startTime;
|
|
129
|
+
|
|
130
|
+
const workerResult: WorkerResult = {
|
|
131
|
+
id: task.id,
|
|
132
|
+
result,
|
|
133
|
+
duration
|
|
134
|
+
};
|
|
135
|
+
|
|
136
|
+
parentPort?.postMessage(workerResult);
|
|
137
|
+
} catch (error: any) {
|
|
138
|
+
const duration = Date.now() - startTime;
|
|
139
|
+
|
|
140
|
+
const workerResult: WorkerResult = {
|
|
141
|
+
id: task.id,
|
|
142
|
+
result: null,
|
|
143
|
+
error: error instanceof Error ? error : new Error(String(error)),
|
|
144
|
+
duration
|
|
145
|
+
};
|
|
146
|
+
|
|
147
|
+
parentPort?.postMessage(workerResult);
|
|
148
|
+
}
|
|
149
|
+
});
|
|
150
|
+
|
|
151
|
+
// Отправляем сообщение о готовности
|
|
152
|
+
parentPort?.postMessage({
|
|
153
|
+
type: 'worker_ready',
|
|
154
|
+
workerId: workerData?.workerId || 0,
|
|
155
|
+
pid: process.pid
|
|
156
|
+
});
|
|
157
|
+
|
|
158
|
+
// Обрабатываем сигналы завершения
|
|
159
|
+
process.on('SIGTERM', () => {
|
|
160
|
+
parentPort?.postMessage({
|
|
161
|
+
type: 'worker_shutdown',
|
|
162
|
+
workerId: workerData?.workerId || 0
|
|
163
|
+
});
|
|
164
|
+
process.exit(0);
|
|
165
|
+
});
|
|
166
|
+
|
|
167
|
+
process.on('SIGINT', () => {
|
|
168
|
+
parentPort?.postMessage({
|
|
169
|
+
type: 'worker_shutdown',
|
|
170
|
+
workerId: workerData?.workerId || 0
|
|
171
|
+
});
|
|
172
|
+
process.exit(0);
|
|
173
|
+
});
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
// Запускаем воркер
|
|
177
|
+
main().catch(error => {
|
|
178
|
+
console.error('Worker initialization failed:', error);
|
|
179
|
+
process.exit(1);
|
|
180
|
+
});
|
|
181
|
+
|
|
182
|
+
/**
|
|
183
|
+
* Утилитарные функции для воркера
|
|
184
|
+
*/
|
|
185
|
+
|
|
186
|
+
/**
|
|
187
|
+
* Разделяет данные на чанки для параллельной обработки
|
|
188
|
+
*/
|
|
189
|
+
export function chunkData<T>(data: T[], chunkSize: number): T[][] {
|
|
190
|
+
const chunks: T[][] = [];
|
|
191
|
+
|
|
192
|
+
for (let i = 0; i < data.length; i += chunkSize) {
|
|
193
|
+
chunks.push(data.slice(i, i + chunkSize));
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
return chunks;
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
/**
|
|
200
|
+
* Объединяет результаты обработки чанков
|
|
201
|
+
*/
|
|
202
|
+
export function mergeChunkResults<T>(chunkResults: T[][]): T[] {
|
|
203
|
+
return chunkResults.flat();
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
/**
|
|
207
|
+
* Создает задачи для обработки чанков
|
|
208
|
+
*/
|
|
209
|
+
export function createChunkTasks<T, R>(
|
|
210
|
+
data: T[],
|
|
211
|
+
chunkSize: number,
|
|
212
|
+
taskType: string,
|
|
213
|
+
options?: Record<string, any>
|
|
214
|
+
): WorkerTask<T[], R>[] {
|
|
215
|
+
const chunks = chunkData(data, chunkSize);
|
|
216
|
+
|
|
217
|
+
return chunks.map((chunk, index) => ({
|
|
218
|
+
id: `chunk_${index}_${Date.now()}`,
|
|
219
|
+
type: taskType,
|
|
220
|
+
data: chunk,
|
|
221
|
+
options: {
|
|
222
|
+
...options,
|
|
223
|
+
chunkIndex: index,
|
|
224
|
+
totalChunks: chunks.length
|
|
225
|
+
}
|
|
226
|
+
}));
|
|
227
|
+
}
|