jtcsv 2.2.7 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +31 -1
- package/bin/jtcsv.js +891 -821
- package/bin/jtcsv.ts +2534 -0
- package/csv-to-json.js +168 -145
- package/dist/jtcsv-core.cjs.js +1407 -0
- package/dist/jtcsv-core.cjs.js.map +1 -0
- package/dist/jtcsv-core.esm.js +1379 -0
- package/dist/jtcsv-core.esm.js.map +1 -0
- package/dist/jtcsv-core.umd.js +1413 -0
- package/dist/jtcsv-core.umd.js.map +1 -0
- package/dist/jtcsv-full.cjs.js +1912 -0
- package/dist/jtcsv-full.cjs.js.map +1 -0
- package/dist/jtcsv-full.esm.js +1880 -0
- package/dist/jtcsv-full.esm.js.map +1 -0
- package/dist/jtcsv-full.umd.js +1918 -0
- package/dist/jtcsv-full.umd.js.map +1 -0
- package/dist/jtcsv-workers.esm.js +759 -0
- package/dist/jtcsv-workers.esm.js.map +1 -0
- package/dist/jtcsv-workers.umd.js +773 -0
- package/dist/jtcsv-workers.umd.js.map +1 -0
- package/dist/jtcsv.cjs.js +61 -19
- package/dist/jtcsv.cjs.js.map +1 -1
- package/dist/jtcsv.esm.js +61 -19
- package/dist/jtcsv.esm.js.map +1 -1
- package/dist/jtcsv.umd.js +61 -19
- package/dist/jtcsv.umd.js.map +1 -1
- package/errors.js +188 -2
- package/examples/advanced/conditional-transformations.js +446 -0
- package/examples/advanced/conditional-transformations.ts +446 -0
- package/examples/advanced/csv-parser.worker.js +89 -0
- package/examples/advanced/csv-parser.worker.ts +89 -0
- package/examples/advanced/nested-objects-example.js +306 -0
- package/examples/advanced/nested-objects-example.ts +306 -0
- package/examples/advanced/performance-optimization.js +504 -0
- package/examples/advanced/performance-optimization.ts +504 -0
- package/examples/advanced/run-demo-server.js +116 -0
- package/examples/advanced/run-demo-server.ts +116 -0
- package/examples/advanced/web-worker-usage.html +874 -0
- package/examples/async-multithreaded-example.ts +335 -0
- package/examples/cli-advanced-usage.md +288 -0
- package/examples/cli-batch-processing.ts +38 -0
- package/examples/cli-tool.js +0 -3
- package/examples/cli-tool.ts +183 -0
- package/examples/error-handling.js +21 -7
- package/examples/error-handling.ts +356 -0
- package/examples/express-api.js +0 -3
- package/examples/express-api.ts +164 -0
- package/examples/large-dataset-example.js +0 -3
- package/examples/large-dataset-example.ts +204 -0
- package/examples/ndjson-processing.js +1 -1
- package/examples/ndjson-processing.ts +456 -0
- package/examples/plugin-excel-exporter.js +3 -4
- package/examples/plugin-excel-exporter.ts +406 -0
- package/examples/react-integration.tsx +637 -0
- package/examples/schema-validation.ts +640 -0
- package/examples/simple-usage.js +254 -254
- package/examples/simple-usage.ts +194 -0
- package/examples/streaming-example.js +4 -5
- package/examples/streaming-example.ts +419 -0
- package/examples/web-workers-advanced.ts +28 -0
- package/index.d.ts +1 -3
- package/index.js +15 -1
- package/json-save.js +9 -3
- package/json-to-csv.js +168 -21
- package/package.json +69 -10
- package/plugins/express-middleware/README.md +21 -2
- package/plugins/express-middleware/example.js +3 -4
- package/plugins/express-middleware/example.ts +135 -0
- package/plugins/express-middleware/index.d.ts +1 -1
- package/plugins/express-middleware/index.js +270 -118
- package/plugins/express-middleware/index.ts +557 -0
- package/plugins/fastify-plugin/index.js +2 -4
- package/plugins/fastify-plugin/index.ts +443 -0
- package/plugins/hono/index.ts +226 -0
- package/plugins/nestjs/index.ts +201 -0
- package/plugins/nextjs-api/examples/ConverterComponent.tsx +386 -0
- package/plugins/nextjs-api/examples/api-convert.js +0 -2
- package/plugins/nextjs-api/examples/api-convert.ts +67 -0
- package/plugins/nextjs-api/index.tsx +339 -0
- package/plugins/nextjs-api/route.js +2 -3
- package/plugins/nextjs-api/route.ts +370 -0
- package/plugins/nuxt/index.ts +94 -0
- package/plugins/nuxt/runtime/composables/useJtcsv.ts +100 -0
- package/plugins/nuxt/runtime/plugin.ts +71 -0
- package/plugins/remix/index.js +1 -1
- package/plugins/remix/index.ts +260 -0
- package/plugins/sveltekit/index.js +1 -1
- package/plugins/sveltekit/index.ts +301 -0
- package/plugins/trpc/index.ts +267 -0
- package/src/browser/browser-functions.ts +402 -0
- package/src/browser/core.js +92 -0
- package/src/browser/core.ts +152 -0
- package/src/browser/csv-to-json-browser.d.ts +3 -0
- package/src/browser/csv-to-json-browser.js +36 -14
- package/src/browser/csv-to-json-browser.ts +264 -0
- package/src/browser/errors-browser.ts +303 -0
- package/src/browser/extensions/plugins.js +92 -0
- package/src/browser/extensions/plugins.ts +93 -0
- package/src/browser/extensions/workers.js +39 -0
- package/src/browser/extensions/workers.ts +39 -0
- package/src/browser/globals.d.ts +5 -0
- package/src/browser/index.ts +192 -0
- package/src/browser/json-to-csv-browser.d.ts +3 -0
- package/src/browser/json-to-csv-browser.js +13 -3
- package/src/browser/json-to-csv-browser.ts +262 -0
- package/src/browser/streams.js +12 -2
- package/src/browser/streams.ts +336 -0
- package/src/browser/workers/csv-parser.worker.ts +377 -0
- package/src/browser/workers/worker-pool.ts +548 -0
- package/src/core/delimiter-cache.js +22 -8
- package/src/core/delimiter-cache.ts +310 -0
- package/src/core/node-optimizations.ts +449 -0
- package/src/core/plugin-system.js +29 -11
- package/src/core/plugin-system.ts +400 -0
- package/src/core/transform-hooks.ts +558 -0
- package/src/engines/fast-path-engine-new.ts +347 -0
- package/src/engines/fast-path-engine.ts +854 -0
- package/src/errors.ts +72 -0
- package/src/formats/ndjson-parser.ts +469 -0
- package/src/formats/tsv-parser.ts +334 -0
- package/src/index-with-plugins.js +16 -9
- package/src/index-with-plugins.ts +395 -0
- package/src/types/index.ts +255 -0
- package/src/utils/bom-utils.js +259 -0
- package/src/utils/bom-utils.ts +373 -0
- package/src/utils/encoding-support.js +124 -0
- package/src/utils/encoding-support.ts +155 -0
- package/src/utils/schema-validator.js +19 -19
- package/src/utils/schema-validator.ts +819 -0
- package/src/utils/transform-loader.js +1 -1
- package/src/utils/transform-loader.ts +389 -0
- package/src/utils/zod-adapter.js +170 -0
- package/src/utils/zod-adapter.ts +280 -0
- package/src/web-server/index.js +10 -10
- package/src/web-server/index.ts +683 -0
- package/src/workers/csv-multithreaded.ts +310 -0
- package/src/workers/csv-parser.worker.ts +227 -0
- package/src/workers/worker-pool.ts +409 -0
- package/stream-csv-to-json.js +26 -8
- package/stream-json-to-csv.js +1 -0
|
@@ -0,0 +1,336 @@
|
|
|
1
|
+
import {
|
|
2
|
+
ValidationError,
|
|
3
|
+
ConfigurationError,
|
|
4
|
+
LimitError
|
|
5
|
+
} from './errors-browser';
|
|
6
|
+
import { csvToJsonIterator } from './csv-to-json-browser';
|
|
7
|
+
|
|
8
|
+
import type { CsvToJsonOptions, JsonToCsvOptions } from '../types';
|
|
9
|
+
|
|
10
|
+
const DEFAULT_MAX_CHUNK_SIZE = 64 * 1024;
|
|
11
|
+
|
|
12
|
+
function isReadableStream(value: any): value is ReadableStream {
|
|
13
|
+
return value && typeof value.getReader === 'function';
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
function isAsyncIterable(value: any): value is AsyncIterable<any> {
|
|
17
|
+
return value && typeof value[Symbol.asyncIterator] === 'function';
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
function isIterable(value: any): value is Iterable<any> {
|
|
21
|
+
return value && typeof value[Symbol.iterator] === 'function';
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
function createReadableStreamFromIterator<T>(iterator: AsyncIterator<T>): ReadableStream<T> {
|
|
25
|
+
return new ReadableStream({
|
|
26
|
+
async pull(controller) {
|
|
27
|
+
try {
|
|
28
|
+
const { value, done } = await iterator.next();
|
|
29
|
+
if (done) {
|
|
30
|
+
controller.close();
|
|
31
|
+
return;
|
|
32
|
+
}
|
|
33
|
+
controller.enqueue(value);
|
|
34
|
+
} catch (error) {
|
|
35
|
+
controller.error(error);
|
|
36
|
+
}
|
|
37
|
+
},
|
|
38
|
+
cancel() {
|
|
39
|
+
if (iterator.return) {
|
|
40
|
+
iterator.return();
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
});
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
function detectInputFormat(input: any, options: any): 'json' | 'ndjson' | 'csv' | 'unknown' {
|
|
47
|
+
if (options && options.inputFormat) {
|
|
48
|
+
return options.inputFormat;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
if (typeof input === 'string') {
|
|
52
|
+
const trimmed = input.trim();
|
|
53
|
+
if (trimmed === '') {
|
|
54
|
+
return 'unknown';
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
// Проверка на NDJSON (каждая строка - валидный JSON)
|
|
58
|
+
if (trimmed.includes('\n')) {
|
|
59
|
+
const lines = trimmed.split('\n').filter(line => line.trim() !== '');
|
|
60
|
+
if (lines.length > 0) {
|
|
61
|
+
try {
|
|
62
|
+
JSON.parse(lines[0]);
|
|
63
|
+
return 'ndjson';
|
|
64
|
+
} catch {
|
|
65
|
+
// Не NDJSON
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
// Проверка на JSON
|
|
71
|
+
try {
|
|
72
|
+
const parsed = JSON.parse(trimmed);
|
|
73
|
+
if (Array.isArray(parsed) || (parsed && typeof parsed === 'object')) {
|
|
74
|
+
return 'json';
|
|
75
|
+
}
|
|
76
|
+
} catch {
|
|
77
|
+
// Не JSON
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
// Проверка на CSV
|
|
81
|
+
if (trimmed.includes(',') || trimmed.includes(';') || trimmed.includes('\t')) {
|
|
82
|
+
return 'csv';
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
return 'unknown';
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
async function* jsonToCsvChunkIterator(input: any, options: JsonToCsvOptions = {}): AsyncGenerator<string> {
|
|
90
|
+
const format = detectInputFormat(input, options);
|
|
91
|
+
|
|
92
|
+
if (format === 'csv') {
|
|
93
|
+
throw new ValidationError('Input appears to be CSV, not JSON');
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
// Вспомогательная функция для создания асинхронного итератора
|
|
97
|
+
function toAsyncIterator<T>(iterable: Iterable<T> | AsyncIterable<T>): AsyncIterator<T> {
|
|
98
|
+
if (isAsyncIterable(iterable)) {
|
|
99
|
+
return iterable[Symbol.asyncIterator]();
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
if (isIterable(iterable)) {
|
|
103
|
+
const syncIterator = iterable[Symbol.iterator]();
|
|
104
|
+
return {
|
|
105
|
+
next: () => Promise.resolve(syncIterator.next()),
|
|
106
|
+
return: syncIterator.return ? () => Promise.resolve(syncIterator.return!()) : undefined,
|
|
107
|
+
throw: syncIterator.throw ? (error: any) => Promise.resolve(syncIterator.throw!(error)) : undefined
|
|
108
|
+
};
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
throw new ValidationError('Input is not iterable');
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
let iterator: AsyncIterator<any>;
|
|
115
|
+
|
|
116
|
+
if (isAsyncIterable(input) || isIterable(input)) {
|
|
117
|
+
iterator = toAsyncIterator(input);
|
|
118
|
+
} else if (typeof input === 'string') {
|
|
119
|
+
const parsed = JSON.parse(input);
|
|
120
|
+
if (Array.isArray(parsed)) {
|
|
121
|
+
iterator = toAsyncIterator(parsed);
|
|
122
|
+
} else {
|
|
123
|
+
iterator = toAsyncIterator([parsed]);
|
|
124
|
+
}
|
|
125
|
+
} else if (Array.isArray(input)) {
|
|
126
|
+
iterator = toAsyncIterator(input);
|
|
127
|
+
} else {
|
|
128
|
+
iterator = toAsyncIterator([input]);
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
const delimiter = options.delimiter || ';';
|
|
132
|
+
const includeHeaders = options.includeHeaders !== false;
|
|
133
|
+
const preventInjection = options.preventCsvInjection !== false;
|
|
134
|
+
|
|
135
|
+
let isFirstChunk = true;
|
|
136
|
+
let headers: string[] = [];
|
|
137
|
+
|
|
138
|
+
while (true) {
|
|
139
|
+
const { value, done } = await iterator.next();
|
|
140
|
+
if (done) break;
|
|
141
|
+
|
|
142
|
+
const item = value;
|
|
143
|
+
|
|
144
|
+
if (isFirstChunk) {
|
|
145
|
+
// Извлечение заголовков из первого элемента
|
|
146
|
+
headers = Object.keys(item);
|
|
147
|
+
|
|
148
|
+
if (includeHeaders) {
|
|
149
|
+
const headerLine = headers.map(header => {
|
|
150
|
+
const escaped = header.includes('"') ? `"${header.replace(/"/g, '""')}"` : header;
|
|
151
|
+
return preventInjection && /^[=+\-@]/.test(escaped) ? `'${escaped}` : escaped;
|
|
152
|
+
}).join(delimiter);
|
|
153
|
+
|
|
154
|
+
yield headerLine + '\n';
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
isFirstChunk = false;
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
const row = headers.map(header => {
|
|
161
|
+
const value = item[header];
|
|
162
|
+
const strValue = value === null || value === undefined ? '' : String(value);
|
|
163
|
+
|
|
164
|
+
if (strValue.includes('"') || strValue.includes('\n') || strValue.includes('\r') || strValue.includes(delimiter)) {
|
|
165
|
+
return `"${strValue.replace(/"/g, '""')}"`;
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
if (preventInjection && /^[=+\-@]/.test(strValue)) {
|
|
169
|
+
return `'${strValue}`;
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
return strValue;
|
|
173
|
+
}).join(delimiter);
|
|
174
|
+
|
|
175
|
+
yield row + '\n';
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
async function* jsonToNdjsonChunkIterator(input: any, options: any = {}): AsyncGenerator<string> {
|
|
180
|
+
const format = detectInputFormat(input, options);
|
|
181
|
+
|
|
182
|
+
// Вспомогательная функция для создания асинхронного итератора
|
|
183
|
+
function toAsyncIterator<T>(iterable: Iterable<T> | AsyncIterable<T>): AsyncIterator<T> {
|
|
184
|
+
if (isAsyncIterable(iterable)) {
|
|
185
|
+
return iterable[Symbol.asyncIterator]();
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
if (isIterable(iterable)) {
|
|
189
|
+
const syncIterator = iterable[Symbol.iterator]();
|
|
190
|
+
return {
|
|
191
|
+
next: () => Promise.resolve(syncIterator.next()),
|
|
192
|
+
return: syncIterator.return ? () => Promise.resolve(syncIterator.return!()) : undefined,
|
|
193
|
+
throw: syncIterator.throw ? (error: any) => Promise.resolve(syncIterator.throw!(error)) : undefined
|
|
194
|
+
};
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
throw new ValidationError('Input is not iterable');
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
let iterator: AsyncIterator<any>;
|
|
201
|
+
|
|
202
|
+
if (isAsyncIterable(input) || isIterable(input)) {
|
|
203
|
+
iterator = toAsyncIterator(input);
|
|
204
|
+
} else if (typeof input === 'string') {
|
|
205
|
+
if (format === 'ndjson') {
|
|
206
|
+
const lines = input.split('\n').filter(line => line.trim() !== '');
|
|
207
|
+
iterator = toAsyncIterator(lines);
|
|
208
|
+
} else {
|
|
209
|
+
const parsed = JSON.parse(input);
|
|
210
|
+
if (Array.isArray(parsed)) {
|
|
211
|
+
iterator = toAsyncIterator(parsed);
|
|
212
|
+
} else {
|
|
213
|
+
iterator = toAsyncIterator([parsed]);
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
} else if (Array.isArray(input)) {
|
|
217
|
+
iterator = toAsyncIterator(input);
|
|
218
|
+
} else {
|
|
219
|
+
iterator = toAsyncIterator([input]);
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
while (true) {
|
|
223
|
+
const { value, done } = await iterator.next();
|
|
224
|
+
if (done) break;
|
|
225
|
+
|
|
226
|
+
let jsonStr: string;
|
|
227
|
+
|
|
228
|
+
if (typeof value === 'string') {
|
|
229
|
+
try {
|
|
230
|
+
// Проверяем, является ли строка валидным JSON
|
|
231
|
+
JSON.parse(value);
|
|
232
|
+
jsonStr = value;
|
|
233
|
+
} catch {
|
|
234
|
+
// Если нет, сериализуем как JSON
|
|
235
|
+
jsonStr = JSON.stringify(value);
|
|
236
|
+
}
|
|
237
|
+
} else {
|
|
238
|
+
jsonStr = JSON.stringify(value);
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
yield jsonStr + '\n';
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
async function* csvToJsonChunkIterator(input: any, options: CsvToJsonOptions = {}): AsyncGenerator<any> {
|
|
246
|
+
if (typeof input === 'string') {
|
|
247
|
+
// Используем csvToJsonIterator из csv-to-json-browser
|
|
248
|
+
yield* csvToJsonIterator(input, options);
|
|
249
|
+
} else if (input instanceof File || input instanceof Blob) {
|
|
250
|
+
const text = await input.text();
|
|
251
|
+
yield* csvToJsonIterator(text, options);
|
|
252
|
+
} else if (isReadableStream(input)) {
|
|
253
|
+
const reader = input.getReader();
|
|
254
|
+
const decoder = new TextDecoder();
|
|
255
|
+
let buffer = '';
|
|
256
|
+
|
|
257
|
+
try {
|
|
258
|
+
while (true) {
|
|
259
|
+
const { value, done } = await reader.read();
|
|
260
|
+
if (done) break;
|
|
261
|
+
|
|
262
|
+
buffer += decoder.decode(value, { stream: true });
|
|
263
|
+
|
|
264
|
+
// Обработка буфера по строкам
|
|
265
|
+
const lines = buffer.split('\n');
|
|
266
|
+
buffer = lines.pop() || '';
|
|
267
|
+
|
|
268
|
+
// TODO: Реализовать парсинг CSV из чанков
|
|
269
|
+
// Пока просто возвращаем сырые строки
|
|
270
|
+
for (const line of lines) {
|
|
271
|
+
if (line.trim()) {
|
|
272
|
+
yield { raw: line };
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
// Обработка остатка буфера
|
|
278
|
+
if (buffer.trim()) {
|
|
279
|
+
yield { raw: buffer };
|
|
280
|
+
}
|
|
281
|
+
} finally {
|
|
282
|
+
reader.releaseLock();
|
|
283
|
+
}
|
|
284
|
+
} else {
|
|
285
|
+
throw new ValidationError('Unsupported input type for CSV streaming');
|
|
286
|
+
}
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
export function jsonToCsvStream(input: any, options: JsonToCsvOptions = {}): ReadableStream<string> {
|
|
290
|
+
const iterator = jsonToCsvChunkIterator(input, options);
|
|
291
|
+
return createReadableStreamFromIterator(iterator);
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
export function jsonToNdjsonStream(input: any, options: any = {}): ReadableStream<string> {
|
|
295
|
+
const iterator = jsonToNdjsonChunkIterator(input, options);
|
|
296
|
+
return createReadableStreamFromIterator(iterator);
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
export function csvToJsonStream(input: any, options: CsvToJsonOptions = {}): ReadableStream<any> {
|
|
300
|
+
const iterator = csvToJsonChunkIterator(input, options);
|
|
301
|
+
return createReadableStreamFromIterator(iterator);
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
/**
|
|
305
|
+
* Асинхронная версия jsonToCsvStream
|
|
306
|
+
*/
|
|
307
|
+
export async function jsonToCsvStreamAsync(input: any, options: JsonToCsvOptions = {}): Promise<ReadableStream<string>> {
|
|
308
|
+
return jsonToCsvStream(input, options);
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
/**
|
|
312
|
+
* Асинхронная версия jsonToNdjsonStream
|
|
313
|
+
*/
|
|
314
|
+
export async function jsonToNdjsonStreamAsync(input: any, options: any = {}): Promise<ReadableStream<string>> {
|
|
315
|
+
return jsonToNdjsonStream(input, options);
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
/**
|
|
319
|
+
* Асинхронная версия csvToJsonStream
|
|
320
|
+
*/
|
|
321
|
+
export async function csvToJsonStreamAsync(input: any, options: CsvToJsonOptions = {}): Promise<ReadableStream<any>> {
|
|
322
|
+
return csvToJsonStream(input, options);
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
// Экспорт для Node.js совместимости
|
|
326
|
+
if (typeof module !== 'undefined' && module.exports) {
|
|
327
|
+
module.exports = {
|
|
328
|
+
jsonToCsvStream,
|
|
329
|
+
jsonToCsvStreamAsync,
|
|
330
|
+
jsonToNdjsonStream,
|
|
331
|
+
jsonToNdjsonStreamAsync,
|
|
332
|
+
csvToJsonStream,
|
|
333
|
+
csvToJsonStreamAsync,
|
|
334
|
+
createReadableStreamFromIterator
|
|
335
|
+
};
|
|
336
|
+
}
|
|
@@ -0,0 +1,377 @@
|
|
|
1
|
+
// Web Worker для обработки CSV
|
|
2
|
+
// Работает в отдельном потоке, не блокируя основной
|
|
3
|
+
|
|
4
|
+
// Импорт функций парсинга (они будут bundled вместе с worker)
|
|
5
|
+
import { csvToJson } from '../csv-to-json-browser';
|
|
6
|
+
import { jsonToCsv } from '../json-to-csv-browser';
|
|
7
|
+
|
|
8
|
+
const textDecoder = new TextDecoder('utf-8');
|
|
9
|
+
|
|
10
|
+
// Кеш для повторного использования результатов
|
|
11
|
+
const cache = new Map();
|
|
12
|
+
const CACHE_MAX_SIZE = 50;
|
|
13
|
+
const CACHE_TTL = 5 * 60 * 1000; // 5 минут
|
|
14
|
+
|
|
15
|
+
// Статистика worker
|
|
16
|
+
const stats = {
|
|
17
|
+
tasksProcessed: 0,
|
|
18
|
+
cacheHits: 0,
|
|
19
|
+
cacheMisses: 0,
|
|
20
|
+
totalProcessingTime: 0,
|
|
21
|
+
averageProcessingTime: 0
|
|
22
|
+
};
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Генерация ключа кеша для CSV строки
|
|
26
|
+
* @param {string} csv - CSV строка
|
|
27
|
+
* @param {Object} options - Опции парсинга
|
|
28
|
+
* @returns {string} Ключ кеша
|
|
29
|
+
*/
|
|
30
|
+
function generateCacheKey(csv, options) {
|
|
31
|
+
// Простой хэш для CSV строки
|
|
32
|
+
let hash = 0;
|
|
33
|
+
for (let i = 0; i < csv.length; i++) {
|
|
34
|
+
const char = csv.charCodeAt(i);
|
|
35
|
+
hash = ((hash << 5) - hash) + char;
|
|
36
|
+
hash = hash & hash; // Convert to 32bit integer
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
// Добавление опций в хэш
|
|
40
|
+
const optionsStr = JSON.stringify(options);
|
|
41
|
+
for (let i = 0; i < optionsStr.length; i++) {
|
|
42
|
+
const char = optionsStr.charCodeAt(i);
|
|
43
|
+
hash = ((hash << 5) - hash) + char;
|
|
44
|
+
hash = hash & hash;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
return `csv-${hash.toString(36)}-${optionsStr.length}`;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
/**
|
|
51
|
+
* Очистка устаревших записей кеша
|
|
52
|
+
*/
|
|
53
|
+
function cleanupCache() {
|
|
54
|
+
const now = Date.now();
|
|
55
|
+
for (const [key, entry] of cache.entries()) {
|
|
56
|
+
if (now - entry.timestamp > CACHE_TTL) {
|
|
57
|
+
cache.delete(key);
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
// Ограничение размера кеша
|
|
62
|
+
if (cache.size > CACHE_MAX_SIZE) {
|
|
63
|
+
const oldestKey = Array.from(cache.entries())
|
|
64
|
+
.sort((a, b) => a[1].timestamp - b[1].timestamp)[0][0];
|
|
65
|
+
cache.delete(oldestKey);
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* Парсинг CSV с кешированием и прогрессом
|
|
71
|
+
* @param {string} csv - CSV строка
|
|
72
|
+
* @param {Object} options - Опции парсинга
|
|
73
|
+
* @param {Function} sendProgress - Функция отправки прогресса
|
|
74
|
+
* @returns {Array<Object>} JSON данные
|
|
75
|
+
*/
|
|
76
|
+
function parseCSVWithProgress(csv, options, sendProgress) {
|
|
77
|
+
const startTime = performance.now();
|
|
78
|
+
|
|
79
|
+
// Проверка кеша
|
|
80
|
+
const cacheKey = generateCacheKey(csv, options);
|
|
81
|
+
if (cache.has(cacheKey)) {
|
|
82
|
+
const cached = cache.get(cacheKey);
|
|
83
|
+
stats.cacheHits++;
|
|
84
|
+
|
|
85
|
+
// Отправка мгновенного прогресса для кешированных данных
|
|
86
|
+
if (sendProgress) {
|
|
87
|
+
sendProgress({
|
|
88
|
+
processed: cached.data.length,
|
|
89
|
+
total: cached.data.length,
|
|
90
|
+
percentage: 100,
|
|
91
|
+
fromCache: true
|
|
92
|
+
});
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
return cached.data;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
stats.cacheMisses++;
|
|
99
|
+
|
|
100
|
+
// Разделение на chunks для прогресса
|
|
101
|
+
const CHUNK_SIZE = 10000; // строк в chunk
|
|
102
|
+
const lines = csv.split('\n');
|
|
103
|
+
const totalLines = lines.length;
|
|
104
|
+
|
|
105
|
+
let result = [];
|
|
106
|
+
let processedLines = 0;
|
|
107
|
+
|
|
108
|
+
// Обработка по chunks
|
|
109
|
+
for (let i = 0; i < lines.length; i += CHUNK_SIZE) {
|
|
110
|
+
const chunk = lines.slice(i, i + CHUNK_SIZE).join('\n');
|
|
111
|
+
const chunkResult = csvToJson(chunk, options);
|
|
112
|
+
result = result.concat(chunkResult);
|
|
113
|
+
|
|
114
|
+
processedLines = Math.min(i + CHUNK_SIZE, totalLines);
|
|
115
|
+
|
|
116
|
+
// Отправка прогресса
|
|
117
|
+
if (sendProgress) {
|
|
118
|
+
const percentage = (processedLines / totalLines) * 100;
|
|
119
|
+
const elapsed = (performance.now() - startTime) / 1000;
|
|
120
|
+
const speed = processedLines / elapsed;
|
|
121
|
+
|
|
122
|
+
sendProgress({
|
|
123
|
+
processed: processedLines,
|
|
124
|
+
total: totalLines,
|
|
125
|
+
percentage: percentage,
|
|
126
|
+
speed: speed,
|
|
127
|
+
elapsed: elapsed
|
|
128
|
+
});
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
// Даем возможность обработать другие задачи
|
|
132
|
+
if (i % (CHUNK_SIZE * 10) === 0) {
|
|
133
|
+
// Небольшая пауза для неблокирующей обработки
|
|
134
|
+
Atomics.wait(new Int32Array(new SharedArrayBuffer(4)), 0, 0, 1);
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
// Сохранение в кеш
|
|
139
|
+
cache.set(cacheKey, {
|
|
140
|
+
data: result,
|
|
141
|
+
timestamp: Date.now(),
|
|
142
|
+
size: csv.length
|
|
143
|
+
});
|
|
144
|
+
|
|
145
|
+
// Очистка кеша если нужно
|
|
146
|
+
cleanupCache();
|
|
147
|
+
|
|
148
|
+
// Обновление статистики
|
|
149
|
+
const processingTime = performance.now() - startTime;
|
|
150
|
+
stats.tasksProcessed++;
|
|
151
|
+
stats.totalProcessingTime += processingTime;
|
|
152
|
+
stats.averageProcessingTime = stats.totalProcessingTime / stats.tasksProcessed;
|
|
153
|
+
|
|
154
|
+
return result;
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
/**
|
|
158
|
+
* Конвертация JSON в CSV
|
|
159
|
+
* @param {Array<Object>} jsonData - JSON данные
|
|
160
|
+
* @param {Object} options - Опции конвертации
|
|
161
|
+
* @returns {string} CSV строка
|
|
162
|
+
*/
|
|
163
|
+
function convertJSONToCSV(jsonData, options) {
|
|
164
|
+
return jsonToCsv(jsonData, options);
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
/**
|
|
168
|
+
* Валидация CSV строки
|
|
169
|
+
* @param {string} csv - CSV строка
|
|
170
|
+
* @param {Object} options - Опции валидации
|
|
171
|
+
* @returns {Object} Результат валидации
|
|
172
|
+
*/
|
|
173
|
+
function validateCSV(csv, options) {
|
|
174
|
+
const startTime = performance.now();
|
|
175
|
+
|
|
176
|
+
try {
|
|
177
|
+
// Быстрый парсинг для валидации
|
|
178
|
+
const sampleSize = Math.min(1000, csv.split('\n').length);
|
|
179
|
+
const sample = csv.split('\n').slice(0, sampleSize).join('\n');
|
|
180
|
+
|
|
181
|
+
const result = csvToJson(sample, options);
|
|
182
|
+
|
|
183
|
+
const processingTime = performance.now() - startTime;
|
|
184
|
+
|
|
185
|
+
return {
|
|
186
|
+
valid: true,
|
|
187
|
+
sampleSize: result.length,
|
|
188
|
+
estimatedTotalRows: csv.split('\n').length,
|
|
189
|
+
processingTime: processingTime,
|
|
190
|
+
estimatedFullProcessingTime: (processingTime / sampleSize) * csv.split('\n').length
|
|
191
|
+
};
|
|
192
|
+
} catch (error) {
|
|
193
|
+
return {
|
|
194
|
+
valid: false,
|
|
195
|
+
error: error.message,
|
|
196
|
+
processingTime: performance.now() - startTime
|
|
197
|
+
};
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
/**
|
|
202
|
+
* Получение статистики worker
|
|
203
|
+
* @returns {Object} Статистика
|
|
204
|
+
*/
|
|
205
|
+
function getStats() {
|
|
206
|
+
return {
|
|
207
|
+
...stats,
|
|
208
|
+
cacheSize: cache.size,
|
|
209
|
+
cacheKeys: Array.from(cache.keys())
|
|
210
|
+
};
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
/**
|
|
214
|
+
* Очистка кеша worker
|
|
215
|
+
*/
|
|
216
|
+
function clearCache() {
|
|
217
|
+
cache.clear();
|
|
218
|
+
stats.cacheHits = 0;
|
|
219
|
+
stats.cacheMisses = 0;
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
function decodeCsvInput(input) {
|
|
223
|
+
if (typeof input === 'string') {
|
|
224
|
+
return input;
|
|
225
|
+
}
|
|
226
|
+
if (input instanceof ArrayBuffer) {
|
|
227
|
+
return textDecoder.decode(new Uint8Array(input));
|
|
228
|
+
}
|
|
229
|
+
if (ArrayBuffer.isView(input)) {
|
|
230
|
+
return textDecoder.decode(input);
|
|
231
|
+
}
|
|
232
|
+
throw new Error('Invalid CSV input type');
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
// Обработчик сообщений от основного потока
|
|
236
|
+
self.onmessage = function (event) {
|
|
237
|
+
const { data } = event;
|
|
238
|
+
|
|
239
|
+
switch (data.type) {
|
|
240
|
+
case 'EXECUTE':
|
|
241
|
+
handleExecute(data);
|
|
242
|
+
break;
|
|
243
|
+
|
|
244
|
+
case 'GET_STATS':
|
|
245
|
+
self.postMessage({
|
|
246
|
+
type: 'STATS',
|
|
247
|
+
taskId: data.taskId,
|
|
248
|
+
data: getStats()
|
|
249
|
+
});
|
|
250
|
+
break;
|
|
251
|
+
|
|
252
|
+
case 'CLEAR_CACHE':
|
|
253
|
+
clearCache();
|
|
254
|
+
self.postMessage({
|
|
255
|
+
type: 'CACHE_CLEARED',
|
|
256
|
+
taskId: data.taskId
|
|
257
|
+
});
|
|
258
|
+
break;
|
|
259
|
+
|
|
260
|
+
default:
|
|
261
|
+
self.postMessage({
|
|
262
|
+
type: 'ERROR',
|
|
263
|
+
taskId: data.taskId,
|
|
264
|
+
message: `Unknown command: ${data.type}`
|
|
265
|
+
});
|
|
266
|
+
}
|
|
267
|
+
};
|
|
268
|
+
|
|
269
|
+
/**
|
|
270
|
+
* Обработка команды EXECUTE
|
|
271
|
+
* @param {Object} commandData - Данные команды
|
|
272
|
+
*/
|
|
273
|
+
function handleExecute(commandData) {
|
|
274
|
+
const { taskId, method, args = [], options = {} } = commandData;
|
|
275
|
+
|
|
276
|
+
try {
|
|
277
|
+
switch (method) {
|
|
278
|
+
case 'parseCSV': {
|
|
279
|
+
const [csvInput, parseOptions] = args;
|
|
280
|
+
const csv = decodeCsvInput(csvInput);
|
|
281
|
+
|
|
282
|
+
// Функция отправки прогресса
|
|
283
|
+
const sendProgress = (progress) => {
|
|
284
|
+
self.postMessage({
|
|
285
|
+
type: 'PROGRESS',
|
|
286
|
+
taskId,
|
|
287
|
+
...progress
|
|
288
|
+
});
|
|
289
|
+
};
|
|
290
|
+
|
|
291
|
+
const result = parseCSVWithProgress(csv, { ...options, ...parseOptions }, sendProgress);
|
|
292
|
+
|
|
293
|
+
self.postMessage({
|
|
294
|
+
type: 'RESULT',
|
|
295
|
+
taskId,
|
|
296
|
+
data: result
|
|
297
|
+
});
|
|
298
|
+
break;
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
case 'jsonToCSV': {
|
|
302
|
+
const [jsonData, csvOptions] = args;
|
|
303
|
+
const result = convertJSONToCSV(jsonData, { ...options, ...csvOptions });
|
|
304
|
+
|
|
305
|
+
self.postMessage({
|
|
306
|
+
type: 'RESULT',
|
|
307
|
+
taskId,
|
|
308
|
+
data: result
|
|
309
|
+
});
|
|
310
|
+
break;
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
case 'validateCSV': {
|
|
314
|
+
const [csv, validateOptions] = args;
|
|
315
|
+
const result = validateCSV(csv, { ...options, ...validateOptions });
|
|
316
|
+
|
|
317
|
+
self.postMessage({
|
|
318
|
+
type: 'RESULT',
|
|
319
|
+
taskId,
|
|
320
|
+
data: result
|
|
321
|
+
});
|
|
322
|
+
break;
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
case 'autoDetectDelimiter': {
|
|
326
|
+
const [csv] = args;
|
|
327
|
+
// Простая реализация автоопределения
|
|
328
|
+
const delimiters = [';', ',', '\t', '|'];
|
|
329
|
+
let bestDelimiter = ';';
|
|
330
|
+
let maxCount = 0;
|
|
331
|
+
|
|
332
|
+
const firstLine = csv.split('\n')[0] || '';
|
|
333
|
+
|
|
334
|
+
for (const delim of delimiters) {
|
|
335
|
+
const count = (firstLine.match(new RegExp(`[${delim}]`, 'g')) || []).length;
|
|
336
|
+
if (count > maxCount) {
|
|
337
|
+
maxCount = count;
|
|
338
|
+
bestDelimiter = delim;
|
|
339
|
+
}
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
self.postMessage({
|
|
343
|
+
type: 'RESULT',
|
|
344
|
+
taskId,
|
|
345
|
+
data: bestDelimiter
|
|
346
|
+
});
|
|
347
|
+
break;
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
default:
|
|
351
|
+
throw new Error(`Unknown method: ${method}`);
|
|
352
|
+
}
|
|
353
|
+
} catch (error) {
|
|
354
|
+
self.postMessage({
|
|
355
|
+
type: 'ERROR',
|
|
356
|
+
taskId,
|
|
357
|
+
message: error.message,
|
|
358
|
+
stack: error.stack,
|
|
359
|
+
code: error.code,
|
|
360
|
+
details: error.details
|
|
361
|
+
});
|
|
362
|
+
}
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
// Инициализация worker
|
|
366
|
+
self.postMessage({ type: 'READY' });
|
|
367
|
+
|
|
368
|
+
// Экспорт для тестирования
|
|
369
|
+
if (typeof module !== 'undefined' && module.exports) {
|
|
370
|
+
module.exports = {
|
|
371
|
+
parseCSVWithProgress,
|
|
372
|
+
convertJSONToCSV,
|
|
373
|
+
validateCSV,
|
|
374
|
+
getStats,
|
|
375
|
+
clearCache
|
|
376
|
+
};
|
|
377
|
+
}
|