jtcsv 2.2.8 → 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +204 -115
- package/bin/jtcsv.ts +2612 -0
- package/browser.d.ts +142 -0
- package/dist/benchmark.js +446 -0
- package/dist/benchmark.js.map +1 -0
- package/dist/bin/jtcsv.js +1940 -0
- package/dist/bin/jtcsv.js.map +1 -0
- package/dist/csv-to-json.js +1262 -0
- package/dist/csv-to-json.js.map +1 -0
- package/dist/errors.js +291 -0
- package/dist/errors.js.map +1 -0
- package/dist/eslint.config.js +147 -0
- package/dist/eslint.config.js.map +1 -0
- package/dist/index-core.js +95 -0
- package/dist/index-core.js.map +1 -0
- package/dist/index.js +93 -0
- package/dist/index.js.map +1 -0
- package/dist/json-save.js +229 -0
- package/dist/json-save.js.map +1 -0
- package/dist/json-to-csv.js +576 -0
- package/dist/json-to-csv.js.map +1 -0
- package/dist/jtcsv-core.cjs.js +1736 -0
- package/dist/jtcsv-core.cjs.js.map +1 -0
- package/dist/jtcsv-core.esm.js +1708 -0
- package/dist/jtcsv-core.esm.js.map +1 -0
- package/dist/jtcsv-core.umd.js +1742 -0
- package/dist/jtcsv-core.umd.js.map +1 -0
- package/dist/jtcsv-full.cjs.js +2241 -0
- package/dist/jtcsv-full.cjs.js.map +1 -0
- package/dist/jtcsv-full.esm.js +2209 -0
- package/dist/jtcsv-full.esm.js.map +1 -0
- package/dist/jtcsv-full.umd.js +2247 -0
- package/dist/jtcsv-full.umd.js.map +1 -0
- package/dist/jtcsv-workers.esm.js +768 -0
- package/dist/jtcsv-workers.esm.js.map +1 -0
- package/dist/jtcsv-workers.umd.js +782 -0
- package/dist/jtcsv-workers.umd.js.map +1 -0
- package/dist/jtcsv.cjs.js +1996 -2048
- package/dist/jtcsv.cjs.js.map +1 -1
- package/dist/jtcsv.esm.js +1992 -2048
- package/dist/jtcsv.esm.js.map +1 -1
- package/dist/jtcsv.umd.js +2157 -2209
- package/dist/jtcsv.umd.js.map +1 -1
- package/dist/plugins/express-middleware/index.js +350 -0
- package/dist/plugins/express-middleware/index.js.map +1 -0
- package/dist/plugins/fastify-plugin/index.js +315 -0
- package/dist/plugins/fastify-plugin/index.js.map +1 -0
- package/dist/plugins/hono/index.js +111 -0
- package/dist/plugins/hono/index.js.map +1 -0
- package/dist/plugins/nestjs/index.js +112 -0
- package/dist/plugins/nestjs/index.js.map +1 -0
- package/dist/plugins/nuxt/index.js +53 -0
- package/dist/plugins/nuxt/index.js.map +1 -0
- package/dist/plugins/remix/index.js +133 -0
- package/dist/plugins/remix/index.js.map +1 -0
- package/dist/plugins/sveltekit/index.js +155 -0
- package/dist/plugins/sveltekit/index.js.map +1 -0
- package/dist/plugins/trpc/index.js +136 -0
- package/dist/plugins/trpc/index.js.map +1 -0
- package/dist/run-demo.js +49 -0
- package/dist/run-demo.js.map +1 -0
- package/dist/src/browser/browser-functions.js +193 -0
- package/dist/src/browser/browser-functions.js.map +1 -0
- package/dist/src/browser/core.js +123 -0
- package/dist/src/browser/core.js.map +1 -0
- package/dist/src/browser/csv-to-json-browser.js +353 -0
- package/dist/src/browser/csv-to-json-browser.js.map +1 -0
- package/dist/src/browser/errors-browser.js +219 -0
- package/dist/src/browser/errors-browser.js.map +1 -0
- package/dist/src/browser/extensions/plugins.js +106 -0
- package/dist/src/browser/extensions/plugins.js.map +1 -0
- package/dist/src/browser/extensions/workers.js +66 -0
- package/dist/src/browser/extensions/workers.js.map +1 -0
- package/dist/src/browser/index.js +140 -0
- package/dist/src/browser/index.js.map +1 -0
- package/dist/src/browser/json-to-csv-browser.js +225 -0
- package/dist/src/browser/json-to-csv-browser.js.map +1 -0
- package/dist/src/browser/streams.js +340 -0
- package/dist/src/browser/streams.js.map +1 -0
- package/dist/src/browser/workers/csv-parser.worker.js +264 -0
- package/dist/src/browser/workers/csv-parser.worker.js.map +1 -0
- package/dist/src/browser/workers/worker-pool.js +338 -0
- package/dist/src/browser/workers/worker-pool.js.map +1 -0
- package/dist/src/core/delimiter-cache.js +196 -0
- package/dist/src/core/delimiter-cache.js.map +1 -0
- package/dist/src/core/node-optimizations.js +279 -0
- package/dist/src/core/node-optimizations.js.map +1 -0
- package/dist/src/core/plugin-system.js +399 -0
- package/dist/src/core/plugin-system.js.map +1 -0
- package/dist/src/core/transform-hooks.js +348 -0
- package/dist/src/core/transform-hooks.js.map +1 -0
- package/dist/src/engines/fast-path-engine-new.js +262 -0
- package/dist/src/engines/fast-path-engine-new.js.map +1 -0
- package/dist/src/engines/fast-path-engine.js +671 -0
- package/dist/src/engines/fast-path-engine.js.map +1 -0
- package/dist/src/errors.js +18 -0
- package/dist/src/errors.js.map +1 -0
- package/dist/src/formats/ndjson-parser.js +332 -0
- package/dist/src/formats/ndjson-parser.js.map +1 -0
- package/dist/src/formats/tsv-parser.js +230 -0
- package/dist/src/formats/tsv-parser.js.map +1 -0
- package/dist/src/index-with-plugins.js +259 -0
- package/dist/src/index-with-plugins.js.map +1 -0
- package/dist/src/types/index.js +3 -0
- package/dist/src/types/index.js.map +1 -0
- package/dist/src/utils/bom-utils.js +267 -0
- package/dist/src/utils/bom-utils.js.map +1 -0
- package/dist/src/utils/encoding-support.js +77 -0
- package/dist/src/utils/encoding-support.js.map +1 -0
- package/dist/src/utils/schema-validator.js +609 -0
- package/dist/src/utils/schema-validator.js.map +1 -0
- package/dist/src/utils/transform-loader.js +281 -0
- package/dist/src/utils/transform-loader.js.map +1 -0
- package/dist/src/utils/validators.js +40 -0
- package/dist/src/utils/validators.js.map +1 -0
- package/dist/src/utils/zod-adapter.js +144 -0
- package/dist/src/utils/zod-adapter.js.map +1 -0
- package/dist/src/web-server/index.js +648 -0
- package/dist/src/web-server/index.js.map +1 -0
- package/dist/src/workers/csv-multithreaded.js +211 -0
- package/dist/src/workers/csv-multithreaded.js.map +1 -0
- package/dist/src/workers/csv-parser.worker.js +179 -0
- package/dist/src/workers/csv-parser.worker.js.map +1 -0
- package/dist/src/workers/worker-pool.js +228 -0
- package/dist/src/workers/worker-pool.js.map +1 -0
- package/dist/stream-csv-to-json.js +665 -0
- package/dist/stream-csv-to-json.js.map +1 -0
- package/dist/stream-json-to-csv.js +389 -0
- package/dist/stream-json-to-csv.js.map +1 -0
- package/examples/advanced/conditional-transformations.ts +446 -0
- package/examples/advanced/csv-parser.worker.ts +89 -0
- package/examples/advanced/nested-objects-example.ts +306 -0
- package/examples/advanced/performance-optimization.ts +504 -0
- package/examples/advanced/run-demo-server.ts +116 -0
- package/examples/advanced/web-worker-usage.html +874 -0
- package/examples/async-multithreaded-example.ts +335 -0
- package/examples/cli-advanced-usage.md +290 -0
- package/examples/{cli-batch-processing.js → cli-batch-processing.ts} +38 -38
- package/examples/{cli-tool.js → cli-tool.ts} +5 -8
- package/examples/{error-handling.js → error-handling.ts} +356 -324
- package/examples/{express-api.js → express-api.ts} +161 -164
- package/examples/{large-dataset-example.js → large-dataset-example.ts} +201 -182
- package/examples/{ndjson-processing.js → ndjson-processing.ts} +456 -434
- package/examples/{plugin-excel-exporter.js → plugin-excel-exporter.ts} +6 -7
- package/examples/react-integration.tsx +637 -0
- package/examples/{schema-validation.js → schema-validation.ts} +2 -2
- package/examples/simple-usage.ts +194 -0
- package/examples/{streaming-example.js → streaming-example.ts} +12 -12
- package/index.d.ts +187 -18
- package/package.json +75 -81
- package/plugins.d.ts +37 -0
- package/schema.d.ts +103 -0
- package/src/browser/browser-functions.ts +402 -0
- package/src/browser/core.ts +152 -0
- package/src/browser/csv-to-json-browser.d.ts +3 -0
- package/src/browser/csv-to-json-browser.ts +494 -0
- package/src/browser/{errors-browser.js → errors-browser.ts} +305 -197
- package/src/browser/extensions/plugins.ts +93 -0
- package/src/browser/extensions/workers.ts +39 -0
- package/src/browser/globals.d.ts +5 -0
- package/src/browser/index.ts +192 -0
- package/src/browser/json-to-csv-browser.d.ts +3 -0
- package/src/browser/json-to-csv-browser.ts +338 -0
- package/src/browser/streams.ts +403 -0
- package/src/browser/workers/{csv-parser.worker.js → csv-parser.worker.ts} +3 -3
- package/src/browser/workers/{worker-pool.js → worker-pool.ts} +51 -30
- package/src/core/delimiter-cache.ts +320 -0
- package/src/core/{node-optimizations.js → node-optimizations.ts} +448 -407
- package/src/core/plugin-system.ts +588 -0
- package/src/core/transform-hooks.ts +566 -0
- package/src/engines/{fast-path-engine-new.js → fast-path-engine-new.ts} +11 -2
- package/src/engines/{fast-path-engine.js → fast-path-engine.ts} +79 -53
- package/src/errors.ts +1 -0
- package/src/formats/{ndjson-parser.js → ndjson-parser.ts} +24 -16
- package/src/formats/{tsv-parser.js → tsv-parser.ts} +18 -17
- package/src/{index-with-plugins.js → index-with-plugins.ts} +381 -357
- package/src/types/index.ts +275 -0
- package/src/utils/bom-utils.ts +373 -0
- package/src/utils/encoding-support.ts +155 -0
- package/src/utils/{schema-validator.js → schema-validator.ts} +814 -589
- package/src/utils/transform-loader.ts +389 -0
- package/src/utils/validators.ts +35 -0
- package/src/utils/zod-adapter.ts +280 -0
- package/src/web-server/{index.js → index.ts} +19 -19
- package/src/workers/csv-multithreaded.ts +310 -0
- package/src/workers/csv-parser.worker.ts +227 -0
- package/src/workers/worker-pool.ts +409 -0
- package/bin/jtcsv.js +0 -2462
- package/csv-to-json.js +0 -688
- package/errors.js +0 -208
- package/examples/simple-usage.js +0 -282
- package/index.js +0 -68
- package/json-save.js +0 -254
- package/json-to-csv.js +0 -526
- package/plugins/README.md +0 -91
- package/plugins/express-middleware/README.md +0 -64
- package/plugins/express-middleware/example.js +0 -136
- package/plugins/express-middleware/index.d.ts +0 -114
- package/plugins/express-middleware/index.js +0 -360
- package/plugins/express-middleware/package.json +0 -52
- package/plugins/fastify-plugin/index.js +0 -406
- package/plugins/fastify-plugin/package.json +0 -55
- package/plugins/hono/README.md +0 -28
- package/plugins/hono/index.d.ts +0 -12
- package/plugins/hono/index.js +0 -36
- package/plugins/hono/package.json +0 -35
- package/plugins/nestjs/README.md +0 -35
- package/plugins/nestjs/index.d.ts +0 -25
- package/plugins/nestjs/index.js +0 -77
- package/plugins/nestjs/package.json +0 -37
- package/plugins/nextjs-api/README.md +0 -57
- package/plugins/nextjs-api/examples/ConverterComponent.jsx +0 -386
- package/plugins/nextjs-api/examples/api-convert.js +0 -69
- package/plugins/nextjs-api/index.js +0 -387
- package/plugins/nextjs-api/package.json +0 -63
- package/plugins/nextjs-api/route.js +0 -371
- package/plugins/nuxt/README.md +0 -24
- package/plugins/nuxt/index.js +0 -21
- package/plugins/nuxt/package.json +0 -35
- package/plugins/nuxt/runtime/composables/useJtcsv.js +0 -6
- package/plugins/nuxt/runtime/plugin.js +0 -6
- package/plugins/remix/README.md +0 -26
- package/plugins/remix/index.d.ts +0 -16
- package/plugins/remix/index.js +0 -62
- package/plugins/remix/package.json +0 -35
- package/plugins/sveltekit/README.md +0 -28
- package/plugins/sveltekit/index.d.ts +0 -17
- package/plugins/sveltekit/index.js +0 -54
- package/plugins/sveltekit/package.json +0 -33
- package/plugins/trpc/README.md +0 -25
- package/plugins/trpc/index.d.ts +0 -7
- package/plugins/trpc/index.js +0 -32
- package/plugins/trpc/package.json +0 -34
- package/src/browser/browser-functions.js +0 -219
- package/src/browser/csv-to-json-browser.js +0 -700
- package/src/browser/index.js +0 -113
- package/src/browser/json-to-csv-browser.js +0 -309
- package/src/browser/streams.js +0 -393
- package/src/core/delimiter-cache.js +0 -186
- package/src/core/plugin-system.js +0 -476
- package/src/core/transform-hooks.js +0 -350
- package/src/errors.js +0 -26
- package/src/utils/transform-loader.js +0 -205
- package/stream-csv-to-json.js +0 -542
- package/stream-json-to-csv.js +0 -464
- /package/examples/{web-workers-advanced.js → web-workers-advanced.ts} +0 -0
|
@@ -0,0 +1,1262 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.predefinedHooks = exports.TransformHooks = void 0;
|
|
7
|
+
exports.autoDetectDelimiter = autoDetectDelimiter;
|
|
8
|
+
exports.csvToJson = csvToJson;
|
|
9
|
+
exports.csvToJsonAsync = csvToJsonAsync;
|
|
10
|
+
exports.csvToJsonIterator = csvToJsonIterator;
|
|
11
|
+
exports.readCsvAsJson = readCsvAsJson;
|
|
12
|
+
exports.readCsvAsJsonSync = readCsvAsJsonSync;
|
|
13
|
+
exports.createTransformHooks = createTransformHooks;
|
|
14
|
+
exports.createDelimiterCache = createDelimiterCache;
|
|
15
|
+
exports.getDelimiterCacheStats = getDelimiterCacheStats;
|
|
16
|
+
exports.clearDelimiterCache = clearDelimiterCache;
|
|
17
|
+
const errors_1 = require("./errors");
|
|
18
|
+
const transform_hooks_1 = require("./src/core/transform-hooks");
|
|
19
|
+
Object.defineProperty(exports, "TransformHooks", { enumerable: true, get: function () { return transform_hooks_1.TransformHooks; } });
|
|
20
|
+
Object.defineProperty(exports, "predefinedHooks", { enumerable: true, get: function () { return transform_hooks_1.predefinedHooks; } });
|
|
21
|
+
const delimiter_cache_1 = require("./src/core/delimiter-cache");
|
|
22
|
+
const fast_path_engine_1 = __importDefault(require("./src/engines/fast-path-engine"));
|
|
23
|
+
const bom_utils_1 = require("./src/utils/bom-utils");
|
|
24
|
+
const globalDelimiterCache = new delimiter_cache_1.DelimiterCache(100);
|
|
25
|
+
const globalFastPathEngine = new fast_path_engine_1.default();
|
|
26
|
+
function validateCsvInput(csv, options) {
|
|
27
|
+
if (typeof csv !== 'string') {
|
|
28
|
+
throw new errors_1.ValidationError('Input must be a CSV string');
|
|
29
|
+
}
|
|
30
|
+
if (options && typeof options !== 'object') {
|
|
31
|
+
throw new errors_1.ConfigurationError('Options must be an object');
|
|
32
|
+
}
|
|
33
|
+
if (options?.delimiter && typeof options.delimiter !== 'string') {
|
|
34
|
+
throw new errors_1.ConfigurationError('Delimiter must be a string');
|
|
35
|
+
}
|
|
36
|
+
if (options?.delimiter && options.delimiter.length !== 1) {
|
|
37
|
+
throw new errors_1.ConfigurationError('Delimiter must be a single character');
|
|
38
|
+
}
|
|
39
|
+
if (options?.autoDetect !== undefined && typeof options.autoDetect !== 'boolean') {
|
|
40
|
+
throw new errors_1.ConfigurationError('autoDetect must be a boolean');
|
|
41
|
+
}
|
|
42
|
+
if (options?.candidates && !Array.isArray(options.candidates)) {
|
|
43
|
+
throw new errors_1.ConfigurationError('candidates must be an array');
|
|
44
|
+
}
|
|
45
|
+
if (options?.maxRows !== undefined && (typeof options.maxRows !== 'number' || options.maxRows <= 0)) {
|
|
46
|
+
throw new errors_1.ConfigurationError('maxRows must be a positive number');
|
|
47
|
+
}
|
|
48
|
+
if (options?.useCache !== undefined && typeof options.useCache !== 'boolean') {
|
|
49
|
+
throw new errors_1.ConfigurationError('useCache must be a boolean');
|
|
50
|
+
}
|
|
51
|
+
if (options?.cache && !(options.cache instanceof delimiter_cache_1.DelimiterCache)) {
|
|
52
|
+
throw new errors_1.ConfigurationError('cache must be an instance of DelimiterCache');
|
|
53
|
+
}
|
|
54
|
+
if (options?.useFastPath !== undefined && typeof options.useFastPath !== 'boolean') {
|
|
55
|
+
throw new errors_1.ConfigurationError('useFastPath must be a boolean');
|
|
56
|
+
}
|
|
57
|
+
if (options?.fastPathMode !== undefined
|
|
58
|
+
&& options.fastPathMode !== 'objects'
|
|
59
|
+
&& options.fastPathMode !== 'compact'
|
|
60
|
+
&& options.fastPathMode !== 'stream') {
|
|
61
|
+
throw new errors_1.ConfigurationError('fastPathMode must be "objects", "compact", or "stream"');
|
|
62
|
+
}
|
|
63
|
+
if (options?.memoryWarningThreshold !== undefined) {
|
|
64
|
+
if (typeof options.memoryWarningThreshold !== 'number' || options.memoryWarningThreshold <= 0) {
|
|
65
|
+
throw new errors_1.ConfigurationError('memoryWarningThreshold must be a positive number');
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
if (options?.memoryLimit !== undefined) {
|
|
69
|
+
if (typeof options.memoryLimit !== 'number') {
|
|
70
|
+
throw new errors_1.ConfigurationError('memoryLimit must be a number');
|
|
71
|
+
}
|
|
72
|
+
if (options.memoryLimit !== Infinity && options.memoryLimit <= 0) {
|
|
73
|
+
throw new errors_1.ConfigurationError('memoryLimit must be a positive number or Infinity');
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
if (options?.onError !== undefined
|
|
77
|
+
&& !['skip', 'warn', 'throw'].includes(options.onError)) {
|
|
78
|
+
throw new errors_1.ConfigurationError('onError must be "skip", "warn", or "throw"');
|
|
79
|
+
}
|
|
80
|
+
if (options?.errorHandler !== undefined && typeof options.errorHandler !== 'function') {
|
|
81
|
+
throw new errors_1.ConfigurationError('errorHandler must be a function');
|
|
82
|
+
}
|
|
83
|
+
if (options?.hooks) {
|
|
84
|
+
if (typeof options.hooks !== 'object') {
|
|
85
|
+
throw new errors_1.ConfigurationError('hooks must be an object');
|
|
86
|
+
}
|
|
87
|
+
if (options.hooks.beforeConvert && typeof options.hooks.beforeConvert !== 'function') {
|
|
88
|
+
throw new errors_1.ConfigurationError('hooks.beforeConvert must be a function');
|
|
89
|
+
}
|
|
90
|
+
if (options.hooks.afterConvert && typeof options.hooks.afterConvert !== 'function') {
|
|
91
|
+
throw new errors_1.ConfigurationError('hooks.afterConvert must be a function');
|
|
92
|
+
}
|
|
93
|
+
if (options.hooks.perRow && typeof options.hooks.perRow !== 'function') {
|
|
94
|
+
throw new errors_1.ConfigurationError('hooks.perRow must be a function');
|
|
95
|
+
}
|
|
96
|
+
if (options.hooks.transformHooks
|
|
97
|
+
&& !(options.hooks.transformHooks instanceof transform_hooks_1.TransformHooks)) {
|
|
98
|
+
throw new errors_1.ConfigurationError('hooks.transformHooks must be an instance of TransformHooks');
|
|
99
|
+
}
|
|
100
|
+
if (options.hooks.onError && typeof options.hooks.onError !== 'function') {
|
|
101
|
+
throw new errors_1.ConfigurationError('hooks.onError must be a function');
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
return true;
|
|
105
|
+
}
|
|
106
|
+
function autoDetectDelimiter(csv, options = {}) {
|
|
107
|
+
if (!csv || typeof csv !== 'string' || csv.trim().length === 0) {
|
|
108
|
+
return ';';
|
|
109
|
+
}
|
|
110
|
+
const resolvedOptions = Array.isArray(options)
|
|
111
|
+
? { candidates: options }
|
|
112
|
+
: (options || {});
|
|
113
|
+
const { candidates = [';', ',', '\t', '|'], useCache = true, cache = globalDelimiterCache } = resolvedOptions;
|
|
114
|
+
if (!Array.isArray(candidates)) {
|
|
115
|
+
throw new errors_1.ConfigurationError('candidates must be an array');
|
|
116
|
+
}
|
|
117
|
+
for (const candidate of candidates) {
|
|
118
|
+
if (typeof candidate !== 'string' || candidate.length !== 1) {
|
|
119
|
+
throw new errors_1.ConfigurationError('Each candidate must be a single character string');
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
if (useCache && cache) {
|
|
123
|
+
const cached = cache.get(csv, candidates);
|
|
124
|
+
if (cached !== null) {
|
|
125
|
+
return cached;
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
const sampleLines = csv.split('\n').slice(0, 10).join('\n');
|
|
129
|
+
let bestDelimiter = candidates[0];
|
|
130
|
+
let bestScore = -1;
|
|
131
|
+
for (const delimiter of candidates) {
|
|
132
|
+
let score = 0;
|
|
133
|
+
for (let i = 0; i < sampleLines.length; i++) {
|
|
134
|
+
if (sampleLines[i] === delimiter) {
|
|
135
|
+
score++;
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
if (score > 0) {
|
|
139
|
+
const lines = sampleLines.split('\n').filter(line => line.trim().length > 0);
|
|
140
|
+
if (lines.length > 1) {
|
|
141
|
+
const firstLineColumns = lines[0].split(delimiter).length;
|
|
142
|
+
let consistent = true;
|
|
143
|
+
for (let i = 1; i < Math.min(lines.length, 5); i++) {
|
|
144
|
+
if (lines[i].split(delimiter).length !== firstLineColumns) {
|
|
145
|
+
consistent = false;
|
|
146
|
+
break;
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
if (consistent) {
|
|
150
|
+
score += 100;
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
if (score > bestScore) {
|
|
155
|
+
bestScore = score;
|
|
156
|
+
bestDelimiter = delimiter;
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
const resolvedDelimiter = bestScore > 0 ? bestDelimiter : ';';
|
|
160
|
+
if (useCache && cache) {
|
|
161
|
+
cache.set(csv, candidates, resolvedDelimiter);
|
|
162
|
+
}
|
|
163
|
+
return resolvedDelimiter;
|
|
164
|
+
}
|
|
165
|
+
function getFirstNonEmptyLine(csv) {
|
|
166
|
+
if (!csv || typeof csv !== 'string') {
|
|
167
|
+
return null;
|
|
168
|
+
}
|
|
169
|
+
const lines = csv.split('\n');
|
|
170
|
+
for (const line of lines) {
|
|
171
|
+
if (line.trim().length > 0) {
|
|
172
|
+
return line;
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
return null;
|
|
176
|
+
}
|
|
177
|
+
function refineDelimiterFromHeaderLine(csv, currentDelimiter, candidates) {
|
|
178
|
+
if (!currentDelimiter) {
|
|
179
|
+
return currentDelimiter;
|
|
180
|
+
}
|
|
181
|
+
const headerLine = getFirstNonEmptyLine(csv);
|
|
182
|
+
if (!headerLine) {
|
|
183
|
+
return currentDelimiter;
|
|
184
|
+
}
|
|
185
|
+
const currentCount = headerLine.split(currentDelimiter).length - 1;
|
|
186
|
+
if (currentCount > 0) {
|
|
187
|
+
return currentDelimiter;
|
|
188
|
+
}
|
|
189
|
+
let bestDelimiter = currentDelimiter;
|
|
190
|
+
let bestCount = 0;
|
|
191
|
+
for (const candidate of candidates) {
|
|
192
|
+
if (typeof candidate !== 'string' || candidate.length !== 1) {
|
|
193
|
+
continue;
|
|
194
|
+
}
|
|
195
|
+
const count = headerLine.split(candidate).length - 1;
|
|
196
|
+
if (count > bestCount) {
|
|
197
|
+
bestCount = count;
|
|
198
|
+
bestDelimiter = candidate;
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
return bestCount > 0 ? bestDelimiter : currentDelimiter;
|
|
202
|
+
}
|
|
203
|
+
function csvToJson(csv, options = {}) {
|
|
204
|
+
return (0, errors_1.safeExecuteSync)(() => {
|
|
205
|
+
validateCsvInput(csv, options);
|
|
206
|
+
const opts = options && typeof options === 'object' ? options : {};
|
|
207
|
+
const { delimiter, autoDetect = true, candidates = [';', ',', '\t', '|'], hasHeaders = true, renameMap = {}, trim = true, parseNumbers = false, parseBooleans = false, maxRows, useFastPath = true, fastPathMode = 'objects', schema = null, transform, hooks, useCache = true, cache, onError = 'throw', errorHandler, repairRowShifts = true, normalizeQuotes = true, memoryWarningThreshold = 1000000, memoryLimit = 5000000 } = opts;
|
|
208
|
+
const delimiterProvided = delimiter !== undefined && delimiter !== null;
|
|
209
|
+
const transformHooks = hooks?.transformHooks instanceof transform_hooks_1.TransformHooks
|
|
210
|
+
? hooks.transformHooks
|
|
211
|
+
: null;
|
|
212
|
+
const hooksContext = { options: opts };
|
|
213
|
+
let resolvedUseFastPath = useFastPath;
|
|
214
|
+
if (onError !== 'throw' && resolvedUseFastPath) {
|
|
215
|
+
resolvedUseFastPath = false;
|
|
216
|
+
}
|
|
217
|
+
if (fastPathMode === 'stream') {
|
|
218
|
+
return csvToJsonIterator(csv, opts);
|
|
219
|
+
}
|
|
220
|
+
if (!csv.trim()) {
|
|
221
|
+
return [];
|
|
222
|
+
}
|
|
223
|
+
const normalizedCsv = (0, bom_utils_1.normalizeCsvInput)(csv);
|
|
224
|
+
const cacheToUse = cache instanceof delimiter_cache_1.DelimiterCache ? cache : globalDelimiterCache;
|
|
225
|
+
let finalDelimiter = delimiter;
|
|
226
|
+
if (!finalDelimiter && autoDetect) {
|
|
227
|
+
finalDelimiter = autoDetectDelimiter(normalizedCsv, { candidates, useCache, cache: cacheToUse });
|
|
228
|
+
}
|
|
229
|
+
if (!finalDelimiter) {
|
|
230
|
+
finalDelimiter = ';';
|
|
231
|
+
}
|
|
232
|
+
let processedCsv = normalizedCsv;
|
|
233
|
+
if (transformHooks) {
|
|
234
|
+
processedCsv = transformHooks.applyBeforeConvert(processedCsv, hooksContext);
|
|
235
|
+
}
|
|
236
|
+
if (hooks?.beforeConvert) {
|
|
237
|
+
processedCsv = hooks.beforeConvert(processedCsv, opts);
|
|
238
|
+
}
|
|
239
|
+
if (!delimiterProvided && autoDetect) {
|
|
240
|
+
const refined = refineDelimiterFromHeaderLine(processedCsv, finalDelimiter, candidates) || finalDelimiter;
|
|
241
|
+
if (refined !== finalDelimiter && useCache && cacheToUse) {
|
|
242
|
+
cacheToUse.set(processedCsv, candidates, refined);
|
|
243
|
+
}
|
|
244
|
+
finalDelimiter = refined;
|
|
245
|
+
}
|
|
246
|
+
const applyPerRowHooks = (row, index) => {
|
|
247
|
+
let result = row;
|
|
248
|
+
if (transformHooks) {
|
|
249
|
+
result = transformHooks.applyPerRow(result, index, hooksContext);
|
|
250
|
+
}
|
|
251
|
+
if (hooks?.perRow) {
|
|
252
|
+
result = hooks.perRow(result, index, hooksContext);
|
|
253
|
+
}
|
|
254
|
+
if (transform) {
|
|
255
|
+
result = transform(result);
|
|
256
|
+
}
|
|
257
|
+
return result;
|
|
258
|
+
};
|
|
259
|
+
const normalizeValue = (value) => {
|
|
260
|
+
let normalized = value;
|
|
261
|
+
if (trim && typeof normalized === 'string') {
|
|
262
|
+
normalized = normalized.trim();
|
|
263
|
+
}
|
|
264
|
+
if (typeof normalized === 'string') {
|
|
265
|
+
if (normalized === '') {
|
|
266
|
+
return null;
|
|
267
|
+
}
|
|
268
|
+
if (normalized[0] === "'" && normalized.length > 1) {
|
|
269
|
+
const candidate = normalized.slice(1);
|
|
270
|
+
const leading = trim ? candidate.trimStart() : candidate;
|
|
271
|
+
const firstChar = leading[0];
|
|
272
|
+
if (firstChar === '=' || firstChar === '+' || firstChar === '-' || firstChar === '@') {
|
|
273
|
+
normalized = candidate;
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
if (parseNumbers && typeof normalized === 'string') {
|
|
278
|
+
const firstChar = normalized[0];
|
|
279
|
+
if ((firstChar >= '0' && firstChar <= '9') || firstChar === '-' || firstChar === '+' || firstChar === '.') {
|
|
280
|
+
const numValue = Number(normalized);
|
|
281
|
+
if (!Number.isNaN(numValue)) {
|
|
282
|
+
normalized = numValue;
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
if (parseBooleans && typeof normalized === 'string') {
|
|
287
|
+
const firstChar = normalized[0];
|
|
288
|
+
if (firstChar === 't' || firstChar === 'T' || firstChar === 'f' || firstChar === 'F') {
|
|
289
|
+
const lowerValue = normalized.toLowerCase();
|
|
290
|
+
if (lowerValue === 'true' || lowerValue === 'false') {
|
|
291
|
+
normalized = lowerValue === 'true';
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
return normalized;
|
|
296
|
+
};
|
|
297
|
+
const applyAfterConvertHooks = (rows) => {
|
|
298
|
+
let result = rows;
|
|
299
|
+
if (hooks?.afterConvert) {
|
|
300
|
+
result = hooks.afterConvert(result, opts);
|
|
301
|
+
}
|
|
302
|
+
if (transformHooks) {
|
|
303
|
+
result = transformHooks.applyAfterConvert(result, hooksContext);
|
|
304
|
+
}
|
|
305
|
+
return result;
|
|
306
|
+
};
|
|
307
|
+
if (resolvedUseFastPath && globalFastPathEngine) {
|
|
308
|
+
try {
|
|
309
|
+
let fastPathRows = null;
|
|
310
|
+
const fastPathOptions = {
|
|
311
|
+
delimiter: finalDelimiter,
|
|
312
|
+
hasHeaders,
|
|
313
|
+
trim,
|
|
314
|
+
parseNumbers,
|
|
315
|
+
parseBooleans,
|
|
316
|
+
maxRows,
|
|
317
|
+
mode: fastPathMode
|
|
318
|
+
};
|
|
319
|
+
if (typeof globalFastPathEngine.parse === 'function') {
|
|
320
|
+
fastPathRows = globalFastPathEngine.parse(processedCsv, fastPathOptions);
|
|
321
|
+
}
|
|
322
|
+
else if (typeof globalFastPathEngine.parseRows === 'function') {
|
|
323
|
+
const collected = [];
|
|
324
|
+
globalFastPathEngine.parseRows(processedCsv, fastPathOptions, (row) => {
|
|
325
|
+
if (!Array.isArray(row) || row.length === 0) {
|
|
326
|
+
return;
|
|
327
|
+
}
|
|
328
|
+
collected.push(row);
|
|
329
|
+
});
|
|
330
|
+
fastPathRows = collected;
|
|
331
|
+
}
|
|
332
|
+
if (!Array.isArray(fastPathRows)) {
|
|
333
|
+
throw new Error('Fast-path parser returned invalid result');
|
|
334
|
+
}
|
|
335
|
+
const normalizeValue = (value) => {
|
|
336
|
+
let normalized = value;
|
|
337
|
+
if (trim && typeof normalized === 'string') {
|
|
338
|
+
normalized = normalized.trim();
|
|
339
|
+
}
|
|
340
|
+
if (typeof normalized === 'string') {
|
|
341
|
+
if (normalized === '') {
|
|
342
|
+
return null;
|
|
343
|
+
}
|
|
344
|
+
if (normalized[0] === "'" && normalized.length > 1) {
|
|
345
|
+
const candidate = normalized.slice(1);
|
|
346
|
+
const leading = trim ? candidate.trimStart() : candidate;
|
|
347
|
+
const firstChar = leading[0];
|
|
348
|
+
if (firstChar === '=' || firstChar === '+' || firstChar === '-' || firstChar === '@') {
|
|
349
|
+
normalized = candidate;
|
|
350
|
+
}
|
|
351
|
+
}
|
|
352
|
+
}
|
|
353
|
+
if (parseNumbers && typeof normalized === 'string') {
|
|
354
|
+
const firstChar = normalized[0];
|
|
355
|
+
if ((firstChar >= '0' && firstChar <= '9') || firstChar === '-' || firstChar === '+' || firstChar === '.') {
|
|
356
|
+
const numValue = Number(normalized);
|
|
357
|
+
if (!Number.isNaN(numValue)) {
|
|
358
|
+
normalized = numValue;
|
|
359
|
+
}
|
|
360
|
+
}
|
|
361
|
+
}
|
|
362
|
+
if (parseBooleans && typeof normalized === 'string') {
|
|
363
|
+
const firstChar = normalized[0];
|
|
364
|
+
if (firstChar === 't' || firstChar === 'T' || firstChar === 'f' || firstChar === 'F') {
|
|
365
|
+
const lowerValue = normalized.toLowerCase();
|
|
366
|
+
if (lowerValue === 'true' || lowerValue === 'false') {
|
|
367
|
+
normalized = lowerValue === 'true';
|
|
368
|
+
}
|
|
369
|
+
}
|
|
370
|
+
}
|
|
371
|
+
return normalized;
|
|
372
|
+
};
|
|
373
|
+
const handleFastPathError = (error, rowIndex, row) => {
|
|
374
|
+
if (errorHandler) {
|
|
375
|
+
errorHandler(error, row.join(finalDelimiter), hasHeaders ? rowIndex + 2 : rowIndex + 1);
|
|
376
|
+
}
|
|
377
|
+
if (onError === 'warn') {
|
|
378
|
+
if (process.env['NODE_ENV'] !== 'test') {
|
|
379
|
+
console.warn(`[jtcsv] Row ${hasHeaders ? rowIndex + 2 : rowIndex + 1}: ${error.message}`);
|
|
380
|
+
}
|
|
381
|
+
return true;
|
|
382
|
+
}
|
|
383
|
+
if (onError === 'skip') {
|
|
384
|
+
return true;
|
|
385
|
+
}
|
|
386
|
+
throw error;
|
|
387
|
+
};
|
|
388
|
+
if (fastPathMode === 'compact') {
|
|
389
|
+
const dataRows = hasHeaders ? fastPathRows.slice(1) : fastPathRows;
|
|
390
|
+
if (maxRows && dataRows.length > maxRows) {
|
|
391
|
+
throw new errors_1.LimitError(`CSV size exceeds maximum limit of ${maxRows} rows`, maxRows, dataRows.length);
|
|
392
|
+
}
|
|
393
|
+
const limitedRows = dataRows;
|
|
394
|
+
const normalizedRows = [];
|
|
395
|
+
for (const row of limitedRows) {
|
|
396
|
+
if (!Array.isArray(row) || row.length === 0) {
|
|
397
|
+
continue;
|
|
398
|
+
}
|
|
399
|
+
normalizedRows.push(row.map((value) => normalizeValue(value)));
|
|
400
|
+
}
|
|
401
|
+
if (Number.isFinite(memoryLimit) && normalizedRows.length > memoryLimit) {
|
|
402
|
+
throw new errors_1.LimitError(`CSV size exceeds memory safety limit of ${memoryLimit} rows`, memoryLimit, normalizedRows.length);
|
|
403
|
+
}
|
|
404
|
+
if (memoryWarningThreshold
|
|
405
|
+
&& normalizedRows.length > memoryWarningThreshold
|
|
406
|
+
&& process.env['NODE_ENV'] !== 'test') {
|
|
407
|
+
console.warn('Warning: Large in-memory CSV parse detected.\n' +
|
|
408
|
+
'Consider using createCsvToJsonStream() for big files.\n' +
|
|
409
|
+
'Current size: ' + normalizedRows.length.toLocaleString() + ' rows\n' +
|
|
410
|
+
'Tip: Increase memoryLimit or set memoryLimit: Infinity to override.');
|
|
411
|
+
}
|
|
412
|
+
return applyAfterConvertHooks(normalizedRows);
|
|
413
|
+
}
|
|
414
|
+
if (fastPathRows.length === 0) {
|
|
415
|
+
return applyAfterConvertHooks([]);
|
|
416
|
+
}
|
|
417
|
+
if (!Array.isArray(fastPathRows[0])) {
|
|
418
|
+
const limitedRows = maxRows ? fastPathRows.slice(0, maxRows) : fastPathRows;
|
|
419
|
+
const normalizedRows = limitedRows.map((row, index) => {
|
|
420
|
+
let obj = row;
|
|
421
|
+
if (Object.keys(renameMap).length > 0) {
|
|
422
|
+
const renamed = {};
|
|
423
|
+
for (const [key, value] of Object.entries(obj)) {
|
|
424
|
+
const newKey = renameMap[key] || key;
|
|
425
|
+
renamed[newKey] = value;
|
|
426
|
+
}
|
|
427
|
+
obj = renamed;
|
|
428
|
+
}
|
|
429
|
+
return applyPerRowHooks(obj, index);
|
|
430
|
+
});
|
|
431
|
+
if (Number.isFinite(memoryLimit) && normalizedRows.length > memoryLimit) {
|
|
432
|
+
throw new errors_1.LimitError(`CSV size exceeds memory safety limit of ${memoryLimit} rows`, memoryLimit, normalizedRows.length);
|
|
433
|
+
}
|
|
434
|
+
if (memoryWarningThreshold
|
|
435
|
+
&& normalizedRows.length > memoryWarningThreshold
|
|
436
|
+
&& process.env['NODE_ENV'] !== 'test') {
|
|
437
|
+
console.warn('Warning: Large in-memory CSV parse detected.\n' +
|
|
438
|
+
'Consider using createCsvToJsonStream() for big files.\n' +
|
|
439
|
+
'Current size: ' + normalizedRows.length.toLocaleString() + ' rows\n' +
|
|
440
|
+
'Tip: Increase memoryLimit or set memoryLimit: Infinity to override.');
|
|
441
|
+
}
|
|
442
|
+
return applyAfterConvertHooks(normalizedRows);
|
|
443
|
+
}
|
|
444
|
+
const headerRow = hasHeaders ? fastPathRows[0] : null;
|
|
445
|
+
if (hasHeaders && Array.isArray(headerRow) && headerRow.length === 1) {
|
|
446
|
+
const headerText = String(headerRow[0]);
|
|
447
|
+
const hasCandidateDelimiter = (candidates || [';', ',', '\t', '|'])
|
|
448
|
+
.some((candidate) => headerText.includes(candidate));
|
|
449
|
+
if (hasCandidateDelimiter) {
|
|
450
|
+
throw new errors_1.ParsingError('Fast-path parser failed to split headers');
|
|
451
|
+
}
|
|
452
|
+
}
|
|
453
|
+
const baseHeaders = hasHeaders
|
|
454
|
+
? headerRow.map((header) => (trim ? String(header).trim() : String(header)))
|
|
455
|
+
: fastPathRows[0].map((_, index) => `column${index + 1}`);
|
|
456
|
+
const finalHeaders = baseHeaders.map((header) => renameMap[header] || header);
|
|
457
|
+
const dataRows = hasHeaders ? fastPathRows.slice(1) : fastPathRows;
|
|
458
|
+
if (maxRows && dataRows.length > maxRows) {
|
|
459
|
+
throw new errors_1.LimitError(`CSV size exceeds maximum limit of ${maxRows} rows`, maxRows, dataRows.length);
|
|
460
|
+
}
|
|
461
|
+
const limitedRows = dataRows;
|
|
462
|
+
const rawRows = [];
|
|
463
|
+
for (let rowIndex = 0; rowIndex < limitedRows.length; rowIndex++) {
|
|
464
|
+
const row = limitedRows[rowIndex];
|
|
465
|
+
if (!Array.isArray(row) || row.length === 0) {
|
|
466
|
+
continue;
|
|
467
|
+
}
|
|
468
|
+
let rowValues = row;
|
|
469
|
+
if (rowValues.length !== baseHeaders.length) {
|
|
470
|
+
if (rowValues.length > baseHeaders.length) {
|
|
471
|
+
if (process.env['NODE_ENV'] === 'development') {
|
|
472
|
+
const lineNumber = hasHeaders ? rowIndex + 2 : rowIndex + 1;
|
|
473
|
+
const extraCount = rowValues.length - baseHeaders.length;
|
|
474
|
+
console.warn(`[jtcsv] Line ${lineNumber}: ${extraCount} extra fields ignored`);
|
|
475
|
+
}
|
|
476
|
+
rowValues = rowValues.slice(0, baseHeaders.length);
|
|
477
|
+
}
|
|
478
|
+
else {
|
|
479
|
+
while (rowValues.length < baseHeaders.length) {
|
|
480
|
+
rowValues.push(undefined);
|
|
481
|
+
}
|
|
482
|
+
}
|
|
483
|
+
}
|
|
484
|
+
const obj = {};
|
|
485
|
+
for (let j = 0; j < finalHeaders.length; j++) {
|
|
486
|
+
let value = rowValues[j];
|
|
487
|
+
value = normalizeValue(value);
|
|
488
|
+
obj[finalHeaders[j]] = value;
|
|
489
|
+
}
|
|
490
|
+
rawRows.push(obj);
|
|
491
|
+
}
|
|
492
|
+
const repairedRows = repairRowShifts
|
|
493
|
+
? repairShiftedRows(rawRows, finalHeaders, { normalizeQuotes })
|
|
494
|
+
: rawRows;
|
|
495
|
+
const normalizedRows = repairedRows.map((row, index) => applyPerRowHooks(row, index));
|
|
496
|
+
if (Number.isFinite(memoryLimit) && normalizedRows.length > memoryLimit) {
|
|
497
|
+
throw new errors_1.LimitError(`CSV size exceeds memory safety limit of ${memoryLimit} rows`, memoryLimit, normalizedRows.length);
|
|
498
|
+
}
|
|
499
|
+
if (memoryWarningThreshold
|
|
500
|
+
&& normalizedRows.length > memoryWarningThreshold
|
|
501
|
+
&& process.env['NODE_ENV'] !== 'test') {
|
|
502
|
+
console.warn('Warning: Large in-memory CSV parse detected.\n' +
|
|
503
|
+
'Consider using createCsvToJsonStream() for big files.\n' +
|
|
504
|
+
'Current size: ' + normalizedRows.length.toLocaleString() + ' rows\n' +
|
|
505
|
+
'Tip: Increase memoryLimit or set memoryLimit: Infinity to override.');
|
|
506
|
+
}
|
|
507
|
+
return applyAfterConvertHooks(normalizedRows);
|
|
508
|
+
}
|
|
509
|
+
catch (error) {
|
|
510
|
+
if (error instanceof errors_1.LimitError) {
|
|
511
|
+
throw error;
|
|
512
|
+
}
|
|
513
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
514
|
+
console.warn('Fast-path parsing failed, falling back to standard parser:', errorMessage);
|
|
515
|
+
}
|
|
516
|
+
}
|
|
517
|
+
const lines = processedCsv.split('\n').filter(line => line.trim().length > 0);
|
|
518
|
+
if (lines.length === 0) {
|
|
519
|
+
return [];
|
|
520
|
+
}
|
|
521
|
+
let rowsToProcess = lines;
|
|
522
|
+
if (maxRows && lines.length > maxRows) {
|
|
523
|
+
throw new errors_1.LimitError(`CSV size exceeds maximum limit of ${maxRows} rows`, maxRows, lines.length);
|
|
524
|
+
}
|
|
525
|
+
let headers = [];
|
|
526
|
+
let dataRows = rowsToProcess;
|
|
527
|
+
if (hasHeaders) {
|
|
528
|
+
const headerLine = rowsToProcess[0];
|
|
529
|
+
headers = parseCsvLine(headerLine, finalDelimiter, trim, 1);
|
|
530
|
+
dataRows = rowsToProcess.slice(1);
|
|
531
|
+
}
|
|
532
|
+
else {
|
|
533
|
+
const firstRow = parseCsvLine(rowsToProcess[0], finalDelimiter, trim, 1);
|
|
534
|
+
headers = firstRow.map((_, index) => `column${index + 1}`);
|
|
535
|
+
}
|
|
536
|
+
if (Number.isFinite(memoryLimit) && dataRows.length > memoryLimit) {
|
|
537
|
+
throw new errors_1.LimitError(`CSV size exceeds memory safety limit of ${memoryLimit} rows`, memoryLimit, dataRows.length);
|
|
538
|
+
}
|
|
539
|
+
if (memoryWarningThreshold
|
|
540
|
+
&& dataRows.length > memoryWarningThreshold
|
|
541
|
+
&& process.env['NODE_ENV'] !== 'test') {
|
|
542
|
+
console.warn('Warning: Large in-memory CSV parse detected.\n' +
|
|
543
|
+
'Consider using createCsvToJsonStream() for big files.\n' +
|
|
544
|
+
'Current size: ' + dataRows.length.toLocaleString() + ' rows\n' +
|
|
545
|
+
'Tip: Increase memoryLimit or set memoryLimit: Infinity to override.');
|
|
546
|
+
}
|
|
547
|
+
const finalHeaders = headers.map(header => renameMap[header] || header);
|
|
548
|
+
if (fastPathMode === 'compact') {
|
|
549
|
+
const compactResult = [];
|
|
550
|
+
for (let i = 0; i < dataRows.length; i++) {
|
|
551
|
+
if (maxRows && compactResult.length >= maxRows) {
|
|
552
|
+
break;
|
|
553
|
+
}
|
|
554
|
+
const line = dataRows[i];
|
|
555
|
+
const lineNumber = hasHeaders ? i + 2 : i + 1;
|
|
556
|
+
let values = parseCsvLine(line, finalDelimiter, trim, lineNumber);
|
|
557
|
+
if (values.length !== finalHeaders.length) {
|
|
558
|
+
if (values.length > finalHeaders.length) {
|
|
559
|
+
if (process.env['NODE_ENV'] === 'development') {
|
|
560
|
+
const extraCount = values.length - finalHeaders.length;
|
|
561
|
+
console.warn(`[jtcsv] Line ${lineNumber}: ${extraCount} extra fields ignored`);
|
|
562
|
+
}
|
|
563
|
+
values = values.slice(0, finalHeaders.length);
|
|
564
|
+
}
|
|
565
|
+
else {
|
|
566
|
+
while (values.length < finalHeaders.length) {
|
|
567
|
+
values.push(undefined);
|
|
568
|
+
}
|
|
569
|
+
}
|
|
570
|
+
}
|
|
571
|
+
compactResult.push(values.map((value) => normalizeValue(value)));
|
|
572
|
+
}
|
|
573
|
+
return applyAfterConvertHooks(compactResult);
|
|
574
|
+
}
|
|
575
|
+
const rawRows = [];
|
|
576
|
+
const handleRowError = (error, line, lineNumber) => {
|
|
577
|
+
if (error instanceof errors_1.LimitError) {
|
|
578
|
+
throw error;
|
|
579
|
+
}
|
|
580
|
+
if (errorHandler) {
|
|
581
|
+
errorHandler(error, line, lineNumber);
|
|
582
|
+
}
|
|
583
|
+
if (onError === 'warn') {
|
|
584
|
+
if (process.env['NODE_ENV'] !== 'test') {
|
|
585
|
+
console.warn(`[jtcsv] Line ${lineNumber}: ${error.message}`);
|
|
586
|
+
}
|
|
587
|
+
return true;
|
|
588
|
+
}
|
|
589
|
+
if (onError === 'skip') {
|
|
590
|
+
return true;
|
|
591
|
+
}
|
|
592
|
+
throw error;
|
|
593
|
+
};
|
|
594
|
+
for (let i = 0; i < dataRows.length; i++) {
|
|
595
|
+
const line = dataRows[i];
|
|
596
|
+
const lineNumber = hasHeaders ? i + 2 : i + 1;
|
|
597
|
+
try {
|
|
598
|
+
let values = parseCsvLine(line, finalDelimiter, trim, lineNumber);
|
|
599
|
+
if (values.length !== finalHeaders.length) {
|
|
600
|
+
if (values.length > finalHeaders.length) {
|
|
601
|
+
if (process.env['NODE_ENV'] === 'development') {
|
|
602
|
+
const extraCount = values.length - finalHeaders.length;
|
|
603
|
+
console.warn(`[jtcsv] Line ${lineNumber}: ${extraCount} extra fields ignored`);
|
|
604
|
+
}
|
|
605
|
+
values = values.slice(0, finalHeaders.length);
|
|
606
|
+
}
|
|
607
|
+
else {
|
|
608
|
+
while (values.length < finalHeaders.length) {
|
|
609
|
+
values.push(undefined);
|
|
610
|
+
}
|
|
611
|
+
}
|
|
612
|
+
}
|
|
613
|
+
const row = {};
|
|
614
|
+
for (let j = 0; j < finalHeaders.length; j++) {
|
|
615
|
+
let value = values[j];
|
|
616
|
+
if (parseNumbers && !isNaN(Number(value)) && value.trim() !== '') {
|
|
617
|
+
value = Number(value);
|
|
618
|
+
}
|
|
619
|
+
if (parseBooleans) {
|
|
620
|
+
const lowerValue = String(value).toLowerCase();
|
|
621
|
+
if (lowerValue === 'true' || lowerValue === 'false') {
|
|
622
|
+
value = lowerValue === 'true';
|
|
623
|
+
}
|
|
624
|
+
}
|
|
625
|
+
row[finalHeaders[j]] = value;
|
|
626
|
+
}
|
|
627
|
+
rawRows.push(row);
|
|
628
|
+
}
|
|
629
|
+
catch (error) {
|
|
630
|
+
if (handleRowError(error, line, lineNumber)) {
|
|
631
|
+
continue;
|
|
632
|
+
}
|
|
633
|
+
}
|
|
634
|
+
}
|
|
635
|
+
const repairedRows = repairRowShifts
|
|
636
|
+
? repairShiftedRows(rawRows, finalHeaders, { normalizeQuotes })
|
|
637
|
+
: rawRows;
|
|
638
|
+
const normalizedRows = repairedRows.map((row, index) => applyPerRowHooks(row, index));
|
|
639
|
+
return applyAfterConvertHooks(normalizedRows);
|
|
640
|
+
}, 'PARSING_ERROR', { function: 'csvToJson' });
|
|
641
|
+
}
|
|
642
|
+
function parseCsvLine(line, delimiter, trim, lineNumber) {
|
|
643
|
+
const result = [];
|
|
644
|
+
let currentField = '';
|
|
645
|
+
let inQuotes = false;
|
|
646
|
+
let quoteChar = '"';
|
|
647
|
+
let escapeNext = false;
|
|
648
|
+
for (let i = 0; i < line.length; i++) {
|
|
649
|
+
const char = line[i];
|
|
650
|
+
const nextChar = i < line.length - 1 ? line[i + 1] : '';
|
|
651
|
+
if (escapeNext) {
|
|
652
|
+
currentField += char;
|
|
653
|
+
escapeNext = false;
|
|
654
|
+
continue;
|
|
655
|
+
}
|
|
656
|
+
if (char === '\\') {
|
|
657
|
+
escapeNext = true;
|
|
658
|
+
continue;
|
|
659
|
+
}
|
|
660
|
+
if (!inQuotes && char === delimiter) {
|
|
661
|
+
result.push(trim ? currentField.trim() : currentField);
|
|
662
|
+
currentField = '';
|
|
663
|
+
}
|
|
664
|
+
else if (!inQuotes && (char === '"' || char === "'")) {
|
|
665
|
+
inQuotes = true;
|
|
666
|
+
quoteChar = char;
|
|
667
|
+
}
|
|
668
|
+
else if (inQuotes && char === quoteChar && nextChar === quoteChar) {
|
|
669
|
+
currentField += char;
|
|
670
|
+
if (i + 2 === line.length) {
|
|
671
|
+
inQuotes = false;
|
|
672
|
+
}
|
|
673
|
+
i++;
|
|
674
|
+
}
|
|
675
|
+
else if (inQuotes && char === quoteChar) {
|
|
676
|
+
inQuotes = false;
|
|
677
|
+
}
|
|
678
|
+
else {
|
|
679
|
+
currentField += char;
|
|
680
|
+
}
|
|
681
|
+
}
|
|
682
|
+
if (escapeNext) {
|
|
683
|
+
currentField += '\\';
|
|
684
|
+
}
|
|
685
|
+
result.push(trim ? currentField.trim() : currentField);
|
|
686
|
+
if (inQuotes) {
|
|
687
|
+
throw errors_1.ParsingError.unclosedQuotes(lineNumber ?? null, null, line.substring(0, 100));
|
|
688
|
+
}
|
|
689
|
+
return result;
|
|
690
|
+
}
|
|
691
|
+
function isEmptyValue(value) {
|
|
692
|
+
return value === undefined || value === null || value === '';
|
|
693
|
+
}
|
|
694
|
+
function hasOddQuotes(value) {
|
|
695
|
+
if (typeof value !== 'string') {
|
|
696
|
+
return false;
|
|
697
|
+
}
|
|
698
|
+
let count = 0;
|
|
699
|
+
for (let i = 0; i < value.length; i++) {
|
|
700
|
+
if (value[i] === '"') {
|
|
701
|
+
count++;
|
|
702
|
+
}
|
|
703
|
+
}
|
|
704
|
+
return count % 2 === 1;
|
|
705
|
+
}
|
|
706
|
+
function hasAnyQuotes(value) {
|
|
707
|
+
return typeof value === 'string' && value.includes('"');
|
|
708
|
+
}
|
|
709
|
+
function normalizeQuotesInField(value) {
|
|
710
|
+
if (typeof value !== 'string') {
|
|
711
|
+
return value;
|
|
712
|
+
}
|
|
713
|
+
if ((value.startsWith('{') && value.endsWith('}')) ||
|
|
714
|
+
(value.startsWith('[') && value.endsWith(']'))) {
|
|
715
|
+
return value;
|
|
716
|
+
}
|
|
717
|
+
let normalized = value.replace(/"{2,}/g, '"');
|
|
718
|
+
normalized = normalized.replace(/"\n/g, '\n').replace(/\n"/g, '\n');
|
|
719
|
+
if (normalized.length >= 2 && normalized.startsWith('"') && normalized.endsWith('"')) {
|
|
720
|
+
normalized = normalized.slice(1, -1);
|
|
721
|
+
}
|
|
722
|
+
return normalized;
|
|
723
|
+
}
|
|
724
|
+
function normalizeRowQuotes(row, headers) {
|
|
725
|
+
const normalized = {};
|
|
726
|
+
const phoneKeys = new Set(['phone', 'phonenumber', 'phone_number', 'tel', 'telephone']);
|
|
727
|
+
for (const header of headers) {
|
|
728
|
+
const baseValue = normalizeQuotesInField(row[header]);
|
|
729
|
+
if (phoneKeys.has(String(header).toLowerCase())) {
|
|
730
|
+
normalized[header] = normalizePhoneValue(baseValue);
|
|
731
|
+
}
|
|
732
|
+
else {
|
|
733
|
+
normalized[header] = baseValue;
|
|
734
|
+
}
|
|
735
|
+
}
|
|
736
|
+
return normalized;
|
|
737
|
+
}
|
|
738
|
+
function normalizePhoneValue(value) {
|
|
739
|
+
if (typeof value !== 'string') {
|
|
740
|
+
return value;
|
|
741
|
+
}
|
|
742
|
+
const trimmed = value.trim();
|
|
743
|
+
if (trimmed === '') {
|
|
744
|
+
return trimmed;
|
|
745
|
+
}
|
|
746
|
+
return trimmed.replace(/["'\\]/g, '');
|
|
747
|
+
}
|
|
748
|
+
function looksLikeUserAgent(value) {
|
|
749
|
+
if (typeof value !== 'string') {
|
|
750
|
+
return false;
|
|
751
|
+
}
|
|
752
|
+
return /Mozilla\/|Opera\/|MSIE|AppleWebKit|Gecko|Safari|Chrome\//.test(value);
|
|
753
|
+
}
|
|
754
|
+
function isHexColor(value) {
|
|
755
|
+
return typeof value === 'string' && /^#([0-9a-fA-F]{3}|[0-9a-fA-F]{6})$/.test(value);
|
|
756
|
+
}
|
|
757
|
+
function repairShiftedRows(rows, headers, options = {}) {
|
|
758
|
+
if (!Array.isArray(rows) || rows.length === 0 || headers.length === 0) {
|
|
759
|
+
return rows;
|
|
760
|
+
}
|
|
761
|
+
const headerCount = headers.length;
|
|
762
|
+
const merged = [];
|
|
763
|
+
let index = 0;
|
|
764
|
+
while (index < rows.length) {
|
|
765
|
+
const row = rows[index];
|
|
766
|
+
if (!row || typeof row !== 'object') {
|
|
767
|
+
merged.push(row);
|
|
768
|
+
index++;
|
|
769
|
+
continue;
|
|
770
|
+
}
|
|
771
|
+
const values = headers.map((header) => row[header]);
|
|
772
|
+
let lastNonEmpty = -1;
|
|
773
|
+
for (let i = headerCount - 1; i >= 0; i--) {
|
|
774
|
+
if (!isEmptyValue(values[i])) {
|
|
775
|
+
lastNonEmpty = i;
|
|
776
|
+
break;
|
|
777
|
+
}
|
|
778
|
+
}
|
|
779
|
+
const missingCount = headerCount - 1 - lastNonEmpty;
|
|
780
|
+
if (lastNonEmpty >= 0 && missingCount > 0 && index + 1 < rows.length) {
|
|
781
|
+
const nextRow = rows[index + 1];
|
|
782
|
+
if (nextRow && typeof nextRow === 'object') {
|
|
783
|
+
const nextValues = headers.map((header) => nextRow[header]);
|
|
784
|
+
const nextTrailingEmpty = nextValues
|
|
785
|
+
.slice(headerCount - missingCount)
|
|
786
|
+
.every((value) => isEmptyValue(value));
|
|
787
|
+
const leadValues = nextValues
|
|
788
|
+
.slice(0, missingCount)
|
|
789
|
+
.filter((value) => !isEmptyValue(value));
|
|
790
|
+
const shouldMerge = nextTrailingEmpty
|
|
791
|
+
&& leadValues.length > 0
|
|
792
|
+
&& (hasOddQuotes(values[lastNonEmpty]) || hasAnyQuotes(values[lastNonEmpty]));
|
|
793
|
+
if (shouldMerge) {
|
|
794
|
+
const toAppend = leadValues.map((value) => String(value));
|
|
795
|
+
if (toAppend.length > 0) {
|
|
796
|
+
const base = isEmptyValue(values[lastNonEmpty]) ? '' : String(values[lastNonEmpty]);
|
|
797
|
+
values[lastNonEmpty] = base ? `${base}\n${toAppend.join('\n')}` : toAppend.join('\n');
|
|
798
|
+
}
|
|
799
|
+
for (let i = 0; i < missingCount; i++) {
|
|
800
|
+
values[lastNonEmpty + 1 + i] = nextValues[missingCount + i];
|
|
801
|
+
}
|
|
802
|
+
const mergedRow = {};
|
|
803
|
+
for (let i = 0; i < headerCount; i++) {
|
|
804
|
+
mergedRow[headers[i]] = values[i];
|
|
805
|
+
}
|
|
806
|
+
merged.push(mergedRow);
|
|
807
|
+
index += 2;
|
|
808
|
+
continue;
|
|
809
|
+
}
|
|
810
|
+
}
|
|
811
|
+
}
|
|
812
|
+
if (index + 1 < rows.length) {
|
|
813
|
+
const nextRow = rows[index + 1];
|
|
814
|
+
if (nextRow && typeof nextRow === 'object') {
|
|
815
|
+
const nextHex = nextRow[headers[4]];
|
|
816
|
+
const nextUserAgentHead = nextRow[headers[2]];
|
|
817
|
+
const nextUserAgentTail = nextRow[headers[3]];
|
|
818
|
+
const shouldMergeUserAgent = isEmptyValue(values[4])
|
|
819
|
+
&& isEmptyValue(values[5])
|
|
820
|
+
&& isHexColor(nextHex)
|
|
821
|
+
&& (looksLikeUserAgent(nextUserAgentHead) || looksLikeUserAgent(nextUserAgentTail));
|
|
822
|
+
if (shouldMergeUserAgent) {
|
|
823
|
+
const addressParts = [values[3], nextRow[headers[0]], nextRow[headers[1]]]
|
|
824
|
+
.filter((value) => !isEmptyValue(value))
|
|
825
|
+
.map((value) => String(value));
|
|
826
|
+
values[3] = addressParts.join('\n');
|
|
827
|
+
let uaHead = isEmptyValue(nextUserAgentHead) ? '' : String(nextUserAgentHead);
|
|
828
|
+
let uaTail = isEmptyValue(nextUserAgentTail) ? '' : String(nextUserAgentTail);
|
|
829
|
+
uaHead = uaHead.replace(/^"+|"+$/g, '');
|
|
830
|
+
uaTail = uaTail.replace(/^"+|"+$/g, '');
|
|
831
|
+
const joiner = uaHead && uaTail ? (uaTail.startsWith(' ') ? '' : ',') : '';
|
|
832
|
+
values[4] = uaHead + joiner + uaTail;
|
|
833
|
+
values[5] = String(nextHex);
|
|
834
|
+
const mergedRow = {};
|
|
835
|
+
for (let i = 0; i < headerCount; i++) {
|
|
836
|
+
mergedRow[headers[i]] = values[i];
|
|
837
|
+
}
|
|
838
|
+
merged.push(mergedRow);
|
|
839
|
+
index += 2;
|
|
840
|
+
continue;
|
|
841
|
+
}
|
|
842
|
+
}
|
|
843
|
+
}
|
|
844
|
+
merged.push(row);
|
|
845
|
+
index++;
|
|
846
|
+
}
|
|
847
|
+
if (options.normalizeQuotes) {
|
|
848
|
+
return merged.map((row) => normalizeRowQuotes(row, headers));
|
|
849
|
+
}
|
|
850
|
+
return merged;
|
|
851
|
+
}
|
|
852
|
+
async function csvToJsonAsync(csv, options = {}) {
|
|
853
|
+
return (0, errors_1.safeExecuteAsync)(async () => {
|
|
854
|
+
const { useWorkers = false, workerCount, chunkSize, onProgress, ...syncOptions } = options;
|
|
855
|
+
return csvToJson(csv, syncOptions);
|
|
856
|
+
}, 'PARSING_ERROR', { function: 'csvToJsonAsync' });
|
|
857
|
+
}
|
|
858
|
+
function* csvToJsonIterator(csv, options = {}) {
|
|
859
|
+
validateCsvInput(csv, options);
|
|
860
|
+
const opts = options && typeof options === 'object' ? options : {};
|
|
861
|
+
const { delimiter, autoDetect = true, candidates = [';', ',', '\t', '|'], hasHeaders = true, renameMap = {}, trim = true, parseNumbers = false, parseBooleans = false, maxRows, useFastPath = true, fastPathMode = 'objects', transform, hooks, useCache = true, cache, onError = 'throw', errorHandler, repairRowShifts = true, normalizeQuotes = true, memoryWarningThreshold = 1000000, memoryLimit = 5000000 } = opts;
|
|
862
|
+
const delimiterProvided = delimiter !== undefined && delimiter !== null;
|
|
863
|
+
const transformHooks = hooks?.transformHooks instanceof transform_hooks_1.TransformHooks
|
|
864
|
+
? hooks.transformHooks
|
|
865
|
+
: null;
|
|
866
|
+
const hooksContext = { options: opts };
|
|
867
|
+
if (!csv.trim()) {
|
|
868
|
+
return;
|
|
869
|
+
}
|
|
870
|
+
const normalizedCsv = (0, bom_utils_1.normalizeCsvInput)(csv);
|
|
871
|
+
let processedCsv = normalizedCsv;
|
|
872
|
+
if (transformHooks) {
|
|
873
|
+
processedCsv = transformHooks.applyBeforeConvert(processedCsv, hooksContext);
|
|
874
|
+
}
|
|
875
|
+
if (hooks?.beforeConvert) {
|
|
876
|
+
processedCsv = hooks.beforeConvert(processedCsv, opts);
|
|
877
|
+
}
|
|
878
|
+
let finalDelimiter = delimiter;
|
|
879
|
+
if (!finalDelimiter && autoDetect) {
|
|
880
|
+
const cacheToUse = cache instanceof delimiter_cache_1.DelimiterCache ? cache : globalDelimiterCache;
|
|
881
|
+
finalDelimiter = autoDetectDelimiter(processedCsv, { candidates, useCache, cache: cacheToUse });
|
|
882
|
+
}
|
|
883
|
+
if (!finalDelimiter) {
|
|
884
|
+
finalDelimiter = ';';
|
|
885
|
+
}
|
|
886
|
+
if (!delimiterProvided && autoDetect) {
|
|
887
|
+
const cacheToUse = cache instanceof delimiter_cache_1.DelimiterCache ? cache : globalDelimiterCache;
|
|
888
|
+
const refined = refineDelimiterFromHeaderLine(processedCsv, finalDelimiter, candidates) || finalDelimiter;
|
|
889
|
+
if (refined !== finalDelimiter && useCache && cacheToUse) {
|
|
890
|
+
cacheToUse.set(processedCsv, candidates, refined);
|
|
891
|
+
}
|
|
892
|
+
finalDelimiter = refined;
|
|
893
|
+
}
|
|
894
|
+
const applyPerRowHooks = (row, index) => {
|
|
895
|
+
let result = row;
|
|
896
|
+
if (transformHooks) {
|
|
897
|
+
result = transformHooks.applyPerRow(result, index, hooksContext);
|
|
898
|
+
}
|
|
899
|
+
if (hooks?.perRow) {
|
|
900
|
+
result = hooks.perRow(result, index, hooksContext);
|
|
901
|
+
}
|
|
902
|
+
if (transform) {
|
|
903
|
+
result = transform(result);
|
|
904
|
+
}
|
|
905
|
+
return result;
|
|
906
|
+
};
|
|
907
|
+
const normalizeValue = (value) => {
|
|
908
|
+
let normalized = value;
|
|
909
|
+
if (trim && typeof normalized === 'string') {
|
|
910
|
+
normalized = normalized.trim();
|
|
911
|
+
}
|
|
912
|
+
if (typeof normalized === 'string') {
|
|
913
|
+
if (normalized === '') {
|
|
914
|
+
return null;
|
|
915
|
+
}
|
|
916
|
+
if (normalized[0] === "'" && normalized.length > 1) {
|
|
917
|
+
const candidate = normalized.slice(1);
|
|
918
|
+
const leading = trim ? candidate.trimStart() : candidate;
|
|
919
|
+
const firstChar = leading[0];
|
|
920
|
+
if (firstChar === '=' || firstChar === '+' || firstChar === '-' || firstChar === '@') {
|
|
921
|
+
normalized = candidate;
|
|
922
|
+
}
|
|
923
|
+
}
|
|
924
|
+
}
|
|
925
|
+
if (parseNumbers && typeof normalized === 'string') {
|
|
926
|
+
const firstChar = normalized[0];
|
|
927
|
+
if ((firstChar >= '0' && firstChar <= '9') || firstChar === '-' || firstChar === '+' || firstChar === '.') {
|
|
928
|
+
const numValue = Number(normalized);
|
|
929
|
+
if (!Number.isNaN(numValue)) {
|
|
930
|
+
normalized = numValue;
|
|
931
|
+
}
|
|
932
|
+
}
|
|
933
|
+
}
|
|
934
|
+
if (parseBooleans && typeof normalized === 'string') {
|
|
935
|
+
const firstChar = normalized[0];
|
|
936
|
+
if (firstChar === 't' || firstChar === 'T' || firstChar === 'f' || firstChar === 'F') {
|
|
937
|
+
const lowerValue = normalized.toLowerCase();
|
|
938
|
+
if (lowerValue === 'true' || lowerValue === 'false') {
|
|
939
|
+
normalized = lowerValue === 'true';
|
|
940
|
+
}
|
|
941
|
+
}
|
|
942
|
+
}
|
|
943
|
+
return normalized;
|
|
944
|
+
};
|
|
945
|
+
const handleRowError = (error, line, lineNumber) => {
|
|
946
|
+
if (error instanceof errors_1.LimitError) {
|
|
947
|
+
throw error;
|
|
948
|
+
}
|
|
949
|
+
if (errorHandler) {
|
|
950
|
+
errorHandler(error, line, lineNumber);
|
|
951
|
+
}
|
|
952
|
+
if (onError === 'warn') {
|
|
953
|
+
if (process.env['NODE_ENV'] !== 'test') {
|
|
954
|
+
console.warn(`[jtcsv] Line ${lineNumber}: ${error.message}`);
|
|
955
|
+
}
|
|
956
|
+
return true;
|
|
957
|
+
}
|
|
958
|
+
if (onError === 'skip') {
|
|
959
|
+
return true;
|
|
960
|
+
}
|
|
961
|
+
throw error;
|
|
962
|
+
};
|
|
963
|
+
let rowCount = 0;
|
|
964
|
+
const assertRowLimit = () => {
|
|
965
|
+
if (maxRows && rowCount >= maxRows) {
|
|
966
|
+
throw new errors_1.LimitError(`CSV size exceeds maximum limit of ${maxRows} rows`, maxRows, rowCount + 1);
|
|
967
|
+
}
|
|
968
|
+
};
|
|
969
|
+
const shouldWarnLargeMemory = memoryWarningThreshold && process.env['NODE_ENV'] !== 'test';
|
|
970
|
+
let warnedLargeMemory = false;
|
|
971
|
+
if (globalFastPathEngine && typeof globalFastPathEngine.iterateRows === 'function') {
|
|
972
|
+
const iterateOptions = { delimiter: finalDelimiter };
|
|
973
|
+
if (!useFastPath) {
|
|
974
|
+
iterateOptions.forceEngine = 'STANDARD';
|
|
975
|
+
}
|
|
976
|
+
const rowIterator = globalFastPathEngine.iterateRows(processedCsv, iterateOptions);
|
|
977
|
+
let headers = [];
|
|
978
|
+
let finalHeaders = [];
|
|
979
|
+
let headersProcessed = false;
|
|
980
|
+
let pendingRow = null;
|
|
981
|
+
try {
|
|
982
|
+
for (const row of rowIterator) {
|
|
983
|
+
if (!Array.isArray(row) || row.length === 0) {
|
|
984
|
+
continue;
|
|
985
|
+
}
|
|
986
|
+
if (!headersProcessed) {
|
|
987
|
+
if (hasHeaders) {
|
|
988
|
+
headers = row.map((header) => (trim ? String(header).trim() : String(header)));
|
|
989
|
+
headersProcessed = true;
|
|
990
|
+
finalHeaders = headers.map((header) => renameMap[header] || header);
|
|
991
|
+
continue;
|
|
992
|
+
}
|
|
993
|
+
else {
|
|
994
|
+
headers = row.map((_, index) => `column${index + 1}`);
|
|
995
|
+
headersProcessed = true;
|
|
996
|
+
finalHeaders = headers.map((header) => renameMap[header] || header);
|
|
997
|
+
}
|
|
998
|
+
}
|
|
999
|
+
if (maxRows && rowCount >= maxRows) {
|
|
1000
|
+
throw new errors_1.LimitError(`CSV size exceeds maximum limit of ${maxRows} rows`, maxRows, rowCount + 1);
|
|
1001
|
+
}
|
|
1002
|
+
if (Number.isFinite(memoryLimit) && rowCount + 1 > memoryLimit) {
|
|
1003
|
+
throw new errors_1.LimitError(`CSV size exceeds memory safety limit of ${memoryLimit} rows`, memoryLimit, rowCount + 1);
|
|
1004
|
+
}
|
|
1005
|
+
if (!warnedLargeMemory && shouldWarnLargeMemory && rowCount + 1 > memoryWarningThreshold) {
|
|
1006
|
+
warnedLargeMemory = true;
|
|
1007
|
+
console.warn('Warning: Large in-memory CSV parse detected.\n' +
|
|
1008
|
+
'Consider using createCsvToJsonStream() for big files.\n' +
|
|
1009
|
+
'Current size: ' + (rowCount + 1).toLocaleString() + ' rows\n' +
|
|
1010
|
+
'Tip: Increase memoryLimit or set memoryLimit: Infinity to override.');
|
|
1011
|
+
}
|
|
1012
|
+
let values = row;
|
|
1013
|
+
if (values.length !== headers.length) {
|
|
1014
|
+
if (values.length > headers.length) {
|
|
1015
|
+
if (process.env['NODE_ENV'] === 'development') {
|
|
1016
|
+
const lineNumber = hasHeaders ? rowCount + 2 : rowCount + 1;
|
|
1017
|
+
const extraCount = values.length - finalHeaders.length;
|
|
1018
|
+
console.warn(`[jtcsv] Line ${lineNumber}: ${extraCount} extra fields ignored`);
|
|
1019
|
+
}
|
|
1020
|
+
values = values.slice(0, headers.length);
|
|
1021
|
+
}
|
|
1022
|
+
else {
|
|
1023
|
+
while (values.length < headers.length) {
|
|
1024
|
+
values.push(undefined);
|
|
1025
|
+
}
|
|
1026
|
+
}
|
|
1027
|
+
}
|
|
1028
|
+
if (fastPathMode === 'compact') {
|
|
1029
|
+
yield values.map((value) => normalizeValue(value));
|
|
1030
|
+
rowCount++;
|
|
1031
|
+
continue;
|
|
1032
|
+
}
|
|
1033
|
+
const rowObj = {};
|
|
1034
|
+
for (let j = 0; j < finalHeaders.length; j++) {
|
|
1035
|
+
rowObj[finalHeaders[j]] = normalizeValue(values[j]);
|
|
1036
|
+
}
|
|
1037
|
+
if (repairRowShifts) {
|
|
1038
|
+
if (!pendingRow) {
|
|
1039
|
+
pendingRow = rowObj;
|
|
1040
|
+
continue;
|
|
1041
|
+
}
|
|
1042
|
+
const repairedRows = repairShiftedRows([pendingRow, rowObj], finalHeaders, { normalizeQuotes });
|
|
1043
|
+
if (repairedRows.length === 1) {
|
|
1044
|
+
assertRowLimit();
|
|
1045
|
+
yield applyPerRowHooks(repairedRows[0], rowCount);
|
|
1046
|
+
rowCount++;
|
|
1047
|
+
pendingRow = null;
|
|
1048
|
+
continue;
|
|
1049
|
+
}
|
|
1050
|
+
assertRowLimit();
|
|
1051
|
+
yield applyPerRowHooks(repairedRows[0], rowCount);
|
|
1052
|
+
rowCount++;
|
|
1053
|
+
pendingRow = repairedRows[1];
|
|
1054
|
+
}
|
|
1055
|
+
else {
|
|
1056
|
+
const normalizedRow = normalizeQuotes ? normalizeRowQuotes(rowObj, finalHeaders) : rowObj;
|
|
1057
|
+
assertRowLimit();
|
|
1058
|
+
yield applyPerRowHooks(normalizedRow, rowCount);
|
|
1059
|
+
rowCount++;
|
|
1060
|
+
}
|
|
1061
|
+
}
|
|
1062
|
+
}
|
|
1063
|
+
catch (error) {
|
|
1064
|
+
if (error && error.code === 'FAST_PATH_UNCLOSED_QUOTES') {
|
|
1065
|
+
throw errors_1.ParsingError.unclosedQuotes(error.lineNumber ?? null);
|
|
1066
|
+
}
|
|
1067
|
+
throw error;
|
|
1068
|
+
}
|
|
1069
|
+
if (pendingRow) {
|
|
1070
|
+
const flushedRows = repairShiftedRows([pendingRow], finalHeaders, { normalizeQuotes });
|
|
1071
|
+
for (const row of flushedRows) {
|
|
1072
|
+
assertRowLimit();
|
|
1073
|
+
yield applyPerRowHooks(row, rowCount);
|
|
1074
|
+
rowCount++;
|
|
1075
|
+
}
|
|
1076
|
+
pendingRow = null;
|
|
1077
|
+
}
|
|
1078
|
+
return;
|
|
1079
|
+
}
|
|
1080
|
+
const lines = processedCsv.split('\n').filter(line => line.trim().length > 0);
|
|
1081
|
+
if (lines.length === 0) {
|
|
1082
|
+
return;
|
|
1083
|
+
}
|
|
1084
|
+
let headers = [];
|
|
1085
|
+
let dataRows = lines;
|
|
1086
|
+
if (hasHeaders) {
|
|
1087
|
+
const headerLine = lines[0];
|
|
1088
|
+
headers = parseCsvLine(headerLine, finalDelimiter, trim, 1);
|
|
1089
|
+
dataRows = lines.slice(1);
|
|
1090
|
+
}
|
|
1091
|
+
else {
|
|
1092
|
+
const firstRow = parseCsvLine(lines[0], finalDelimiter, trim, 1);
|
|
1093
|
+
headers = firstRow.map((_, index) => `column${index + 1}`);
|
|
1094
|
+
}
|
|
1095
|
+
const finalHeaders = headers.map(header => renameMap[header] || header);
|
|
1096
|
+
let pendingRow = null;
|
|
1097
|
+
for (let i = 0; i < dataRows.length; i++) {
|
|
1098
|
+
if (maxRows && rowCount >= maxRows) {
|
|
1099
|
+
throw new errors_1.LimitError(`CSV size exceeds maximum limit of ${maxRows} rows`, maxRows, rowCount + 1);
|
|
1100
|
+
}
|
|
1101
|
+
const line = dataRows[i];
|
|
1102
|
+
const lineNumber = hasHeaders ? i + 2 : i + 1;
|
|
1103
|
+
if (Number.isFinite(memoryLimit) && rowCount + 1 > memoryLimit) {
|
|
1104
|
+
throw new errors_1.LimitError(`CSV size exceeds memory safety limit of ${memoryLimit} rows`, memoryLimit, rowCount + 1);
|
|
1105
|
+
}
|
|
1106
|
+
if (!warnedLargeMemory && shouldWarnLargeMemory && rowCount + 1 > memoryWarningThreshold) {
|
|
1107
|
+
warnedLargeMemory = true;
|
|
1108
|
+
console.warn('Warning: Large in-memory CSV parse detected.\n' +
|
|
1109
|
+
'Consider using createCsvToJsonStream() for big files.\n' +
|
|
1110
|
+
'Current size: ' + (rowCount + 1).toLocaleString() + ' rows\n' +
|
|
1111
|
+
'Tip: Increase memoryLimit or set memoryLimit: Infinity to override.');
|
|
1112
|
+
}
|
|
1113
|
+
try {
|
|
1114
|
+
let values = parseCsvLine(line, finalDelimiter, trim, lineNumber);
|
|
1115
|
+
if (values.length !== finalHeaders.length) {
|
|
1116
|
+
if (values.length > finalHeaders.length) {
|
|
1117
|
+
if (process.env['NODE_ENV'] === 'development') {
|
|
1118
|
+
const extraCount = values.length - finalHeaders.length;
|
|
1119
|
+
console.warn(`[jtcsv] Line ${lineNumber}: ${extraCount} extra fields ignored`);
|
|
1120
|
+
}
|
|
1121
|
+
values = values.slice(0, finalHeaders.length);
|
|
1122
|
+
}
|
|
1123
|
+
else {
|
|
1124
|
+
while (values.length < finalHeaders.length) {
|
|
1125
|
+
values.push(undefined);
|
|
1126
|
+
}
|
|
1127
|
+
}
|
|
1128
|
+
}
|
|
1129
|
+
if (fastPathMode === 'compact') {
|
|
1130
|
+
yield values.map((value) => normalizeValue(value));
|
|
1131
|
+
rowCount++;
|
|
1132
|
+
continue;
|
|
1133
|
+
}
|
|
1134
|
+
const row = {};
|
|
1135
|
+
for (let j = 0; j < finalHeaders.length; j++) {
|
|
1136
|
+
const value = normalizeValue(values[j]);
|
|
1137
|
+
row[finalHeaders[j]] = value;
|
|
1138
|
+
}
|
|
1139
|
+
if (repairRowShifts) {
|
|
1140
|
+
if (!pendingRow) {
|
|
1141
|
+
pendingRow = row;
|
|
1142
|
+
continue;
|
|
1143
|
+
}
|
|
1144
|
+
const repairedRows = repairShiftedRows([pendingRow, row], finalHeaders, { normalizeQuotes });
|
|
1145
|
+
if (repairedRows.length === 1) {
|
|
1146
|
+
assertRowLimit();
|
|
1147
|
+
yield applyPerRowHooks(repairedRows[0], rowCount);
|
|
1148
|
+
rowCount++;
|
|
1149
|
+
pendingRow = null;
|
|
1150
|
+
continue;
|
|
1151
|
+
}
|
|
1152
|
+
assertRowLimit();
|
|
1153
|
+
yield applyPerRowHooks(repairedRows[0], rowCount);
|
|
1154
|
+
rowCount++;
|
|
1155
|
+
pendingRow = repairedRows[1];
|
|
1156
|
+
}
|
|
1157
|
+
else {
|
|
1158
|
+
const normalizedRow = normalizeQuotes ? normalizeRowQuotes(row, finalHeaders) : row;
|
|
1159
|
+
assertRowLimit();
|
|
1160
|
+
yield applyPerRowHooks(normalizedRow, rowCount);
|
|
1161
|
+
rowCount++;
|
|
1162
|
+
}
|
|
1163
|
+
}
|
|
1164
|
+
catch (error) {
|
|
1165
|
+
if (handleRowError(error, line, lineNumber)) {
|
|
1166
|
+
continue;
|
|
1167
|
+
}
|
|
1168
|
+
}
|
|
1169
|
+
}
|
|
1170
|
+
if (pendingRow) {
|
|
1171
|
+
const flushedRows = repairShiftedRows([pendingRow], finalHeaders, { normalizeQuotes });
|
|
1172
|
+
for (const row of flushedRows) {
|
|
1173
|
+
assertRowLimit();
|
|
1174
|
+
yield applyPerRowHooks(row, rowCount);
|
|
1175
|
+
rowCount++;
|
|
1176
|
+
}
|
|
1177
|
+
pendingRow = null;
|
|
1178
|
+
}
|
|
1179
|
+
}
|
|
1180
|
+
function validateCsvFilePath(filePath) {
|
|
1181
|
+
if (typeof filePath !== 'string' || filePath.trim() === '') {
|
|
1182
|
+
throw new errors_1.ValidationError('File path must be a non-empty string');
|
|
1183
|
+
}
|
|
1184
|
+
if (!filePath.toLowerCase().endsWith('.csv')) {
|
|
1185
|
+
throw new errors_1.ValidationError('File must have .csv extension');
|
|
1186
|
+
}
|
|
1187
|
+
const normalizedPath = filePath.trim();
|
|
1188
|
+
if (normalizedPath.includes('..') || normalizedPath.includes('~')) {
|
|
1189
|
+
throw new errors_1.SecurityError('Invalid file path: directory traversal detected');
|
|
1190
|
+
}
|
|
1191
|
+
const dangerousExtensions = ['.exe', '.bat', '.cmd', '.sh', '.php', '.py'];
|
|
1192
|
+
const lowerPath = normalizedPath.toLowerCase();
|
|
1193
|
+
for (const ext of dangerousExtensions) {
|
|
1194
|
+
if (lowerPath.endsWith(ext)) {
|
|
1195
|
+
throw new errors_1.SecurityError(`Invalid file extension: ${ext}`);
|
|
1196
|
+
}
|
|
1197
|
+
}
|
|
1198
|
+
return normalizedPath;
|
|
1199
|
+
}
|
|
1200
|
+
async function readCsvAsJson(filePath, options = {}) {
|
|
1201
|
+
return (0, errors_1.safeExecuteAsync)(async () => {
|
|
1202
|
+
const { validatePath = true, ...csvOptions } = options;
|
|
1203
|
+
const fs = require('fs');
|
|
1204
|
+
const safePath = validatePath ? validateCsvFilePath(filePath) : filePath;
|
|
1205
|
+
try {
|
|
1206
|
+
const csvContent = await fs.promises.readFile(safePath, 'utf8');
|
|
1207
|
+
return csvToJson(csvContent, csvOptions);
|
|
1208
|
+
}
|
|
1209
|
+
catch (error) {
|
|
1210
|
+
if (error instanceof errors_1.ParsingError || error instanceof errors_1.ValidationError || error instanceof errors_1.LimitError) {
|
|
1211
|
+
throw error;
|
|
1212
|
+
}
|
|
1213
|
+
if (error.code === 'ENOENT') {
|
|
1214
|
+
throw new errors_1.FileSystemError(`File not found: ${safePath}`, error);
|
|
1215
|
+
}
|
|
1216
|
+
if (error.code === 'EACCES') {
|
|
1217
|
+
throw new errors_1.FileSystemError(`Permission denied: ${safePath}`, error);
|
|
1218
|
+
}
|
|
1219
|
+
if (error.code === 'EISDIR') {
|
|
1220
|
+
throw new errors_1.FileSystemError(`Path is a directory: ${safePath}`, error);
|
|
1221
|
+
}
|
|
1222
|
+
throw new errors_1.FileSystemError(`Failed to read CSV file: ${error.message}`, error);
|
|
1223
|
+
}
|
|
1224
|
+
}, 'FILE_SYSTEM_ERROR', { function: 'readCsvAsJson' });
|
|
1225
|
+
}
|
|
1226
|
+
function readCsvAsJsonSync(filePath, options = {}) {
|
|
1227
|
+
const fs = require('fs');
|
|
1228
|
+
const { validatePath = true, ...csvOptions } = options;
|
|
1229
|
+
const safePath = validatePath ? validateCsvFilePath(filePath) : filePath;
|
|
1230
|
+
try {
|
|
1231
|
+
const csvContent = fs.readFileSync(safePath, 'utf8');
|
|
1232
|
+
return csvToJson(csvContent, csvOptions);
|
|
1233
|
+
}
|
|
1234
|
+
catch (error) {
|
|
1235
|
+
if (error instanceof errors_1.ParsingError || error instanceof errors_1.ValidationError || error instanceof errors_1.LimitError) {
|
|
1236
|
+
throw error;
|
|
1237
|
+
}
|
|
1238
|
+
if (error.code === 'ENOENT') {
|
|
1239
|
+
throw new errors_1.FileSystemError(`File not found: ${safePath}`, error);
|
|
1240
|
+
}
|
|
1241
|
+
if (error.code === 'EACCES') {
|
|
1242
|
+
throw new errors_1.FileSystemError(`Permission denied: ${safePath}`, error);
|
|
1243
|
+
}
|
|
1244
|
+
if (error.code === 'EISDIR') {
|
|
1245
|
+
throw new errors_1.FileSystemError(`Path is a directory: ${safePath}`, error);
|
|
1246
|
+
}
|
|
1247
|
+
throw new errors_1.FileSystemError(`Failed to read CSV file: ${error.message}`, error);
|
|
1248
|
+
}
|
|
1249
|
+
}
|
|
1250
|
+
function createTransformHooks() {
|
|
1251
|
+
return new transform_hooks_1.TransformHooks();
|
|
1252
|
+
}
|
|
1253
|
+
function createDelimiterCache(maxSize = 100) {
|
|
1254
|
+
return new delimiter_cache_1.DelimiterCache(maxSize);
|
|
1255
|
+
}
|
|
1256
|
+
function getDelimiterCacheStats() {
|
|
1257
|
+
return globalDelimiterCache.getStats();
|
|
1258
|
+
}
|
|
1259
|
+
function clearDelimiterCache() {
|
|
1260
|
+
globalDelimiterCache.clear();
|
|
1261
|
+
}
|
|
1262
|
+
//# sourceMappingURL=csv-to-json.js.map
|