jtcsv 3.0.0 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (258) hide show
  1. package/README.md +205 -146
  2. package/bin/jtcsv.ts +280 -202
  3. package/browser.d.ts +142 -0
  4. package/dist/benchmark.js +446 -0
  5. package/dist/benchmark.js.map +1 -0
  6. package/dist/bin/jtcsv.js +1940 -0
  7. package/dist/bin/jtcsv.js.map +1 -0
  8. package/dist/csv-to-json.js +1262 -0
  9. package/dist/csv-to-json.js.map +1 -0
  10. package/dist/errors.js +291 -0
  11. package/dist/errors.js.map +1 -0
  12. package/dist/eslint.config.js +147 -0
  13. package/dist/eslint.config.js.map +1 -0
  14. package/dist/index-core.js +95 -0
  15. package/dist/index-core.js.map +1 -0
  16. package/dist/index.js +93 -0
  17. package/dist/index.js.map +1 -0
  18. package/dist/json-save.js +229 -0
  19. package/dist/json-save.js.map +1 -0
  20. package/dist/json-to-csv.js +576 -0
  21. package/dist/json-to-csv.js.map +1 -0
  22. package/dist/jtcsv-core.cjs.js +336 -7
  23. package/dist/jtcsv-core.cjs.js.map +1 -1
  24. package/dist/jtcsv-core.esm.js +336 -7
  25. package/dist/jtcsv-core.esm.js.map +1 -1
  26. package/dist/jtcsv-core.umd.js +336 -7
  27. package/dist/jtcsv-core.umd.js.map +1 -1
  28. package/dist/jtcsv-full.cjs.js +336 -7
  29. package/dist/jtcsv-full.cjs.js.map +1 -1
  30. package/dist/jtcsv-full.esm.js +336 -7
  31. package/dist/jtcsv-full.esm.js.map +1 -1
  32. package/dist/jtcsv-full.umd.js +336 -7
  33. package/dist/jtcsv-full.umd.js.map +1 -1
  34. package/dist/jtcsv-workers.esm.js +9 -0
  35. package/dist/jtcsv-workers.esm.js.map +1 -1
  36. package/dist/jtcsv-workers.umd.js +9 -0
  37. package/dist/jtcsv-workers.umd.js.map +1 -1
  38. package/dist/jtcsv.cjs.js +1998 -2092
  39. package/dist/jtcsv.cjs.js.map +1 -1
  40. package/dist/jtcsv.esm.js +1994 -2092
  41. package/dist/jtcsv.esm.js.map +1 -1
  42. package/dist/jtcsv.umd.js +2157 -2251
  43. package/dist/jtcsv.umd.js.map +1 -1
  44. package/dist/plugins/express-middleware/index.js +350 -0
  45. package/dist/plugins/express-middleware/index.js.map +1 -0
  46. package/dist/plugins/fastify-plugin/index.js +315 -0
  47. package/dist/plugins/fastify-plugin/index.js.map +1 -0
  48. package/dist/plugins/hono/index.js +111 -0
  49. package/dist/plugins/hono/index.js.map +1 -0
  50. package/dist/plugins/nestjs/index.js +112 -0
  51. package/dist/plugins/nestjs/index.js.map +1 -0
  52. package/dist/plugins/nuxt/index.js +53 -0
  53. package/dist/plugins/nuxt/index.js.map +1 -0
  54. package/dist/plugins/remix/index.js +133 -0
  55. package/dist/plugins/remix/index.js.map +1 -0
  56. package/dist/plugins/sveltekit/index.js +155 -0
  57. package/dist/plugins/sveltekit/index.js.map +1 -0
  58. package/dist/plugins/trpc/index.js +136 -0
  59. package/dist/plugins/trpc/index.js.map +1 -0
  60. package/dist/run-demo.js +49 -0
  61. package/dist/run-demo.js.map +1 -0
  62. package/dist/src/browser/browser-functions.js +193 -0
  63. package/dist/src/browser/browser-functions.js.map +1 -0
  64. package/dist/src/browser/core.js +123 -0
  65. package/dist/src/browser/core.js.map +1 -0
  66. package/dist/src/browser/csv-to-json-browser.js +353 -0
  67. package/dist/src/browser/csv-to-json-browser.js.map +1 -0
  68. package/dist/src/browser/errors-browser.js +219 -0
  69. package/dist/src/browser/errors-browser.js.map +1 -0
  70. package/dist/src/browser/extensions/plugins.js +106 -0
  71. package/dist/src/browser/extensions/plugins.js.map +1 -0
  72. package/dist/src/browser/extensions/workers.js +66 -0
  73. package/dist/src/browser/extensions/workers.js.map +1 -0
  74. package/dist/src/browser/index.js +140 -0
  75. package/dist/src/browser/index.js.map +1 -0
  76. package/dist/src/browser/json-to-csv-browser.js +225 -0
  77. package/dist/src/browser/json-to-csv-browser.js.map +1 -0
  78. package/dist/src/browser/streams.js +340 -0
  79. package/dist/src/browser/streams.js.map +1 -0
  80. package/dist/src/browser/workers/csv-parser.worker.js +264 -0
  81. package/dist/src/browser/workers/csv-parser.worker.js.map +1 -0
  82. package/dist/src/browser/workers/worker-pool.js +338 -0
  83. package/dist/src/browser/workers/worker-pool.js.map +1 -0
  84. package/dist/src/core/delimiter-cache.js +196 -0
  85. package/dist/src/core/delimiter-cache.js.map +1 -0
  86. package/dist/src/core/node-optimizations.js +279 -0
  87. package/dist/src/core/node-optimizations.js.map +1 -0
  88. package/dist/src/core/plugin-system.js +399 -0
  89. package/dist/src/core/plugin-system.js.map +1 -0
  90. package/dist/src/core/transform-hooks.js +348 -0
  91. package/dist/src/core/transform-hooks.js.map +1 -0
  92. package/dist/src/engines/fast-path-engine-new.js +262 -0
  93. package/dist/src/engines/fast-path-engine-new.js.map +1 -0
  94. package/dist/src/engines/fast-path-engine.js +671 -0
  95. package/dist/src/engines/fast-path-engine.js.map +1 -0
  96. package/dist/src/errors.js +18 -0
  97. package/dist/src/errors.js.map +1 -0
  98. package/dist/src/formats/ndjson-parser.js +332 -0
  99. package/dist/src/formats/ndjson-parser.js.map +1 -0
  100. package/dist/src/formats/tsv-parser.js +230 -0
  101. package/dist/src/formats/tsv-parser.js.map +1 -0
  102. package/dist/src/index-with-plugins.js +259 -0
  103. package/dist/src/index-with-plugins.js.map +1 -0
  104. package/dist/src/types/index.js +3 -0
  105. package/dist/src/types/index.js.map +1 -0
  106. package/dist/src/utils/bom-utils.js +267 -0
  107. package/dist/src/utils/bom-utils.js.map +1 -0
  108. package/dist/src/utils/encoding-support.js +77 -0
  109. package/dist/src/utils/encoding-support.js.map +1 -0
  110. package/dist/src/utils/schema-validator.js +609 -0
  111. package/dist/src/utils/schema-validator.js.map +1 -0
  112. package/dist/src/utils/transform-loader.js +281 -0
  113. package/dist/src/utils/transform-loader.js.map +1 -0
  114. package/dist/src/utils/validators.js +40 -0
  115. package/dist/src/utils/validators.js.map +1 -0
  116. package/dist/src/utils/zod-adapter.js +144 -0
  117. package/dist/src/utils/zod-adapter.js.map +1 -0
  118. package/{src → dist/src}/web-server/index.js +251 -286
  119. package/dist/src/web-server/index.js.map +1 -0
  120. package/dist/src/workers/csv-multithreaded.js +211 -0
  121. package/dist/src/workers/csv-multithreaded.js.map +1 -0
  122. package/dist/src/workers/csv-parser.worker.js +179 -0
  123. package/dist/src/workers/csv-parser.worker.js.map +1 -0
  124. package/dist/src/workers/worker-pool.js +228 -0
  125. package/dist/src/workers/worker-pool.js.map +1 -0
  126. package/dist/stream-csv-to-json.js +665 -0
  127. package/dist/stream-csv-to-json.js.map +1 -0
  128. package/dist/stream-json-to-csv.js +389 -0
  129. package/dist/stream-json-to-csv.js.map +1 -0
  130. package/examples/advanced/conditional-transformations.ts +2 -2
  131. package/examples/advanced/performance-optimization.ts +2 -2
  132. package/examples/cli-advanced-usage.md +2 -0
  133. package/examples/cli-tool.ts +1 -1
  134. package/examples/large-dataset-example.ts +2 -2
  135. package/examples/simple-usage.ts +2 -2
  136. package/examples/streaming-example.ts +1 -1
  137. package/index.d.ts +186 -15
  138. package/package.json +43 -108
  139. package/plugins.d.ts +37 -0
  140. package/schema.d.ts +103 -0
  141. package/src/browser/csv-to-json-browser.ts +233 -3
  142. package/src/browser/errors-browser.ts +45 -28
  143. package/src/browser/json-to-csv-browser.ts +81 -5
  144. package/src/browser/streams.ts +73 -6
  145. package/src/core/delimiter-cache.ts +21 -11
  146. package/src/core/plugin-system.ts +343 -155
  147. package/src/core/transform-hooks.ts +20 -12
  148. package/src/engines/fast-path-engine.ts +48 -32
  149. package/src/errors.ts +1 -72
  150. package/src/formats/ndjson-parser.ts +6 -0
  151. package/src/formats/tsv-parser.ts +6 -0
  152. package/src/types/index.ts +21 -1
  153. package/src/utils/validators.ts +35 -0
  154. package/src/web-server/index.ts +1 -1
  155. package/bin/jtcsv.js +0 -2532
  156. package/csv-to-json.js +0 -711
  157. package/errors.js +0 -394
  158. package/examples/advanced/conditional-transformations.js +0 -446
  159. package/examples/advanced/csv-parser.worker.js +0 -89
  160. package/examples/advanced/nested-objects-example.js +0 -306
  161. package/examples/advanced/performance-optimization.js +0 -504
  162. package/examples/advanced/run-demo-server.js +0 -116
  163. package/examples/cli-batch-processing.js +0 -38
  164. package/examples/cli-tool.js +0 -183
  165. package/examples/error-handling.js +0 -338
  166. package/examples/express-api.js +0 -164
  167. package/examples/large-dataset-example.js +0 -182
  168. package/examples/ndjson-processing.js +0 -434
  169. package/examples/plugin-excel-exporter.js +0 -406
  170. package/examples/schema-validation.js +0 -640
  171. package/examples/simple-usage.js +0 -282
  172. package/examples/streaming-example.js +0 -418
  173. package/examples/web-workers-advanced.js +0 -28
  174. package/index.js +0 -82
  175. package/json-save.js +0 -255
  176. package/json-to-csv.js +0 -668
  177. package/plugins/README.md +0 -91
  178. package/plugins/express-middleware/README.md +0 -83
  179. package/plugins/express-middleware/example.js +0 -135
  180. package/plugins/express-middleware/example.ts +0 -135
  181. package/plugins/express-middleware/index.d.ts +0 -114
  182. package/plugins/express-middleware/index.js +0 -512
  183. package/plugins/express-middleware/index.ts +0 -557
  184. package/plugins/express-middleware/package.json +0 -52
  185. package/plugins/fastify-plugin/index.js +0 -404
  186. package/plugins/fastify-plugin/index.ts +0 -443
  187. package/plugins/fastify-plugin/package.json +0 -55
  188. package/plugins/hono/README.md +0 -28
  189. package/plugins/hono/index.d.ts +0 -12
  190. package/plugins/hono/index.js +0 -36
  191. package/plugins/hono/index.ts +0 -226
  192. package/plugins/hono/package.json +0 -35
  193. package/plugins/nestjs/README.md +0 -35
  194. package/plugins/nestjs/index.d.ts +0 -25
  195. package/plugins/nestjs/index.js +0 -77
  196. package/plugins/nestjs/index.ts +0 -201
  197. package/plugins/nestjs/package.json +0 -37
  198. package/plugins/nextjs-api/README.md +0 -57
  199. package/plugins/nextjs-api/examples/ConverterComponent.jsx +0 -386
  200. package/plugins/nextjs-api/examples/ConverterComponent.tsx +0 -386
  201. package/plugins/nextjs-api/examples/api-convert.js +0 -67
  202. package/plugins/nextjs-api/examples/api-convert.ts +0 -67
  203. package/plugins/nextjs-api/index.js +0 -387
  204. package/plugins/nextjs-api/index.tsx +0 -339
  205. package/plugins/nextjs-api/package.json +0 -63
  206. package/plugins/nextjs-api/route.js +0 -370
  207. package/plugins/nextjs-api/route.ts +0 -370
  208. package/plugins/nuxt/README.md +0 -24
  209. package/plugins/nuxt/index.js +0 -21
  210. package/plugins/nuxt/index.ts +0 -94
  211. package/plugins/nuxt/package.json +0 -35
  212. package/plugins/nuxt/runtime/composables/useJtcsv.js +0 -6
  213. package/plugins/nuxt/runtime/composables/useJtcsv.ts +0 -100
  214. package/plugins/nuxt/runtime/plugin.js +0 -6
  215. package/plugins/nuxt/runtime/plugin.ts +0 -71
  216. package/plugins/remix/README.md +0 -26
  217. package/plugins/remix/index.d.ts +0 -16
  218. package/plugins/remix/index.js +0 -62
  219. package/plugins/remix/index.ts +0 -260
  220. package/plugins/remix/package.json +0 -35
  221. package/plugins/sveltekit/README.md +0 -28
  222. package/plugins/sveltekit/index.d.ts +0 -17
  223. package/plugins/sveltekit/index.js +0 -54
  224. package/plugins/sveltekit/index.ts +0 -301
  225. package/plugins/sveltekit/package.json +0 -33
  226. package/plugins/trpc/README.md +0 -25
  227. package/plugins/trpc/index.d.ts +0 -7
  228. package/plugins/trpc/index.js +0 -32
  229. package/plugins/trpc/index.ts +0 -267
  230. package/plugins/trpc/package.json +0 -34
  231. package/src/browser/browser-functions.js +0 -219
  232. package/src/browser/core.js +0 -92
  233. package/src/browser/csv-to-json-browser.js +0 -722
  234. package/src/browser/errors-browser.js +0 -212
  235. package/src/browser/extensions/plugins.js +0 -92
  236. package/src/browser/extensions/workers.js +0 -39
  237. package/src/browser/index.js +0 -113
  238. package/src/browser/json-to-csv-browser.js +0 -319
  239. package/src/browser/streams.js +0 -403
  240. package/src/browser/workers/csv-parser.worker.js +0 -377
  241. package/src/browser/workers/worker-pool.js +0 -527
  242. package/src/core/delimiter-cache.js +0 -200
  243. package/src/core/node-optimizations.js +0 -408
  244. package/src/core/plugin-system.js +0 -494
  245. package/src/core/transform-hooks.js +0 -350
  246. package/src/engines/fast-path-engine-new.js +0 -338
  247. package/src/engines/fast-path-engine.js +0 -844
  248. package/src/errors.js +0 -26
  249. package/src/formats/ndjson-parser.js +0 -467
  250. package/src/formats/tsv-parser.js +0 -339
  251. package/src/index-with-plugins.js +0 -378
  252. package/src/utils/bom-utils.js +0 -259
  253. package/src/utils/encoding-support.js +0 -124
  254. package/src/utils/schema-validator.js +0 -594
  255. package/src/utils/transform-loader.js +0 -205
  256. package/src/utils/zod-adapter.js +0 -170
  257. package/stream-csv-to-json.js +0 -560
  258. package/stream-json-to-csv.js +0 -465
@@ -1,844 +0,0 @@
1
- /**
2
- * Fast-Path Engine для оптимизации CSV парсинга
3
- * Автоматически выбирает оптимальный парсер на основе структуры CSV
4
- *
5
- * @version 1.0.0
6
- * @date 2026-01-22
7
- */
8
-
9
- class FastPathEngine {
10
- constructor() {
11
- this.compilers = new Map();
12
- this.rowCompilers = new Map();
13
- this.stats = {
14
- simpleParserCount: 0,
15
- quoteAwareParserCount: 0,
16
- standardParserCount: 0,
17
- cacheHits: 0,
18
- cacheMisses: 0
19
- };
20
- }
21
-
22
- _hasQuotes(csv) {
23
- return csv.indexOf('"') !== -1;
24
- }
25
-
26
- _hasEscapedQuotes(csv) {
27
- return csv.indexOf('""') !== -1;
28
- }
29
-
30
- _hasBackslashes(csv) {
31
- return csv.indexOf('\\') !== -1;
32
- }
33
-
34
- _getStructureForParse(csv, options) {
35
- const sampleSize = Math.min(1000, csv.length);
36
- const sample = csv.substring(0, sampleSize);
37
- const structure = this.analyzeStructure(sample, options);
38
- const hasBackslashes = this._hasBackslashes(csv);
39
- const hasQuotes = structure.hasQuotes ? true : this._hasQuotes(csv);
40
- const hasEscapedQuotes = structure.hasEscapedQuotes
41
- ? true
42
- : (hasQuotes ? this._hasEscapedQuotes(csv) : false);
43
-
44
- let normalized = {
45
- ...structure,
46
- hasQuotes,
47
- hasEscapedQuotes,
48
- hasBackslashes
49
- };
50
-
51
- if (structure.recommendedEngine === 'SIMPLE' && hasQuotes) {
52
- normalized = {
53
- ...normalized,
54
- hasNewlinesInFields: true,
55
- recommendedEngine: 'QUOTE_AWARE'
56
- };
57
- }
58
-
59
- if (options && options.forceEngine) {
60
- normalized = {
61
- ...normalized,
62
- recommendedEngine: options.forceEngine
63
- };
64
- }
65
-
66
- return normalized;
67
- }
68
-
69
- /**
70
- * Анализирует структуру CSV и определяет оптимальный парсер
71
- */
72
- analyzeStructure(sample, options = {}) {
73
- const delimiter = options.delimiter || this._detectDelimiter(sample);
74
- const lines = sample.split('\n').slice(0, 10);
75
-
76
- let hasQuotes = false;
77
- let hasNewlinesInFields = false;
78
- let hasEscapedQuotes = false;
79
- let maxFields = 0;
80
- let totalFields = 0;
81
-
82
- for (const line of lines) {
83
- if (line.includes('"')) {
84
- hasQuotes = true;
85
- if (line.includes('""')) {
86
- hasEscapedQuotes = true;
87
- }
88
- }
89
-
90
- const quoteCount = (line.match(/"/g) || []).length;
91
- if (quoteCount % 2 !== 0) {
92
- hasNewlinesInFields = true;
93
- }
94
-
95
- const fieldCount = line.split(delimiter).length;
96
- totalFields += fieldCount;
97
- if (fieldCount > maxFields) {
98
- maxFields = fieldCount;
99
- }
100
- }
101
-
102
- const avgFieldsPerLine = totalFields / lines.length;
103
- const fieldConsistency = maxFields === avgFieldsPerLine;
104
-
105
- return {
106
- delimiter,
107
- hasQuotes,
108
- hasEscapedQuotes,
109
- hasNewlinesInFields,
110
- fieldConsistency,
111
- avgFieldsPerLine,
112
- maxFields,
113
- recommendedEngine: this._selectEngine(hasQuotes, hasNewlinesInFields, fieldConsistency)
114
- };
115
- }
116
-
117
- /**
118
- * Автоматически определяет разделитель
119
- */
120
- _detectDelimiter(sample) {
121
- const candidates = [',', ';', '\t', '|'];
122
- const firstLine = sample.split('\n')[0];
123
-
124
- let bestDelimiter = ',';
125
- let bestScore = 0;
126
-
127
- for (const delimiter of candidates) {
128
- const fields = firstLine.split(delimiter);
129
- const score = fields.length;
130
-
131
- // Если разделитель не найден в строке, пропускаем его
132
- if (score === 1 && !firstLine.includes(delimiter)) {
133
- continue;
134
- }
135
-
136
- const avgLength = fields.reduce((sum, field) => sum + field.length, 0) / fields.length;
137
- const variance = fields.reduce((sum, field) => sum + Math.pow(field.length - avgLength, 2), 0) / fields.length;
138
-
139
- const finalScore = score / (variance + 1);
140
-
141
- /* istanbul ignore next */
142
- if (finalScore > bestScore) {
143
- bestScore = finalScore;
144
- bestDelimiter = delimiter;
145
- }
146
- }
147
-
148
- return bestDelimiter;
149
- }
150
-
151
- /**
152
- * Выбирает оптимальный движок парсинга
153
- */
154
- _selectEngine(hasQuotes, hasNewlinesInFields, _fieldConsistency) {
155
- if (!hasQuotes && !hasNewlinesInFields) {
156
- return 'SIMPLE';
157
- }
158
-
159
- if (hasQuotes && !hasNewlinesInFields) {
160
- return 'QUOTE_AWARE';
161
- }
162
-
163
- return 'STANDARD';
164
- }
165
-
166
- /**
167
- * Создает простой парсер (разделитель без кавычек)
168
- */
169
- _createSimpleParser(structure) {
170
- const { delimiter, hasBackslashes } = structure;
171
-
172
- return (csv) => {
173
- const rows = [];
174
- if (hasBackslashes) {
175
- this._emitSimpleRowsEscaped(csv, delimiter, (row) => rows.push(row));
176
- } else {
177
- this._emitSimpleRows(csv, delimiter, (row) => rows.push(row));
178
- }
179
-
180
- return rows;
181
- };
182
- }
183
-
184
- _emitSimpleRows(csv, delimiter, onRow) {
185
- let currentRow = [];
186
- let rowHasData = false;
187
- let fieldStart = 0;
188
- let i = 0;
189
-
190
- while (i <= csv.length) {
191
- const char = i < csv.length ? csv[i] : '\n';
192
-
193
- if (char !== '\r' && char !== '\n' && char !== ' ' && char !== '\t') {
194
- rowHasData = true;
195
- }
196
-
197
- if (char === delimiter || char === '\n' || char === '\r' || i === csv.length) {
198
- const field = csv.slice(fieldStart, i);
199
- currentRow.push(field);
200
-
201
- if (char === '\n' || char === '\r' || i === csv.length) {
202
- if (rowHasData) {
203
- onRow(currentRow);
204
- }
205
- currentRow = [];
206
- rowHasData = false;
207
- }
208
-
209
- if (char === '\r' && csv[i + 1] === '\n') {
210
- i++;
211
- }
212
-
213
- fieldStart = i + 1;
214
- }
215
-
216
- i++;
217
- }
218
- }
219
-
220
- _emitSimpleRowsEscaped(csv, delimiter, onRow) {
221
- let currentRow = [];
222
- let currentField = '';
223
- let rowHasData = false;
224
- let escapeNext = false;
225
- let i = 0;
226
-
227
- while (i <= csv.length) {
228
- const char = i < csv.length ? csv[i] : '\n';
229
- const nextChar = i + 1 < csv.length ? csv[i + 1] : '';
230
-
231
- if (char !== '\r' && char !== '\n' && char !== ' ' && char !== '\t') {
232
- rowHasData = true;
233
- }
234
-
235
- if (escapeNext) {
236
- currentField += char;
237
- escapeNext = false;
238
- i++;
239
- continue;
240
- }
241
-
242
- if (char === '\\') {
243
- if (i + 1 >= csv.length) {
244
- currentField += '\\';
245
- i++;
246
- continue;
247
- }
248
-
249
- if (nextChar === '\\') {
250
- currentField += '\\';
251
- i += 2;
252
- continue;
253
- }
254
-
255
- if (nextChar === '\n' || nextChar === '\r') {
256
- currentField += '\\';
257
- i++;
258
- continue;
259
- }
260
-
261
- escapeNext = true;
262
- i++;
263
- continue;
264
- }
265
-
266
- if (char === delimiter || char === '\n' || char === '\r' || i === csv.length) {
267
- currentRow.push(currentField);
268
- currentField = '';
269
-
270
- if (char === '\n' || char === '\r' || i === csv.length) {
271
- if (rowHasData) {
272
- onRow(currentRow);
273
- }
274
- currentRow = [];
275
- rowHasData = false;
276
- }
277
-
278
- if (char === '\r' && csv[i + 1] === '\n') {
279
- i++;
280
- }
281
-
282
- i++;
283
- continue;
284
- }
285
-
286
- currentField += char;
287
- i++;
288
- }
289
- }
290
-
291
- *_simpleRowsGenerator(csv, delimiter) {
292
- let currentRow = [];
293
- let rowHasData = false;
294
- let fieldStart = 0;
295
- let i = 0;
296
-
297
- while (i <= csv.length) {
298
- const char = i < csv.length ? csv[i] : '\n';
299
-
300
- if (char !== '\r' && char !== '\n' && char !== ' ' && char !== '\t') {
301
- rowHasData = true;
302
- }
303
-
304
- if (char === delimiter || char === '\n' || char === '\r' || i === csv.length) {
305
- const field = csv.slice(fieldStart, i);
306
- currentRow.push(field);
307
-
308
- if (char === '\n' || char === '\r' || i === csv.length) {
309
- if (rowHasData) {
310
- yield currentRow;
311
- }
312
- currentRow = [];
313
- rowHasData = false;
314
- }
315
-
316
- if (char === '\r' && csv[i + 1] === '\n') {
317
- i++;
318
- }
319
-
320
- fieldStart = i + 1;
321
- }
322
-
323
- i++;
324
- }
325
- }
326
-
327
- *_simpleEscapedRowsGenerator(csv, delimiter) {
328
- let currentRow = [];
329
- let currentField = '';
330
- let rowHasData = false;
331
- let escapeNext = false;
332
- let i = 0;
333
-
334
- while (i <= csv.length) {
335
- const char = i < csv.length ? csv[i] : '\n';
336
- const nextChar = i + 1 < csv.length ? csv[i + 1] : '';
337
-
338
- if (char !== '\r' && char !== '\n' && char !== ' ' && char !== '\t') {
339
- rowHasData = true;
340
- }
341
-
342
- if (escapeNext) {
343
- currentField += char;
344
- escapeNext = false;
345
- i++;
346
- continue;
347
- }
348
-
349
- if (char === '\\') {
350
- if (i + 1 >= csv.length) {
351
- currentField += '\\';
352
- i++;
353
- continue;
354
- }
355
-
356
- if (nextChar === '\\') {
357
- currentField += '\\';
358
- i += 2;
359
- continue;
360
- }
361
-
362
- if (nextChar === '\n' || nextChar === '\r') {
363
- currentField += '\\';
364
- i++;
365
- continue;
366
- }
367
-
368
- escapeNext = true;
369
- i++;
370
- continue;
371
- }
372
-
373
- if (char === delimiter || char === '\n' || char === '\r' || i === csv.length) {
374
- currentRow.push(currentField);
375
- currentField = '';
376
-
377
- if (char === '\n' || char === '\r' || i === csv.length) {
378
- if (rowHasData) {
379
- yield currentRow;
380
- }
381
- currentRow = [];
382
- rowHasData = false;
383
- }
384
-
385
- if (char === '\r' && csv[i + 1] === '\n') {
386
- i++;
387
- }
388
-
389
- i++;
390
- continue;
391
- }
392
-
393
- currentField += char;
394
- i++;
395
- }
396
- }
397
-
398
- /**
399
- * Simple row emitter that avoids storing all rows in memory.
400
- */
401
- _createSimpleRowEmitter(structure) {
402
- const { delimiter, hasBackslashes } = structure;
403
-
404
- return (csv, onRow) => {
405
- if (hasBackslashes) {
406
- this._emitSimpleRowsEscaped(csv, delimiter, onRow);
407
- } else {
408
- this._emitSimpleRows(csv, delimiter, onRow);
409
- }
410
- };
411
- }
412
-
413
- /**
414
- * State machine парсер для CSV с кавычками (RFC 4180)
415
- */
416
- _createQuoteAwareParser(structure) {
417
- const { delimiter, hasEscapedQuotes, hasBackslashes } = structure;
418
-
419
- return (csv) => {
420
- const rows = [];
421
- /* istanbul ignore next */
422
- const iterator = hasBackslashes
423
- ? this._quoteAwareEscapedRowsGenerator(csv, delimiter, hasEscapedQuotes)
424
- : this._quoteAwareRowsGenerator(csv, delimiter, hasEscapedQuotes);
425
-
426
- for (const row of iterator) {
427
- rows.push(row);
428
- }
429
-
430
- return rows;
431
- };
432
- }
433
-
434
- /**
435
- * Quote-aware row emitter that avoids storing all rows in memory.
436
- */
437
- _createQuoteAwareRowEmitter(structure) {
438
- const { delimiter, hasEscapedQuotes, hasBackslashes } = structure;
439
-
440
- return (csv, onRow) => {
441
- const iterator = hasBackslashes
442
- ? this._quoteAwareEscapedRowsGenerator(csv, delimiter, hasEscapedQuotes)
443
- : this._quoteAwareRowsGenerator(csv, delimiter, hasEscapedQuotes);
444
-
445
- for (const row of iterator) {
446
- onRow(row);
447
- }
448
- };
449
- }
450
-
451
- *_quoteAwareRowsGenerator(csv, delimiter, hasEscapedQuotes) {
452
- let currentRow = [];
453
- let currentField = '';
454
- let rowHasData = false;
455
- let insideQuotes = false;
456
- let lineNumber = 1;
457
- let i = 0;
458
-
459
- while (i < csv.length) {
460
- const char = csv[i];
461
- const nextChar = csv[i + 1];
462
-
463
- if (char !== '\r' && char !== '\n' && char !== ' ' && char !== '\t') {
464
- rowHasData = true;
465
- }
466
-
467
- if (char === '"') {
468
- if (insideQuotes) {
469
- if (hasEscapedQuotes && nextChar === '"') {
470
- const afterNext = csv[i + 2];
471
- const isLineEnd = i + 2 >= csv.length || afterNext === '\n' || afterNext === '\r';
472
-
473
- currentField += '"';
474
- if (isLineEnd) {
475
- insideQuotes = false;
476
- i += 2;
477
- continue;
478
- }
479
-
480
- i += 2;
481
-
482
- let j = i;
483
- while (j < csv.length && (csv[j] === ' ' || csv[j] === '\t')) {
484
- j++;
485
- }
486
- if (j >= csv.length || csv[j] === delimiter || csv[j] === '\n' || csv[j] === '\r') {
487
- insideQuotes = false;
488
- }
489
- continue;
490
- }
491
-
492
- let j = i + 1;
493
- while (j < csv.length && (csv[j] === ' ' || csv[j] === '\t')) {
494
- j++;
495
- }
496
- if (j >= csv.length || csv[j] === delimiter || csv[j] === '\n' || csv[j] === '\r') {
497
- insideQuotes = false;
498
- i++;
499
- continue;
500
- }
501
-
502
- currentField += '"';
503
- i++;
504
- continue;
505
- }
506
-
507
- insideQuotes = true;
508
- i++;
509
- continue;
510
- }
511
-
512
- if (!insideQuotes && (char === delimiter || char === '\n' || char === '\r')) {
513
- currentRow.push(currentField);
514
- currentField = '';
515
-
516
- if (char === '\n' || char === '\r') {
517
- /* istanbul ignore next */
518
- if (rowHasData) {
519
- yield currentRow;
520
- }
521
- currentRow = [];
522
- rowHasData = false;
523
- lineNumber++;
524
-
525
- if (char === '\r' && nextChar === '\n') {
526
- i++;
527
- }
528
- }
529
-
530
- i++;
531
- continue;
532
- }
533
-
534
- currentField += char;
535
- i++;
536
- }
537
-
538
- if (insideQuotes) {
539
- const error = new Error('Unclosed quotes in CSV');
540
- error.code = 'FAST_PATH_UNCLOSED_QUOTES';
541
- error.lineNumber = lineNumber;
542
- throw error;
543
- }
544
-
545
- if (currentField !== '' || currentRow.length > 0) {
546
- currentRow.push(currentField);
547
- /* istanbul ignore next */
548
- if (rowHasData) {
549
- yield currentRow;
550
- }
551
- }
552
- }
553
-
554
- *_quoteAwareEscapedRowsGenerator(csv, delimiter, hasEscapedQuotes) {
555
- let currentRow = [];
556
- let currentField = '';
557
- let rowHasData = false;
558
- let insideQuotes = false;
559
- let escapeNext = false;
560
- let lineNumber = 1;
561
- let i = 0;
562
-
563
- while (i < csv.length) {
564
- const char = csv[i];
565
- const nextChar = csv[i + 1];
566
-
567
- if (char !== '\r' && char !== '\n' && char !== ' ' && char !== '\t') {
568
- rowHasData = true;
569
- }
570
-
571
- if (escapeNext) {
572
- currentField += char;
573
- escapeNext = false;
574
- i++;
575
- continue;
576
- }
577
-
578
- if (char === '\\') {
579
- if (i + 1 >= csv.length) {
580
- currentField += '\\';
581
- i++;
582
- continue;
583
- }
584
-
585
- if (!insideQuotes && (nextChar === '\n' || nextChar === '\r')) {
586
- currentField += '\\';
587
- i++;
588
- continue;
589
- }
590
-
591
- if (nextChar === '\\') {
592
- currentField += '\\';
593
- i += 2;
594
- continue;
595
- }
596
-
597
- escapeNext = true;
598
- i++;
599
- continue;
600
- }
601
-
602
- if (char === '"') {
603
- if (insideQuotes) {
604
- if (hasEscapedQuotes && nextChar === '"') {
605
- const afterNext = csv[i + 2];
606
- const isLineEnd = i + 2 >= csv.length || afterNext === '\n' || afterNext === '\r';
607
-
608
- currentField += '"';
609
- if (isLineEnd) {
610
- insideQuotes = false;
611
- i += 2;
612
- continue;
613
- }
614
-
615
- i += 2;
616
-
617
- let j = i;
618
- while (j < csv.length && (csv[j] === ' ' || csv[j] === '\t')) {
619
- j++;
620
- }
621
- if (j >= csv.length || csv[j] === delimiter || csv[j] === '\n' || csv[j] === '\r') {
622
- insideQuotes = false;
623
- }
624
- continue;
625
- }
626
-
627
- let j = i + 1;
628
- while (j < csv.length && (csv[j] === ' ' || csv[j] === '\t')) {
629
- j++;
630
- }
631
- if (j >= csv.length || csv[j] === delimiter || csv[j] === '\n' || csv[j] === '\r') {
632
- insideQuotes = false;
633
- i++;
634
- continue;
635
- }
636
-
637
- currentField += '"';
638
- i++;
639
- continue;
640
- }
641
-
642
- insideQuotes = true;
643
- i++;
644
- continue;
645
- }
646
-
647
- if (!insideQuotes && (char === delimiter || char === '\n' || char === '\r')) {
648
- currentRow.push(currentField);
649
- currentField = '';
650
-
651
- if (char === '\n' || char === '\r') {
652
- /* istanbul ignore next */
653
- if (rowHasData) {
654
- yield currentRow;
655
- }
656
- currentRow = [];
657
- rowHasData = false;
658
- lineNumber++;
659
-
660
- if (char === '\r' && nextChar === '\n') {
661
- i++;
662
- }
663
- }
664
-
665
- i++;
666
- continue;
667
- }
668
-
669
- currentField += char;
670
- i++;
671
- }
672
-
673
- /* istanbul ignore next */
674
- if (escapeNext) {
675
- currentField += '\\';
676
- }
677
-
678
- if (insideQuotes) {
679
- const error = new Error('Unclosed quotes in CSV');
680
- error.code = 'FAST_PATH_UNCLOSED_QUOTES';
681
- error.lineNumber = lineNumber;
682
- throw error;
683
- }
684
-
685
- if (currentField !== '' || currentRow.length > 0) {
686
- currentRow.push(currentField);
687
- /* istanbul ignore next */
688
- if (rowHasData) {
689
- yield currentRow;
690
- }
691
- }
692
- }
693
-
694
- compileParser(structure) {
695
- const cacheKey = JSON.stringify(structure);
696
-
697
- // Проверяем кеш
698
- if (this.compilers.has(cacheKey)) {
699
- this.stats.cacheHits++;
700
- return this.compilers.get(cacheKey);
701
- }
702
-
703
- this.stats.cacheMisses++;
704
-
705
- let parser;
706
- switch (structure.recommendedEngine) {
707
- case 'SIMPLE':
708
- parser = this._createSimpleParser(structure);
709
- this.stats.simpleParserCount++;
710
- break;
711
- case 'QUOTE_AWARE':
712
- parser = this._createQuoteAwareParser(structure);
713
- this.stats.quoteAwareParserCount++;
714
- break;
715
- case 'STANDARD':
716
- parser = this._createQuoteAwareParser(structure);
717
- this.stats.standardParserCount++;
718
- break;
719
- default:
720
- parser = this._createQuoteAwareParser(structure);
721
- this.stats.standardParserCount++;
722
- }
723
-
724
- // Кешируем парсер
725
- this.compilers.set(cacheKey, parser);
726
-
727
- return parser;
728
- }
729
-
730
- /**
731
- * Compiles a row-emitter parser for streaming conversion.
732
- */
733
- compileRowEmitter(structure) {
734
- const cacheKey = JSON.stringify(structure);
735
-
736
- if (this.rowCompilers.has(cacheKey)) {
737
- return this.rowCompilers.get(cacheKey);
738
- }
739
-
740
- let emitter;
741
- switch (structure.recommendedEngine) {
742
- case 'SIMPLE':
743
- emitter = this._createSimpleRowEmitter(structure);
744
- break;
745
- case 'QUOTE_AWARE':
746
- emitter = this._createQuoteAwareRowEmitter(structure);
747
- break;
748
- case 'STANDARD':
749
- emitter = this._createQuoteAwareRowEmitter(structure);
750
- break;
751
- default:
752
- emitter = this._createQuoteAwareRowEmitter(structure);
753
- }
754
-
755
- this.rowCompilers.set(cacheKey, emitter);
756
- return emitter;
757
- }
758
-
759
- /**
760
- * Iterates rows without allocating the full result set.
761
- */
762
- *iterateRows(csv, options = {}) {
763
- const structure = this._getStructureForParse(csv, options);
764
- const useEscapes = structure.hasBackslashes;
765
-
766
- switch (structure.recommendedEngine) {
767
- case 'SIMPLE':
768
- if (useEscapes) {
769
- yield* this._simpleEscapedRowsGenerator(csv, structure.delimiter);
770
- } else {
771
- yield* this._simpleRowsGenerator(csv, structure.delimiter);
772
- }
773
- break;
774
- case 'QUOTE_AWARE':
775
- if (useEscapes) {
776
- yield* this._quoteAwareEscapedRowsGenerator(csv, structure.delimiter, structure.hasEscapedQuotes);
777
- } else {
778
- yield* this._quoteAwareRowsGenerator(csv, structure.delimiter, structure.hasEscapedQuotes);
779
- }
780
- break;
781
- case 'STANDARD':
782
- if (useEscapes) {
783
- yield* this._quoteAwareEscapedRowsGenerator(csv, structure.delimiter, structure.hasEscapedQuotes);
784
- } else {
785
- yield* this._quoteAwareRowsGenerator(csv, structure.delimiter, structure.hasEscapedQuotes);
786
- }
787
- break;
788
- default:
789
- if (useEscapes) {
790
- yield* this._quoteAwareEscapedRowsGenerator(csv, structure.delimiter, structure.hasEscapedQuotes);
791
- } else {
792
- yield* this._quoteAwareRowsGenerator(csv, structure.delimiter, structure.hasEscapedQuotes);
793
- }
794
- }
795
- }
796
-
797
- /**
798
- * Парсит CSV с использованием оптимального парсера
799
- */
800
- parse(csv, options = {}) {
801
- const structure = this._getStructureForParse(csv, options);
802
- const parser = this.compileParser(structure);
803
-
804
- return parser(csv);
805
- }
806
-
807
- /**
808
- * Parses CSV and emits rows via a callback to reduce memory usage.
809
- */
810
- /* istanbul ignore next */
811
- parseRows(csv, options = {}, onRow) {
812
- for (const row of this.iterateRows(csv, options)) {
813
- onRow(row);
814
- }
815
- }
816
-
817
- /**
818
- * Возвращает статистику использования парсеров
819
- */
820
- getStats() {
821
- return {
822
- ...this.stats,
823
- totalParsers: this.compilers.size,
824
- hitRate: this.stats.cacheHits / (this.stats.cacheHits + this.stats.cacheMisses) || 0
825
- };
826
- }
827
-
828
- /**
829
- * Сбрасывает статистику и кеш
830
- */
831
- reset() {
832
- this.compilers.clear();
833
- this.rowCompilers.clear();
834
- this.stats = {
835
- simpleParserCount: 0,
836
- quoteAwareParserCount: 0,
837
- standardParserCount: 0,
838
- cacheHits: 0,
839
- cacheMisses: 0
840
- };
841
- }
842
- }
843
-
844
- module.exports = FastPathEngine;