jtcsv 2.2.8 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (246) hide show
  1. package/README.md +204 -115
  2. package/bin/jtcsv.ts +2612 -0
  3. package/browser.d.ts +142 -0
  4. package/dist/benchmark.js +446 -0
  5. package/dist/benchmark.js.map +1 -0
  6. package/dist/bin/jtcsv.js +1940 -0
  7. package/dist/bin/jtcsv.js.map +1 -0
  8. package/dist/csv-to-json.js +1262 -0
  9. package/dist/csv-to-json.js.map +1 -0
  10. package/dist/errors.js +291 -0
  11. package/dist/errors.js.map +1 -0
  12. package/dist/eslint.config.js +147 -0
  13. package/dist/eslint.config.js.map +1 -0
  14. package/dist/index-core.js +95 -0
  15. package/dist/index-core.js.map +1 -0
  16. package/dist/index.js +93 -0
  17. package/dist/index.js.map +1 -0
  18. package/dist/json-save.js +229 -0
  19. package/dist/json-save.js.map +1 -0
  20. package/dist/json-to-csv.js +576 -0
  21. package/dist/json-to-csv.js.map +1 -0
  22. package/dist/jtcsv-core.cjs.js +1736 -0
  23. package/dist/jtcsv-core.cjs.js.map +1 -0
  24. package/dist/jtcsv-core.esm.js +1708 -0
  25. package/dist/jtcsv-core.esm.js.map +1 -0
  26. package/dist/jtcsv-core.umd.js +1742 -0
  27. package/dist/jtcsv-core.umd.js.map +1 -0
  28. package/dist/jtcsv-full.cjs.js +2241 -0
  29. package/dist/jtcsv-full.cjs.js.map +1 -0
  30. package/dist/jtcsv-full.esm.js +2209 -0
  31. package/dist/jtcsv-full.esm.js.map +1 -0
  32. package/dist/jtcsv-full.umd.js +2247 -0
  33. package/dist/jtcsv-full.umd.js.map +1 -0
  34. package/dist/jtcsv-workers.esm.js +768 -0
  35. package/dist/jtcsv-workers.esm.js.map +1 -0
  36. package/dist/jtcsv-workers.umd.js +782 -0
  37. package/dist/jtcsv-workers.umd.js.map +1 -0
  38. package/dist/jtcsv.cjs.js +1996 -2048
  39. package/dist/jtcsv.cjs.js.map +1 -1
  40. package/dist/jtcsv.esm.js +1992 -2048
  41. package/dist/jtcsv.esm.js.map +1 -1
  42. package/dist/jtcsv.umd.js +2157 -2209
  43. package/dist/jtcsv.umd.js.map +1 -1
  44. package/dist/plugins/express-middleware/index.js +350 -0
  45. package/dist/plugins/express-middleware/index.js.map +1 -0
  46. package/dist/plugins/fastify-plugin/index.js +315 -0
  47. package/dist/plugins/fastify-plugin/index.js.map +1 -0
  48. package/dist/plugins/hono/index.js +111 -0
  49. package/dist/plugins/hono/index.js.map +1 -0
  50. package/dist/plugins/nestjs/index.js +112 -0
  51. package/dist/plugins/nestjs/index.js.map +1 -0
  52. package/dist/plugins/nuxt/index.js +53 -0
  53. package/dist/plugins/nuxt/index.js.map +1 -0
  54. package/dist/plugins/remix/index.js +133 -0
  55. package/dist/plugins/remix/index.js.map +1 -0
  56. package/dist/plugins/sveltekit/index.js +155 -0
  57. package/dist/plugins/sveltekit/index.js.map +1 -0
  58. package/dist/plugins/trpc/index.js +136 -0
  59. package/dist/plugins/trpc/index.js.map +1 -0
  60. package/dist/run-demo.js +49 -0
  61. package/dist/run-demo.js.map +1 -0
  62. package/dist/src/browser/browser-functions.js +193 -0
  63. package/dist/src/browser/browser-functions.js.map +1 -0
  64. package/dist/src/browser/core.js +123 -0
  65. package/dist/src/browser/core.js.map +1 -0
  66. package/dist/src/browser/csv-to-json-browser.js +353 -0
  67. package/dist/src/browser/csv-to-json-browser.js.map +1 -0
  68. package/dist/src/browser/errors-browser.js +219 -0
  69. package/dist/src/browser/errors-browser.js.map +1 -0
  70. package/dist/src/browser/extensions/plugins.js +106 -0
  71. package/dist/src/browser/extensions/plugins.js.map +1 -0
  72. package/dist/src/browser/extensions/workers.js +66 -0
  73. package/dist/src/browser/extensions/workers.js.map +1 -0
  74. package/dist/src/browser/index.js +140 -0
  75. package/dist/src/browser/index.js.map +1 -0
  76. package/dist/src/browser/json-to-csv-browser.js +225 -0
  77. package/dist/src/browser/json-to-csv-browser.js.map +1 -0
  78. package/dist/src/browser/streams.js +340 -0
  79. package/dist/src/browser/streams.js.map +1 -0
  80. package/dist/src/browser/workers/csv-parser.worker.js +264 -0
  81. package/dist/src/browser/workers/csv-parser.worker.js.map +1 -0
  82. package/dist/src/browser/workers/worker-pool.js +338 -0
  83. package/dist/src/browser/workers/worker-pool.js.map +1 -0
  84. package/dist/src/core/delimiter-cache.js +196 -0
  85. package/dist/src/core/delimiter-cache.js.map +1 -0
  86. package/dist/src/core/node-optimizations.js +279 -0
  87. package/dist/src/core/node-optimizations.js.map +1 -0
  88. package/dist/src/core/plugin-system.js +399 -0
  89. package/dist/src/core/plugin-system.js.map +1 -0
  90. package/dist/src/core/transform-hooks.js +348 -0
  91. package/dist/src/core/transform-hooks.js.map +1 -0
  92. package/dist/src/engines/fast-path-engine-new.js +262 -0
  93. package/dist/src/engines/fast-path-engine-new.js.map +1 -0
  94. package/dist/src/engines/fast-path-engine.js +671 -0
  95. package/dist/src/engines/fast-path-engine.js.map +1 -0
  96. package/dist/src/errors.js +18 -0
  97. package/dist/src/errors.js.map +1 -0
  98. package/dist/src/formats/ndjson-parser.js +332 -0
  99. package/dist/src/formats/ndjson-parser.js.map +1 -0
  100. package/dist/src/formats/tsv-parser.js +230 -0
  101. package/dist/src/formats/tsv-parser.js.map +1 -0
  102. package/dist/src/index-with-plugins.js +259 -0
  103. package/dist/src/index-with-plugins.js.map +1 -0
  104. package/dist/src/types/index.js +3 -0
  105. package/dist/src/types/index.js.map +1 -0
  106. package/dist/src/utils/bom-utils.js +267 -0
  107. package/dist/src/utils/bom-utils.js.map +1 -0
  108. package/dist/src/utils/encoding-support.js +77 -0
  109. package/dist/src/utils/encoding-support.js.map +1 -0
  110. package/dist/src/utils/schema-validator.js +609 -0
  111. package/dist/src/utils/schema-validator.js.map +1 -0
  112. package/dist/src/utils/transform-loader.js +281 -0
  113. package/dist/src/utils/transform-loader.js.map +1 -0
  114. package/dist/src/utils/validators.js +40 -0
  115. package/dist/src/utils/validators.js.map +1 -0
  116. package/dist/src/utils/zod-adapter.js +144 -0
  117. package/dist/src/utils/zod-adapter.js.map +1 -0
  118. package/dist/src/web-server/index.js +648 -0
  119. package/dist/src/web-server/index.js.map +1 -0
  120. package/dist/src/workers/csv-multithreaded.js +211 -0
  121. package/dist/src/workers/csv-multithreaded.js.map +1 -0
  122. package/dist/src/workers/csv-parser.worker.js +179 -0
  123. package/dist/src/workers/csv-parser.worker.js.map +1 -0
  124. package/dist/src/workers/worker-pool.js +228 -0
  125. package/dist/src/workers/worker-pool.js.map +1 -0
  126. package/dist/stream-csv-to-json.js +665 -0
  127. package/dist/stream-csv-to-json.js.map +1 -0
  128. package/dist/stream-json-to-csv.js +389 -0
  129. package/dist/stream-json-to-csv.js.map +1 -0
  130. package/examples/advanced/conditional-transformations.ts +446 -0
  131. package/examples/advanced/csv-parser.worker.ts +89 -0
  132. package/examples/advanced/nested-objects-example.ts +306 -0
  133. package/examples/advanced/performance-optimization.ts +504 -0
  134. package/examples/advanced/run-demo-server.ts +116 -0
  135. package/examples/advanced/web-worker-usage.html +874 -0
  136. package/examples/async-multithreaded-example.ts +335 -0
  137. package/examples/cli-advanced-usage.md +290 -0
  138. package/examples/{cli-batch-processing.js → cli-batch-processing.ts} +38 -38
  139. package/examples/{cli-tool.js → cli-tool.ts} +5 -8
  140. package/examples/{error-handling.js → error-handling.ts} +356 -324
  141. package/examples/{express-api.js → express-api.ts} +161 -164
  142. package/examples/{large-dataset-example.js → large-dataset-example.ts} +201 -182
  143. package/examples/{ndjson-processing.js → ndjson-processing.ts} +456 -434
  144. package/examples/{plugin-excel-exporter.js → plugin-excel-exporter.ts} +6 -7
  145. package/examples/react-integration.tsx +637 -0
  146. package/examples/{schema-validation.js → schema-validation.ts} +2 -2
  147. package/examples/simple-usage.ts +194 -0
  148. package/examples/{streaming-example.js → streaming-example.ts} +12 -12
  149. package/index.d.ts +187 -18
  150. package/package.json +75 -81
  151. package/plugins.d.ts +37 -0
  152. package/schema.d.ts +103 -0
  153. package/src/browser/browser-functions.ts +402 -0
  154. package/src/browser/core.ts +152 -0
  155. package/src/browser/csv-to-json-browser.d.ts +3 -0
  156. package/src/browser/csv-to-json-browser.ts +494 -0
  157. package/src/browser/{errors-browser.js → errors-browser.ts} +305 -197
  158. package/src/browser/extensions/plugins.ts +93 -0
  159. package/src/browser/extensions/workers.ts +39 -0
  160. package/src/browser/globals.d.ts +5 -0
  161. package/src/browser/index.ts +192 -0
  162. package/src/browser/json-to-csv-browser.d.ts +3 -0
  163. package/src/browser/json-to-csv-browser.ts +338 -0
  164. package/src/browser/streams.ts +403 -0
  165. package/src/browser/workers/{csv-parser.worker.js → csv-parser.worker.ts} +3 -3
  166. package/src/browser/workers/{worker-pool.js → worker-pool.ts} +51 -30
  167. package/src/core/delimiter-cache.ts +320 -0
  168. package/src/core/{node-optimizations.js → node-optimizations.ts} +448 -407
  169. package/src/core/plugin-system.ts +588 -0
  170. package/src/core/transform-hooks.ts +566 -0
  171. package/src/engines/{fast-path-engine-new.js → fast-path-engine-new.ts} +11 -2
  172. package/src/engines/{fast-path-engine.js → fast-path-engine.ts} +79 -53
  173. package/src/errors.ts +1 -0
  174. package/src/formats/{ndjson-parser.js → ndjson-parser.ts} +24 -16
  175. package/src/formats/{tsv-parser.js → tsv-parser.ts} +18 -17
  176. package/src/{index-with-plugins.js → index-with-plugins.ts} +381 -357
  177. package/src/types/index.ts +275 -0
  178. package/src/utils/bom-utils.ts +373 -0
  179. package/src/utils/encoding-support.ts +155 -0
  180. package/src/utils/{schema-validator.js → schema-validator.ts} +814 -589
  181. package/src/utils/transform-loader.ts +389 -0
  182. package/src/utils/validators.ts +35 -0
  183. package/src/utils/zod-adapter.ts +280 -0
  184. package/src/web-server/{index.js → index.ts} +19 -19
  185. package/src/workers/csv-multithreaded.ts +310 -0
  186. package/src/workers/csv-parser.worker.ts +227 -0
  187. package/src/workers/worker-pool.ts +409 -0
  188. package/bin/jtcsv.js +0 -2462
  189. package/csv-to-json.js +0 -688
  190. package/errors.js +0 -208
  191. package/examples/simple-usage.js +0 -282
  192. package/index.js +0 -68
  193. package/json-save.js +0 -254
  194. package/json-to-csv.js +0 -526
  195. package/plugins/README.md +0 -91
  196. package/plugins/express-middleware/README.md +0 -64
  197. package/plugins/express-middleware/example.js +0 -136
  198. package/plugins/express-middleware/index.d.ts +0 -114
  199. package/plugins/express-middleware/index.js +0 -360
  200. package/plugins/express-middleware/package.json +0 -52
  201. package/plugins/fastify-plugin/index.js +0 -406
  202. package/plugins/fastify-plugin/package.json +0 -55
  203. package/plugins/hono/README.md +0 -28
  204. package/plugins/hono/index.d.ts +0 -12
  205. package/plugins/hono/index.js +0 -36
  206. package/plugins/hono/package.json +0 -35
  207. package/plugins/nestjs/README.md +0 -35
  208. package/plugins/nestjs/index.d.ts +0 -25
  209. package/plugins/nestjs/index.js +0 -77
  210. package/plugins/nestjs/package.json +0 -37
  211. package/plugins/nextjs-api/README.md +0 -57
  212. package/plugins/nextjs-api/examples/ConverterComponent.jsx +0 -386
  213. package/plugins/nextjs-api/examples/api-convert.js +0 -69
  214. package/plugins/nextjs-api/index.js +0 -387
  215. package/plugins/nextjs-api/package.json +0 -63
  216. package/plugins/nextjs-api/route.js +0 -371
  217. package/plugins/nuxt/README.md +0 -24
  218. package/plugins/nuxt/index.js +0 -21
  219. package/plugins/nuxt/package.json +0 -35
  220. package/plugins/nuxt/runtime/composables/useJtcsv.js +0 -6
  221. package/plugins/nuxt/runtime/plugin.js +0 -6
  222. package/plugins/remix/README.md +0 -26
  223. package/plugins/remix/index.d.ts +0 -16
  224. package/plugins/remix/index.js +0 -62
  225. package/plugins/remix/package.json +0 -35
  226. package/plugins/sveltekit/README.md +0 -28
  227. package/plugins/sveltekit/index.d.ts +0 -17
  228. package/plugins/sveltekit/index.js +0 -54
  229. package/plugins/sveltekit/package.json +0 -33
  230. package/plugins/trpc/README.md +0 -25
  231. package/plugins/trpc/index.d.ts +0 -7
  232. package/plugins/trpc/index.js +0 -32
  233. package/plugins/trpc/package.json +0 -34
  234. package/src/browser/browser-functions.js +0 -219
  235. package/src/browser/csv-to-json-browser.js +0 -700
  236. package/src/browser/index.js +0 -113
  237. package/src/browser/json-to-csv-browser.js +0 -309
  238. package/src/browser/streams.js +0 -393
  239. package/src/core/delimiter-cache.js +0 -186
  240. package/src/core/plugin-system.js +0 -476
  241. package/src/core/transform-hooks.js +0 -350
  242. package/src/errors.js +0 -26
  243. package/src/utils/transform-loader.js +0 -205
  244. package/stream-csv-to-json.js +0 -542
  245. package/stream-json-to-csv.js +0 -464
  246. /package/examples/{web-workers-advanced.js → web-workers-advanced.ts} +0 -0
@@ -0,0 +1,267 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
15
+ }) : function(o, v) {
16
+ o["default"] = v;
17
+ });
18
+ var __importStar = (this && this.__importStar) || (function () {
19
+ var ownKeys = function(o) {
20
+ ownKeys = Object.getOwnPropertyNames || function (o) {
21
+ var ar = [];
22
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
+ return ar;
24
+ };
25
+ return ownKeys(o);
26
+ };
27
+ return function (mod) {
28
+ if (mod && mod.__esModule) return mod;
29
+ var result = {};
30
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
+ __setModuleDefault(result, mod);
32
+ return result;
33
+ };
34
+ })();
35
+ Object.defineProperty(exports, "__esModule", { value: true });
36
+ exports.BOM_SIGNATURES = void 0;
37
+ exports.detectBom = detectBom;
38
+ exports.stripBom = stripBom;
39
+ exports.stripBomFromString = stripBomFromString;
40
+ exports.createBomStripStream = createBomStripStream;
41
+ exports.readFileWithBomHandling = readFileWithBomHandling;
42
+ exports.fileHasBomSync = fileHasBomSync;
43
+ exports.normalizeCsvInput = normalizeCsvInput;
44
+ exports.normalizeCsvInputAsync = normalizeCsvInputAsync;
45
+ exports.createBomStrippingIterator = createBomStrippingIterator;
46
+ exports.detectBomAsync = detectBomAsync;
47
+ const stream_1 = require("stream");
48
+ const fs = __importStar(require("fs"));
49
+ const fsPromises = __importStar(require("fs/promises"));
50
+ exports.BOM_SIGNATURES = {
51
+ 'utf-8': Buffer.from([0xEF, 0xBB, 0xBF]),
52
+ 'utf-16le': Buffer.from([0xFF, 0xFE]),
53
+ 'utf-16be': Buffer.from([0xFE, 0xFF]),
54
+ 'utf-32le': Buffer.from([0xFF, 0xFE, 0x00, 0x00]),
55
+ 'utf-32be': Buffer.from([0x00, 0x00, 0xFE, 0xFF])
56
+ };
57
+ function detectBom(input) {
58
+ if (!input) {
59
+ return null;
60
+ }
61
+ let buffer;
62
+ if (typeof input === 'string') {
63
+ buffer = Buffer.from(input, 'utf8');
64
+ }
65
+ else if (Buffer.isBuffer(input)) {
66
+ buffer = input;
67
+ }
68
+ else {
69
+ return null;
70
+ }
71
+ for (const [encoding, signature] of Object.entries(exports.BOM_SIGNATURES)) {
72
+ if (buffer.length >= signature.length) {
73
+ if (buffer.slice(0, signature.length).equals(signature)) {
74
+ return {
75
+ encoding: encoding,
76
+ bomLength: signature.length,
77
+ hasBom: true
78
+ };
79
+ }
80
+ }
81
+ }
82
+ return null;
83
+ }
84
+ function stripBom(input) {
85
+ if (!input) {
86
+ return input;
87
+ }
88
+ const bomInfo = detectBom(input);
89
+ if (!bomInfo) {
90
+ return input;
91
+ }
92
+ if (Buffer.isBuffer(input)) {
93
+ return input.slice(bomInfo.bomLength);
94
+ }
95
+ if (typeof input === 'string') {
96
+ const buffer = Buffer.from(input, 'utf8');
97
+ const strippedBuffer = buffer.slice(bomInfo.bomLength);
98
+ let encoding = 'utf8';
99
+ if (bomInfo.encoding === 'utf-16le') {
100
+ encoding = 'utf16le';
101
+ }
102
+ else if (bomInfo.encoding === 'utf-16be') {
103
+ encoding = 'utf16le';
104
+ }
105
+ return strippedBuffer.toString(encoding);
106
+ }
107
+ return input;
108
+ }
109
+ function stripBomFromString(str) {
110
+ if (typeof str !== 'string') {
111
+ return str;
112
+ }
113
+ if (str.charCodeAt(0) === 0xFEFF) {
114
+ return str.slice(1);
115
+ }
116
+ if (str.length >= 3 &&
117
+ str.charCodeAt(0) === 0xEF &&
118
+ str.charCodeAt(1) === 0xBB &&
119
+ str.charCodeAt(2) === 0xBF) {
120
+ return str.slice(3);
121
+ }
122
+ return str;
123
+ }
124
+ function createBomStripStream() {
125
+ let bomStripped = false;
126
+ return new stream_1.Transform({
127
+ transform(chunk, encoding, callback) {
128
+ if (!bomStripped) {
129
+ const bomInfo = detectBom(chunk);
130
+ if (bomInfo) {
131
+ chunk = chunk.slice(bomInfo.bomLength);
132
+ bomStripped = true;
133
+ }
134
+ else {
135
+ bomStripped = true;
136
+ }
137
+ }
138
+ this.push(chunk);
139
+ callback();
140
+ }
141
+ });
142
+ }
143
+ async function readFileWithBomHandling(filePath, options = {}) {
144
+ const buffer = await fsPromises.readFile(filePath);
145
+ const bomInfo = detectBom(buffer);
146
+ const hadBom = !!bomInfo;
147
+ let data;
148
+ let encoding = options.encoding || 'utf8';
149
+ if (bomInfo) {
150
+ data = buffer.slice(bomInfo.bomLength);
151
+ if (!options.encoding) {
152
+ if (bomInfo.encoding === 'utf-8') {
153
+ encoding = 'utf8';
154
+ }
155
+ else if (bomInfo.encoding === 'utf-16le') {
156
+ encoding = 'utf16le';
157
+ }
158
+ else if (bomInfo.encoding === 'utf-16be') {
159
+ encoding = 'utf16le';
160
+ }
161
+ else {
162
+ encoding = 'utf8';
163
+ }
164
+ }
165
+ }
166
+ else {
167
+ data = buffer;
168
+ }
169
+ if (options.encoding || bomInfo) {
170
+ data = data.toString(encoding);
171
+ }
172
+ return {
173
+ data,
174
+ encoding,
175
+ hadBom,
176
+ bomInfo: bomInfo || null
177
+ };
178
+ }
179
+ function fileHasBomSync(filePath) {
180
+ const fd = fs.openSync(filePath, 'r');
181
+ const buffer = Buffer.alloc(4);
182
+ const bytesRead = fs.readSync(fd, buffer, 0, 4, 0);
183
+ fs.closeSync(fd);
184
+ if (bytesRead < 2) {
185
+ return null;
186
+ }
187
+ return detectBom(buffer.slice(0, bytesRead));
188
+ }
189
+ function normalizeCsvInput(csvInput, options = {}) {
190
+ if (!csvInput) {
191
+ return '';
192
+ }
193
+ let normalized;
194
+ if (Buffer.isBuffer(csvInput)) {
195
+ const bomInfo = detectBom(csvInput);
196
+ if (bomInfo) {
197
+ normalized = csvInput.slice(bomInfo.bomLength).toString(bomInfo.encoding);
198
+ }
199
+ else {
200
+ normalized = csvInput.toString(options.encoding || 'utf8');
201
+ }
202
+ }
203
+ else if (typeof csvInput === 'string') {
204
+ normalized = stripBomFromString(csvInput);
205
+ }
206
+ else {
207
+ throw new Error('CSV input must be a string or Buffer');
208
+ }
209
+ if (options.normalizeLineEndings !== false) {
210
+ normalized = normalized.replace(/\r\n|\r/g, '\n');
211
+ }
212
+ return normalized;
213
+ }
214
+ async function normalizeCsvInputAsync(csvInput, options = {}) {
215
+ if (typeof csvInput === 'object' && 'filePath' in csvInput) {
216
+ const result = await readFileWithBomHandling(csvInput.filePath, {
217
+ encoding: options.encoding || 'utf8'
218
+ });
219
+ let normalized = typeof result.data === 'string' ? result.data : result.data.toString();
220
+ if (options.normalizeLineEndings !== false) {
221
+ normalized = normalized.replace(/\r\n|\r/g, '\n');
222
+ }
223
+ return normalized;
224
+ }
225
+ return normalizeCsvInput(csvInput, options);
226
+ }
227
+ async function* createBomStrippingIterator(stream) {
228
+ let bomStripped = false;
229
+ for await (const chunk of stream) {
230
+ if (!bomStripped) {
231
+ const bomInfo = detectBom(chunk);
232
+ if (bomInfo) {
233
+ yield chunk.slice(bomInfo.bomLength);
234
+ bomStripped = true;
235
+ continue;
236
+ }
237
+ else {
238
+ bomStripped = true;
239
+ }
240
+ }
241
+ yield chunk;
242
+ }
243
+ }
244
+ async function detectBomAsync(filePath) {
245
+ const fd = await fsPromises.open(filePath, 'r');
246
+ const buffer = Buffer.alloc(4);
247
+ const { bytesRead } = await fd.read(buffer, 0, 4, 0);
248
+ await fd.close();
249
+ if (bytesRead < 2) {
250
+ return null;
251
+ }
252
+ return detectBom(buffer.slice(0, bytesRead));
253
+ }
254
+ exports.default = {
255
+ detectBom,
256
+ stripBom,
257
+ stripBomFromString,
258
+ createBomStripStream,
259
+ readFileWithBomHandling,
260
+ fileHasBomSync,
261
+ normalizeCsvInput,
262
+ normalizeCsvInputAsync,
263
+ createBomStrippingIterator,
264
+ detectBomAsync,
265
+ BOM_SIGNATURES: exports.BOM_SIGNATURES
266
+ };
267
+ //# sourceMappingURL=bom-utils.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"bom-utils.js","sourceRoot":"","sources":["../../../src/utils/bom-utils.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAkDA,8BA4BC;AAQD,4BA+BC;AAQD,gDAmBC;AAOD,oDAoBC;AASD,0DA4CC;AAQD,wCAWC;AASD,8CA6BC;AASD,wDAqBC;AAQD,gEAoBC;AAQD,wCAWC;AA7VD,mCAAmC;AACnC,uCAAyB;AACzB,wDAA0C;AAK7B,QAAA,cAAc,GAAG;IAC5B,OAAO,EAAE,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC,CAAC;IACxC,UAAU,EAAE,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC;IACrC,UAAU,EAAE,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC;IACrC,UAAU,EAAE,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC,CAAC;IACjD,UAAU,EAAE,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC,CAAC;CACzC,CAAC;AA4BX,SAAgB,SAAS,CAAC,KAAyC;IACjE,IAAI,CAAC,KAAK,EAAE,CAAC;QACX,OAAO,IAAI,CAAC;IACd,CAAC;IAED,IAAI,MAAc,CAAC;IACnB,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;QAC9B,MAAM,GAAG,MAAM,CAAC,IAAI,CAAC,KAAK,EAAE,MAAM,CAAC,CAAC;IACtC,CAAC;SAAM,IAAI,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;QAClC,MAAM,GAAG,KAAK,CAAC;IACjB,CAAC;SAAM,CAAC;QACN,OAAO,IAAI,CAAC;IACd,CAAC;IAGD,KAAK,MAAM,CAAC,QAAQ,EAAE,SAAS,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,sBAAc,CAAC,EAAE,CAAC;QACnE,IAAI,MAAM,CAAC,MAAM,IAAI,SAAS,CAAC,MAAM,EAAE,CAAC;YACtC,IAAI,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,SAAS,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC,SAAS,CAAC,EAAE,CAAC;gBACxD,OAAO;oBACL,QAAQ,EAAE,QAAoB;oBAC9B,SAAS,EAAE,SAAS,CAAC,MAAM;oBAC3B,MAAM,EAAE,IAAI;iBACb,CAAC;YACJ,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAQD,SAAgB,QAAQ,CAAC,KAAyC;IAChE,IAAI,CAAC,KAAK,EAAE,CAAC;QACX,OAAO,KAAY,CAAC;IACtB,CAAC;IAED,MAAM,OAAO,GAAG,SAAS,CAAC,KAAK,CAAC,CAAC;IACjC,IAAI,CAAC,OAAO,EAAE,CAAC;QACb,OAAO,KAAK,CAAC;IACf,CAAC;IAED,IAAI,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;QAC3B,OAAO,KAAK,CAAC,KAAK,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;IACxC,CAAC;IAED,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;QAE9B,MAAM,MAAM,GAAG,MAAM,CAAC,IAAI,CAAC,KAAK,EAAE,MAAM,CAAC,CAAC;QAC1C,MAAM,cAAc,GAAG,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;QAGvD,IAAI,QAAQ,GAAmB,MAAM,CAAC;QACtC,IAAI,OAAO,CAAC,QAAQ,KAAK,UAAU,EAAE,CAAC;YACpC,QAAQ,GAAG,SAAS,CAAC;QACvB,CAAC;aAAM,IAAI,OAAO,CAAC,QAAQ,KAAK,UAAU,EAAE,CAAC;YAC3C,QAAQ,GAAG,SAAS,CAAC;QACvB,CAAC;QAED,OAAO,cAAc,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;IAC3C,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC;AAQD,SAAgB,kBAAkB,CAAC,GAAW;IAC5C,IAAI,OAAO,GAAG,KAAK,QAAQ,EAAE,CAAC;QAC5B,OAAO,GAAU,CAAC;IACpB,CAAC;IAGD,IAAI,GAAG,CAAC,UAAU,CAAC,CAAC,CAAC,KAAK,MAAM,EAAE,CAAC;QACjC,OAAO,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IACtB,CAAC;IAGD,IAAI,GAAG,CAAC,MAAM,IAAI,CAAC;QACf,GAAG,CAAC,UAAU,CAAC,CAAC,CAAC,KAAK,IAAI;QAC1B,GAAG,CAAC,UAAU,CAAC,CAAC,CAAC,KAAK,IAAI;QAC1B,GAAG,CAAC,UAAU,CAAC,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QAC/B,OAAO,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IACtB,CAAC;IAED,OAAO,GAAG,CAAC;AACb,CAAC;AAOD,SAAgB,oBAAoB;IAClC,IAAI,WAAW,GAAG,KAAK,CAAC;IAExB,OAAO,IAAI,kBAAS,CAAC;QACnB,SAAS,CAAC,KAAa,EAAE,QAAgB,EAAE,QAAuD;YAChG,IAAI,CAAC,WAAW,EAAE,CAAC;gBACjB,MAAM,OAAO,GAAG,SAAS,CAAC,KAAK,CAAC,CAAC;gBACjC,IAAI,OAAO,EAAE,CAAC;oBAEZ,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;oBACvC,WAAW,GAAG,IAAI,CAAC;gBACrB,CAAC;qBAAM,CAAC;oBACN,WAAW,GAAG,IAAI,CAAC;gBACrB,CAAC;YACH,CAAC;YAED,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YACjB,QAAQ,EAAE,CAAC;QACb,CAAC;KACF,CAAC,CAAC;AACL,CAAC;AASM,KAAK,UAAU,uBAAuB,CAC3C,QAAgB,EAChB,UAAyC,EAAE;IAE3C,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;IAEnD,MAAM,OAAO,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC;IAClC,MAAM,MAAM,GAAG,CAAC,CAAC,OAAO,CAAC;IAEzB,IAAI,IAAqB,CAAC;IAC1B,IAAI,QAAQ,GAAG,OAAO,CAAC,QAAQ,IAAI,MAAM,CAAC;IAE1C,IAAI,OAAO,EAAE,CAAC;QAEZ,IAAI,GAAG,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;QAGvC,IAAI,CAAC,OAAO,CAAC,QAAQ,EAAE,CAAC;YAEtB,IAAI,OAAO,CAAC,QAAQ,KAAK,OAAO,EAAE,CAAC;gBACjC,QAAQ,GAAG,MAAM,CAAC;YACpB,CAAC;iBAAM,IAAI,OAAO,CAAC,QAAQ,KAAK,UAAU,EAAE,CAAC;gBAC3C,QAAQ,GAAG,SAAS,CAAC;YACvB,CAAC;iBAAM,IAAI,OAAO,CAAC,QAAQ,KAAK,UAAU,EAAE,CAAC;gBAC3C,QAAQ,GAAG,SAAS,CAAC;YACvB,CAAC;iBAAM,CAAC;gBACN,QAAQ,GAAG,MAAM,CAAC;YACpB,CAAC;QACH,CAAC;IACH,CAAC;SAAM,CAAC;QACN,IAAI,GAAG,MAAM,CAAC;IAChB,CAAC;IAGD,IAAI,OAAO,CAAC,QAAQ,IAAI,OAAO,EAAE,CAAC;QAChC,IAAI,GAAG,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;IACjC,CAAC;IAED,OAAO;QACL,IAAI;QACJ,QAAQ;QACR,MAAM;QACN,OAAO,EAAE,OAAO,IAAI,IAAI;KACzB,CAAC;AACJ,CAAC;AAQD,SAAgB,cAAc,CAAC,QAAgB;IAC7C,MAAM,EAAE,GAAG,EAAE,CAAC,QAAQ,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAC;IACtC,MAAM,MAAM,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IAC/B,MAAM,SAAS,GAAG,EAAE,CAAC,QAAQ,CAAC,EAAE,EAAE,MAAM,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC;IACnD,EAAE,CAAC,SAAS,CAAC,EAAE,CAAC,CAAC;IAEjB,IAAI,SAAS,GAAG,CAAC,EAAE,CAAC;QAClB,OAAO,IAAI,CAAC;IACd,CAAC;IAED,OAAO,SAAS,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,SAAS,CAAC,CAAC,CAAC;AAC/C,CAAC;AASD,SAAgB,iBAAiB,CAC/B,QAAyB,EACzB,UAAoC,EAAE;IAEtC,IAAI,CAAC,QAAQ,EAAE,CAAC;QACd,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,IAAI,UAAkB,CAAC;IAEvB,IAAI,MAAM,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC;QAC9B,MAAM,OAAO,GAAG,SAAS,CAAC,QAAQ,CAAC,CAAC;QACpC,IAAI,OAAO,EAAE,CAAC;YACZ,UAAU,GAAG,QAAQ,CAAC,KAAK,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,QAA0B,CAAC,CAAC;QAC9F,CAAC;aAAM,CAAC;YACN,UAAU,GAAG,QAAQ,CAAC,QAAQ,CAAE,OAAO,CAAC,QAA2B,IAAI,MAAM,CAAC,CAAC;QACjF,CAAC;IACH,CAAC;SAAM,IAAI,OAAO,QAAQ,KAAK,QAAQ,EAAE,CAAC;QACxC,UAAU,GAAG,kBAAkB,CAAC,QAAQ,CAAC,CAAC;IAC5C,CAAC;SAAM,CAAC;QACN,MAAM,IAAI,KAAK,CAAC,sCAAsC,CAAC,CAAC;IAC1D,CAAC;IAGD,IAAI,OAAO,CAAC,oBAAoB,KAAK,KAAK,EAAE,CAAC;QAC3C,UAAU,GAAG,UAAU,CAAC,OAAO,CAAC,UAAU,EAAE,IAAI,CAAC,CAAC;IACpD,CAAC;IAED,OAAO,UAAU,CAAC;AACpB,CAAC;AASM,KAAK,UAAU,sBAAsB,CAC1C,QAAgD,EAChD,UAAoC,EAAE;IAEtC,IAAI,OAAO,QAAQ,KAAK,QAAQ,IAAI,UAAU,IAAI,QAAQ,EAAE,CAAC;QAE3D,MAAM,MAAM,GAAG,MAAM,uBAAuB,CAAC,QAAQ,CAAC,QAAQ,EAAE;YAC9D,QAAQ,EAAE,OAAO,CAAC,QAA0B,IAAI,MAAM;SACvD,CAAC,CAAC;QACH,IAAI,UAAU,GAAG,OAAO,MAAM,CAAC,IAAI,KAAK,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC;QAGxF,IAAI,OAAO,CAAC,oBAAoB,KAAK,KAAK,EAAE,CAAC;YAC3C,UAAU,GAAG,UAAU,CAAC,OAAO,CAAC,UAAU,EAAE,IAAI,CAAC,CAAC;QACpD,CAAC;QAED,OAAO,UAAU,CAAC;IACpB,CAAC;IAGD,OAAO,iBAAiB,CAAC,QAA2B,EAAE,OAAO,CAAC,CAAC;AACjE,CAAC;AAQM,KAAK,SAAS,CAAC,CAAC,0BAA0B,CAC/C,MAA6B;IAE7B,IAAI,WAAW,GAAG,KAAK,CAAC;IAExB,IAAI,KAAK,EAAE,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QACjC,IAAI,CAAC,WAAW,EAAE,CAAC;YACjB,MAAM,OAAO,GAAG,SAAS,CAAC,KAAe,CAAC,CAAC;YAC3C,IAAI,OAAO,EAAE,CAAC;gBAEZ,MAAO,KAAgB,CAAC,KAAK,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;gBACjD,WAAW,GAAG,IAAI,CAAC;gBACnB,SAAS;YACX,CAAC;iBAAM,CAAC;gBACN,WAAW,GAAG,IAAI,CAAC;YACrB,CAAC;QACH,CAAC;QAED,MAAM,KAAe,CAAC;IACxB,CAAC;AACH,CAAC;AAQM,KAAK,UAAU,cAAc,CAAC,QAAgB;IACnD,MAAM,EAAE,GAAG,MAAM,UAAU,CAAC,IAAI,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAC;IAChD,MAAM,MAAM,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IAC/B,MAAM,EAAE,SAAS,EAAE,GAAG,MAAM,EAAE,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC;IACrD,MAAM,EAAE,CAAC,KAAK,EAAE,CAAC;IAEjB,IAAI,SAAS,GAAG,CAAC,EAAE,CAAC;QAClB,OAAO,IAAI,CAAC;IACd,CAAC;IAED,OAAO,SAAS,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,SAAS,CAAC,CAAC,CAAC;AAC/C,CAAC;AAED,kBAAe;IACb,SAAS;IACT,QAAQ;IACR,kBAAkB;IAClB,oBAAoB;IACpB,uBAAuB;IACvB,cAAc;IACd,iBAAiB;IACjB,sBAAsB;IACtB,0BAA0B;IAC1B,cAAc;IACd,cAAc,EAAd,sBAAc;CACf,CAAC","sourcesContent":["/**\r\n * BOM (Byte Order Mark) Utilities for jtcsv\r\n * \r\n * Provides functions to detect and strip BOM characters from UTF-8, UTF-16 LE/BE,\r\n * and UTF-32 encoded strings/buffers.\r\n * \r\n * @module bom-utils\r\n */\r\n\r\nimport { Transform } from 'stream';\r\nimport * as fs from 'fs';\r\nimport * as fsPromises from 'fs/promises';\r\n\r\n/**\r\n * BOM signatures for different encodings\r\n */\r\nexport const BOM_SIGNATURES = {\r\n 'utf-8': Buffer.from([0xEF, 0xBB, 0xBF]),\r\n 'utf-16le': Buffer.from([0xFF, 0xFE]),\r\n 'utf-16be': Buffer.from([0xFE, 0xFF]),\r\n 'utf-32le': Buffer.from([0xFF, 0xFE, 0x00, 0x00]),\r\n 'utf-32be': Buffer.from([0x00, 0x00, 0xFE, 0xFF])\r\n} as const;\r\n\r\nexport type Encoding = keyof typeof BOM_SIGNATURES;\r\n\r\nexport interface BomDetectionResult {\r\n encoding: Encoding;\r\n bomLength: number;\r\n hasBom: boolean;\r\n}\r\n\r\nexport interface ReadFileWithBomResult {\r\n data: Buffer | string;\r\n encoding: string;\r\n hadBom: boolean;\r\n bomInfo: BomDetectionResult | null;\r\n}\r\n\r\nexport interface NormalizeCsvInputOptions {\r\n encoding?: string;\r\n normalizeLineEndings?: boolean;\r\n}\r\n\r\n/**\r\n * Detects if a buffer or string starts with a BOM\r\n * \r\n * @param input - Input to check for BOM\r\n * @returns Detection result or null if no BOM found\r\n */\r\nexport function detectBom(input: Buffer | string | null | undefined): BomDetectionResult | null {\r\n if (!input) {\r\n return null;\r\n }\r\n \r\n let buffer: Buffer;\r\n if (typeof input === 'string') {\r\n buffer = Buffer.from(input, 'utf8');\r\n } else if (Buffer.isBuffer(input)) {\r\n buffer = input;\r\n } else {\r\n return null;\r\n }\r\n \r\n // Check each BOM signature\r\n for (const [encoding, signature] of Object.entries(BOM_SIGNATURES)) {\r\n if (buffer.length >= signature.length) {\r\n if (buffer.slice(0, signature.length).equals(signature)) {\r\n return {\r\n encoding: encoding as Encoding,\r\n bomLength: signature.length,\r\n hasBom: true\r\n };\r\n }\r\n }\r\n }\r\n \r\n return null;\r\n}\r\n\r\n/**\r\n * Strips BOM from a buffer or string\r\n * \r\n * @param input - Input to strip BOM from\r\n * @returns Input without BOM\r\n */\r\nexport function stripBom(input: Buffer | string | null | undefined): Buffer | string {\r\n if (!input) {\r\n return input as any;\r\n }\r\n \r\n const bomInfo = detectBom(input);\r\n if (!bomInfo) {\r\n return input;\r\n }\r\n \r\n if (Buffer.isBuffer(input)) {\r\n return input.slice(bomInfo.bomLength);\r\n }\r\n \r\n if (typeof input === 'string') {\r\n // Convert to buffer, strip BOM, then convert back to string\r\n const buffer = Buffer.from(input, 'utf8');\r\n const strippedBuffer = buffer.slice(bomInfo.bomLength);\r\n \r\n // Determine correct encoding for conversion\r\n let encoding: BufferEncoding = 'utf8';\r\n if (bomInfo.encoding === 'utf-16le') {\r\n encoding = 'utf16le';\r\n } else if (bomInfo.encoding === 'utf-16be') {\r\n encoding = 'utf16le'; // Node.js uses utf16le for both LE and BE, conversion handled by Buffer\r\n }\r\n \r\n return strippedBuffer.toString(encoding);\r\n }\r\n \r\n return input;\r\n}\r\n\r\n/**\r\n * Strips BOM from a string (optimized for strings)\r\n * \r\n * @param str - String to strip BOM from\r\n * @returns String without BOM\r\n */\r\nexport function stripBomFromString(str: string): string {\r\n if (typeof str !== 'string') {\r\n return str as any;\r\n }\r\n \r\n // Check for UTF-8 BOM (most common)\r\n if (str.charCodeAt(0) === 0xFEFF) {\r\n return str.slice(1);\r\n }\r\n \r\n // Check for UTF-8 BOM bytes as characters\r\n if (str.length >= 3 && \r\n str.charCodeAt(0) === 0xEF && \r\n str.charCodeAt(1) === 0xBB && \r\n str.charCodeAt(2) === 0xBF) {\r\n return str.slice(3);\r\n }\r\n \r\n return str;\r\n}\r\n\r\n/**\r\n * Creates a transform stream that strips BOM from incoming data\r\n * \r\n * @returns Transform stream\r\n */\r\nexport function createBomStripStream(): Transform {\r\n let bomStripped = false;\r\n \r\n return new Transform({\r\n transform(chunk: Buffer, encoding: string, callback: (error?: Error | null, data?: Buffer) => void) {\r\n if (!bomStripped) {\r\n const bomInfo = detectBom(chunk);\r\n if (bomInfo) {\r\n // Strip BOM from first chunk\r\n chunk = chunk.slice(bomInfo.bomLength);\r\n bomStripped = true;\r\n } else {\r\n bomStripped = true; // No BOM found, but we've checked\r\n }\r\n }\r\n \r\n this.push(chunk);\r\n callback();\r\n }\r\n });\r\n}\r\n\r\n/**\r\n * Reads a file and automatically handles BOM\r\n * \r\n * @param filePath - Path to file\r\n * @param options - Read options\r\n * @returns Promise with file data and BOM info\r\n */\r\nexport async function readFileWithBomHandling(\r\n filePath: string, \r\n options: { encoding?: BufferEncoding } = {}\r\n): Promise<ReadFileWithBomResult> {\r\n const buffer = await fsPromises.readFile(filePath);\r\n \r\n const bomInfo = detectBom(buffer);\r\n const hadBom = !!bomInfo;\r\n \r\n let data: Buffer | string;\r\n let encoding = options.encoding || 'utf8';\r\n \r\n if (bomInfo) {\r\n // Strip BOM\r\n data = buffer.slice(bomInfo.bomLength);\r\n \r\n // Use detected encoding if not specified\r\n if (!options.encoding) {\r\n // Convert our encoding names to Node.js BufferEncoding\r\n if (bomInfo.encoding === 'utf-8') {\r\n encoding = 'utf8';\r\n } else if (bomInfo.encoding === 'utf-16le') {\r\n encoding = 'utf16le';\r\n } else if (bomInfo.encoding === 'utf-16be') {\r\n encoding = 'utf16le'; // Node.js uses utf16le for both\r\n } else {\r\n encoding = 'utf8'; // fallback\r\n }\r\n }\r\n } else {\r\n data = buffer;\r\n }\r\n \r\n // Convert to string if encoding is specified\r\n if (options.encoding || bomInfo) {\r\n data = data.toString(encoding);\r\n }\r\n \r\n return {\r\n data,\r\n encoding,\r\n hadBom,\r\n bomInfo: bomInfo || null\r\n };\r\n}\r\n\r\n/**\r\n * Checks if a file has BOM (synchronous)\r\n * \r\n * @param filePath - Path to file\r\n * @returns BOM info or null\r\n */\r\nexport function fileHasBomSync(filePath: string): BomDetectionResult | null {\r\n const fd = fs.openSync(filePath, 'r');\r\n const buffer = Buffer.alloc(4);\r\n const bytesRead = fs.readSync(fd, buffer, 0, 4, 0);\r\n fs.closeSync(fd);\r\n \r\n if (bytesRead < 2) {\r\n return null;\r\n }\r\n \r\n return detectBom(buffer.slice(0, bytesRead));\r\n}\r\n\r\n/**\r\n * Normalizes CSV input by stripping BOM and ensuring proper encoding\r\n * \r\n * @param csvInput - CSV input\r\n * @param options - Processing options\r\n * @returns Normalized CSV string\r\n */\r\nexport function normalizeCsvInput(\r\n csvInput: string | Buffer, \r\n options: NormalizeCsvInputOptions = {}\r\n): string {\r\n if (!csvInput) {\r\n return '';\r\n }\r\n \r\n let normalized: string;\r\n \r\n if (Buffer.isBuffer(csvInput)) {\r\n const bomInfo = detectBom(csvInput);\r\n if (bomInfo) {\r\n normalized = csvInput.slice(bomInfo.bomLength).toString(bomInfo.encoding as BufferEncoding);\r\n } else {\r\n normalized = csvInput.toString((options.encoding as BufferEncoding) || 'utf8');\r\n }\r\n } else if (typeof csvInput === 'string') {\r\n normalized = stripBomFromString(csvInput);\r\n } else {\r\n throw new Error('CSV input must be a string or Buffer');\r\n }\r\n \r\n // Ensure proper line endings\r\n if (options.normalizeLineEndings !== false) {\r\n normalized = normalized.replace(/\\r\\n|\\r/g, '\\n');\r\n }\r\n \r\n return normalized;\r\n}\r\n\r\n/**\r\n * Async version of normalizeCsvInput that can handle large files\r\n * \r\n * @param csvInput - CSV input as string, Buffer, or file path\r\n * @param options - Processing options\r\n * @returns Promise with normalized CSV string\r\n */\r\nexport async function normalizeCsvInputAsync(\r\n csvInput: string | Buffer | { filePath: string },\r\n options: NormalizeCsvInputOptions = {}\r\n): Promise<string> {\r\n if (typeof csvInput === 'object' && 'filePath' in csvInput) {\r\n // Read file asynchronously\r\n const result = await readFileWithBomHandling(csvInput.filePath, {\r\n encoding: options.encoding as BufferEncoding || 'utf8'\r\n });\r\n let normalized = typeof result.data === 'string' ? result.data : result.data.toString();\r\n \r\n // Ensure proper line endings\r\n if (options.normalizeLineEndings !== false) {\r\n normalized = normalized.replace(/\\r\\n|\\r/g, '\\n');\r\n }\r\n \r\n return normalized;\r\n }\r\n \r\n // Handle string or Buffer input\r\n return normalizeCsvInput(csvInput as string | Buffer, options);\r\n}\r\n\r\n/**\r\n * Creates an async iterator that strips BOM from a stream\r\n * \r\n * @param stream - Readable stream\r\n * @returns Async iterator yielding chunks without BOM\r\n */\r\nexport async function* createBomStrippingIterator(\r\n stream: NodeJS.ReadableStream\r\n): AsyncIterableIterator<Buffer> {\r\n let bomStripped = false;\r\n \r\n for await (const chunk of stream) {\r\n if (!bomStripped) {\r\n const bomInfo = detectBom(chunk as Buffer);\r\n if (bomInfo) {\r\n // Strip BOM from first chunk\r\n yield (chunk as Buffer).slice(bomInfo.bomLength);\r\n bomStripped = true;\r\n continue;\r\n } else {\r\n bomStripped = true;\r\n }\r\n }\r\n \r\n yield chunk as Buffer;\r\n }\r\n}\r\n\r\n/**\r\n * Detects BOM asynchronously for large files\r\n * \r\n * @param filePath - Path to file\r\n * @returns Promise with BOM info or null\r\n */\r\nexport async function detectBomAsync(filePath: string): Promise<BomDetectionResult | null> {\r\n const fd = await fsPromises.open(filePath, 'r');\r\n const buffer = Buffer.alloc(4);\r\n const { bytesRead } = await fd.read(buffer, 0, 4, 0);\r\n await fd.close();\r\n \r\n if (bytesRead < 2) {\r\n return null;\r\n }\r\n \r\n return detectBom(buffer.slice(0, bytesRead));\r\n}\r\n\r\nexport default {\r\n detectBom,\r\n stripBom,\r\n stripBomFromString,\r\n createBomStripStream,\r\n readFileWithBomHandling,\r\n fileHasBomSync,\r\n normalizeCsvInput,\r\n normalizeCsvInputAsync,\r\n createBomStrippingIterator,\r\n detectBomAsync,\r\n BOM_SIGNATURES\r\n};"]}
@@ -0,0 +1,77 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.detectEncoding = detectEncoding;
4
+ exports.convertToUtf8 = convertToUtf8;
5
+ exports.autoDetectAndConvert = autoDetectAndConvert;
6
+ exports.csvToJsonWithEncoding = csvToJsonWithEncoding;
7
+ exports.csvToJsonWithEncodingAsync = csvToJsonWithEncodingAsync;
8
+ const errors_1 = require("../errors");
9
+ function detectEncoding(buffer, options = {}) {
10
+ const { fallback = 'utf8' } = options;
11
+ if (!Buffer.isBuffer(buffer)) {
12
+ throw new errors_1.ValidationError('Input must be a Buffer');
13
+ }
14
+ if (buffer.length >= 3 && buffer[0] === 0xEF && buffer[1] === 0xBB && buffer[2] === 0xBF) {
15
+ return 'utf8';
16
+ }
17
+ if (buffer.length >= 2 && buffer[0] === 0xFE && buffer[1] === 0xFF) {
18
+ return 'utf16be';
19
+ }
20
+ if (buffer.length >= 2 && buffer[0] === 0xFF && buffer[1] === 0xFE) {
21
+ return 'utf16le';
22
+ }
23
+ return fallback;
24
+ }
25
+ function convertToUtf8(buffer, encoding = 'utf8') {
26
+ if (!Buffer.isBuffer(buffer)) {
27
+ throw new errors_1.ValidationError('Input must be a Buffer');
28
+ }
29
+ let offset = 0;
30
+ if (encoding === 'utf8' && buffer.length >= 3 && buffer[0] === 0xEF && buffer[1] === 0xBB && buffer[2] === 0xBF) {
31
+ offset = 3;
32
+ }
33
+ else if (encoding === 'utf16be' && buffer.length >= 2 && buffer[0] === 0xFE && buffer[1] === 0xFF) {
34
+ offset = 2;
35
+ }
36
+ else if (encoding === 'utf16le' && buffer.length >= 2 && buffer[0] === 0xFF && buffer[1] === 0xFE) {
37
+ offset = 2;
38
+ }
39
+ const slice = buffer.slice(offset);
40
+ return slice.toString(encoding);
41
+ }
42
+ function autoDetectAndConvert(buffer, options = {}) {
43
+ const encoding = detectEncoding(buffer, options);
44
+ const text = convertToUtf8(buffer, encoding);
45
+ return { encoding, text };
46
+ }
47
+ async function csvToJsonWithEncoding(input, parseOptions = {}) {
48
+ const { encoding = 'auto', fallbackEncoding = 'utf8', ...restOptions } = parseOptions;
49
+ let text;
50
+ if (Buffer.isBuffer(input)) {
51
+ if (encoding === 'auto') {
52
+ const detected = autoDetectAndConvert(input, { fallback: fallbackEncoding });
53
+ text = detected.text;
54
+ }
55
+ else {
56
+ text = convertToUtf8(input, encoding);
57
+ }
58
+ }
59
+ else if (typeof input === 'string') {
60
+ text = input;
61
+ }
62
+ else {
63
+ throw new errors_1.ValidationError('Input must be a Buffer or string');
64
+ }
65
+ throw new Error('csvToJson function not available. This function requires csvToJson to be provided.');
66
+ }
67
+ async function csvToJsonWithEncodingAsync(input, parseOptions = {}) {
68
+ return csvToJsonWithEncoding(input, parseOptions);
69
+ }
70
+ exports.default = {
71
+ detectEncoding,
72
+ convertToUtf8,
73
+ autoDetectAndConvert,
74
+ csvToJsonWithEncoding,
75
+ csvToJsonWithEncodingAsync
76
+ };
77
+ //# sourceMappingURL=encoding-support.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"encoding-support.js","sourceRoot":"","sources":["../../../src/utils/encoding-support.ts"],"names":[],"mappings":";;AAsCA,wCAoBC;AAUD,sCAkBC;AAUD,oDAOC;AAWD,sDAsBC;AAKD,gEAKC;AApID,sCAA4C;AAwB5C,SAAgB,cAAc,CAAC,MAAc,EAAE,UAAiC,EAAE;IAChF,MAAM,EAAE,QAAQ,GAAG,MAAM,EAAE,GAAG,OAAO,CAAC;IAEtC,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;QAC7B,MAAM,IAAI,wBAAe,CAAC,wBAAwB,CAAC,CAAC;IACtD,CAAC;IAGD,IAAI,MAAM,CAAC,MAAM,IAAI,CAAC,IAAI,MAAM,CAAC,CAAC,CAAC,KAAK,IAAI,IAAI,MAAM,CAAC,CAAC,CAAC,KAAK,IAAI,IAAI,MAAM,CAAC,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QACzF,OAAO,MAAM,CAAC;IAChB,CAAC;IACD,IAAI,MAAM,CAAC,MAAM,IAAI,CAAC,IAAI,MAAM,CAAC,CAAC,CAAC,KAAK,IAAI,IAAI,MAAM,CAAC,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QACnE,OAAO,SAAS,CAAC;IACnB,CAAC;IACD,IAAI,MAAM,CAAC,MAAM,IAAI,CAAC,IAAI,MAAM,CAAC,CAAC,CAAC,KAAK,IAAI,IAAI,MAAM,CAAC,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QACnE,OAAO,SAAS,CAAC;IACnB,CAAC;IAGD,OAAO,QAAQ,CAAC;AAClB,CAAC;AAUD,SAAgB,aAAa,CAAC,MAAc,EAAE,WAAmB,MAAM;IACrE,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;QAC7B,MAAM,IAAI,wBAAe,CAAC,wBAAwB,CAAC,CAAC;IACtD,CAAC;IAED,IAAI,MAAM,GAAG,CAAC,CAAC;IAGf,IAAI,QAAQ,KAAK,MAAM,IAAI,MAAM,CAAC,MAAM,IAAI,CAAC,IAAI,MAAM,CAAC,CAAC,CAAC,KAAK,IAAI,IAAI,MAAM,CAAC,CAAC,CAAC,KAAK,IAAI,IAAI,MAAM,CAAC,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QAChH,MAAM,GAAG,CAAC,CAAC;IACb,CAAC;SAAM,IAAI,QAAQ,KAAK,SAAS,IAAI,MAAM,CAAC,MAAM,IAAI,CAAC,IAAI,MAAM,CAAC,CAAC,CAAC,KAAK,IAAI,IAAI,MAAM,CAAC,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QACpG,MAAM,GAAG,CAAC,CAAC;IACb,CAAC;SAAM,IAAI,QAAQ,KAAK,SAAS,IAAI,MAAM,CAAC,MAAM,IAAI,CAAC,IAAI,MAAM,CAAC,CAAC,CAAC,KAAK,IAAI,IAAI,MAAM,CAAC,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QACpG,MAAM,GAAG,CAAC,CAAC;IACb,CAAC;IAED,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;IACnC,OAAO,KAAK,CAAC,QAAQ,CAAC,QAA0B,CAAC,CAAC;AACpD,CAAC;AAUD,SAAgB,oBAAoB,CAClC,MAAc,EACd,UAAuC,EAAE;IAEzC,MAAM,QAAQ,GAAG,cAAc,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IACjD,MAAM,IAAI,GAAG,aAAa,CAAC,MAAM,EAAE,QAAQ,CAAC,CAAC;IAC7C,OAAO,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC;AAC5B,CAAC;AAWM,KAAK,UAAU,qBAAqB,CACzC,KAAsB,EACtB,eAA6C,EAAE;IAE/C,MAAM,EAAE,QAAQ,GAAG,MAAM,EAAE,gBAAgB,GAAG,MAAM,EAAE,GAAG,WAAW,EAAE,GAAG,YAAY,CAAC;IAEtF,IAAI,IAAY,CAAC;IACjB,IAAI,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;QAC3B,IAAI,QAAQ,KAAK,MAAM,EAAE,CAAC;YACxB,MAAM,QAAQ,GAAG,oBAAoB,CAAC,KAAK,EAAE,EAAE,QAAQ,EAAE,gBAAgB,EAAE,CAAC,CAAC;YAC7E,IAAI,GAAG,QAAQ,CAAC,IAAI,CAAC;QACvB,CAAC;aAAM,CAAC;YACN,IAAI,GAAG,aAAa,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAC;QACxC,CAAC;IACH,CAAC;SAAM,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;QACrC,IAAI,GAAG,KAAK,CAAC;IACf,CAAC;SAAM,CAAC;QACN,MAAM,IAAI,wBAAe,CAAC,kCAAkC,CAAC,CAAC;IAChE,CAAC;IAGD,MAAM,IAAI,KAAK,CAAC,oFAAoF,CAAC,CAAC;AACxG,CAAC;AAKM,KAAK,UAAU,0BAA0B,CAC9C,KAAsB,EACtB,eAA6C,EAAE;IAE/C,OAAO,qBAAqB,CAAC,KAAK,EAAE,YAAY,CAAC,CAAC;AACpD,CAAC;AAED,kBAAe;IACb,cAAc;IACd,aAAa;IACb,oBAAoB;IACpB,qBAAqB;IACrB,0BAA0B;CAC3B,CAAC","sourcesContent":["/**\r\n * Encoding detection and conversion utilities.\r\n * \r\n * Provides basic auto‑detection of UTF‑8, UTF‑16LE, UTF‑16BE with BOM,\r\n * and fallback to a default encoding.\r\n * \r\n * @example\r\n * import { detectEncoding, convertToUtf8 } from './encoding-support';\r\n * \r\n * const buffer = fs.readFileSync('data.csv');\r\n * const encoding = detectEncoding(buffer, { fallback: 'utf8' });\r\n * const utf8Text = convertToUtf8(buffer, encoding);\r\n */\r\n\r\nimport { ValidationError } from '../errors';\r\n\r\nexport interface DetectEncodingOptions {\r\n fallback?: 'utf8' | 'utf16le' | 'utf16be' | string;\r\n}\r\n\r\nexport interface AutoDetectAndConvertOptions {\r\n fallback?: string;\r\n}\r\n\r\nexport interface CsvToJsonWithEncodingOptions {\r\n encoding?: 'auto' | 'utf8' | 'utf16le' | 'utf16be' | string;\r\n fallbackEncoding?: string;\r\n [key: string]: any;\r\n}\r\n\r\n/**\r\n * Detects encoding from buffer based on BOM.\r\n * \r\n * @param buffer - Input buffer\r\n * @param options - Detection options\r\n * @param options.fallback - Fallback encoding if detection fails (default: 'utf8')\r\n * @returns Detected encoding: 'utf8', 'utf16le', 'utf16be', or fallback\r\n */\r\nexport function detectEncoding(buffer: Buffer, options: DetectEncodingOptions = {}): string {\r\n const { fallback = 'utf8' } = options;\r\n \r\n if (!Buffer.isBuffer(buffer)) {\r\n throw new ValidationError('Input must be a Buffer');\r\n }\r\n \r\n // Check BOM\r\n if (buffer.length >= 3 && buffer[0] === 0xEF && buffer[1] === 0xBB && buffer[2] === 0xBF) {\r\n return 'utf8';\r\n }\r\n if (buffer.length >= 2 && buffer[0] === 0xFE && buffer[1] === 0xFF) {\r\n return 'utf16be';\r\n }\r\n if (buffer.length >= 2 && buffer[0] === 0xFF && buffer[1] === 0xFE) {\r\n return 'utf16le';\r\n }\r\n \r\n // No BOM detected, use fallback\r\n return fallback;\r\n}\r\n\r\n/**\r\n * Converts buffer to UTF‑8 string using detected encoding.\r\n * Strips BOM if present.\r\n * \r\n * @param buffer - Input buffer\r\n * @param encoding - Source encoding ('utf8', 'utf16le', 'utf16be')\r\n * @returns UTF‑8 string without BOM\r\n */\r\nexport function convertToUtf8(buffer: Buffer, encoding: string = 'utf8'): string {\r\n if (!Buffer.isBuffer(buffer)) {\r\n throw new ValidationError('Input must be a Buffer');\r\n }\r\n \r\n let offset = 0;\r\n \r\n // Skip BOM\r\n if (encoding === 'utf8' && buffer.length >= 3 && buffer[0] === 0xEF && buffer[1] === 0xBB && buffer[2] === 0xBF) {\r\n offset = 3;\r\n } else if (encoding === 'utf16be' && buffer.length >= 2 && buffer[0] === 0xFE && buffer[1] === 0xFF) {\r\n offset = 2;\r\n } else if (encoding === 'utf16le' && buffer.length >= 2 && buffer[0] === 0xFF && buffer[1] === 0xFE) {\r\n offset = 2;\r\n }\r\n \r\n const slice = buffer.slice(offset);\r\n return slice.toString(encoding as BufferEncoding);\r\n}\r\n\r\n/**\r\n * Auto‑detects encoding and converts buffer to UTF‑8 string.\r\n * \r\n * @param buffer - Input buffer\r\n * @param options - Options\r\n * @param options.fallback - Fallback encoding (default: 'utf8')\r\n * @returns Detected encoding and converted text\r\n */\r\nexport function autoDetectAndConvert(\r\n buffer: Buffer, \r\n options: AutoDetectAndConvertOptions = {}\r\n): { encoding: string; text: string } {\r\n const encoding = detectEncoding(buffer, options);\r\n const text = convertToUtf8(buffer, encoding);\r\n return { encoding, text };\r\n}\r\n\r\n/**\r\n * Creates a wrapper around csvToJson that accepts Buffer or string with encoding detection.\r\n * \r\n * @param input - CSV as Buffer or string\r\n * @param parseOptions - Options for csvToJson\r\n * @param parseOptions.encoding - Explicit encoding (default: 'auto')\r\n * @param parseOptions.fallbackEncoding - Fallback if auto detection fails (default: 'utf8')\r\n * @returns Promise with parsed JSON data\r\n */\r\nexport async function csvToJsonWithEncoding(\r\n input: Buffer | string,\r\n parseOptions: CsvToJsonWithEncodingOptions = {}\r\n): Promise<any[]> {\r\n const { encoding = 'auto', fallbackEncoding = 'utf8', ...restOptions } = parseOptions;\r\n \r\n let text: string;\r\n if (Buffer.isBuffer(input)) {\r\n if (encoding === 'auto') {\r\n const detected = autoDetectAndConvert(input, { fallback: fallbackEncoding });\r\n text = detected.text;\r\n } else {\r\n text = convertToUtf8(input, encoding);\r\n }\r\n } else if (typeof input === 'string') {\r\n text = input;\r\n } else {\r\n throw new ValidationError('Input must be a Buffer or string');\r\n }\r\n \r\n // csvToJson will be provided by the caller or imported elsewhere\r\n throw new Error('csvToJson function not available. This function requires csvToJson to be provided.');\r\n}\r\n\r\n/**\r\n * Async version of csvToJsonWithEncoding\r\n */\r\nexport async function csvToJsonWithEncodingAsync(\r\n input: Buffer | string,\r\n parseOptions: CsvToJsonWithEncodingOptions = {}\r\n): Promise<any[]> {\r\n return csvToJsonWithEncoding(input, parseOptions);\r\n}\r\n\r\nexport default {\r\n detectEncoding,\r\n convertToUtf8,\r\n autoDetectAndConvert,\r\n csvToJsonWithEncoding,\r\n csvToJsonWithEncodingAsync\r\n};"]}