typeshi 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/dist/config/dataLoader.d.ts +37 -0
  2. package/dist/config/dataLoader.js +171 -0
  3. package/dist/config/env.d.ts +23 -0
  4. package/dist/config/env.js +55 -0
  5. package/dist/config/index.d.ts +6 -0
  6. package/dist/config/index.js +22 -0
  7. package/dist/config/setupLog.d.ts +39 -0
  8. package/dist/config/setupLog.js +144 -0
  9. package/dist/index.d.ts +8 -0
  10. package/dist/index.js +44 -0
  11. package/dist/utils/argumentValidation.d.ts +192 -0
  12. package/dist/utils/argumentValidation.js +807 -0
  13. package/dist/utils/io/dateTime.d.ts +96 -0
  14. package/dist/utils/io/dateTime.js +202 -0
  15. package/dist/utils/io/index.d.ts +8 -0
  16. package/dist/utils/io/index.js +24 -0
  17. package/dist/utils/io/logging.d.ts +34 -0
  18. package/dist/utils/io/logging.js +260 -0
  19. package/dist/utils/io/reading.d.ts +265 -0
  20. package/dist/utils/io/reading.js +1245 -0
  21. package/dist/utils/io/types/Csv.d.ts +31 -0
  22. package/dist/utils/io/types/Csv.js +29 -0
  23. package/dist/utils/io/types/Io.TypeGuards.d.ts +31 -0
  24. package/dist/utils/io/types/Io.TypeGuards.js +75 -0
  25. package/dist/utils/io/types/Io.d.ts +49 -0
  26. package/dist/utils/io/types/Io.js +2 -0
  27. package/dist/utils/io/types/index.d.ts +6 -0
  28. package/dist/utils/io/types/index.js +22 -0
  29. package/dist/utils/io/writing.d.ts +67 -0
  30. package/dist/utils/io/writing.js +333 -0
  31. package/dist/utils/regex/cleaning.d.ts +65 -0
  32. package/dist/utils/regex/cleaning.js +162 -0
  33. package/dist/utils/regex/configureParameters.d.ts +23 -0
  34. package/dist/utils/regex/configureParameters.js +63 -0
  35. package/dist/utils/regex/email.d.ts +6 -0
  36. package/dist/utils/regex/email.js +37 -0
  37. package/dist/utils/regex/entity.d.ts +59 -0
  38. package/dist/utils/regex/entity.js +168 -0
  39. package/dist/utils/regex/index.d.ts +11 -0
  40. package/dist/utils/regex/index.js +27 -0
  41. package/dist/utils/regex/misc.d.ts +37 -0
  42. package/dist/utils/regex/misc.js +75 -0
  43. package/dist/utils/regex/phone.d.ts +83 -0
  44. package/dist/utils/regex/phone.js +132 -0
  45. package/dist/utils/regex/stringOperations.d.ts +45 -0
  46. package/dist/utils/regex/stringOperations.js +201 -0
  47. package/dist/utils/regex/types/StringOptions.d.ts +87 -0
  48. package/dist/utils/regex/types/StringOptions.js +25 -0
  49. package/dist/utils/regex/types/index.d.ts +5 -0
  50. package/dist/utils/regex/types/index.js +21 -0
  51. package/dist/utils/regex/types/typeGuards.d.ts +12 -0
  52. package/dist/utils/regex/types/typeGuards.js +15 -0
  53. package/dist/utils/typeValidation.d.ts +163 -0
  54. package/dist/utils/typeValidation.js +308 -0
  55. package/package.json +56 -0
@@ -0,0 +1,1245 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
15
+ }) : function(o, v) {
16
+ o["default"] = v;
17
+ });
18
+ var __importStar = (this && this.__importStar) || (function () {
19
+ var ownKeys = function(o) {
20
+ ownKeys = Object.getOwnPropertyNames || function (o) {
21
+ var ar = [];
22
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
+ return ar;
24
+ };
25
+ return ownKeys(o);
26
+ };
27
+ return function (mod) {
28
+ if (mod && mod.__esModule) return mod;
29
+ var result = {};
30
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
+ __setModuleDefault(result, mod);
32
+ return result;
33
+ };
34
+ })();
35
+ var __importDefault = (this && this.__importDefault) || function (mod) {
36
+ return (mod && mod.__esModule) ? mod : { "default": mod };
37
+ };
38
+ Object.defineProperty(exports, "__esModule", { value: true });
39
+ exports.readJsonSync = void 0;
40
+ exports.isDirectory = isDirectory;
41
+ exports.isFile = isFile;
42
+ exports.getDelimiterFromFilePath = getDelimiterFromFilePath;
43
+ exports.readJsonFileAsObject = readJsonFileAsObject;
44
+ exports.coerceFileExtension = coerceFileExtension;
45
+ exports.concatenateFiles = concatenateFiles;
46
+ exports.getRows = getRows;
47
+ exports.getExcelRows = getExcelRows;
48
+ exports.getCsvRows = getCsvRows;
49
+ exports.getOneToOneDictionary = getOneToOneDictionary;
50
+ exports.getColumnValues = getColumnValues;
51
+ exports.getIndexedColumnValues = getIndexedColumnValues;
52
+ exports.handleFileArgument = handleFileArgument;
53
+ exports.getDirectoryFiles = getDirectoryFiles;
54
+ exports.getOneToManyDictionary = getOneToManyDictionary;
55
+ exports.parseExcelForOneToMany = parseExcelForOneToMany;
56
+ exports.parseCsvForOneToMany = parseCsvForOneToMany;
57
+ exports.isValidCsv = isValidCsv;
58
+ exports.isValidCsvSync = isValidCsvSync;
59
+ exports.analyzeCsv = analyzeCsv;
60
+ exports.repairCsv = repairCsv;
61
+ exports.validatePath = validatePath;
62
+ exports.extractTargetRows = extractTargetRows;
63
+ exports.findMissingValues = findMissingValues;
64
+ /**
65
+ * @file src/utils/io/reading.ts
66
+ */
67
+ const node_path_1 = __importDefault(require("node:path"));
68
+ const fs_1 = __importDefault(require("fs"));
69
+ const stream_1 = require("stream");
70
+ const csv_parser_1 = __importDefault(require("csv-parser"));
71
+ const xlsx_1 = __importDefault(require("xlsx"));
72
+ const regex_1 = require("../regex");
73
+ const misc_1 = require("../regex/misc");
74
+ const config_1 = require("../../config");
75
+ const types_1 = require("./types");
76
+ const typeValidation_1 = require("../typeValidation");
77
+ const validate = __importStar(require("../argumentValidation"));
78
+ const logging_1 = require("./logging");
79
+ const F = (0, misc_1.extractFileName)(__filename);
80
+ /** for testing if `pathString (value)` points to an existing directory */
81
+ function isDirectory(value) {
82
+ return ((0, typeValidation_1.isNonEmptyString)(value)
83
+ && fs_1.default.existsSync(value)
84
+ && fs_1.default.statSync(value).isDirectory());
85
+ }
86
+ /** for testing if `pathString (value)` points to an existing file */
87
+ function isFile(value) {
88
+ return ((0, typeValidation_1.isNonEmptyString)(value)
89
+ && fs_1.default.existsSync(value)
90
+ && fs_1.default.statSync(value).isFile());
91
+ }
92
+ /**
93
+ * Determines the proper delimiter based on file type or extension
94
+ * @param filePath `string` Path to the file
95
+ * @returns **`delimiter`** `{`{@link DelimiterCharacterEnum}` | string}` The delimiter character
96
+ * @throws an error if the file extension is unsupported
97
+ */
98
+ function getDelimiterFromFilePath(filePath) {
99
+ const extension = filePath.split('.').pop()?.toLowerCase();
100
+ if (extension === types_1.DelimitedFileTypeEnum.CSV) {
101
+ return types_1.DelimiterCharacterEnum.COMMA;
102
+ }
103
+ else if (extension === types_1.DelimitedFileTypeEnum.TSV) {
104
+ return types_1.DelimiterCharacterEnum.TAB;
105
+ }
106
+ else {
107
+ throw new Error(`[reading.getDelimiterFromFilePath()] Unsupported file extension: ${extension}`);
108
+ }
109
+ }
110
+ /**
111
+ * @param filePath `string`
112
+ * @returns **`jsonData`** — `Record<string, any>`
113
+ * - JSON data as an object
114
+ */
115
+ exports.readJsonSync = readJsonFileAsObject;
116
+ /**
117
+ * @param filePath `string`
118
+ * @returns **`jsonData`** — `Record<string, any>`
119
+ * - JSON data as an object
120
+ */
121
+ function readJsonFileAsObject(filePath) {
122
+ const source = (0, logging_1.getSourceString)(F, readJsonFileAsObject.name);
123
+ try {
124
+ filePath = coerceFileExtension(filePath, 'json');
125
+ const data = fs_1.default.readFileSync(filePath, 'utf8');
126
+ const jsonData = JSON.parse(data);
127
+ return jsonData;
128
+ }
129
+ catch (error) {
130
+ config_1.typeshiLogger.error([`${source} Error reading JSON file`,
131
+ `Given filePath: '${filePath}'`,
132
+ `error: `, JSON.stringify(error, null, 4)
133
+ ].join(config_1.INDENT_LOG_LINE));
134
+ throw new Error(JSON.stringify(error));
135
+ }
136
+ }
137
+ /**
138
+ * @param filePath `string`
139
+ * @param expectedExtension `string`
140
+ * @returns **`validatedFilePath`** `string`
141
+ */
142
+ function coerceFileExtension(filePath, expectedExtension) {
143
+ validate.multipleStringArguments(`reading.coerceFileExtension`, { filePath, expectedExtension });
144
+ expectedExtension = expectedExtension.replace(/\./, '');
145
+ if (filePath.endsWith(`.${expectedExtension}`)) {
146
+ return filePath;
147
+ }
148
+ return filePath + '.' + expectedExtension;
149
+ }
150
+ /**
151
+ * - {@link getDirectoryFiles}
152
+ * @param arg1 `Array<`{@link FileData}` | string> | string`
153
+ * - `files:` {@link FileData}`[]`
154
+ * - `filePaths:` `string[]`
155
+ * - `dirPath:` `string`
156
+ * @param sheetName `string`
157
+ * @param requiredHeaders `string[]` `if` left `undefined`,
158
+ * `requiredHeaders` will be set to the headers of first non empty file from `arg1`
159
+ * @param strictRequirement `boolean`
160
+ * - `Default` = `true`
161
+ * - `if` `true`, then every `row` **must** have headers/keys exactly equal to `requiredHeaders`
162
+ * - `else` `false`, then if a `row` is missing one or more `header` in `requiredHeaders`,
163
+ * for each missing `header`, set `row[header] = ''` (empty string),
164
+ * @param targetExtensions `string[]` try to read rows of all files whose type is in `targetExtensions`
165
+ * @returns **`concatenatedRows`** `Promise<Record<string, any>[]>`
166
+ */
167
+ async function concatenateFiles(arg1, sheetName = 'Sheet1', requiredHeaders = [], strictRequirement = true, targetExtensions = ['.csv', '.tsv', '.xlsx']) {
168
+ const source = (0, logging_1.getSourceString)(F, concatenateFiles.name);
169
+ validate.stringArgument(source, { sheetName });
170
+ validate.arrayArgument(source, { targetExtensions, isNonEmptyString: typeValidation_1.isNonEmptyString });
171
+ let files;
172
+ if ((0, typeValidation_1.isNonEmptyArray)(arg1)) {
173
+ files = arg1;
174
+ }
175
+ else if (isDirectory(arg1)) {
176
+ files = getDirectoryFiles(arg1, ...targetExtensions);
177
+ }
178
+ else if (isFile(arg1)
179
+ && (0, regex_1.stringEndsWithAnyOf)(arg1, targetExtensions, regex_1.RegExpFlagsEnum.IGNORE_CASE)) {
180
+ files = [arg1];
181
+ }
182
+ else {
183
+ let message = [`${source} Invalid parameter: 'arg1'`,
184
+ `Expected: arg1: (Array<FileData | string> | string) to be one of:`,
185
+ `files: FileData[] | filePaths: string[] | filePath: string | dirPath: string`,
186
+ `Received: ${typeof arg1}`
187
+ ].join(config_1.INDENT_LOG_LINE);
188
+ config_1.typeshiLogger.error(message);
189
+ throw new Error(message);
190
+ }
191
+ if (!(0, typeValidation_1.isNonEmptyArray)(files)) { // i.e. isEmptyArray.... shouldn't get here
192
+ config_1.typeshiLogger.error(`${source} how did this happen, we're smarter than this`);
193
+ return [];
194
+ }
195
+ else if (files.length === 1) {
196
+ return await getRows(files[0], sheetName);
197
+ } // else if files.length > 1, need to make sure each file has same headers
198
+ const concatenatedRows = [];
199
+ let haveDefinedRequiredHeaders = ((0, typeValidation_1.isNonEmptyArray)(requiredHeaders)
200
+ && requiredHeaders.every(h => (0, typeValidation_1.isNonEmptyString)(h))
201
+ ? true : false);
202
+ for (const fileRepresentative of files) {
203
+ const rows = await getRows(fileRepresentative, sheetName);
204
+ if (!(0, typeValidation_1.isNonEmptyArray)(rows)) {
205
+ continue;
206
+ }
207
+ if (!haveDefinedRequiredHeaders) {
208
+ let firstValidRow = rows.find(row => !(0, typeValidation_1.isNullLike)(row));
209
+ if (!firstValidRow) {
210
+ continue;
211
+ }
212
+ requiredHeaders = Object.keys(firstValidRow);
213
+ haveDefinedRequiredHeaders = true;
214
+ }
215
+ if (!(0, typeValidation_1.isNonEmptyArray)(requiredHeaders)) {
216
+ config_1.typeshiLogger.warn(`${source} No requiredHeaders defined,`, `skipping file: '${(0, types_1.isFileData)(fileRepresentative)
217
+ ? fileRepresentative.fileName : fileRepresentative}'`);
218
+ continue;
219
+ }
220
+ for (let i = 0; i < rows.length; i++) {
221
+ const row = rows[i];
222
+ if (!(0, typeValidation_1.hasKeys)(row, requiredHeaders)) {
223
+ let missingHeaders = requiredHeaders.filter(h => !(0, typeValidation_1.hasKeys)(row, h));
224
+ if (strictRequirement) {
225
+ let message = [`${source} Invalid row: missing required header(s)`,
226
+ `(strictRequirement === true)`,
227
+ ` file: '${(0, types_1.isFileData)(fileRepresentative)
228
+ ? fileRepresentative.fileName : fileRepresentative}'`,
229
+ ` rowIndex: ${i}`,
230
+ `requiredHeaders: ${JSON.stringify(requiredHeaders)}`,
231
+ ` missingHeaders: ${JSON.stringify(missingHeaders)}`
232
+ ].join(config_1.INDENT_LOG_LINE);
233
+ config_1.typeshiLogger.error(message);
234
+ throw new Error(message);
235
+ }
236
+ for (const header of missingHeaders) {
237
+ row[header] = '';
238
+ }
239
+ }
240
+ concatenatedRows.push(row);
241
+ }
242
+ }
243
+ return concatenatedRows;
244
+ }
245
+ /**
246
+ * @param arg1 {@link FileData}` | string` one of the following:
247
+ * - `fileData:` {@link FileData} = `{ fileName: string; fileContent: string; }`
248
+ * - `filePath:` `string`
249
+ * @param sheetName `string` `optional`
250
+ * - defined/used `if` `arg1` pertains to an excel file and you want to specify which sheet to read
251
+ * - `Default` = `'Sheet1'`
252
+ * @returns **`rows`** `Promise<Record<string, any>[]>`
253
+ */
254
+ async function getRows(arg1, sheetName = 'Sheet1') {
255
+ if ((0, types_1.isFileData)(arg1)) {
256
+ const { fileName } = arg1;
257
+ if (fileName.endsWith('.xlsx') || fileName.endsWith('.xls')) {
258
+ return getExcelRows(arg1, sheetName);
259
+ }
260
+ return getCsvRows(arg1);
261
+ }
262
+ else if ((0, typeValidation_1.isNonEmptyString)(arg1)) { // assume it's a file path
263
+ if (arg1.endsWith('.xlsx') || arg1.endsWith('.xls')) {
264
+ return getExcelRows(arg1, sheetName);
265
+ }
266
+ return getCsvRows(arg1);
267
+ }
268
+ else {
269
+ throw new Error(`[reading.getRows()] Invalid argument: 'arg1' must be a FileData object or a string file path.`);
270
+ }
271
+ }
272
+ /**
273
+ * @note excludes empty rows
274
+ * @param arg1
275
+ * @param sheetName
276
+ * @returns
277
+ */
278
+ async function getExcelRows(arg1, sheetName = 'Sheet1') {
279
+ const source = '[reading.getExcelRows()]';
280
+ validate.stringArgument(source, { sheetName });
281
+ let filePath;
282
+ let fileContent;
283
+ let buffer;
284
+ if ((0, types_1.isFileData)(arg1) && (0, typeValidation_1.isNonEmptyString)(arg1.fileName)
285
+ && (0, regex_1.stringEndsWithAnyOf)(arg1.fileName, ['.xlsx', '.xls'])) {
286
+ filePath = arg1.fileName;
287
+ fileContent = arg1.fileContent;
288
+ buffer = Buffer.from(fileContent, 'base64');
289
+ }
290
+ else if ((0, typeValidation_1.isNonEmptyString)(arg1) && (0, regex_1.stringEndsWithAnyOf)(arg1, ['.xlsx', '.xls'])) {
291
+ filePath = arg1;
292
+ validate.existingPathArgument(`${source}.filePath`, { filePath });
293
+ buffer = fs_1.default.readFileSync(filePath);
294
+ }
295
+ else {
296
+ throw new Error([
297
+ `${source} Invalid argument: 'arg1' (FileData or filePath)`,
298
+ `must be a FileData object or a string file path.`,
299
+ `Received: ${JSON.stringify(arg1)}`
300
+ ].join(config_1.INDENT_LOG_LINE));
301
+ }
302
+ try {
303
+ const workbook = xlsx_1.default.read(buffer, { type: 'buffer' });
304
+ sheetName = (workbook.SheetNames.includes(sheetName)
305
+ ? sheetName
306
+ : workbook.SheetNames[0]);
307
+ const sheet = workbook.Sheets[sheetName];
308
+ const jsonData = xlsx_1.default.utils.sheet_to_json(sheet);
309
+ return jsonData;
310
+ }
311
+ catch (error) {
312
+ config_1.typeshiLogger.error([
313
+ `${source} Error reading or parsing the Excel file.`,
314
+ `Received arg1 = ${JSON.stringify(arg1)}, sheetName: '${sheetName}'`,
315
+ ].join(config_1.INDENT_LOG_LINE), JSON.stringify(error, null, 4));
316
+ return [];
317
+ }
318
+ }
319
+ /**
320
+ * @param filePath `string`
321
+ * @returns **`rows`** `Promise<Record<string, any>[]>`
322
+ * - an array of objects representing rows from a CSV file.
323
+ */
324
+ async function getCsvRows(arg1) {
325
+ const source = (0, logging_1.getSourceString)(__filename, getCsvRows.name);
326
+ let filePath;
327
+ let fileContent;
328
+ let delimiter = types_1.DelimiterCharacterEnum.COMMA;
329
+ let buffer;
330
+ if ((0, types_1.isFileData)(arg1) && (0, typeValidation_1.isNonEmptyString)(arg1.fileName)
331
+ && (0, regex_1.stringEndsWithAnyOf)(arg1.fileName, ['.csv', '.tsv'])) {
332
+ filePath = arg1.fileName;
333
+ fileContent = arg1.fileContent;
334
+ buffer = Buffer.from(fileContent, 'base64');
335
+ delimiter = getDelimiterFromFilePath(filePath);
336
+ }
337
+ else if ((0, typeValidation_1.isNonEmptyString)(arg1) && (0, regex_1.stringEndsWithAnyOf)(arg1, ['.csv', '.tsv'])) {
338
+ filePath = arg1;
339
+ validate.existingPathArgument(`${source}`, { filePath });
340
+ try {
341
+ buffer = fs_1.default.readFileSync(filePath);
342
+ }
343
+ catch (error) {
344
+ throw new Error([
345
+ `${source} Error making buffer when reading file: '${filePath}'`,
346
+ `Error: ${error instanceof Error ? error.message : String(error)}`
347
+ ].join(config_1.INDENT_LOG_LINE));
348
+ }
349
+ delimiter = getDelimiterFromFilePath(filePath);
350
+ }
351
+ else {
352
+ throw new Error([
353
+ `${source} Invalid argument: 'arg1' (FileData or filePath)`,
354
+ `must be a FileData object or a string file path.`,
355
+ `Received: ${JSON.stringify(arg1)}`
356
+ ].join(config_1.INDENT_LOG_LINE));
357
+ }
358
+ const rows = [];
359
+ if (!buffer) {
360
+ throw new Error(`${source} No buffer available to read`);
361
+ }
362
+ const stream = stream_1.Readable.from(buffer.toString('utf8'));
363
+ return new Promise((resolve, reject) => {
364
+ stream
365
+ .pipe((0, csv_parser_1.default)({ separator: delimiter }))
366
+ .on('data', (row) => rows.push(row))
367
+ .on('end', () => {
368
+ config_1.SUPPRESSED_LOGS.push([`${source} Successfully read CSV file.`,
369
+ `filePath: '${filePath}'`,
370
+ `Number of rows read: ${rows.length}`
371
+ ].join(config_1.INDENT_LOG_LINE));
372
+ resolve(rows);
373
+ })
374
+ .on('error', (error) => {
375
+ config_1.typeshiLogger.error(`${source} Error reading CSV file:`, config_1.INDENT_LOG_LINE + `filePath: '${filePath}'`, config_1.NEW_LINE + `Error: ${JSON.stringify(error, null, 4)}`);
376
+ reject(error);
377
+ });
378
+ });
379
+ }
380
+ /**
381
+ * @param arg1 `string | Record<string, any>[]` - the file path to a CSV file or an array of rows.
382
+ * @param keyColumn `string` - the column name whose contents will be keys in the dictionary.
383
+ * @param valueColumn `string` - the column name whose contents will be used as values in the dictionary.
384
+ * @returns **`dict`** `Record<string, string>`
385
+ */
386
+ async function getOneToOneDictionary(arg1, keyColumn, valueColumn, keyOptions, valueOptions, requireIncludeAllRows = false) {
387
+ const source = (0, logging_1.getSourceString)(__filename, getOneToOneDictionary.name);
388
+ validate.multipleStringArguments(source, { keyColumn, valueColumn });
389
+ let rows = await handleFileArgument(arg1, getOneToOneDictionary.name, [keyColumn, valueColumn]);
390
+ const dict = {};
391
+ for (let i = 0; i < rows.length; i++) {
392
+ const row = rows[i];
393
+ if (!(0, typeValidation_1.hasKeys)(row, [keyColumn, valueColumn])) {
394
+ let msg = [`${source} row @ index ${i} missing key(s): '${keyColumn}', '${valueColumn}'`,
395
+ ` keyColumn: '${keyColumn}' in row ? ${keyColumn in row} -> row[keyColumn] = '${row[keyColumn]}'`,
396
+ `valueColumn: '${valueColumn}' in row ? ${valueColumn in row} -> row[valueColumn] = '${row[valueColumn]}'`,
397
+ ].join(config_1.INDENT_LOG_LINE);
398
+ if (requireIncludeAllRows)
399
+ throw new Error(msg);
400
+ config_1.typeshiLogger.warn(msg);
401
+ continue;
402
+ }
403
+ const key = (0, regex_1.clean)(String(row[keyColumn]), keyOptions);
404
+ const value = (0, regex_1.clean)(String(row[valueColumn]), valueOptions);
405
+ if (!key || !value) {
406
+ let msg = [`${source} Row @ index ${i} missing key or value.`,
407
+ ` keyColumn: '${keyColumn}' in row ? ${keyColumn in row}`,
408
+ `-> row[keyColumn] = '${row[keyColumn]}'`,
409
+ ` clean(String(row[keyColumn]), keyOptions): '${key}'`,
410
+ `valueColumn: '${valueColumn}' in row ? ${valueColumn in row}`,
411
+ `-> row[valueColumn] = '${row[valueColumn]}'`,
412
+ `clean(String(row[valueColumn]), valueOptions): '${value}'`,
413
+ ].join(config_1.INDENT_LOG_LINE);
414
+ if (requireIncludeAllRows)
415
+ throw new Error(msg);
416
+ config_1.typeshiLogger.warn(msg);
417
+ continue;
418
+ }
419
+ if (dict[key]) {
420
+ config_1.typeshiLogger.warn([`${source} row @ index ${i} Duplicate key found: '${key}'`,
421
+ `overwriting value '${dict[key]}' with '${value}'`
422
+ ].join(config_1.INDENT_LOG_LINE));
423
+ }
424
+ dict[key] = value;
425
+ }
426
+ return dict;
427
+ }
428
+ /**
429
+ * @param arg1 `string | FileData | Record<string, any>[]` - the `filePath` to a CSV file or an array of rows.
430
+ * @param columnName `string` - the column name whose values will be returned.
431
+ * @param allowDuplicates `boolean` - `optional` if `true`, allows duplicate values in the returned array, otherwise only unique values are returned.
432
+ * - Defaults to `false`.
433
+ * @returns **`values`** `Promise<Array<string>>` - sorted array of values (as strings) from the specified column.
434
+ */
435
+ async function getColumnValues(arg1, columnName, cleaner, allowDuplicates = false) {
436
+ const source = `[reading.getColumnValues()]`;
437
+ validate.stringArgument(source, { columnName });
438
+ validate.booleanArgument(source, { allowDuplicates });
439
+ if (cleaner)
440
+ validate.functionArgument(source, { cleaner });
441
+ let rows = await handleFileArgument(arg1, getColumnValues.name, [columnName]);
442
+ const values = [];
443
+ for (const row of rows) {
444
+ if (!(0, typeValidation_1.isNonEmptyString)(String(row[columnName])))
445
+ continue;
446
+ const value = (cleaner
447
+ ? await cleaner(String(row[columnName]))
448
+ : String(row[columnName])).trim();
449
+ if (allowDuplicates || !values.includes(value)) {
450
+ values.push(value);
451
+ }
452
+ }
453
+ return values.sort();
454
+ }
455
+ /**
456
+ * @param arg1 `string | FileData | Record<string, any>[]` - the `filePath` to a CSV file or an array of rows.
457
+ * @param columnName `string` - the column name whose values will be returned.
458
+ * @returns **`indexedColumnValues`** `Promise<Record<string, number[]>>`
459
+ */
460
+ async function getIndexedColumnValues(arg1, columnName, cleaner) {
461
+ const source = `[reading.getIndexedColumnValues()]`;
462
+ validate.stringArgument(source, { columnName });
463
+ if (cleaner)
464
+ validate.functionArgument(source, { cleaner });
465
+ let rows = await handleFileArgument(arg1, getIndexedColumnValues.name, [columnName]);
466
+ const valueDict = {};
467
+ for (const rowIndex in rows) {
468
+ const row = rows[rowIndex];
469
+ if (!(0, typeValidation_1.isNonEmptyString)(String(row[columnName])))
470
+ continue;
471
+ const value = (cleaner
472
+ ? await cleaner(String(row[columnName]))
473
+ : String(row[columnName])).trim();
474
+ if (!valueDict[value]) {
475
+ valueDict[value] = [];
476
+ }
477
+ valueDict[value].push(Number(rowIndex));
478
+ }
479
+ return valueDict;
480
+ }
481
+ /**
482
+ * @param arg1 `string | FileData | Record<string, any>[]`
483
+ * @param invocationSource `string`
484
+ * @param requiredHeaders `string[]` `optional`
485
+ * @returns **`rows`** `Promise<Record<string, any>[]>`
486
+ */
487
+ async function handleFileArgument(arg1, invocationSource, requiredHeaders = [], sheetName) {
488
+ const source = (0, logging_1.getSourceString)(F, handleFileArgument.name);
489
+ validate.stringArgument(source, { invocationSource });
490
+ validate.arrayArgument(source, { requiredHeaders, isNonEmptyString: typeValidation_1.isNonEmptyString }, true);
491
+ let rows = [];
492
+ // Handle file path validation only for string inputs
493
+ if ((0, typeValidation_1.isNonEmptyString)(arg1)
494
+ && (0, regex_1.stringEndsWithAnyOf)(arg1, /(\.tsv|\.csv)/i)
495
+ && !isValidCsvSync(arg1, requiredHeaders)) {
496
+ throw new Error([
497
+ `${source} Invalid CSV filePath provided: '${arg1}'`,
498
+ `invocationSource: ${invocationSource}`,
499
+ `requiredHeaders ? ${(0, typeValidation_1.isNonEmptyArray)(requiredHeaders)
500
+ ? JSON.stringify(requiredHeaders)
501
+ : 'none provided'}`
502
+ ].join(config_1.INDENT_LOG_LINE));
503
+ }
504
+ if (((0, typeValidation_1.isNonEmptyString)(arg1) && isFile(arg1)) // arg1 is file path string
505
+ || (0, types_1.isFileData)(arg1)) { // arg1 is FileData { fileName: string; fileContent: string; }
506
+ rows = await getRows(arg1, sheetName);
507
+ }
508
+ else if ((0, typeValidation_1.isNonEmptyArray)(arg1)) { // arg1 is already array of rows
509
+ if (arg1.some(v => !(0, typeValidation_1.isObject)(v))) {
510
+ throw new Error([
511
+ `${source} Error: Invalid 'arg1' (Record<string, any>[]) param:`,
512
+ `There exists an element in the row array that is not an object.`,
513
+ `Source: ${invocationSource}`,
514
+ ].join(config_1.INDENT_LOG_LINE));
515
+ }
516
+ rows = arg1;
517
+ }
518
+ else {
519
+ throw new Error([
520
+ `${source} Invalid parameter: 'arg1' (string | FileData | Record<string, any>[])`,
521
+ `arg1 must be a file path string, FileData object, or an array of rows.`,
522
+ `Source: ${invocationSource}`,
523
+ ].join(config_1.INDENT_LOG_LINE));
524
+ }
525
+ return rows;
526
+ }
527
+ /**
528
+ * @param dir `string` path to target directory
529
+ * @param targetExtensions `string[] optional` - array of file extensions to filter files by.
530
+ * - `If` not provided, all files in the directory will be returned.
531
+ * - `If` provided, only files with extensions matching the array will be returned.
532
+ * @returns **`targetFiles`** `string[]` array of full file paths
533
+ */
534
+ function getDirectoryFiles(dir, ...targetExtensions) {
535
+ const source = (0, logging_1.getSourceString)(F, getDirectoryFiles.name);
536
+ validate.existingPathArgument(source, { dir });
537
+ validate.arrayArgument(source, { targetExtensions, isNonEmptyString: typeValidation_1.isNonEmptyString }, true);
538
+ // ensure all target extensions start with period
539
+ for (let i = 0; i < targetExtensions.length; i++) {
540
+ const ext = targetExtensions[i];
541
+ if (!ext.startsWith('.')) {
542
+ targetExtensions[i] = `.${ext}`;
543
+ }
544
+ }
545
+ const targetFiles = fs_1.default.readdirSync(dir).filter(f => (0, typeValidation_1.isNonEmptyArray)(targetExtensions)
546
+ ? true // get all files in dir, regardless of extension
547
+ : (0, regex_1.stringEndsWithAnyOf)(f, targetExtensions, regex_1.RegExpFlagsEnum.IGNORE_CASE)).map(file => node_path_1.default.join(dir, file));
548
+ return targetFiles;
549
+ }
550
+ /**
551
+ * @param dataSource `string | FileData | Record<string, any>[]`
552
+ * @param keyColumn `string`
553
+ * @param valueColumn `string`
554
+ * @param keyOptions {@link CleanStringOptions} `(optional)`
555
+ * @param valueOptions {@link CleanStringOptions}`(optional)`
556
+ * @param sheetName `string`
557
+ * @returns **`dict`** `Promise<Record<string, string[]>>`
558
+ */
559
+ async function getOneToManyDictionary(dataSource, keyColumn, valueColumn, keyOptions, valueOptions, sheetName) {
560
+ const source = (0, logging_1.getSourceString)(F, getOneToManyDictionary.name);
561
+ validate.multipleStringArguments(source, { keyColumn, valueColumn });
562
+ if (keyOptions)
563
+ validate.objectArgument(source, { keyOptions, isCleanStringOptions: regex_1.isCleanStringOptions });
564
+ if (valueOptions)
565
+ validate.objectArgument(source, { valueOptions, isCleanStringOptions: regex_1.isCleanStringOptions });
566
+ const rows = await handleFileArgument(dataSource, source, [keyColumn, valueColumn], sheetName);
567
+ const dict = {};
568
+ for (let i = 0; i < rows.length; i++) {
569
+ let row = rows[i];
570
+ let key = (0, regex_1.clean)(row[keyColumn], keyOptions).trim().replace(/\.$/, '');
571
+ if (!dict[key]) {
572
+ dict[key] = [];
573
+ }
574
+ let value = (0, regex_1.clean)(row[valueColumn], valueOptions).trim().replace(/\.$/, '');
575
+ if (!dict[key].includes(value)) {
576
+ dict[key].push(value);
577
+ }
578
+ }
579
+ return dict;
580
+ }
581
+ /**
582
+ * @deprecated -> use {@link getOneToManyDictionary}
583
+ * @param filePath `string`
584
+ * @param sheetName `string`
585
+ * @param keyColumn `string`
586
+ * @param valueColumn `string`
587
+ * @param options - {@link ParseOneToManyOptions}
588
+ * = `{ keyStripOptions`?: {@link StringStripOptions}, `valueStripOptions`?: {@link StringStripOptions}, keyCaseOptions`?: {@link StringCaseOptions}, `valueCaseOptions`?: {@link StringCaseOptions}, `keyPadOptions`?: {@link StringPadOptions}, `valuePadOptions`?: {@link StringPadOptions} `}`
589
+ * - {@link StringStripOptions} = `{ char`: `string`, `escape`?: `boolean`, `stripLeftCondition`?: `(s: string, ...args: any[]) => boolean`, `leftArgs`?: `any[]`, `stripRightCondition`?: `(s: string, ...args: any[]) => boolean`, `rightArgs`?: `any[] }`
590
+ * - {@link StringCaseOptions} = `{ toUpper`?: `boolean`, `toLower`?: `boolean`, `toTitle`?: `boolean }`
591
+ * - {@link StringPadOptions} = `{ padLength`: `number`, `padChar`?: `string`, `padLeft`?: `boolean`, `padRight`?: `boolean }`
592
+ * @returns **`dict`** `Record<string, Array<string>>` — key-value pairs where key is from `keyColumn` and value is an array of values from `valueColumn`
593
+ */
594
+ function parseExcelForOneToMany(filePath, sheetName, keyColumn, valueColumn, options = {}) {
595
+ filePath = coerceFileExtension(filePath, 'xlsx');
596
+ validate.multipleStringArguments(`reading.parseExcelForOneToMany`, { filePath, sheetName, keyColumn, valueColumn });
597
+ try {
598
+ const { keyStripOptions, valueStripOptions, keyCaseOptions, valueCaseOptions, keyPadOptions, valuePadOptions } = options;
599
+ const workbook = xlsx_1.default.readFile(filePath);
600
+ const sheet = workbook.Sheets[sheetName];
601
+ const jsonData = xlsx_1.default.utils.sheet_to_json(sheet);
602
+ const dict = {};
603
+ jsonData.forEach(row => {
604
+ let key = (0, regex_1.clean)(String(row[keyColumn]), keyStripOptions, keyCaseOptions, keyPadOptions).trim().replace(/\.$/, '');
605
+ let val = (0, regex_1.clean)(String(row[valueColumn]), valueStripOptions, valueCaseOptions, valuePadOptions).trim().replace(/\.$/, '');
606
+ if (!dict[key]) {
607
+ dict[key] = [];
608
+ }
609
+ if (!dict[key].includes(val)) {
610
+ dict[key].push(val);
611
+ }
612
+ });
613
+ return dict;
614
+ }
615
+ catch (err) {
616
+ config_1.typeshiLogger.error('Error reading or parsing the Excel file:', err, config_1.INDENT_LOG_LINE + 'Given File Path:', '"' + filePath + '"');
617
+ return {};
618
+ }
619
+ }
620
+ /**
621
+ * @deprecated -> use {@link getOneToManyDictionary}
622
+ * @param filePath `string`
623
+ * @param keyColumn `string`
624
+ * @param valueColumn `string`
625
+ * @param delimiter {@link DelimiterCharacters} | `string`
626
+ * @param options {@link ParseOneToManyOptions}
627
+ * = `{ keyCaseOptions`?: {@link StringCaseOptions}, `valueCaseOptions`?: {@link StringCaseOptions}, `keyPadOptions`?: {@link StringPadOptions}, `valuePadOptions`?: {@link StringPadOptions} `}`
628
+ * - {@link StringCaseOptions} = `{ toUpper`?: `boolean`, `toLower`?: `boolean`, `toTitle`?: `boolean }`
629
+ * - {@link StringPadOptions} = `{ padLength`: `number`, `padChar`?: `string`, `padLeft`?: `boolean`, `padRight`?: `boolean }`
630
+ * @returns `Record<string, Array<string>>` - key-value pairs where key is from `keyColumn` and value is an array of values from `valueColumn`
631
+ */
632
+ function parseCsvForOneToMany(filePath, keyColumn, valueColumn, delimiter = types_1.DelimiterCharacterEnum.COMMA, options = {}) {
633
+ filePath = coerceFileExtension(filePath, (delimiter === types_1.DelimiterCharacterEnum.TAB) ? 'tsv' : 'csv');
634
+ const source = `[reading.parseCsvForOneToMany()]`;
635
+ validate.existingFileArgument(source, ['.tsv', '.csv'], { filePath });
636
+ validate.multipleStringArguments(source, { keyColumn, valueColumn });
637
+ try {
638
+ const { keyStripOptions, valueStripOptions, keyCaseOptions, valueCaseOptions, keyPadOptions, valuePadOptions } = options;
639
+ const data = fs_1.default.readFileSync(filePath, 'utf8');
640
+ const lines = data.split('\n');
641
+ const dict = {};
642
+ const header = lines[0].split(delimiter).map(col => col.trim());
643
+ const keyIndex = header.indexOf(keyColumn);
644
+ const valueIndex = header.indexOf(valueColumn);
645
+ if (keyIndex === -1 || valueIndex === -1) {
646
+ throw new Error(`Key or value column not found in CSV file.`);
647
+ }
648
+ for (let i = 1; i < lines.length; i++) {
649
+ const line = lines[i].split(delimiter).map(col => col.trim());
650
+ if (line.length > 1) {
651
+ let key = (0, regex_1.clean)(line[keyIndex], keyStripOptions, keyCaseOptions, keyPadOptions);
652
+ let val = (0, regex_1.clean)(line[valueIndex], valueStripOptions, valueCaseOptions, valuePadOptions);
653
+ if (!dict[key]) {
654
+ dict[key] = [];
655
+ }
656
+ if (!dict[key].includes(val)) {
657
+ dict[key].push(val);
658
+ }
659
+ }
660
+ }
661
+ return dict;
662
+ }
663
+ catch (err) {
664
+ config_1.typeshiLogger.error('Error reading or parsing the CSV file:', err, config_1.INDENT_LOG_LINE + 'Given File Path:', '"' + filePath + '"');
665
+ return {};
666
+ }
667
+ }
668
+ const DEFAULT_CSV_VALIDATION_RULES = {
669
+ allowEmptyRows: true,
670
+ allowInconsistentColumns: true,
671
+ maxRowsToCheck: Infinity,
672
+ };
673
+ /**
674
+ * @notimplemented
675
+ * @TODO
676
+ * @param arg1
677
+ * @param requiredHeaders
678
+ * @param options
679
+ * @returns
680
+ */
681
+ async function isValidCsv(arg1, requiredHeaders, options = DEFAULT_CSV_VALIDATION_RULES) {
682
+ return false;
683
+ }
684
+ /**
685
+ * @problem has trouble handling case where column value contains a single double quote;
686
+ * e.g. when it's used as the inches unit after a number
687
+ *
688
+ * `sync`
689
+ * @param filePath `string` - must be a string to an existing file, otherwise return `false`.
690
+ * @param requiredHeaders `string[]` - `optional` array of headers that must be present in the CSV file.
691
+ * - If provided, the function checks if all required headers are present in the CSV header row
692
+ * @param options `object` - optional configuration
693
+ * - `allowEmptyRows`: `boolean` - if true, allows rows with all empty fields (default: true)
694
+ * - `allowInconsistentColumns`: `boolean` - if true, allows rows with different column counts (default: false)
695
+ * - `maxRowsToCheck`: `number` - maximum number of rows to validate (default: all rows)
696
+ * @returns **`isValidCsv`** `boolean`
697
+ * - **`true`** `if` the CSV file at `filePath` is valid (proper structure and formatting),
698
+ * - **`false`** `otherwise`.
699
+ */
700
+ function isValidCsvSync(filePath, requiredHeaders, options = DEFAULT_CSV_VALIDATION_RULES) {
701
+ const { allowEmptyRows = true, allowInconsistentColumns = false, maxRowsToCheck = Infinity } = options;
702
+ validate.existingPathArgument(`reading.isValidCsv`, { filePath });
703
+ try {
704
+ const delimiter = getDelimiterFromFilePath(filePath);
705
+ const data = fs_1.default.readFileSync(filePath, 'utf8');
706
+ // Handle different line endings
707
+ const normalizedData = data.replace(/\r\n/g, '\n').replace(/\r/g, '\n');
708
+ // Split into lines, but be careful about quoted fields with newlines
709
+ let lines = [];
710
+ let currentLine = '';
711
+ let inQuotes = false;
712
+ let i = 0;
713
+ while (i < normalizedData.length) {
714
+ const char = normalizedData[i];
715
+ const nextChar = normalizedData[i + 1];
716
+ if (char === '"') {
717
+ if (inQuotes && nextChar === '"') {
718
+ // Escaped quote
719
+ currentLine += '""';
720
+ i++; // Skip next quote
721
+ }
722
+ else {
723
+ // Toggle quote state
724
+ inQuotes = !inQuotes;
725
+ currentLine += char;
726
+ }
727
+ }
728
+ else if (char === '\n' && !inQuotes) {
729
+ // End of line (not within quotes)
730
+ if (currentLine.trim() !== '' || allowEmptyRows) {
731
+ lines.push(currentLine);
732
+ }
733
+ currentLine = '';
734
+ }
735
+ else {
736
+ currentLine += char;
737
+ }
738
+ i++;
739
+ }
740
+ // Add the last line if it exists
741
+ if (currentLine.trim() !== '' || allowEmptyRows) {
742
+ lines.push(currentLine);
743
+ }
744
+ if (lines.length < 1) {
745
+ config_1.typeshiLogger.error(`[ERROR isValidCsv()]: file has no valid lines: ${filePath}`);
746
+ return false;
747
+ }
748
+ const headerRow = parseCsvLine(lines[0], delimiter);
749
+ if (headerRow.length < 1) {
750
+ config_1.typeshiLogger.error(`[ERROR isValidCsv()]: no header found in file: ${filePath}`);
751
+ return false;
752
+ }
753
+ // Check for empty headers
754
+ if (headerRow.some(header => header === '')) {
755
+ config_1.typeshiLogger.warn(`[isValidCsv()]: Found empty header(s) in file: ${filePath}`);
756
+ if (!allowInconsistentColumns) {
757
+ return false;
758
+ }
759
+ }
760
+ // Validate required headers
761
+ if ((0, typeValidation_1.isNonEmptyArray)(requiredHeaders)) {
762
+ const hasRequiredHeaders = requiredHeaders.every(header => {
763
+ if (!(0, typeValidation_1.isNonEmptyString)(header)) {
764
+ config_1.typeshiLogger.warn([
765
+ `[reading.isValidCsv]: Invalid parameter: 'requiredHeaders'`,
766
+ `requiredHeaders must be of type: Array<string>`,
767
+ `found array element of type: '${typeof header}' (skipping)`
768
+ ].join(config_1.INDENT_LOG_LINE));
769
+ return true; // skip headers if they are not strings
770
+ }
771
+ return headerRow.includes(header);
772
+ });
773
+ if (!hasRequiredHeaders) {
774
+ config_1.typeshiLogger.warn([
775
+ `[isValidCsv()]: Required headers missing from headerRow`,
776
+ `filePath: '${filePath}'`,
777
+ `requiredHeaders: ${JSON.stringify(requiredHeaders)}`,
778
+ `csvFileHeaders: ${JSON.stringify(headerRow)}`
779
+ ].join(config_1.INDENT_LOG_LINE));
780
+ return false;
781
+ }
782
+ }
783
+ // Check consistency of data rows
784
+ const maxRows = Math.min(lines.length, maxRowsToCheck + 1); // +1 for header
785
+ const expectedColumnCount = headerRow.length;
786
+ for (let i = 1; i < maxRows; i++) {
787
+ const line = lines[i];
788
+ // Skip completely empty lines if allowed
789
+ if (allowEmptyRows && line.trim() === '') {
790
+ continue;
791
+ }
792
+ const rowValues = parseCsvLine(line, delimiter);
793
+ // Check if row is empty (all fields are empty)
794
+ const isEmptyRow = rowValues.every(val => val === '');
795
+ if (isEmptyRow && allowEmptyRows) {
796
+ continue;
797
+ }
798
+ // Check column count consistency
799
+ if (rowValues.length !== expectedColumnCount && !allowInconsistentColumns) {
800
+ config_1.typeshiLogger.warn([
801
+ `[isValidCsv()]: Invalid row found: header.length !== rowValues.length`,
802
+ ` header.length: ${expectedColumnCount}`,
803
+ `rowValues.length: ${rowValues.length}`,
804
+ ` -> Difference = ${expectedColumnCount - rowValues.length}`,
805
+ ` header: ${JSON.stringify(headerRow)}`,
806
+ // `rowValues: ${JSON.stringify(rowValues)}`,
807
+ ` rowIndex: ${i}`,
808
+ ` filePath: '${filePath}'`,
809
+ `delimiter: '${delimiter}'`
810
+ ].join(config_1.INDENT_LOG_LINE));
811
+ return false;
812
+ }
813
+ }
814
+ return true;
815
+ }
816
+ catch (error) {
817
+ config_1.typeshiLogger.error([
818
+ `[isValidCsv()]: Error reading or parsing CSV file: ${filePath}`,
819
+ `Error: ${error instanceof Error ? error.message : String(error)}`
820
+ ].join(config_1.INDENT_LOG_LINE));
821
+ return false;
822
+ }
823
+ }
824
+ /**
825
+ * Parses a CSV line into fields, properly handling quoted fields with embedded delimiters, quotes, and newlines
826
+ * @param line `string` - the CSV line to parse
827
+ * @param delimiter `string` - the delimiter character
828
+ * @returns **`fields`** `string[]` - array of field values
829
+ */
830
+ function parseCsvLine(line, delimiter) {
831
+ const fields = [];
832
+ let current = '';
833
+ let inQuotes = false;
834
+ let i = 0;
835
+ while (i < line.length) {
836
+ const char = line[i];
837
+ const nextChar = line[i + 1];
838
+ if (!inQuotes) {
839
+ if (char === '"') {
840
+ inQuotes = true;
841
+ }
842
+ else if (char === delimiter) {
843
+ fields.push(current.trim());
844
+ current = '';
845
+ }
846
+ else {
847
+ current += char;
848
+ }
849
+ }
850
+ else {
851
+ if (char === '"') {
852
+ if (nextChar === '"') {
853
+ // Escaped quote within quoted field
854
+ current += '"';
855
+ i++; // Skip the next quote
856
+ }
857
+ else {
858
+ // End of quoted field
859
+ inQuotes = false;
860
+ }
861
+ }
862
+ else {
863
+ current += char;
864
+ }
865
+ }
866
+ i++;
867
+ }
868
+ // Add the last field
869
+ fields.push(current.trim());
870
+ return fields;
871
+ }
872
+ /**
873
+ * Analyzes a CSV file and returns detailed validation information
874
+ * @param filePath `string` - path to the CSV file
875
+ * @param options `object` - validation options
876
+ * @returns **`analysis`** `object` - detailed analysis of the CSV file
877
+ */
878
+ function analyzeCsv(filePath, options = {}) {
879
+ const { sampleSize = 1000, checkEncoding = false, detectDelimiter = false } = options;
880
+ const issues = [];
881
+ const warnings = [];
882
+ const stats = {
883
+ totalRows: 0,
884
+ headerCount: 0,
885
+ maxRowLength: 0,
886
+ minRowLength: Infinity,
887
+ emptyRows: 0,
888
+ encoding: null,
889
+ detectedDelimiter: null
890
+ };
891
+ let headers = [];
892
+ try {
893
+ validate.existingPathArgument(`reading.analyzeCsv`, { filePath });
894
+ const data = fs_1.default.readFileSync(filePath, 'utf8');
895
+ const normalizedData = data.replace(/\r\n/g, '\n').replace(/\r/g, '\n');
896
+ // Detect delimiter if requested
897
+ let delimiter;
898
+ if (detectDelimiter) {
899
+ const commonDelimiters = [',', '\t', ';', '|'];
900
+ const delimiterCounts = commonDelimiters.map(delim => ({
901
+ delimiter: delim,
902
+ count: (data.match(new RegExp(`\\${delim}`, 'g')) || []).length
903
+ }));
904
+ const mostLikely = delimiterCounts.sort((a, b) => b.count - a.count)[0];
905
+ delimiter = mostLikely.count > 0 ? mostLikely.delimiter : getDelimiterFromFilePath(filePath);
906
+ stats.detectedDelimiter = delimiter;
907
+ }
908
+ else {
909
+ delimiter = getDelimiterFromFilePath(filePath);
910
+ }
911
+ // Parse the file properly
912
+ let lines = [];
913
+ let currentLine = '';
914
+ let inQuotes = false;
915
+ let i = 0;
916
+ while (i < normalizedData.length) {
917
+ const char = normalizedData[i];
918
+ const nextChar = normalizedData[i + 1];
919
+ if (char === '"') {
920
+ if (inQuotes && nextChar === '"') {
921
+ currentLine += '""';
922
+ i++;
923
+ }
924
+ else {
925
+ inQuotes = !inQuotes;
926
+ currentLine += char;
927
+ }
928
+ }
929
+ else if (char === '\n' && !inQuotes) {
930
+ lines.push(currentLine);
931
+ currentLine = '';
932
+ }
933
+ else {
934
+ currentLine += char;
935
+ }
936
+ i++;
937
+ }
938
+ if (currentLine) {
939
+ lines.push(currentLine);
940
+ }
941
+ stats.totalRows = lines.length;
942
+ if (lines.length === 0) {
943
+ issues.push('File is empty');
944
+ return { isValid: false, issues, warnings, stats, headers };
945
+ }
946
+ headers = parseCsvLine(lines[0], delimiter);
947
+ stats.headerCount = headers.length;
948
+ stats.maxRowLength = headers.length;
949
+ stats.minRowLength = headers.length;
950
+ // Check for duplicate headers
951
+ const headerSet = new Set(headers);
952
+ if (headerSet.size !== headers.length) {
953
+ warnings.push('Duplicate header names found');
954
+ }
955
+ // Check for empty headers
956
+ if (headers.some(h => h.trim() === '')) {
957
+ warnings.push('Empty header names found');
958
+ }
959
+ // Analyze data rows (sample if necessary)
960
+ const rowsToCheck = Math.min(lines.length - 1, sampleSize);
961
+ const step = rowsToCheck < lines.length - 1 ? Math.floor((lines.length - 1) / rowsToCheck) : 1;
962
+ let inconsistentRows = 0;
963
+ for (let i = 1; i < lines.length; i += step) {
964
+ const line = lines[i];
965
+ if (line.trim() === '') {
966
+ stats.emptyRows++;
967
+ continue;
968
+ }
969
+ const fields = parseCsvLine(line, delimiter);
970
+ stats.maxRowLength = Math.max(stats.maxRowLength, fields.length);
971
+ stats.minRowLength = Math.min(stats.minRowLength, fields.length);
972
+ if (fields.length !== headers.length) {
973
+ inconsistentRows++;
974
+ }
975
+ }
976
+ if (inconsistentRows > 0) {
977
+ warnings.push(`${inconsistentRows} rows have inconsistent column counts`);
978
+ }
979
+ if (stats.emptyRows > 0) {
980
+ warnings.push(`${stats.emptyRows} empty rows found`);
981
+ }
982
+ // Encoding detection (basic)
983
+ if (checkEncoding) {
984
+ try {
985
+ const buffer = fs_1.default.readFileSync(filePath);
986
+ const hasUtf8Bom = buffer.length >= 3 &&
987
+ buffer[0] === 0xEF && buffer[1] === 0xBB && buffer[2] === 0xBF;
988
+ stats.encoding = hasUtf8Bom ? 'UTF-8 with BOM' : 'UTF-8';
989
+ }
990
+ catch (error) {
991
+ warnings.push('Could not detect file encoding');
992
+ }
993
+ }
994
+ const isValid = issues.length === 0;
995
+ return { isValid, issues, warnings, stats, headers };
996
+ }
997
+ catch (error) {
998
+ issues.push(`Error analyzing file: ${error instanceof Error ? error.message : String(error)}`);
999
+ return { isValid: false, issues, warnings, stats, headers };
1000
+ }
1001
+ }
1002
+ /**
1003
+ * Attempts to repair common CSV formatting issues
1004
+ * @param filePath `string` - path to the CSV file to repair
1005
+ * @param outputPath `string` - path where the repaired CSV will be saved
1006
+ * @param options `object` - repair options
1007
+ * @returns **`repairResult`** `object` - result of the repair operation
1008
+ */
1009
+ function repairCsv(filePath, outputPath, options = {}) {
1010
+ const { fixQuoting = true, removeEmptyRows = true, standardizeLineEndings = true, fillMissingColumns = true, fillValue = '' } = options;
1011
+ const repairsMade = [];
1012
+ const errors = [];
1013
+ try {
1014
+ validate.existingPathArgument(`reading.repairCsv`, { filePath });
1015
+ validate.stringArgument(`reading.repairCsv`, { outputPath });
1016
+ const delimiter = getDelimiterFromFilePath(filePath);
1017
+ let data = fs_1.default.readFileSync(filePath, 'utf8');
1018
+ // Standardize line endings
1019
+ if (standardizeLineEndings) {
1020
+ const originalData = data;
1021
+ data = data.replace(/\r\n/g, '\n').replace(/\r/g, '\n');
1022
+ if (originalData !== data) {
1023
+ repairsMade.push('Standardized line endings');
1024
+ }
1025
+ }
1026
+ // Parse lines properly
1027
+ let lines = [];
1028
+ let currentLine = '';
1029
+ let inQuotes = false;
1030
+ let i = 0;
1031
+ while (i < data.length) {
1032
+ const char = data[i];
1033
+ const nextChar = data[i + 1];
1034
+ if (char === '"') {
1035
+ if (inQuotes && nextChar === '"') {
1036
+ currentLine += '""';
1037
+ i++;
1038
+ }
1039
+ else {
1040
+ inQuotes = !inQuotes;
1041
+ currentLine += char;
1042
+ }
1043
+ }
1044
+ else if (char === '\n' && !inQuotes) {
1045
+ lines.push(currentLine);
1046
+ currentLine = '';
1047
+ }
1048
+ else {
1049
+ currentLine += char;
1050
+ }
1051
+ i++;
1052
+ }
1053
+ if (currentLine) {
1054
+ lines.push(currentLine);
1055
+ }
1056
+ if (lines.length === 0) {
1057
+ errors.push('File is empty');
1058
+ return { success: false, repairsMade, errors };
1059
+ }
1060
+ // Get expected column count from header
1061
+ const headerFields = parseCsvLine(lines[0], delimiter);
1062
+ const expectedColumnCount = headerFields.length;
1063
+ // Process each line
1064
+ const repairedLines = [];
1065
+ let emptyRowsRemoved = 0;
1066
+ let rowsWithMissingColumns = 0;
1067
+ for (let lineIndex = 0; lineIndex < lines.length; lineIndex++) {
1068
+ const line = lines[lineIndex];
1069
+ // Skip empty rows if requested
1070
+ if (removeEmptyRows && line.trim() === '') {
1071
+ emptyRowsRemoved++;
1072
+ continue;
1073
+ }
1074
+ let fields = parseCsvLine(line, delimiter);
1075
+ // Fill missing columns
1076
+ if (fillMissingColumns && fields.length < expectedColumnCount) {
1077
+ while (fields.length < expectedColumnCount) {
1078
+ fields.push(fillValue);
1079
+ }
1080
+ rowsWithMissingColumns++;
1081
+ }
1082
+ // Reconstruct line with proper quoting
1083
+ const repairedLine = fields.map(field => {
1084
+ // Escape quotes and wrap in quotes if needed
1085
+ if (field.includes(delimiter) || field.includes('\n') || field.includes('"')) {
1086
+ const escapedField = field.replace(/"/g, '""');
1087
+ return `"${escapedField}"`;
1088
+ }
1089
+ return field;
1090
+ }).join(delimiter);
1091
+ repairedLines.push(repairedLine);
1092
+ }
1093
+ // Record repairs made
1094
+ if (emptyRowsRemoved > 0) {
1095
+ repairsMade.push(`Removed ${emptyRowsRemoved} empty rows`);
1096
+ }
1097
+ if (rowsWithMissingColumns > 0) {
1098
+ repairsMade.push(`Fixed ${rowsWithMissingColumns} rows with missing columns`);
1099
+ }
1100
+ // Write repaired file
1101
+ const repairedData = repairedLines.join('\n');
1102
+ fs_1.default.writeFileSync(outputPath, repairedData, 'utf8');
1103
+ return { success: true, repairsMade, errors };
1104
+ }
1105
+ catch (error) {
1106
+ errors.push(`Error repairing CSV: ${error instanceof Error ? error.message : String(error)}`);
1107
+ return { success: false, repairsMade, errors };
1108
+ }
1109
+ }
1110
+ /** paths to folders or files */
1111
+ async function validatePath(...paths) {
1112
+ for (const path of paths) {
1113
+ if (!fs_1.default.existsSync(path)) {
1114
+ throw new Error(`[ERROR reading.validatePath()]: path does not exist: ${path}`);
1115
+ }
1116
+ }
1117
+ }
1118
+ /**
1119
+ * @param rowSource `string | Record<string, any>[]`
1120
+ * @param targetColumn `string`
1121
+ * @param targetValues `string[]`
1122
+ * @param extractor `function (columnValue: string, ...args: any[]) => string`
1123
+ * @param extractorArgs `any[]`
1124
+ * @returns **`targetRows`** `Promise<Record<string, any>[]>`
1125
+ * - array of all rows where either `row[targetColumn]` or `extractor(row[targetColumn])` is in `targetValues`
1126
+ */
1127
+ async function extractTargetRows(
1128
+ /**
1129
+ * - `string` -> filePath to a csv file
1130
+ * - `Record<string, any>[]` -> array of rows
1131
+ * */
1132
+ rowSource, targetColumn, targetValues, extractor, extractorArgs) {
1133
+ const source = (0, logging_1.getSourceString)(F, extractTargetRows.name);
1134
+ if (!(0, typeValidation_1.isNonEmptyString)(rowSource) && !(0, typeValidation_1.isNonEmptyArray)(rowSource)) {
1135
+ throw new Error([`${source} Invalid param 'rowSource'`,
1136
+ `Expected rowSource: string | Record<string, any>[]`,
1137
+ `Received rowSource: '${typeof rowSource}'`
1138
+ ].join(config_1.INDENT_LOG_LINE));
1139
+ }
1140
+ validate.stringArgument(source, { targetColumn });
1141
+ if (extractor !== undefined)
1142
+ validate.functionArgument(source, { extractor });
1143
+ validate.arrayArgument(source, { targetValues, isNonEmptyString: typeValidation_1.isNonEmptyString });
1144
+ const sourceRows = await handleFileArgument(rowSource, extractTargetRows.name, [targetColumn]);
1145
+ const remainingValues = [];
1146
+ let potentials = {};
1147
+ let valuesFound = [];
1148
+ const targetRows = [];
1149
+ for (let i = 0; i < sourceRows.length; i++) {
1150
+ const row = sourceRows[i];
1151
+ if (!(0, typeValidation_1.hasKeys)(row, targetColumn)) {
1152
+ config_1.typeshiLogger.warn([`${source} row does not have provided targetColumn`,
1153
+ ` targetColumn: '${targetColumn}'`,
1154
+ `Object.keys(row): ${JSON.stringify(Object.keys(row))}`,
1155
+ ].join(config_1.INDENT_LOG_LINE));
1156
+ continue;
1157
+ }
1158
+ const originalValue = String(row[targetColumn]);
1159
+ if (targetValues.includes(originalValue)) {
1160
+ targetRows.push(row);
1161
+ if (!valuesFound.includes(originalValue))
1162
+ valuesFound.push(originalValue);
1163
+ // slog.debug(`${source} ORIGINAL VALUE IN TARGET VALUES`)
1164
+ continue;
1165
+ }
1166
+ if (!extractor) {
1167
+ continue;
1168
+ }
1169
+ const extractedValue = await extractor(originalValue, extractorArgs);
1170
+ if (!(0, typeValidation_1.isNonEmptyString)(extractedValue)) {
1171
+ // slog.warn([`${source} extractor(value) returned invalid string`,
1172
+ // ` originalValue: '${originalValue}'`,
1173
+ // `rowSource type: '${typeof rowSource}'`
1174
+ // ].join(TAB));
1175
+ continue;
1176
+ }
1177
+ if (targetValues.includes(extractedValue)) {
1178
+ targetRows.push(row);
1179
+ if (!valuesFound.includes(extractedValue))
1180
+ valuesFound.push(extractedValue);
1181
+ continue;
1182
+ }
1183
+ let targetMatch = targetValues.find(v => {
1184
+ v = v.toUpperCase();
1185
+ return v.startsWith(extractedValue.toUpperCase());
1186
+ });
1187
+ if (targetMatch) {
1188
+ if (!potentials[targetMatch]) {
1189
+ potentials[targetMatch] = [i];
1190
+ }
1191
+ else {
1192
+ potentials[targetMatch].push(i);
1193
+ }
1194
+ // slog.debug([`${source} Found potentialMatch for a targetValue at rowIndex ${i}`,
1195
+ // ` originalValue: '${originalValue}'`,
1196
+ // `extractedValue: '${extractedValue}'`,
1197
+ // `potentialMatch: '${targetMatch}'`,
1198
+ // ].join(TAB));
1199
+ }
1200
+ }
1201
+ remainingValues.push(...targetValues.filter(v => !valuesFound.includes(v)));
1202
+ // if (remainingValues.length > 0) {
1203
+ // mlog.warn([`${source} ${remainingValues.length} value(s) from targetValues did not have a matching row`,
1204
+ // // indentedStringify(remainingValues)
1205
+ // ].join(TAB));
1206
+ // write({remainingValues}, path.join(CLOUD_LOG_DIR, `${getFileNameTimestamp()}_remainingValues.json`))
1207
+ // }
1208
+ return { rows: targetRows, remainingValues };
1209
+ }
1210
+ /**
1211
+ * @param extantValues `string[]`
1212
+ * @param csvFiles `string[] | FileData[] | Record<string, any>[][]`
1213
+ * @param column `string`
1214
+ * @param extractor `(columnValue: string, ...args: any[]) => string | Promise<string>`
1215
+ * @param extractorArgs `any[]`
1216
+ * @returns **`missingValues`** `Promise<string[][]>`
1217
+ * where `missingValues[i]` is the array of values
1218
+ * that are found in `csvFiles[i][column]` but not in `extantValues`
1219
+ */
1220
+ async function findMissingValues(extantValues, csvFiles, column, extractor, extractorArgs = []) {
1221
+ const source = (0, logging_1.getSourceString)(__filename, findMissingValues.name);
1222
+ const missingValues = [];
1223
+ for (let i = 0; i < csvFiles.length; i++) {
1224
+ const rowSource = csvFiles[i];
1225
+ missingValues[i] = [];
1226
+ const columnValues = await getColumnValues(rowSource, column);
1227
+ for (const originalValue of columnValues) {
1228
+ const extractedValue = await extractor(originalValue, ...extractorArgs);
1229
+ if (!(0, typeValidation_1.isNonEmptyString)(extractedValue)) {
1230
+ config_1.typeshiSimpleLogger.warn([`${source} extractor(value) returned invalid string`,
1231
+ `originalValue: '${originalValue}'`,
1232
+ ].join(config_1.INDENT_LOG_LINE));
1233
+ if (!missingValues[i].includes(originalValue)) {
1234
+ missingValues[i].push(originalValue);
1235
+ }
1236
+ continue;
1237
+ }
1238
+ if (!extantValues.includes(extractedValue)
1239
+ && !missingValues[i].includes(extractedValue)) {
1240
+ missingValues[i].push(extractedValue);
1241
+ }
1242
+ }
1243
+ }
1244
+ return missingValues;
1245
+ }