typeshi 2.0.2 → 2.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -36,11 +36,10 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
36
36
  return (mod && mod.__esModule) ? mod : { "default": mod };
37
37
  };
38
38
  Object.defineProperty(exports, "__esModule", { value: true });
39
- exports.readJsonSync = void 0;
40
39
  exports.isDirectory = isDirectory;
41
40
  exports.isFile = isFile;
42
41
  exports.getDelimiterFromFilePath = getDelimiterFromFilePath;
43
- exports.readJsonFileAsObject = readJsonFileAsObject;
42
+ exports.readJsonSync = readJsonSync;
44
43
  exports.readJsonSyncOrThrow = readJsonSyncOrThrow;
45
44
  exports.readFileToArraySync = readFileToArraySync;
46
45
  exports.coerceFileExtension = coerceFileExtension;
@@ -54,13 +53,6 @@ exports.getIndexedColumnValues = getIndexedColumnValues;
54
53
  exports.handleFileArgument = handleFileArgument;
55
54
  exports.getDirectoryFiles = getDirectoryFiles;
56
55
  exports.getOneToManyDictionary = getOneToManyDictionary;
57
- exports.parseExcelForOneToMany = parseExcelForOneToMany;
58
- exports.parseCsvForOneToMany = parseCsvForOneToMany;
59
- exports.isValidCsv = isValidCsv;
60
- exports.isValidCsvSync = isValidCsvSync;
61
- exports.analyzeCsv = analyzeCsv;
62
- exports.repairCsv = repairCsv;
63
- exports.validatePath = validatePath;
64
56
  exports.extractTargetRows = extractTargetRows;
65
57
  exports.findMissingValues = findMissingValues;
66
58
  /**
@@ -107,13 +99,6 @@ function getDelimiterFromFilePath(filePath) {
107
99
  throw new Error(`[reading.getDelimiterFromFilePath()] Unsupported file extension: ${extension}`);
108
100
  }
109
101
  }
110
- /**
111
- * @param filePath `string`
112
- * @returns **`jsonData`** — `T extends Record<string, any>` - JSON data as an object
113
- * @note returns empty object if error occurred while reading `filepath` or parsing json
114
- * - use {@link readJsonSyncOrThrow} if throwing error is desired behavior
115
- */
116
- exports.readJsonSync = readJsonFileAsObject;
117
102
  /**
118
103
  * a.k.a. `readJsonSync`
119
104
  * @param filePath `string`
@@ -121,8 +106,8 @@ exports.readJsonSync = readJsonFileAsObject;
121
106
  * @note returns empty object if error occurred while reading `filepath` or parsing json
122
107
  * - use {@link readJsonSyncOrThrow} if throwing error is desired behavior
123
108
  */
124
- function readJsonFileAsObject(filePath) {
125
- const source = (0, logging_1.getSourceString)(__filename, readJsonFileAsObject.name);
109
+ function readJsonSync(filePath) {
110
+ const source = (0, logging_1.getSourceString)(__filename, readJsonSync.name);
126
111
  try {
127
112
  filePath = coerceFileExtension(filePath, 'json');
128
113
  const data = fs_1.default.readFileSync(filePath, 'utf8');
@@ -186,7 +171,6 @@ function readFileToArraySync(filePath, separator = /\r?\n/, encoding = 'utf8') {
186
171
  * @returns **`validatedFilePath`** `string`
187
172
  */
188
173
  function coerceFileExtension(filePath, expectedExtension) {
189
- validate.multipleStringArguments((0, logging_1.getSourceString)(__filename, coerceFileExtension.name), { filePath, expectedExtension });
190
174
  expectedExtension = expectedExtension.replace(/\./, '');
191
175
  if (filePath.endsWith(`.${expectedExtension}`)) {
192
176
  return filePath;
@@ -251,7 +235,7 @@ async function concatenateFiles(arg1, sheetName = 'Sheet1', requiredHeaders = []
251
235
  continue;
252
236
  }
253
237
  if (!haveDefinedRequiredHeaders) {
254
- let firstValidRow = rows.find(row => !(0, typeValidation_1.isNullLike)(row));
238
+ let firstValidRow = rows.find(row => !(0, typeValidation_1.isEmpty)(row));
255
239
  if (!firstValidRow) {
256
240
  continue;
257
241
  }
@@ -280,7 +264,7 @@ async function concatenateFiles(arg1, sheetName = 'Sheet1', requiredHeaders = []
280
264
  throw new Error(message);
281
265
  }
282
266
  for (const header of missingHeaders) {
283
- row[header] = '';
267
+ (row)[header] = '';
284
268
  }
285
269
  }
286
270
  concatenatedRows.push(row);
@@ -431,45 +415,53 @@ async function getCsvRows(arg1) {
431
415
  */
432
416
  async function getOneToOneDictionary(arg1, keyColumn, valueColumn, keyOptions, valueOptions, requireIncludeAllRows = false) {
433
417
  const source = (0, logging_1.getSourceString)(__filename, getOneToOneDictionary.name);
434
- validate.multipleStringArguments(source, { keyColumn, valueColumn });
435
- let rows = await handleFileArgument(arg1, getOneToOneDictionary.name, [keyColumn, valueColumn]);
436
- const dict = {};
437
- for (let i = 0; i < rows.length; i++) {
438
- const row = rows[i];
439
- if (!(0, typeValidation_1.hasKeys)(row, [keyColumn, valueColumn])) {
440
- let msg = [`${source} row @ index ${i} missing key(s): '${keyColumn}', '${valueColumn}'`,
441
- ` keyColumn: '${keyColumn}' in row ? ${keyColumn in row} -> row[keyColumn] = '${row[keyColumn]}'`,
442
- `valueColumn: '${valueColumn}' in row ? ${valueColumn in row} -> row[valueColumn] = '${row[valueColumn]}'`,
443
- ].join(config_1.INDENT_LOG_LINE);
444
- if (requireIncludeAllRows)
445
- throw new Error(msg);
446
- config_1.typeshiLogger.warn(msg);
447
- continue;
448
- }
449
- const key = (0, regex_1.DEP_clean)(String(row[keyColumn]), keyOptions);
450
- const value = (0, regex_1.DEP_clean)(String(row[valueColumn]), valueOptions);
451
- if (!key || !value) {
452
- let msg = [`${source} Row @ index ${i} missing key or value.`,
453
- ` keyColumn: '${keyColumn}' in row ? ${keyColumn in row}`,
454
- `-> row[keyColumn] = '${row[keyColumn]}'`,
455
- ` clean(String(row[keyColumn]), keyOptions): '${key}'`,
456
- `valueColumn: '${valueColumn}' in row ? ${valueColumn in row}`,
457
- `-> row[valueColumn] = '${row[valueColumn]}'`,
458
- `clean(String(row[valueColumn]), valueOptions): '${value}'`,
459
- ].join(config_1.INDENT_LOG_LINE);
460
- if (requireIncludeAllRows)
461
- throw new Error(msg);
462
- config_1.typeshiLogger.warn(msg);
463
- continue;
464
- }
465
- if (dict[key]) {
466
- config_1.typeshiLogger.warn([`${source} row @ index ${i} Duplicate key found: '${key}'`,
467
- `overwriting value '${dict[key]}' with '${value}'`
468
- ].join(config_1.INDENT_LOG_LINE));
418
+ try {
419
+ validate.multipleStringArguments(source, { keyColumn, valueColumn });
420
+ let rows = await handleFileArgument(arg1, getOneToOneDictionary.name, [keyColumn, valueColumn]);
421
+ const dict = {};
422
+ for (let i = 0; i < rows.length; i++) {
423
+ const row = rows[i];
424
+ if (!(0, typeValidation_1.hasKeys)(row, [keyColumn, valueColumn])) {
425
+ let msg = [`${source} row @ index ${i} missing key(s): '${keyColumn}', '${valueColumn}'`,
426
+ ` keyColumn: '${keyColumn}' in row ? ${keyColumn in row} -> row[keyColumn] = '${row[keyColumn]}'`,
427
+ `valueColumn: '${valueColumn}' in row ? ${valueColumn in row} -> row[valueColumn] = '${row[valueColumn]}'`,
428
+ ].join(config_1.INDENT_LOG_LINE);
429
+ if (requireIncludeAllRows)
430
+ throw new Error(msg);
431
+ config_1.typeshiLogger.warn(msg);
432
+ continue;
433
+ }
434
+ const key = (0, regex_1.clean)(String(row[keyColumn]), keyOptions);
435
+ const value = (0, regex_1.clean)(String(row[valueColumn]), valueOptions);
436
+ if (!key || !value) {
437
+ let msg = [`${source} Row @ index ${i} missing key or value.`,
438
+ ` keyColumn: '${keyColumn}' in row ? ${keyColumn in row}`,
439
+ `-> row[keyColumn] = '${row[keyColumn]}'`,
440
+ ` clean(String(row[keyColumn]), keyOptions): '${key}'`,
441
+ `valueColumn: '${valueColumn}' in row ? ${valueColumn in row}`,
442
+ `-> row[valueColumn] = '${row[valueColumn]}'`,
443
+ `clean(String(row[valueColumn]), valueOptions): '${value}'`,
444
+ ].join(config_1.INDENT_LOG_LINE);
445
+ if (requireIncludeAllRows)
446
+ throw new Error(msg);
447
+ config_1.typeshiLogger.warn(msg);
448
+ continue;
449
+ }
450
+ if (dict[key]) {
451
+ config_1.typeshiLogger.warn([`${source} row @ index ${i} Duplicate key found: '${key}'`,
452
+ `overwriting value '${dict[key]}' with '${value}'`
453
+ ].join(config_1.INDENT_LOG_LINE));
454
+ }
455
+ dict[key] = value;
469
456
  }
470
- dict[key] = value;
457
+ return dict;
458
+ }
459
+ catch (error) {
460
+ config_1.typeshiLogger.error([`${source} An unexpected error occurred, returning empty dictionary.`,
461
+ `caught: ${error}`
462
+ ].join(config_1.NEW_LINE));
463
+ return {};
471
464
  }
472
- return dict;
473
465
  }
474
466
  /**
475
467
  * @param arg1 `string | FileData | Record<string, any>[]` - the `filePath` to a CSV file or an array of rows.
@@ -530,45 +522,52 @@ async function getIndexedColumnValues(arg1, columnName, cleaner) {
530
522
  * @param requiredHeaders `string[]` `optional`
531
523
  * @returns **`rows`** `Promise<Record<string, any>[]>`
532
524
  */
533
- async function handleFileArgument(arg1, invocationSource, requiredHeaders = [], sheetName) {
525
+ async function handleFileArgument(arg1, invocationSource, requiredHeaders, sheetName) {
534
526
  const source = (0, logging_1.getSourceString)(__filename, handleFileArgument.name);
535
- validate.stringArgument(source, { invocationSource });
536
- validate.arrayArgument(source, { requiredHeaders, isNonEmptyString: typeValidation_1.isNonEmptyString }, true);
537
- let rows = [];
538
- // Handle file path validation only for string inputs
539
- if ((0, typeValidation_1.isNonEmptyString)(arg1)
540
- && (0, regex_1.stringEndsWithAnyOf)(arg1, /(\.tsv|\.csv)/i)
541
- && !isValidCsvSync(arg1, requiredHeaders)) {
542
- throw new Error([
543
- `${source} Invalid CSV filePath provided: '${arg1}'`,
544
- `invocationSource: ${invocationSource}`,
545
- `requiredHeaders ? ${(0, typeValidation_1.isNonEmptyArray)(requiredHeaders)
546
- ? JSON.stringify(requiredHeaders)
547
- : 'none provided'}`
548
- ].join(config_1.INDENT_LOG_LINE));
549
- }
550
- if (((0, typeValidation_1.isNonEmptyString)(arg1) && isFile(arg1)) // arg1 is file path string
551
- || (0, types_1.isFileData)(arg1)) { // arg1 is FileData { fileName: string; fileContent: string; }
552
- rows = await getRows(arg1, sheetName);
553
- }
554
- else if ((0, typeValidation_1.isNonEmptyArray)(arg1)) { // arg1 is already array of rows
555
- if (arg1.some(v => !(0, typeValidation_1.isObject)(v))) {
527
+ try {
528
+ let rows = [];
529
+ if (((0, typeValidation_1.isNonEmptyString)(arg1) && isFile(arg1)) // arg1 is file path
530
+ || (0, types_1.isFileData)(arg1)) {
531
+ rows = await getRows(arg1, sheetName);
532
+ }
533
+ else if ((0, typeValidation_1.isNonEmptyArray)(arg1)) { // arg1 is already array of rows
534
+ if (arg1.some(v => !(0, typeValidation_1.isObject)(v))) {
535
+ throw new Error([
536
+ `${source} Error: Invalid 'arg1' (Record<string, any>[]) param:`,
537
+ `There exists an element in the row array that is not an object.`,
538
+ `Source: ${invocationSource}`,
539
+ ].join(config_1.INDENT_LOG_LINE));
540
+ }
541
+ rows = arg1;
542
+ }
543
+ else {
556
544
  throw new Error([
557
- `${source} Error: Invalid 'arg1' (Record<string, any>[]) param:`,
558
- `There exists an element in the row array that is not an object.`,
545
+ `${source} Invalid parameter: 'arg1' (string | FileData | Record<string, any>[])`,
546
+ `arg1 must be a file path string, FileData object, or an array of rows.`,
559
547
  `Source: ${invocationSource}`,
560
548
  ].join(config_1.INDENT_LOG_LINE));
561
549
  }
562
- rows = arg1;
550
+ if ((0, typeValidation_1.isEmpty)(requiredHeaders))
551
+ return rows;
552
+ for (let i = 0; i < rows.length; i++) {
553
+ const rowHeaders = new Set(Object.keys(rows[i]));
554
+ if (requiredHeaders.some(h => !rowHeaders.has(h))) {
555
+ throw new Error([
556
+ `${source} Invalid row @ index ${i}: Missing required header(s)`,
557
+ `requiredHeaders: ${JSON.stringify(requiredHeaders)}`,
558
+ `row${i} headers: ${JSON.stringify(Array.from(rowHeaders))}`,
559
+ `Source: ${invocationSource}`,
560
+ ].join(config_1.INDENT_LOG_LINE));
561
+ }
562
+ }
563
+ return rows;
563
564
  }
564
- else {
565
- throw new Error([
566
- `${source} Invalid parameter: 'arg1' (string | FileData | Record<string, any>[])`,
567
- `arg1 must be a file path string, FileData object, or an array of rows.`,
568
- `Source: ${invocationSource}`,
569
- ].join(config_1.INDENT_LOG_LINE));
565
+ catch (error) {
566
+ config_1.typeshiLogger.error([`${source} An unexpected error occurred. Returning empty array.`,
567
+ `caught: ${error}`
568
+ ].join(config_1.NEW_LINE));
569
+ return [];
570
570
  }
571
- return rows;
572
571
  }
573
572
  /**
574
573
  * `sync`
@@ -639,568 +638,39 @@ function getDirectoryFiles(dir, arg2, ...targetExtensions) {
639
638
  * @param dataSource `string | FileData | Record<string, any>[]`
640
639
  * @param keyColumn `string`
641
640
  * @param valueColumn `string`
642
- * @param keyOptions {@link DEP_CleanStringOptions} `(optional)`
643
- * @param valueOptions {@link DEP_CleanStringOptions}`(optional)`
644
- * @param sheetName `string`
641
+ * @param keyOptions {@link StringCleanOptions} `(optional)`
642
+ * @param valueOptions {@link StringCleanOptions} `(optional)`
643
+ * @param sheetName `string` `(optional)`
645
644
  * @returns **`dict`** `Promise<Record<string, string[]>>`
646
645
  */
647
646
  async function getOneToManyDictionary(dataSource, keyColumn, valueColumn, keyOptions, valueOptions, sheetName) {
648
647
  const source = (0, logging_1.getSourceString)(__filename, getOneToManyDictionary.name);
649
- validate.multipleStringArguments(source, { keyColumn, valueColumn });
650
- if (keyOptions)
651
- validate.objectArgument(source, { keyOptions, DEP_isCleanStringOptions: regex_1.DEP_isCleanStringOptions });
652
- if (valueOptions)
653
- validate.objectArgument(source, { valueOptions, DEP_isCleanStringOptions: regex_1.DEP_isCleanStringOptions });
654
- const rows = await handleFileArgument(dataSource, source, [keyColumn, valueColumn], sheetName);
655
- const dict = {};
656
- for (let i = 0; i < rows.length; i++) {
657
- let row = rows[i];
658
- let key = (0, regex_1.DEP_clean)(row[keyColumn], keyOptions).trim().replace(/\.$/, '');
659
- if (!dict[key]) {
660
- dict[key] = [];
661
- }
662
- let value = (0, regex_1.DEP_clean)(row[valueColumn], valueOptions).trim().replace(/\.$/, '');
663
- if (!dict[key].includes(value)) {
664
- dict[key].push(value);
665
- }
666
- }
667
- return dict;
668
- }
669
- /**
670
- * @deprecated `use `{@link getOneToManyDictionary}
671
- * @param filePath `string`
672
- * @param sheetName `string`
673
- * @param keyColumn `string`
674
- * @param valueColumn `string`
675
- * @param options - {@link ParseOneToManyOptions}
676
- * = `{ keyStripOptions`?: {@link DEP_StringStripOptions}, `valueStripOptions`?: {@link DEP_StringStripOptions}, keyCaseOptions`?: {@link StringCaseOptions}, `valueCaseOptions`?: {@link StringCaseOptions}, `keyPadOptions`?: {@link StringPadOptions}, `valuePadOptions`?: {@link StringPadOptions} `}`
677
- * - {@link DEP_StringStripOptions} = `{ char`: `string`, `escape`?: `boolean`, `stripLeftCondition`?: `(s: string, ...args: any[]) => boolean`, `leftArgs`?: `any[]`, `stripRightCondition`?: `(s: string, ...args: any[]) => boolean`, `rightArgs`?: `any[] }`
678
- * - {@link DEP_StringCaseOptions} = `{ toUpper`?: `boolean`, `toLower`?: `boolean`, `toTitle`?: `boolean }`
679
- * - {@link StringPadOptions} = `{ padLength`: `number`, `padChar`?: `string`, `padLeft`?: `boolean`, `padRight`?: `boolean }`
680
- * @returns **`dict`** `Record<string, Array<string>>` — key-value pairs where key is from `keyColumn` and value is an array of values from `valueColumn`
681
- */
682
- function parseExcelForOneToMany(filePath, sheetName, keyColumn, valueColumn, options = {}) {
683
- filePath = coerceFileExtension(filePath, 'xlsx');
684
- validate.multipleStringArguments(`reading.parseExcelForOneToMany`, { filePath, sheetName, keyColumn, valueColumn });
685
648
  try {
686
- const { keyStripOptions, valueStripOptions, keyCaseOptions, valueCaseOptions, keyPadOptions, valuePadOptions } = options;
687
- const workbook = xlsx_1.default.readFile(filePath);
688
- const sheet = workbook.Sheets[sheetName];
689
- const jsonData = xlsx_1.default.utils.sheet_to_json(sheet);
690
649
  const dict = {};
691
- jsonData.forEach(row => {
692
- let key = (0, regex_1.DEP_clean)(String(row[keyColumn]), keyStripOptions, keyCaseOptions, keyPadOptions).trim().replace(/\.$/, '');
693
- let val = (0, regex_1.DEP_clean)(String(row[valueColumn]), valueStripOptions, valueCaseOptions, valuePadOptions).trim().replace(/\.$/, '');
694
- if (!dict[key]) {
695
- dict[key] = [];
696
- }
697
- if (!dict[key].includes(val)) {
698
- dict[key].push(val);
699
- }
700
- });
701
- return dict;
702
- }
703
- catch (err) {
704
- config_1.typeshiLogger.error('Error reading or parsing the Excel file:', err, config_1.INDENT_LOG_LINE + 'Given File Path:', '"' + filePath + '"');
705
- return {};
706
- }
707
- }
708
- /**
709
- * @deprecated -> use {@link getOneToManyDictionary}
710
- * @param filePath `string`
711
- * @param keyColumn `string`
712
- * @param valueColumn `string`
713
- * @param delimiter {@link DelimiterCharacters} | `string`
714
- * @param options {@link ParseOneToManyOptions}
715
- * = `{ keyCaseOptions`?: {@link DEP_StringCaseOptions}, `valueCaseOptions`?: {@link DEP_StringCaseOptions}, `keyPadOptions`?: {@link StringPadOptions}, `valuePadOptions`?: {@link StringPadOptions} `}`
716
- * - {@link DEP_StringCaseOptions} = `{ toUpper`?: `boolean`, `toLower`?: `boolean`, `toTitle`?: `boolean }`
717
- * - {@link StringPadOptions} = `{ padLength`: `number`, `padChar`?: `string`, `padLeft`?: `boolean`, `padRight`?: `boolean }`
718
- * @returns `Record<string, Array<string>>` - key-value pairs where key is from `keyColumn` and value is an array of values from `valueColumn`
719
- */
720
- function parseCsvForOneToMany(filePath, keyColumn, valueColumn, delimiter = types_1.DelimiterCharacterEnum.COMMA, options = {}) {
721
- filePath = coerceFileExtension(filePath, (delimiter === types_1.DelimiterCharacterEnum.TAB) ? 'tsv' : 'csv');
722
- const source = `[reading.parseCsvForOneToMany()]`;
723
- try {
724
- validate.existingFileArgument(source, ['.tsv', '.csv'], { filePath });
725
650
  validate.multipleStringArguments(source, { keyColumn, valueColumn });
726
- const { keyStripOptions, valueStripOptions, keyCaseOptions, valueCaseOptions, keyPadOptions, valuePadOptions } = options;
727
- const data = fs_1.default.readFileSync(filePath, 'utf8');
728
- const lines = data.split('\n');
729
- const dict = {};
730
- const header = lines[0].split(delimiter).map(col => col.trim());
731
- const keyIndex = header.indexOf(keyColumn);
732
- const valueIndex = header.indexOf(valueColumn);
733
- if (keyIndex === -1 || valueIndex === -1) {
734
- throw new Error(`Key or value column not found in CSV file.`);
735
- }
736
- for (let i = 1; i < lines.length; i++) {
737
- const line = lines[i].split(delimiter).map(col => col.trim());
738
- if (line.length > 1) {
739
- let key = (0, regex_1.DEP_clean)(line[keyIndex], keyStripOptions, keyCaseOptions, keyPadOptions);
740
- let val = (0, regex_1.DEP_clean)(line[valueIndex], valueStripOptions, valueCaseOptions, valuePadOptions);
741
- if (!dict[key]) {
742
- dict[key] = [];
743
- }
744
- if (!dict[key].includes(val)) {
745
- dict[key].push(val);
746
- }
747
- }
651
+ if (keyOptions)
652
+ validate.objectArgument(source, { keyOptions, isStringCleanOptions: regex_1.isStringCleanOptions });
653
+ if (valueOptions)
654
+ validate.objectArgument(source, { valueOptions, isStringCleanOptions: regex_1.isStringCleanOptions });
655
+ const rows = await handleFileArgument(dataSource, source, [keyColumn, valueColumn], sheetName);
656
+ for (let i = 0; i < rows.length; i++) {
657
+ let row = rows[i];
658
+ let key = (0, regex_1.clean)(row[keyColumn], keyOptions)
659
+ .trim().replace(/\.$/, '');
660
+ if (!dict[key])
661
+ dict[key] = [];
662
+ let value = (0, regex_1.clean)(row[valueColumn], valueOptions)
663
+ .trim().replace(/\.$/, '');
664
+ if (!dict[key].includes(value))
665
+ dict[key].push(value);
748
666
  }
749
667
  return dict;
750
668
  }
751
- catch (err) {
752
- config_1.typeshiLogger.error('Error reading or parsing the CSV file:', err, config_1.INDENT_LOG_LINE + 'Given File Path:', '"' + filePath + '"');
753
- return {};
754
- }
755
- }
756
- const DEFAULT_CSV_VALIDATION_RULES = {
757
- allowEmptyRows: true,
758
- allowInconsistentColumns: true,
759
- maxRowsToCheck: Infinity,
760
- };
761
- /**
762
- * @notimplemented
763
- * @TODO
764
- * @param arg1
765
- * @param requiredHeaders
766
- * @param options
767
- * @returns
768
- */
769
- async function isValidCsv(arg1, requiredHeaders, options = DEFAULT_CSV_VALIDATION_RULES) {
770
- return false;
771
- }
772
- /**
773
- * @problem has trouble handling case where column value contains a single double quote;
774
- * e.g. when it's used as the inches unit after a number
775
- *
776
- * `sync`
777
- * @param filePath `string` - must be a string to an existing file, otherwise return `false`.
778
- * @param requiredHeaders `string[]` - `optional` array of headers that must be present in the CSV file.
779
- * - If provided, the function checks if all required headers are present in the CSV header row
780
- * @param options `object` - optional configuration
781
- * - `allowEmptyRows`: `boolean` - if true, allows rows with all empty fields (default: true)
782
- * - `allowInconsistentColumns`: `boolean` - if true, allows rows with different column counts (default: false)
783
- * - `maxRowsToCheck`: `number` - maximum number of rows to validate (default: all rows)
784
- * @returns **`isValidCsv`** `boolean`
785
- * - **`true`** `if` the CSV file at `filePath` is valid (proper structure and formatting),
786
- * - **`false`** `otherwise`.
787
- */
788
- function isValidCsvSync(filePath, requiredHeaders, options = DEFAULT_CSV_VALIDATION_RULES) {
789
- const { allowEmptyRows = true, allowInconsistentColumns = false, maxRowsToCheck = Infinity } = options;
790
- validate.existingPathArgument(`reading.isValidCsv`, { filePath });
791
- try {
792
- const delimiter = getDelimiterFromFilePath(filePath);
793
- const data = fs_1.default.readFileSync(filePath, 'utf8');
794
- // Handle different line endings
795
- const normalizedData = data.replace(/\r\n/g, '\n').replace(/\r/g, '\n');
796
- // Split into lines, but be careful about quoted fields with newlines
797
- let lines = [];
798
- let currentLine = '';
799
- let inQuotes = false;
800
- let i = 0;
801
- while (i < normalizedData.length) {
802
- const char = normalizedData[i];
803
- const nextChar = normalizedData[i + 1];
804
- if (char === '"') {
805
- if (inQuotes && nextChar === '"') {
806
- // Escaped quote
807
- currentLine += '""';
808
- i++; // Skip next quote
809
- }
810
- else {
811
- // Toggle quote state
812
- inQuotes = !inQuotes;
813
- currentLine += char;
814
- }
815
- }
816
- else if (char === '\n' && !inQuotes) {
817
- // End of line (not within quotes)
818
- if (currentLine.trim() !== '' || allowEmptyRows) {
819
- lines.push(currentLine);
820
- }
821
- currentLine = '';
822
- }
823
- else {
824
- currentLine += char;
825
- }
826
- i++;
827
- }
828
- // Add the last line if it exists
829
- if (currentLine.trim() !== '' || allowEmptyRows) {
830
- lines.push(currentLine);
831
- }
832
- if (lines.length < 1) {
833
- config_1.typeshiLogger.error(`[ERROR isValidCsv()]: file has no valid lines: ${filePath}`);
834
- return false;
835
- }
836
- const headerRow = parseCsvLine(lines[0], delimiter);
837
- if (headerRow.length < 1) {
838
- config_1.typeshiLogger.error(`[ERROR isValidCsv()]: no header found in file: ${filePath}`);
839
- return false;
840
- }
841
- // Check for empty headers
842
- if (headerRow.some(header => header === '')) {
843
- config_1.typeshiLogger.warn(`[isValidCsv()]: Found empty header(s) in file: ${filePath}`);
844
- if (!allowInconsistentColumns) {
845
- return false;
846
- }
847
- }
848
- // Validate required headers
849
- if ((0, typeValidation_1.isNonEmptyArray)(requiredHeaders)) {
850
- const hasRequiredHeaders = requiredHeaders.every(header => {
851
- if (!(0, typeValidation_1.isNonEmptyString)(header)) {
852
- config_1.typeshiLogger.warn([
853
- `[reading.isValidCsv]: Invalid parameter: 'requiredHeaders'`,
854
- `requiredHeaders must be of type: Array<string>`,
855
- `found array element of type: '${typeof header}' (skipping)`
856
- ].join(config_1.INDENT_LOG_LINE));
857
- return true; // skip headers if they are not strings
858
- }
859
- return headerRow.includes(header);
860
- });
861
- if (!hasRequiredHeaders) {
862
- config_1.typeshiLogger.warn([
863
- `[isValidCsv()]: Required headers missing from headerRow`,
864
- `filePath: '${filePath}'`,
865
- `requiredHeaders: ${JSON.stringify(requiredHeaders)}`,
866
- `csvFileHeaders: ${JSON.stringify(headerRow)}`
867
- ].join(config_1.INDENT_LOG_LINE));
868
- return false;
869
- }
870
- }
871
- // Check consistency of data rows
872
- const maxRows = Math.min(lines.length, maxRowsToCheck + 1); // +1 for header
873
- const expectedColumnCount = headerRow.length;
874
- for (let i = 1; i < maxRows; i++) {
875
- const line = lines[i];
876
- // Skip completely empty lines if allowed
877
- if (allowEmptyRows && line.trim() === '') {
878
- continue;
879
- }
880
- const rowValues = parseCsvLine(line, delimiter);
881
- // Check if row is empty (all fields are empty)
882
- const isEmptyRow = rowValues.every(val => val === '');
883
- if (isEmptyRow && allowEmptyRows) {
884
- continue;
885
- }
886
- // Check column count consistency
887
- if (rowValues.length !== expectedColumnCount && !allowInconsistentColumns) {
888
- config_1.typeshiLogger.warn([
889
- `[isValidCsv()]: Invalid row found: header.length !== rowValues.length`,
890
- ` header.length: ${expectedColumnCount}`,
891
- `rowValues.length: ${rowValues.length}`,
892
- ` -> Difference = ${expectedColumnCount - rowValues.length}`,
893
- ` header: ${JSON.stringify(headerRow)}`,
894
- // `rowValues: ${JSON.stringify(rowValues)}`,
895
- ` rowIndex: ${i}`,
896
- ` filePath: '${filePath}'`,
897
- `delimiter: '${delimiter}'`
898
- ].join(config_1.INDENT_LOG_LINE));
899
- return false;
900
- }
901
- }
902
- return true;
903
- }
904
- catch (error) {
905
- config_1.typeshiLogger.error([
906
- `[isValidCsv()]: Error reading or parsing CSV file: ${filePath}`,
907
- `Error: ${error instanceof Error ? error.message : String(error)}`
908
- ].join(config_1.INDENT_LOG_LINE));
909
- return false;
910
- }
911
- }
912
- /**
913
- * Parses a CSV line into fields, properly handling quoted fields with embedded delimiters, quotes, and newlines
914
- * @param line `string` - the CSV line to parse
915
- * @param delimiter `string` - the delimiter character
916
- * @returns **`fields`** `string[]` - array of field values
917
- */
918
- function parseCsvLine(line, delimiter) {
919
- const fields = [];
920
- let current = '';
921
- let inQuotes = false;
922
- let i = 0;
923
- while (i < line.length) {
924
- const char = line[i];
925
- const nextChar = line[i + 1];
926
- if (!inQuotes) {
927
- if (char === '"') {
928
- inQuotes = true;
929
- }
930
- else if (char === delimiter) {
931
- fields.push(current.trim());
932
- current = '';
933
- }
934
- else {
935
- current += char;
936
- }
937
- }
938
- else {
939
- if (char === '"') {
940
- if (nextChar === '"') {
941
- // Escaped quote within quoted field
942
- current += '"';
943
- i++; // Skip the next quote
944
- }
945
- else {
946
- // End of quoted field
947
- inQuotes = false;
948
- }
949
- }
950
- else {
951
- current += char;
952
- }
953
- }
954
- i++;
955
- }
956
- // Add the last field
957
- fields.push(current.trim());
958
- return fields;
959
- }
960
- /**
961
- * Analyzes a CSV file and returns detailed validation information
962
- * @param filePath `string` - path to the CSV file
963
- * @param options `object` - validation options
964
- * @returns **`analysis`** `object` - detailed analysis of the CSV file
965
- */
966
- function analyzeCsv(filePath, options = {}) {
967
- const { sampleSize = 1000, checkEncoding = false, detectDelimiter = false } = options;
968
- const issues = [];
969
- const warnings = [];
970
- const stats = {
971
- totalRows: 0,
972
- headerCount: 0,
973
- maxRowLength: 0,
974
- minRowLength: Infinity,
975
- emptyRows: 0,
976
- encoding: null,
977
- detectedDelimiter: null
978
- };
979
- let headers = [];
980
- try {
981
- validate.existingPathArgument(`reading.analyzeCsv`, { filePath });
982
- const data = fs_1.default.readFileSync(filePath, 'utf8');
983
- const normalizedData = data.replace(/\r\n/g, '\n').replace(/\r/g, '\n');
984
- // Detect delimiter if requested
985
- let delimiter;
986
- if (detectDelimiter) {
987
- const commonDelimiters = [',', '\t', ';', '|'];
988
- const delimiterCounts = commonDelimiters.map(delim => ({
989
- delimiter: delim,
990
- count: (data.match(new RegExp(`\\${delim}`, 'g')) || []).length
991
- }));
992
- const mostLikely = delimiterCounts.sort((a, b) => b.count - a.count)[0];
993
- delimiter = mostLikely.count > 0 ? mostLikely.delimiter : getDelimiterFromFilePath(filePath);
994
- stats.detectedDelimiter = delimiter;
995
- }
996
- else {
997
- delimiter = getDelimiterFromFilePath(filePath);
998
- }
999
- // Parse the file properly
1000
- let lines = [];
1001
- let currentLine = '';
1002
- let inQuotes = false;
1003
- let i = 0;
1004
- while (i < normalizedData.length) {
1005
- const char = normalizedData[i];
1006
- const nextChar = normalizedData[i + 1];
1007
- if (char === '"') {
1008
- if (inQuotes && nextChar === '"') {
1009
- currentLine += '""';
1010
- i++;
1011
- }
1012
- else {
1013
- inQuotes = !inQuotes;
1014
- currentLine += char;
1015
- }
1016
- }
1017
- else if (char === '\n' && !inQuotes) {
1018
- lines.push(currentLine);
1019
- currentLine = '';
1020
- }
1021
- else {
1022
- currentLine += char;
1023
- }
1024
- i++;
1025
- }
1026
- if (currentLine) {
1027
- lines.push(currentLine);
1028
- }
1029
- stats.totalRows = lines.length;
1030
- if (lines.length === 0) {
1031
- issues.push('File is empty');
1032
- return { isValid: false, issues, warnings, stats, headers };
1033
- }
1034
- headers = parseCsvLine(lines[0], delimiter);
1035
- stats.headerCount = headers.length;
1036
- stats.maxRowLength = headers.length;
1037
- stats.minRowLength = headers.length;
1038
- // Check for duplicate headers
1039
- const headerSet = new Set(headers);
1040
- if (headerSet.size !== headers.length) {
1041
- warnings.push('Duplicate header names found');
1042
- }
1043
- // Check for empty headers
1044
- if (headers.some(h => h.trim() === '')) {
1045
- warnings.push('Empty header names found');
1046
- }
1047
- // Analyze data rows (sample if necessary)
1048
- const rowsToCheck = Math.min(lines.length - 1, sampleSize);
1049
- const step = rowsToCheck < lines.length - 1 ? Math.floor((lines.length - 1) / rowsToCheck) : 1;
1050
- let inconsistentRows = 0;
1051
- for (let i = 1; i < lines.length; i += step) {
1052
- const line = lines[i];
1053
- if (line.trim() === '') {
1054
- stats.emptyRows++;
1055
- continue;
1056
- }
1057
- const fields = parseCsvLine(line, delimiter);
1058
- stats.maxRowLength = Math.max(stats.maxRowLength, fields.length);
1059
- stats.minRowLength = Math.min(stats.minRowLength, fields.length);
1060
- if (fields.length !== headers.length) {
1061
- inconsistentRows++;
1062
- }
1063
- }
1064
- if (inconsistentRows > 0) {
1065
- warnings.push(`${inconsistentRows} rows have inconsistent column counts`);
1066
- }
1067
- if (stats.emptyRows > 0) {
1068
- warnings.push(`${stats.emptyRows} empty rows found`);
1069
- }
1070
- // Encoding detection (basic)
1071
- if (checkEncoding) {
1072
- try {
1073
- const buffer = fs_1.default.readFileSync(filePath);
1074
- const hasUtf8Bom = buffer.length >= 3 &&
1075
- buffer[0] === 0xEF && buffer[1] === 0xBB && buffer[2] === 0xBF;
1076
- stats.encoding = hasUtf8Bom ? 'UTF-8 with BOM' : 'UTF-8';
1077
- }
1078
- catch (error) {
1079
- warnings.push('Could not detect file encoding');
1080
- }
1081
- }
1082
- const isValid = issues.length === 0;
1083
- return { isValid, issues, warnings, stats, headers };
1084
- }
1085
- catch (error) {
1086
- issues.push(`Error analyzing file: ${error instanceof Error ? error.message : String(error)}`);
1087
- return { isValid: false, issues, warnings, stats, headers };
1088
- }
1089
- }
1090
- /**
1091
- * Attempts to repair common CSV formatting issues
1092
- * @param filePath `string` - path to the CSV file to repair
1093
- * @param outputPath `string` - path where the repaired CSV will be saved
1094
- * @param options `object` - repair options
1095
- * @returns **`repairResult`** `object` - result of the repair operation
1096
- */
1097
- function repairCsv(filePath, outputPath, options = {}) {
1098
- const { fixQuoting = true, removeEmptyRows = true, standardizeLineEndings = true, fillMissingColumns = true, fillValue = '' } = options;
1099
- const repairsMade = [];
1100
- const errors = [];
1101
- try {
1102
- validate.existingPathArgument(`reading.repairCsv`, { filePath });
1103
- validate.stringArgument(`reading.repairCsv`, { outputPath });
1104
- const delimiter = getDelimiterFromFilePath(filePath);
1105
- let data = fs_1.default.readFileSync(filePath, 'utf8');
1106
- // Standardize line endings
1107
- if (standardizeLineEndings) {
1108
- const originalData = data;
1109
- data = data.replace(/\r\n/g, '\n').replace(/\r/g, '\n');
1110
- if (originalData !== data) {
1111
- repairsMade.push('Standardized line endings');
1112
- }
1113
- }
1114
- // Parse lines properly
1115
- let lines = [];
1116
- let currentLine = '';
1117
- let inQuotes = false;
1118
- let i = 0;
1119
- while (i < data.length) {
1120
- const char = data[i];
1121
- const nextChar = data[i + 1];
1122
- if (char === '"') {
1123
- if (inQuotes && nextChar === '"') {
1124
- currentLine += '""';
1125
- i++;
1126
- }
1127
- else {
1128
- inQuotes = !inQuotes;
1129
- currentLine += char;
1130
- }
1131
- }
1132
- else if (char === '\n' && !inQuotes) {
1133
- lines.push(currentLine);
1134
- currentLine = '';
1135
- }
1136
- else {
1137
- currentLine += char;
1138
- }
1139
- i++;
1140
- }
1141
- if (currentLine) {
1142
- lines.push(currentLine);
1143
- }
1144
- if (lines.length === 0) {
1145
- errors.push('File is empty');
1146
- return { success: false, repairsMade, errors };
1147
- }
1148
- // Get expected column count from header
1149
- const headerFields = parseCsvLine(lines[0], delimiter);
1150
- const expectedColumnCount = headerFields.length;
1151
- // Process each line
1152
- const repairedLines = [];
1153
- let emptyRowsRemoved = 0;
1154
- let rowsWithMissingColumns = 0;
1155
- for (let lineIndex = 0; lineIndex < lines.length; lineIndex++) {
1156
- const line = lines[lineIndex];
1157
- // Skip empty rows if requested
1158
- if (removeEmptyRows && line.trim() === '') {
1159
- emptyRowsRemoved++;
1160
- continue;
1161
- }
1162
- let fields = parseCsvLine(line, delimiter);
1163
- // Fill missing columns
1164
- if (fillMissingColumns && fields.length < expectedColumnCount) {
1165
- while (fields.length < expectedColumnCount) {
1166
- fields.push(fillValue);
1167
- }
1168
- rowsWithMissingColumns++;
1169
- }
1170
- // Reconstruct line with proper quoting
1171
- const repairedLine = fields.map(field => {
1172
- // Escape quotes and wrap in quotes if needed
1173
- if (field.includes(delimiter) || field.includes('\n') || field.includes('"')) {
1174
- const escapedField = field.replace(/"/g, '""');
1175
- return `"${escapedField}"`;
1176
- }
1177
- return field;
1178
- }).join(delimiter);
1179
- repairedLines.push(repairedLine);
1180
- }
1181
- // Record repairs made
1182
- if (emptyRowsRemoved > 0) {
1183
- repairsMade.push(`Removed ${emptyRowsRemoved} empty rows`);
1184
- }
1185
- if (rowsWithMissingColumns > 0) {
1186
- repairsMade.push(`Fixed ${rowsWithMissingColumns} rows with missing columns`);
1187
- }
1188
- // Write repaired file
1189
- const repairedData = repairedLines.join('\n');
1190
- fs_1.default.writeFileSync(outputPath, repairedData, 'utf8');
1191
- return { success: true, repairsMade, errors };
1192
- }
1193
669
  catch (error) {
1194
- errors.push(`Error repairing CSV: ${error instanceof Error ? error.message : String(error)}`);
1195
- return { success: false, repairsMade, errors };
1196
- }
1197
- }
1198
- /** paths to folders or files */
1199
- async function validatePath(...paths) {
1200
- for (const path of paths) {
1201
- if (!fs_1.default.existsSync(path)) {
1202
- throw new Error(`[ERROR reading.validatePath()]: path does not exist: ${path}`);
1203
- }
670
+ config_1.typeshiLogger.error([`${source} An unexpected error occurred, returning empty dictionary.`,
671
+ `caught: ${error}`
672
+ ].join(config_1.NEW_LINE));
673
+ return {};
1204
674
  }
1205
675
  }
1206
676
  /**