typeshi 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/config/dataLoader.d.ts +37 -0
- package/dist/config/dataLoader.js +171 -0
- package/dist/config/env.d.ts +23 -0
- package/dist/config/env.js +55 -0
- package/dist/config/index.d.ts +6 -0
- package/dist/config/index.js +22 -0
- package/dist/config/setupLog.d.ts +39 -0
- package/dist/config/setupLog.js +144 -0
- package/dist/index.d.ts +8 -0
- package/dist/index.js +44 -0
- package/dist/utils/argumentValidation.d.ts +192 -0
- package/dist/utils/argumentValidation.js +807 -0
- package/dist/utils/io/dateTime.d.ts +96 -0
- package/dist/utils/io/dateTime.js +202 -0
- package/dist/utils/io/index.d.ts +8 -0
- package/dist/utils/io/index.js +24 -0
- package/dist/utils/io/logging.d.ts +34 -0
- package/dist/utils/io/logging.js +260 -0
- package/dist/utils/io/reading.d.ts +265 -0
- package/dist/utils/io/reading.js +1245 -0
- package/dist/utils/io/types/Csv.d.ts +31 -0
- package/dist/utils/io/types/Csv.js +29 -0
- package/dist/utils/io/types/Io.TypeGuards.d.ts +31 -0
- package/dist/utils/io/types/Io.TypeGuards.js +75 -0
- package/dist/utils/io/types/Io.d.ts +49 -0
- package/dist/utils/io/types/Io.js +2 -0
- package/dist/utils/io/types/index.d.ts +6 -0
- package/dist/utils/io/types/index.js +22 -0
- package/dist/utils/io/writing.d.ts +67 -0
- package/dist/utils/io/writing.js +333 -0
- package/dist/utils/regex/cleaning.d.ts +65 -0
- package/dist/utils/regex/cleaning.js +162 -0
- package/dist/utils/regex/configureParameters.d.ts +23 -0
- package/dist/utils/regex/configureParameters.js +63 -0
- package/dist/utils/regex/email.d.ts +6 -0
- package/dist/utils/regex/email.js +37 -0
- package/dist/utils/regex/entity.d.ts +59 -0
- package/dist/utils/regex/entity.js +168 -0
- package/dist/utils/regex/index.d.ts +11 -0
- package/dist/utils/regex/index.js +27 -0
- package/dist/utils/regex/misc.d.ts +37 -0
- package/dist/utils/regex/misc.js +75 -0
- package/dist/utils/regex/phone.d.ts +83 -0
- package/dist/utils/regex/phone.js +132 -0
- package/dist/utils/regex/stringOperations.d.ts +45 -0
- package/dist/utils/regex/stringOperations.js +201 -0
- package/dist/utils/regex/types/StringOptions.d.ts +87 -0
- package/dist/utils/regex/types/StringOptions.js +25 -0
- package/dist/utils/regex/types/index.d.ts +5 -0
- package/dist/utils/regex/types/index.js +21 -0
- package/dist/utils/regex/types/typeGuards.d.ts +12 -0
- package/dist/utils/regex/types/typeGuards.js +15 -0
- package/dist/utils/typeValidation.d.ts +163 -0
- package/dist/utils/typeValidation.js +308 -0
- package/package.json +56 -0
|
@@ -0,0 +1,1245 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
+
var ownKeys = function(o) {
|
|
20
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
+
var ar = [];
|
|
22
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
+
return ar;
|
|
24
|
+
};
|
|
25
|
+
return ownKeys(o);
|
|
26
|
+
};
|
|
27
|
+
return function (mod) {
|
|
28
|
+
if (mod && mod.__esModule) return mod;
|
|
29
|
+
var result = {};
|
|
30
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
+
__setModuleDefault(result, mod);
|
|
32
|
+
return result;
|
|
33
|
+
};
|
|
34
|
+
})();
|
|
35
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
36
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
37
|
+
};
|
|
38
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
39
|
+
exports.readJsonSync = void 0;
|
|
40
|
+
exports.isDirectory = isDirectory;
|
|
41
|
+
exports.isFile = isFile;
|
|
42
|
+
exports.getDelimiterFromFilePath = getDelimiterFromFilePath;
|
|
43
|
+
exports.readJsonFileAsObject = readJsonFileAsObject;
|
|
44
|
+
exports.coerceFileExtension = coerceFileExtension;
|
|
45
|
+
exports.concatenateFiles = concatenateFiles;
|
|
46
|
+
exports.getRows = getRows;
|
|
47
|
+
exports.getExcelRows = getExcelRows;
|
|
48
|
+
exports.getCsvRows = getCsvRows;
|
|
49
|
+
exports.getOneToOneDictionary = getOneToOneDictionary;
|
|
50
|
+
exports.getColumnValues = getColumnValues;
|
|
51
|
+
exports.getIndexedColumnValues = getIndexedColumnValues;
|
|
52
|
+
exports.handleFileArgument = handleFileArgument;
|
|
53
|
+
exports.getDirectoryFiles = getDirectoryFiles;
|
|
54
|
+
exports.getOneToManyDictionary = getOneToManyDictionary;
|
|
55
|
+
exports.parseExcelForOneToMany = parseExcelForOneToMany;
|
|
56
|
+
exports.parseCsvForOneToMany = parseCsvForOneToMany;
|
|
57
|
+
exports.isValidCsv = isValidCsv;
|
|
58
|
+
exports.isValidCsvSync = isValidCsvSync;
|
|
59
|
+
exports.analyzeCsv = analyzeCsv;
|
|
60
|
+
exports.repairCsv = repairCsv;
|
|
61
|
+
exports.validatePath = validatePath;
|
|
62
|
+
exports.extractTargetRows = extractTargetRows;
|
|
63
|
+
exports.findMissingValues = findMissingValues;
|
|
64
|
+
/**
|
|
65
|
+
* @file src/utils/io/reading.ts
|
|
66
|
+
*/
|
|
67
|
+
const node_path_1 = __importDefault(require("node:path"));
|
|
68
|
+
const fs_1 = __importDefault(require("fs"));
|
|
69
|
+
const stream_1 = require("stream");
|
|
70
|
+
const csv_parser_1 = __importDefault(require("csv-parser"));
|
|
71
|
+
const xlsx_1 = __importDefault(require("xlsx"));
|
|
72
|
+
const regex_1 = require("../regex");
|
|
73
|
+
const misc_1 = require("../regex/misc");
|
|
74
|
+
const config_1 = require("../../config");
|
|
75
|
+
const types_1 = require("./types");
|
|
76
|
+
const typeValidation_1 = require("../typeValidation");
|
|
77
|
+
const validate = __importStar(require("../argumentValidation"));
|
|
78
|
+
const logging_1 = require("./logging");
|
|
79
|
+
const F = (0, misc_1.extractFileName)(__filename);
|
|
80
|
+
/** for testing if `pathString (value)` points to an existing directory */
|
|
81
|
+
function isDirectory(value) {
|
|
82
|
+
return ((0, typeValidation_1.isNonEmptyString)(value)
|
|
83
|
+
&& fs_1.default.existsSync(value)
|
|
84
|
+
&& fs_1.default.statSync(value).isDirectory());
|
|
85
|
+
}
|
|
86
|
+
/** for testing if `pathString (value)` points to an existing file */
|
|
87
|
+
function isFile(value) {
|
|
88
|
+
return ((0, typeValidation_1.isNonEmptyString)(value)
|
|
89
|
+
&& fs_1.default.existsSync(value)
|
|
90
|
+
&& fs_1.default.statSync(value).isFile());
|
|
91
|
+
}
|
|
92
|
+
/**
|
|
93
|
+
* Determines the proper delimiter based on file type or extension
|
|
94
|
+
* @param filePath `string` Path to the file
|
|
95
|
+
* @returns **`delimiter`** `{`{@link DelimiterCharacterEnum}` | string}` The delimiter character
|
|
96
|
+
* @throws an error if the file extension is unsupported
|
|
97
|
+
*/
|
|
98
|
+
function getDelimiterFromFilePath(filePath) {
|
|
99
|
+
const extension = filePath.split('.').pop()?.toLowerCase();
|
|
100
|
+
if (extension === types_1.DelimitedFileTypeEnum.CSV) {
|
|
101
|
+
return types_1.DelimiterCharacterEnum.COMMA;
|
|
102
|
+
}
|
|
103
|
+
else if (extension === types_1.DelimitedFileTypeEnum.TSV) {
|
|
104
|
+
return types_1.DelimiterCharacterEnum.TAB;
|
|
105
|
+
}
|
|
106
|
+
else {
|
|
107
|
+
throw new Error(`[reading.getDelimiterFromFilePath()] Unsupported file extension: ${extension}`);
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
/**
|
|
111
|
+
* @param filePath `string`
|
|
112
|
+
* @returns **`jsonData`** — `Record<string, any>`
|
|
113
|
+
* - JSON data as an object
|
|
114
|
+
*/
|
|
115
|
+
exports.readJsonSync = readJsonFileAsObject;
|
|
116
|
+
/**
|
|
117
|
+
* @param filePath `string`
|
|
118
|
+
* @returns **`jsonData`** — `Record<string, any>`
|
|
119
|
+
* - JSON data as an object
|
|
120
|
+
*/
|
|
121
|
+
function readJsonFileAsObject(filePath) {
|
|
122
|
+
const source = (0, logging_1.getSourceString)(F, readJsonFileAsObject.name);
|
|
123
|
+
try {
|
|
124
|
+
filePath = coerceFileExtension(filePath, 'json');
|
|
125
|
+
const data = fs_1.default.readFileSync(filePath, 'utf8');
|
|
126
|
+
const jsonData = JSON.parse(data);
|
|
127
|
+
return jsonData;
|
|
128
|
+
}
|
|
129
|
+
catch (error) {
|
|
130
|
+
config_1.typeshiLogger.error([`${source} Error reading JSON file`,
|
|
131
|
+
`Given filePath: '${filePath}'`,
|
|
132
|
+
`error: `, JSON.stringify(error, null, 4)
|
|
133
|
+
].join(config_1.INDENT_LOG_LINE));
|
|
134
|
+
throw new Error(JSON.stringify(error));
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
/**
|
|
138
|
+
* @param filePath `string`
|
|
139
|
+
* @param expectedExtension `string`
|
|
140
|
+
* @returns **`validatedFilePath`** `string`
|
|
141
|
+
*/
|
|
142
|
+
function coerceFileExtension(filePath, expectedExtension) {
|
|
143
|
+
validate.multipleStringArguments(`reading.coerceFileExtension`, { filePath, expectedExtension });
|
|
144
|
+
expectedExtension = expectedExtension.replace(/\./, '');
|
|
145
|
+
if (filePath.endsWith(`.${expectedExtension}`)) {
|
|
146
|
+
return filePath;
|
|
147
|
+
}
|
|
148
|
+
return filePath + '.' + expectedExtension;
|
|
149
|
+
}
|
|
150
|
+
/**
|
|
151
|
+
* - {@link getDirectoryFiles}
|
|
152
|
+
* @param arg1 `Array<`{@link FileData}` | string> | string`
|
|
153
|
+
* - `files:` {@link FileData}`[]`
|
|
154
|
+
* - `filePaths:` `string[]`
|
|
155
|
+
* - `dirPath:` `string`
|
|
156
|
+
* @param sheetName `string`
|
|
157
|
+
* @param requiredHeaders `string[]` `if` left `undefined`,
|
|
158
|
+
* `requiredHeaders` will be set to the headers of first non empty file from `arg1`
|
|
159
|
+
* @param strictRequirement `boolean`
|
|
160
|
+
* - `Default` = `true`
|
|
161
|
+
* - `if` `true`, then every `row` **must** have headers/keys exactly equal to `requiredHeaders`
|
|
162
|
+
* - `else` `false`, then if a `row` is missing one or more `header` in `requiredHeaders`,
|
|
163
|
+
* for each missing `header`, set `row[header] = ''` (empty string),
|
|
164
|
+
* @param targetExtensions `string[]` try to read rows of all files whose type is in `targetExtensions`
|
|
165
|
+
* @returns **`concatenatedRows`** `Promise<Record<string, any>[]>`
|
|
166
|
+
*/
|
|
167
|
+
async function concatenateFiles(arg1, sheetName = 'Sheet1', requiredHeaders = [], strictRequirement = true, targetExtensions = ['.csv', '.tsv', '.xlsx']) {
|
|
168
|
+
const source = (0, logging_1.getSourceString)(F, concatenateFiles.name);
|
|
169
|
+
validate.stringArgument(source, { sheetName });
|
|
170
|
+
validate.arrayArgument(source, { targetExtensions, isNonEmptyString: typeValidation_1.isNonEmptyString });
|
|
171
|
+
let files;
|
|
172
|
+
if ((0, typeValidation_1.isNonEmptyArray)(arg1)) {
|
|
173
|
+
files = arg1;
|
|
174
|
+
}
|
|
175
|
+
else if (isDirectory(arg1)) {
|
|
176
|
+
files = getDirectoryFiles(arg1, ...targetExtensions);
|
|
177
|
+
}
|
|
178
|
+
else if (isFile(arg1)
|
|
179
|
+
&& (0, regex_1.stringEndsWithAnyOf)(arg1, targetExtensions, regex_1.RegExpFlagsEnum.IGNORE_CASE)) {
|
|
180
|
+
files = [arg1];
|
|
181
|
+
}
|
|
182
|
+
else {
|
|
183
|
+
let message = [`${source} Invalid parameter: 'arg1'`,
|
|
184
|
+
`Expected: arg1: (Array<FileData | string> | string) to be one of:`,
|
|
185
|
+
`files: FileData[] | filePaths: string[] | filePath: string | dirPath: string`,
|
|
186
|
+
`Received: ${typeof arg1}`
|
|
187
|
+
].join(config_1.INDENT_LOG_LINE);
|
|
188
|
+
config_1.typeshiLogger.error(message);
|
|
189
|
+
throw new Error(message);
|
|
190
|
+
}
|
|
191
|
+
if (!(0, typeValidation_1.isNonEmptyArray)(files)) { // i.e. isEmptyArray.... shouldn't get here
|
|
192
|
+
config_1.typeshiLogger.error(`${source} how did this happen, we're smarter than this`);
|
|
193
|
+
return [];
|
|
194
|
+
}
|
|
195
|
+
else if (files.length === 1) {
|
|
196
|
+
return await getRows(files[0], sheetName);
|
|
197
|
+
} // else if files.length > 1, need to make sure each file has same headers
|
|
198
|
+
const concatenatedRows = [];
|
|
199
|
+
let haveDefinedRequiredHeaders = ((0, typeValidation_1.isNonEmptyArray)(requiredHeaders)
|
|
200
|
+
&& requiredHeaders.every(h => (0, typeValidation_1.isNonEmptyString)(h))
|
|
201
|
+
? true : false);
|
|
202
|
+
for (const fileRepresentative of files) {
|
|
203
|
+
const rows = await getRows(fileRepresentative, sheetName);
|
|
204
|
+
if (!(0, typeValidation_1.isNonEmptyArray)(rows)) {
|
|
205
|
+
continue;
|
|
206
|
+
}
|
|
207
|
+
if (!haveDefinedRequiredHeaders) {
|
|
208
|
+
let firstValidRow = rows.find(row => !(0, typeValidation_1.isNullLike)(row));
|
|
209
|
+
if (!firstValidRow) {
|
|
210
|
+
continue;
|
|
211
|
+
}
|
|
212
|
+
requiredHeaders = Object.keys(firstValidRow);
|
|
213
|
+
haveDefinedRequiredHeaders = true;
|
|
214
|
+
}
|
|
215
|
+
if (!(0, typeValidation_1.isNonEmptyArray)(requiredHeaders)) {
|
|
216
|
+
config_1.typeshiLogger.warn(`${source} No requiredHeaders defined,`, `skipping file: '${(0, types_1.isFileData)(fileRepresentative)
|
|
217
|
+
? fileRepresentative.fileName : fileRepresentative}'`);
|
|
218
|
+
continue;
|
|
219
|
+
}
|
|
220
|
+
for (let i = 0; i < rows.length; i++) {
|
|
221
|
+
const row = rows[i];
|
|
222
|
+
if (!(0, typeValidation_1.hasKeys)(row, requiredHeaders)) {
|
|
223
|
+
let missingHeaders = requiredHeaders.filter(h => !(0, typeValidation_1.hasKeys)(row, h));
|
|
224
|
+
if (strictRequirement) {
|
|
225
|
+
let message = [`${source} Invalid row: missing required header(s)`,
|
|
226
|
+
`(strictRequirement === true)`,
|
|
227
|
+
` file: '${(0, types_1.isFileData)(fileRepresentative)
|
|
228
|
+
? fileRepresentative.fileName : fileRepresentative}'`,
|
|
229
|
+
` rowIndex: ${i}`,
|
|
230
|
+
`requiredHeaders: ${JSON.stringify(requiredHeaders)}`,
|
|
231
|
+
` missingHeaders: ${JSON.stringify(missingHeaders)}`
|
|
232
|
+
].join(config_1.INDENT_LOG_LINE);
|
|
233
|
+
config_1.typeshiLogger.error(message);
|
|
234
|
+
throw new Error(message);
|
|
235
|
+
}
|
|
236
|
+
for (const header of missingHeaders) {
|
|
237
|
+
row[header] = '';
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
concatenatedRows.push(row);
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
return concatenatedRows;
|
|
244
|
+
}
|
|
245
|
+
/**
|
|
246
|
+
* @param arg1 {@link FileData}` | string` one of the following:
|
|
247
|
+
* - `fileData:` {@link FileData} = `{ fileName: string; fileContent: string; }`
|
|
248
|
+
* - `filePath:` `string`
|
|
249
|
+
* @param sheetName `string` `optional`
|
|
250
|
+
* - defined/used `if` `arg1` pertains to an excel file and you want to specify which sheet to read
|
|
251
|
+
* - `Default` = `'Sheet1'`
|
|
252
|
+
* @returns **`rows`** `Promise<Record<string, any>[]>`
|
|
253
|
+
*/
|
|
254
|
+
async function getRows(arg1, sheetName = 'Sheet1') {
|
|
255
|
+
if ((0, types_1.isFileData)(arg1)) {
|
|
256
|
+
const { fileName } = arg1;
|
|
257
|
+
if (fileName.endsWith('.xlsx') || fileName.endsWith('.xls')) {
|
|
258
|
+
return getExcelRows(arg1, sheetName);
|
|
259
|
+
}
|
|
260
|
+
return getCsvRows(arg1);
|
|
261
|
+
}
|
|
262
|
+
else if ((0, typeValidation_1.isNonEmptyString)(arg1)) { // assume it's a file path
|
|
263
|
+
if (arg1.endsWith('.xlsx') || arg1.endsWith('.xls')) {
|
|
264
|
+
return getExcelRows(arg1, sheetName);
|
|
265
|
+
}
|
|
266
|
+
return getCsvRows(arg1);
|
|
267
|
+
}
|
|
268
|
+
else {
|
|
269
|
+
throw new Error(`[reading.getRows()] Invalid argument: 'arg1' must be a FileData object or a string file path.`);
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
/**
|
|
273
|
+
* @note excludes empty rows
|
|
274
|
+
* @param arg1
|
|
275
|
+
* @param sheetName
|
|
276
|
+
* @returns
|
|
277
|
+
*/
|
|
278
|
+
async function getExcelRows(arg1, sheetName = 'Sheet1') {
|
|
279
|
+
const source = '[reading.getExcelRows()]';
|
|
280
|
+
validate.stringArgument(source, { sheetName });
|
|
281
|
+
let filePath;
|
|
282
|
+
let fileContent;
|
|
283
|
+
let buffer;
|
|
284
|
+
if ((0, types_1.isFileData)(arg1) && (0, typeValidation_1.isNonEmptyString)(arg1.fileName)
|
|
285
|
+
&& (0, regex_1.stringEndsWithAnyOf)(arg1.fileName, ['.xlsx', '.xls'])) {
|
|
286
|
+
filePath = arg1.fileName;
|
|
287
|
+
fileContent = arg1.fileContent;
|
|
288
|
+
buffer = Buffer.from(fileContent, 'base64');
|
|
289
|
+
}
|
|
290
|
+
else if ((0, typeValidation_1.isNonEmptyString)(arg1) && (0, regex_1.stringEndsWithAnyOf)(arg1, ['.xlsx', '.xls'])) {
|
|
291
|
+
filePath = arg1;
|
|
292
|
+
validate.existingPathArgument(`${source}.filePath`, { filePath });
|
|
293
|
+
buffer = fs_1.default.readFileSync(filePath);
|
|
294
|
+
}
|
|
295
|
+
else {
|
|
296
|
+
throw new Error([
|
|
297
|
+
`${source} Invalid argument: 'arg1' (FileData or filePath)`,
|
|
298
|
+
`must be a FileData object or a string file path.`,
|
|
299
|
+
`Received: ${JSON.stringify(arg1)}`
|
|
300
|
+
].join(config_1.INDENT_LOG_LINE));
|
|
301
|
+
}
|
|
302
|
+
try {
|
|
303
|
+
const workbook = xlsx_1.default.read(buffer, { type: 'buffer' });
|
|
304
|
+
sheetName = (workbook.SheetNames.includes(sheetName)
|
|
305
|
+
? sheetName
|
|
306
|
+
: workbook.SheetNames[0]);
|
|
307
|
+
const sheet = workbook.Sheets[sheetName];
|
|
308
|
+
const jsonData = xlsx_1.default.utils.sheet_to_json(sheet);
|
|
309
|
+
return jsonData;
|
|
310
|
+
}
|
|
311
|
+
catch (error) {
|
|
312
|
+
config_1.typeshiLogger.error([
|
|
313
|
+
`${source} Error reading or parsing the Excel file.`,
|
|
314
|
+
`Received arg1 = ${JSON.stringify(arg1)}, sheetName: '${sheetName}'`,
|
|
315
|
+
].join(config_1.INDENT_LOG_LINE), JSON.stringify(error, null, 4));
|
|
316
|
+
return [];
|
|
317
|
+
}
|
|
318
|
+
}
|
|
319
|
+
/**
|
|
320
|
+
* @param filePath `string`
|
|
321
|
+
* @returns **`rows`** `Promise<Record<string, any>[]>`
|
|
322
|
+
* - an array of objects representing rows from a CSV file.
|
|
323
|
+
*/
|
|
324
|
+
async function getCsvRows(arg1) {
|
|
325
|
+
const source = (0, logging_1.getSourceString)(__filename, getCsvRows.name);
|
|
326
|
+
let filePath;
|
|
327
|
+
let fileContent;
|
|
328
|
+
let delimiter = types_1.DelimiterCharacterEnum.COMMA;
|
|
329
|
+
let buffer;
|
|
330
|
+
if ((0, types_1.isFileData)(arg1) && (0, typeValidation_1.isNonEmptyString)(arg1.fileName)
|
|
331
|
+
&& (0, regex_1.stringEndsWithAnyOf)(arg1.fileName, ['.csv', '.tsv'])) {
|
|
332
|
+
filePath = arg1.fileName;
|
|
333
|
+
fileContent = arg1.fileContent;
|
|
334
|
+
buffer = Buffer.from(fileContent, 'base64');
|
|
335
|
+
delimiter = getDelimiterFromFilePath(filePath);
|
|
336
|
+
}
|
|
337
|
+
else if ((0, typeValidation_1.isNonEmptyString)(arg1) && (0, regex_1.stringEndsWithAnyOf)(arg1, ['.csv', '.tsv'])) {
|
|
338
|
+
filePath = arg1;
|
|
339
|
+
validate.existingPathArgument(`${source}`, { filePath });
|
|
340
|
+
try {
|
|
341
|
+
buffer = fs_1.default.readFileSync(filePath);
|
|
342
|
+
}
|
|
343
|
+
catch (error) {
|
|
344
|
+
throw new Error([
|
|
345
|
+
`${source} Error making buffer when reading file: '${filePath}'`,
|
|
346
|
+
`Error: ${error instanceof Error ? error.message : String(error)}`
|
|
347
|
+
].join(config_1.INDENT_LOG_LINE));
|
|
348
|
+
}
|
|
349
|
+
delimiter = getDelimiterFromFilePath(filePath);
|
|
350
|
+
}
|
|
351
|
+
else {
|
|
352
|
+
throw new Error([
|
|
353
|
+
`${source} Invalid argument: 'arg1' (FileData or filePath)`,
|
|
354
|
+
`must be a FileData object or a string file path.`,
|
|
355
|
+
`Received: ${JSON.stringify(arg1)}`
|
|
356
|
+
].join(config_1.INDENT_LOG_LINE));
|
|
357
|
+
}
|
|
358
|
+
const rows = [];
|
|
359
|
+
if (!buffer) {
|
|
360
|
+
throw new Error(`${source} No buffer available to read`);
|
|
361
|
+
}
|
|
362
|
+
const stream = stream_1.Readable.from(buffer.toString('utf8'));
|
|
363
|
+
return new Promise((resolve, reject) => {
|
|
364
|
+
stream
|
|
365
|
+
.pipe((0, csv_parser_1.default)({ separator: delimiter }))
|
|
366
|
+
.on('data', (row) => rows.push(row))
|
|
367
|
+
.on('end', () => {
|
|
368
|
+
config_1.SUPPRESSED_LOGS.push([`${source} Successfully read CSV file.`,
|
|
369
|
+
`filePath: '${filePath}'`,
|
|
370
|
+
`Number of rows read: ${rows.length}`
|
|
371
|
+
].join(config_1.INDENT_LOG_LINE));
|
|
372
|
+
resolve(rows);
|
|
373
|
+
})
|
|
374
|
+
.on('error', (error) => {
|
|
375
|
+
config_1.typeshiLogger.error(`${source} Error reading CSV file:`, config_1.INDENT_LOG_LINE + `filePath: '${filePath}'`, config_1.NEW_LINE + `Error: ${JSON.stringify(error, null, 4)}`);
|
|
376
|
+
reject(error);
|
|
377
|
+
});
|
|
378
|
+
});
|
|
379
|
+
}
|
|
380
|
+
/**
|
|
381
|
+
* @param arg1 `string | Record<string, any>[]` - the file path to a CSV file or an array of rows.
|
|
382
|
+
* @param keyColumn `string` - the column name whose contents will be keys in the dictionary.
|
|
383
|
+
* @param valueColumn `string` - the column name whose contents will be used as values in the dictionary.
|
|
384
|
+
* @returns **`dict`** `Record<string, string>`
|
|
385
|
+
*/
|
|
386
|
+
async function getOneToOneDictionary(arg1, keyColumn, valueColumn, keyOptions, valueOptions, requireIncludeAllRows = false) {
|
|
387
|
+
const source = (0, logging_1.getSourceString)(__filename, getOneToOneDictionary.name);
|
|
388
|
+
validate.multipleStringArguments(source, { keyColumn, valueColumn });
|
|
389
|
+
let rows = await handleFileArgument(arg1, getOneToOneDictionary.name, [keyColumn, valueColumn]);
|
|
390
|
+
const dict = {};
|
|
391
|
+
for (let i = 0; i < rows.length; i++) {
|
|
392
|
+
const row = rows[i];
|
|
393
|
+
if (!(0, typeValidation_1.hasKeys)(row, [keyColumn, valueColumn])) {
|
|
394
|
+
let msg = [`${source} row @ index ${i} missing key(s): '${keyColumn}', '${valueColumn}'`,
|
|
395
|
+
` keyColumn: '${keyColumn}' in row ? ${keyColumn in row} -> row[keyColumn] = '${row[keyColumn]}'`,
|
|
396
|
+
`valueColumn: '${valueColumn}' in row ? ${valueColumn in row} -> row[valueColumn] = '${row[valueColumn]}'`,
|
|
397
|
+
].join(config_1.INDENT_LOG_LINE);
|
|
398
|
+
if (requireIncludeAllRows)
|
|
399
|
+
throw new Error(msg);
|
|
400
|
+
config_1.typeshiLogger.warn(msg);
|
|
401
|
+
continue;
|
|
402
|
+
}
|
|
403
|
+
const key = (0, regex_1.clean)(String(row[keyColumn]), keyOptions);
|
|
404
|
+
const value = (0, regex_1.clean)(String(row[valueColumn]), valueOptions);
|
|
405
|
+
if (!key || !value) {
|
|
406
|
+
let msg = [`${source} Row @ index ${i} missing key or value.`,
|
|
407
|
+
` keyColumn: '${keyColumn}' in row ? ${keyColumn in row}`,
|
|
408
|
+
`-> row[keyColumn] = '${row[keyColumn]}'`,
|
|
409
|
+
` clean(String(row[keyColumn]), keyOptions): '${key}'`,
|
|
410
|
+
`valueColumn: '${valueColumn}' in row ? ${valueColumn in row}`,
|
|
411
|
+
`-> row[valueColumn] = '${row[valueColumn]}'`,
|
|
412
|
+
`clean(String(row[valueColumn]), valueOptions): '${value}'`,
|
|
413
|
+
].join(config_1.INDENT_LOG_LINE);
|
|
414
|
+
if (requireIncludeAllRows)
|
|
415
|
+
throw new Error(msg);
|
|
416
|
+
config_1.typeshiLogger.warn(msg);
|
|
417
|
+
continue;
|
|
418
|
+
}
|
|
419
|
+
if (dict[key]) {
|
|
420
|
+
config_1.typeshiLogger.warn([`${source} row @ index ${i} Duplicate key found: '${key}'`,
|
|
421
|
+
`overwriting value '${dict[key]}' with '${value}'`
|
|
422
|
+
].join(config_1.INDENT_LOG_LINE));
|
|
423
|
+
}
|
|
424
|
+
dict[key] = value;
|
|
425
|
+
}
|
|
426
|
+
return dict;
|
|
427
|
+
}
|
|
428
|
+
/**
|
|
429
|
+
* @param arg1 `string | FileData | Record<string, any>[]` - the `filePath` to a CSV file or an array of rows.
|
|
430
|
+
* @param columnName `string` - the column name whose values will be returned.
|
|
431
|
+
* @param allowDuplicates `boolean` - `optional` if `true`, allows duplicate values in the returned array, otherwise only unique values are returned.
|
|
432
|
+
* - Defaults to `false`.
|
|
433
|
+
* @returns **`values`** `Promise<Array<string>>` - sorted array of values (as strings) from the specified column.
|
|
434
|
+
*/
|
|
435
|
+
async function getColumnValues(arg1, columnName, cleaner, allowDuplicates = false) {
|
|
436
|
+
const source = `[reading.getColumnValues()]`;
|
|
437
|
+
validate.stringArgument(source, { columnName });
|
|
438
|
+
validate.booleanArgument(source, { allowDuplicates });
|
|
439
|
+
if (cleaner)
|
|
440
|
+
validate.functionArgument(source, { cleaner });
|
|
441
|
+
let rows = await handleFileArgument(arg1, getColumnValues.name, [columnName]);
|
|
442
|
+
const values = [];
|
|
443
|
+
for (const row of rows) {
|
|
444
|
+
if (!(0, typeValidation_1.isNonEmptyString)(String(row[columnName])))
|
|
445
|
+
continue;
|
|
446
|
+
const value = (cleaner
|
|
447
|
+
? await cleaner(String(row[columnName]))
|
|
448
|
+
: String(row[columnName])).trim();
|
|
449
|
+
if (allowDuplicates || !values.includes(value)) {
|
|
450
|
+
values.push(value);
|
|
451
|
+
}
|
|
452
|
+
}
|
|
453
|
+
return values.sort();
|
|
454
|
+
}
|
|
455
|
+
/**
|
|
456
|
+
* @param arg1 `string | FileData | Record<string, any>[]` - the `filePath` to a CSV file or an array of rows.
|
|
457
|
+
* @param columnName `string` - the column name whose values will be returned.
|
|
458
|
+
* @returns **`indexedColumnValues`** `Promise<Record<string, number[]>>`
|
|
459
|
+
*/
|
|
460
|
+
async function getIndexedColumnValues(arg1, columnName, cleaner) {
|
|
461
|
+
const source = `[reading.getIndexedColumnValues()]`;
|
|
462
|
+
validate.stringArgument(source, { columnName });
|
|
463
|
+
if (cleaner)
|
|
464
|
+
validate.functionArgument(source, { cleaner });
|
|
465
|
+
let rows = await handleFileArgument(arg1, getIndexedColumnValues.name, [columnName]);
|
|
466
|
+
const valueDict = {};
|
|
467
|
+
for (const rowIndex in rows) {
|
|
468
|
+
const row = rows[rowIndex];
|
|
469
|
+
if (!(0, typeValidation_1.isNonEmptyString)(String(row[columnName])))
|
|
470
|
+
continue;
|
|
471
|
+
const value = (cleaner
|
|
472
|
+
? await cleaner(String(row[columnName]))
|
|
473
|
+
: String(row[columnName])).trim();
|
|
474
|
+
if (!valueDict[value]) {
|
|
475
|
+
valueDict[value] = [];
|
|
476
|
+
}
|
|
477
|
+
valueDict[value].push(Number(rowIndex));
|
|
478
|
+
}
|
|
479
|
+
return valueDict;
|
|
480
|
+
}
|
|
481
|
+
/**
|
|
482
|
+
* @param arg1 `string | FileData | Record<string, any>[]`
|
|
483
|
+
* @param invocationSource `string`
|
|
484
|
+
* @param requiredHeaders `string[]` `optional`
|
|
485
|
+
* @returns **`rows`** `Promise<Record<string, any>[]>`
|
|
486
|
+
*/
|
|
487
|
+
async function handleFileArgument(arg1, invocationSource, requiredHeaders = [], sheetName) {
|
|
488
|
+
const source = (0, logging_1.getSourceString)(F, handleFileArgument.name);
|
|
489
|
+
validate.stringArgument(source, { invocationSource });
|
|
490
|
+
validate.arrayArgument(source, { requiredHeaders, isNonEmptyString: typeValidation_1.isNonEmptyString }, true);
|
|
491
|
+
let rows = [];
|
|
492
|
+
// Handle file path validation only for string inputs
|
|
493
|
+
if ((0, typeValidation_1.isNonEmptyString)(arg1)
|
|
494
|
+
&& (0, regex_1.stringEndsWithAnyOf)(arg1, /(\.tsv|\.csv)/i)
|
|
495
|
+
&& !isValidCsvSync(arg1, requiredHeaders)) {
|
|
496
|
+
throw new Error([
|
|
497
|
+
`${source} Invalid CSV filePath provided: '${arg1}'`,
|
|
498
|
+
`invocationSource: ${invocationSource}`,
|
|
499
|
+
`requiredHeaders ? ${(0, typeValidation_1.isNonEmptyArray)(requiredHeaders)
|
|
500
|
+
? JSON.stringify(requiredHeaders)
|
|
501
|
+
: 'none provided'}`
|
|
502
|
+
].join(config_1.INDENT_LOG_LINE));
|
|
503
|
+
}
|
|
504
|
+
if (((0, typeValidation_1.isNonEmptyString)(arg1) && isFile(arg1)) // arg1 is file path string
|
|
505
|
+
|| (0, types_1.isFileData)(arg1)) { // arg1 is FileData { fileName: string; fileContent: string; }
|
|
506
|
+
rows = await getRows(arg1, sheetName);
|
|
507
|
+
}
|
|
508
|
+
else if ((0, typeValidation_1.isNonEmptyArray)(arg1)) { // arg1 is already array of rows
|
|
509
|
+
if (arg1.some(v => !(0, typeValidation_1.isObject)(v))) {
|
|
510
|
+
throw new Error([
|
|
511
|
+
`${source} Error: Invalid 'arg1' (Record<string, any>[]) param:`,
|
|
512
|
+
`There exists an element in the row array that is not an object.`,
|
|
513
|
+
`Source: ${invocationSource}`,
|
|
514
|
+
].join(config_1.INDENT_LOG_LINE));
|
|
515
|
+
}
|
|
516
|
+
rows = arg1;
|
|
517
|
+
}
|
|
518
|
+
else {
|
|
519
|
+
throw new Error([
|
|
520
|
+
`${source} Invalid parameter: 'arg1' (string | FileData | Record<string, any>[])`,
|
|
521
|
+
`arg1 must be a file path string, FileData object, or an array of rows.`,
|
|
522
|
+
`Source: ${invocationSource}`,
|
|
523
|
+
].join(config_1.INDENT_LOG_LINE));
|
|
524
|
+
}
|
|
525
|
+
return rows;
|
|
526
|
+
}
|
|
527
|
+
/**
|
|
528
|
+
* @param dir `string` path to target directory
|
|
529
|
+
* @param targetExtensions `string[] optional` - array of file extensions to filter files by.
|
|
530
|
+
* - `If` not provided, all files in the directory will be returned.
|
|
531
|
+
* - `If` provided, only files with extensions matching the array will be returned.
|
|
532
|
+
* @returns **`targetFiles`** `string[]` array of full file paths
|
|
533
|
+
*/
|
|
534
|
+
function getDirectoryFiles(dir, ...targetExtensions) {
|
|
535
|
+
const source = (0, logging_1.getSourceString)(F, getDirectoryFiles.name);
|
|
536
|
+
validate.existingPathArgument(source, { dir });
|
|
537
|
+
validate.arrayArgument(source, { targetExtensions, isNonEmptyString: typeValidation_1.isNonEmptyString }, true);
|
|
538
|
+
// ensure all target extensions start with period
|
|
539
|
+
for (let i = 0; i < targetExtensions.length; i++) {
|
|
540
|
+
const ext = targetExtensions[i];
|
|
541
|
+
if (!ext.startsWith('.')) {
|
|
542
|
+
targetExtensions[i] = `.${ext}`;
|
|
543
|
+
}
|
|
544
|
+
}
|
|
545
|
+
const targetFiles = fs_1.default.readdirSync(dir).filter(f => (0, typeValidation_1.isNonEmptyArray)(targetExtensions)
|
|
546
|
+
? true // get all files in dir, regardless of extension
|
|
547
|
+
: (0, regex_1.stringEndsWithAnyOf)(f, targetExtensions, regex_1.RegExpFlagsEnum.IGNORE_CASE)).map(file => node_path_1.default.join(dir, file));
|
|
548
|
+
return targetFiles;
|
|
549
|
+
}
|
|
550
|
+
/**
|
|
551
|
+
* @param dataSource `string | FileData | Record<string, any>[]`
|
|
552
|
+
* @param keyColumn `string`
|
|
553
|
+
* @param valueColumn `string`
|
|
554
|
+
* @param keyOptions {@link CleanStringOptions} `(optional)`
|
|
555
|
+
* @param valueOptions {@link CleanStringOptions}`(optional)`
|
|
556
|
+
* @param sheetName `string`
|
|
557
|
+
* @returns **`dict`** `Promise<Record<string, string[]>>`
|
|
558
|
+
*/
|
|
559
|
+
async function getOneToManyDictionary(dataSource, keyColumn, valueColumn, keyOptions, valueOptions, sheetName) {
|
|
560
|
+
const source = (0, logging_1.getSourceString)(F, getOneToManyDictionary.name);
|
|
561
|
+
validate.multipleStringArguments(source, { keyColumn, valueColumn });
|
|
562
|
+
if (keyOptions)
|
|
563
|
+
validate.objectArgument(source, { keyOptions, isCleanStringOptions: regex_1.isCleanStringOptions });
|
|
564
|
+
if (valueOptions)
|
|
565
|
+
validate.objectArgument(source, { valueOptions, isCleanStringOptions: regex_1.isCleanStringOptions });
|
|
566
|
+
const rows = await handleFileArgument(dataSource, source, [keyColumn, valueColumn], sheetName);
|
|
567
|
+
const dict = {};
|
|
568
|
+
for (let i = 0; i < rows.length; i++) {
|
|
569
|
+
let row = rows[i];
|
|
570
|
+
let key = (0, regex_1.clean)(row[keyColumn], keyOptions).trim().replace(/\.$/, '');
|
|
571
|
+
if (!dict[key]) {
|
|
572
|
+
dict[key] = [];
|
|
573
|
+
}
|
|
574
|
+
let value = (0, regex_1.clean)(row[valueColumn], valueOptions).trim().replace(/\.$/, '');
|
|
575
|
+
if (!dict[key].includes(value)) {
|
|
576
|
+
dict[key].push(value);
|
|
577
|
+
}
|
|
578
|
+
}
|
|
579
|
+
return dict;
|
|
580
|
+
}
|
|
581
|
+
/**
|
|
582
|
+
* @deprecated -> use {@link getOneToManyDictionary}
|
|
583
|
+
* @param filePath `string`
|
|
584
|
+
* @param sheetName `string`
|
|
585
|
+
* @param keyColumn `string`
|
|
586
|
+
* @param valueColumn `string`
|
|
587
|
+
* @param options - {@link ParseOneToManyOptions}
|
|
588
|
+
* = `{ keyStripOptions`?: {@link StringStripOptions}, `valueStripOptions`?: {@link StringStripOptions}, keyCaseOptions`?: {@link StringCaseOptions}, `valueCaseOptions`?: {@link StringCaseOptions}, `keyPadOptions`?: {@link StringPadOptions}, `valuePadOptions`?: {@link StringPadOptions} `}`
|
|
589
|
+
* - {@link StringStripOptions} = `{ char`: `string`, `escape`?: `boolean`, `stripLeftCondition`?: `(s: string, ...args: any[]) => boolean`, `leftArgs`?: `any[]`, `stripRightCondition`?: `(s: string, ...args: any[]) => boolean`, `rightArgs`?: `any[] }`
|
|
590
|
+
* - {@link StringCaseOptions} = `{ toUpper`?: `boolean`, `toLower`?: `boolean`, `toTitle`?: `boolean }`
|
|
591
|
+
* - {@link StringPadOptions} = `{ padLength`: `number`, `padChar`?: `string`, `padLeft`?: `boolean`, `padRight`?: `boolean }`
|
|
592
|
+
* @returns **`dict`** `Record<string, Array<string>>` — key-value pairs where key is from `keyColumn` and value is an array of values from `valueColumn`
|
|
593
|
+
*/
|
|
594
|
+
function parseExcelForOneToMany(filePath, sheetName, keyColumn, valueColumn, options = {}) {
|
|
595
|
+
filePath = coerceFileExtension(filePath, 'xlsx');
|
|
596
|
+
validate.multipleStringArguments(`reading.parseExcelForOneToMany`, { filePath, sheetName, keyColumn, valueColumn });
|
|
597
|
+
try {
|
|
598
|
+
const { keyStripOptions, valueStripOptions, keyCaseOptions, valueCaseOptions, keyPadOptions, valuePadOptions } = options;
|
|
599
|
+
const workbook = xlsx_1.default.readFile(filePath);
|
|
600
|
+
const sheet = workbook.Sheets[sheetName];
|
|
601
|
+
const jsonData = xlsx_1.default.utils.sheet_to_json(sheet);
|
|
602
|
+
const dict = {};
|
|
603
|
+
jsonData.forEach(row => {
|
|
604
|
+
let key = (0, regex_1.clean)(String(row[keyColumn]), keyStripOptions, keyCaseOptions, keyPadOptions).trim().replace(/\.$/, '');
|
|
605
|
+
let val = (0, regex_1.clean)(String(row[valueColumn]), valueStripOptions, valueCaseOptions, valuePadOptions).trim().replace(/\.$/, '');
|
|
606
|
+
if (!dict[key]) {
|
|
607
|
+
dict[key] = [];
|
|
608
|
+
}
|
|
609
|
+
if (!dict[key].includes(val)) {
|
|
610
|
+
dict[key].push(val);
|
|
611
|
+
}
|
|
612
|
+
});
|
|
613
|
+
return dict;
|
|
614
|
+
}
|
|
615
|
+
catch (err) {
|
|
616
|
+
config_1.typeshiLogger.error('Error reading or parsing the Excel file:', err, config_1.INDENT_LOG_LINE + 'Given File Path:', '"' + filePath + '"');
|
|
617
|
+
return {};
|
|
618
|
+
}
|
|
619
|
+
}
|
|
620
|
+
/**
|
|
621
|
+
* @deprecated -> use {@link getOneToManyDictionary}
|
|
622
|
+
* @param filePath `string`
|
|
623
|
+
* @param keyColumn `string`
|
|
624
|
+
* @param valueColumn `string`
|
|
625
|
+
* @param delimiter {@link DelimiterCharacters} | `string`
|
|
626
|
+
* @param options {@link ParseOneToManyOptions}
|
|
627
|
+
* = `{ keyCaseOptions`?: {@link StringCaseOptions}, `valueCaseOptions`?: {@link StringCaseOptions}, `keyPadOptions`?: {@link StringPadOptions}, `valuePadOptions`?: {@link StringPadOptions} `}`
|
|
628
|
+
* - {@link StringCaseOptions} = `{ toUpper`?: `boolean`, `toLower`?: `boolean`, `toTitle`?: `boolean }`
|
|
629
|
+
* - {@link StringPadOptions} = `{ padLength`: `number`, `padChar`?: `string`, `padLeft`?: `boolean`, `padRight`?: `boolean }`
|
|
630
|
+
* @returns `Record<string, Array<string>>` - key-value pairs where key is from `keyColumn` and value is an array of values from `valueColumn`
|
|
631
|
+
*/
|
|
632
|
+
function parseCsvForOneToMany(filePath, keyColumn, valueColumn, delimiter = types_1.DelimiterCharacterEnum.COMMA, options = {}) {
|
|
633
|
+
filePath = coerceFileExtension(filePath, (delimiter === types_1.DelimiterCharacterEnum.TAB) ? 'tsv' : 'csv');
|
|
634
|
+
const source = `[reading.parseCsvForOneToMany()]`;
|
|
635
|
+
validate.existingFileArgument(source, ['.tsv', '.csv'], { filePath });
|
|
636
|
+
validate.multipleStringArguments(source, { keyColumn, valueColumn });
|
|
637
|
+
try {
|
|
638
|
+
const { keyStripOptions, valueStripOptions, keyCaseOptions, valueCaseOptions, keyPadOptions, valuePadOptions } = options;
|
|
639
|
+
const data = fs_1.default.readFileSync(filePath, 'utf8');
|
|
640
|
+
const lines = data.split('\n');
|
|
641
|
+
const dict = {};
|
|
642
|
+
const header = lines[0].split(delimiter).map(col => col.trim());
|
|
643
|
+
const keyIndex = header.indexOf(keyColumn);
|
|
644
|
+
const valueIndex = header.indexOf(valueColumn);
|
|
645
|
+
if (keyIndex === -1 || valueIndex === -1) {
|
|
646
|
+
throw new Error(`Key or value column not found in CSV file.`);
|
|
647
|
+
}
|
|
648
|
+
for (let i = 1; i < lines.length; i++) {
|
|
649
|
+
const line = lines[i].split(delimiter).map(col => col.trim());
|
|
650
|
+
if (line.length > 1) {
|
|
651
|
+
let key = (0, regex_1.clean)(line[keyIndex], keyStripOptions, keyCaseOptions, keyPadOptions);
|
|
652
|
+
let val = (0, regex_1.clean)(line[valueIndex], valueStripOptions, valueCaseOptions, valuePadOptions);
|
|
653
|
+
if (!dict[key]) {
|
|
654
|
+
dict[key] = [];
|
|
655
|
+
}
|
|
656
|
+
if (!dict[key].includes(val)) {
|
|
657
|
+
dict[key].push(val);
|
|
658
|
+
}
|
|
659
|
+
}
|
|
660
|
+
}
|
|
661
|
+
return dict;
|
|
662
|
+
}
|
|
663
|
+
catch (err) {
|
|
664
|
+
config_1.typeshiLogger.error('Error reading or parsing the CSV file:', err, config_1.INDENT_LOG_LINE + 'Given File Path:', '"' + filePath + '"');
|
|
665
|
+
return {};
|
|
666
|
+
}
|
|
667
|
+
}
|
|
668
|
+
const DEFAULT_CSV_VALIDATION_RULES = {
|
|
669
|
+
allowEmptyRows: true,
|
|
670
|
+
allowInconsistentColumns: true,
|
|
671
|
+
maxRowsToCheck: Infinity,
|
|
672
|
+
};
|
|
673
|
+
/**
|
|
674
|
+
* @notimplemented
|
|
675
|
+
* @TODO
|
|
676
|
+
* @param arg1
|
|
677
|
+
* @param requiredHeaders
|
|
678
|
+
* @param options
|
|
679
|
+
* @returns
|
|
680
|
+
*/
|
|
681
|
+
async function isValidCsv(arg1, requiredHeaders, options = DEFAULT_CSV_VALIDATION_RULES) {
|
|
682
|
+
return false;
|
|
683
|
+
}
|
|
684
|
+
/**
|
|
685
|
+
* @problem has trouble handling case where column value contains a single double quote;
|
|
686
|
+
* e.g. when it's used as the inches unit after a number
|
|
687
|
+
*
|
|
688
|
+
* `sync`
|
|
689
|
+
* @param filePath `string` - must be a string to an existing file, otherwise return `false`.
|
|
690
|
+
* @param requiredHeaders `string[]` - `optional` array of headers that must be present in the CSV file.
|
|
691
|
+
* - If provided, the function checks if all required headers are present in the CSV header row
|
|
692
|
+
* @param options `object` - optional configuration
|
|
693
|
+
* - `allowEmptyRows`: `boolean` - if true, allows rows with all empty fields (default: true)
|
|
694
|
+
* - `allowInconsistentColumns`: `boolean` - if true, allows rows with different column counts (default: false)
|
|
695
|
+
* - `maxRowsToCheck`: `number` - maximum number of rows to validate (default: all rows)
|
|
696
|
+
* @returns **`isValidCsv`** `boolean`
|
|
697
|
+
* - **`true`** `if` the CSV file at `filePath` is valid (proper structure and formatting),
|
|
698
|
+
* - **`false`** `otherwise`.
|
|
699
|
+
*/
|
|
700
|
+
function isValidCsvSync(filePath, requiredHeaders, options = DEFAULT_CSV_VALIDATION_RULES) {
|
|
701
|
+
const { allowEmptyRows = true, allowInconsistentColumns = false, maxRowsToCheck = Infinity } = options;
|
|
702
|
+
validate.existingPathArgument(`reading.isValidCsv`, { filePath });
|
|
703
|
+
try {
|
|
704
|
+
const delimiter = getDelimiterFromFilePath(filePath);
|
|
705
|
+
const data = fs_1.default.readFileSync(filePath, 'utf8');
|
|
706
|
+
// Handle different line endings
|
|
707
|
+
const normalizedData = data.replace(/\r\n/g, '\n').replace(/\r/g, '\n');
|
|
708
|
+
// Split into lines, but be careful about quoted fields with newlines
|
|
709
|
+
let lines = [];
|
|
710
|
+
let currentLine = '';
|
|
711
|
+
let inQuotes = false;
|
|
712
|
+
let i = 0;
|
|
713
|
+
while (i < normalizedData.length) {
|
|
714
|
+
const char = normalizedData[i];
|
|
715
|
+
const nextChar = normalizedData[i + 1];
|
|
716
|
+
if (char === '"') {
|
|
717
|
+
if (inQuotes && nextChar === '"') {
|
|
718
|
+
// Escaped quote
|
|
719
|
+
currentLine += '""';
|
|
720
|
+
i++; // Skip next quote
|
|
721
|
+
}
|
|
722
|
+
else {
|
|
723
|
+
// Toggle quote state
|
|
724
|
+
inQuotes = !inQuotes;
|
|
725
|
+
currentLine += char;
|
|
726
|
+
}
|
|
727
|
+
}
|
|
728
|
+
else if (char === '\n' && !inQuotes) {
|
|
729
|
+
// End of line (not within quotes)
|
|
730
|
+
if (currentLine.trim() !== '' || allowEmptyRows) {
|
|
731
|
+
lines.push(currentLine);
|
|
732
|
+
}
|
|
733
|
+
currentLine = '';
|
|
734
|
+
}
|
|
735
|
+
else {
|
|
736
|
+
currentLine += char;
|
|
737
|
+
}
|
|
738
|
+
i++;
|
|
739
|
+
}
|
|
740
|
+
// Add the last line if it exists
|
|
741
|
+
if (currentLine.trim() !== '' || allowEmptyRows) {
|
|
742
|
+
lines.push(currentLine);
|
|
743
|
+
}
|
|
744
|
+
if (lines.length < 1) {
|
|
745
|
+
config_1.typeshiLogger.error(`[ERROR isValidCsv()]: file has no valid lines: ${filePath}`);
|
|
746
|
+
return false;
|
|
747
|
+
}
|
|
748
|
+
const headerRow = parseCsvLine(lines[0], delimiter);
|
|
749
|
+
if (headerRow.length < 1) {
|
|
750
|
+
config_1.typeshiLogger.error(`[ERROR isValidCsv()]: no header found in file: ${filePath}`);
|
|
751
|
+
return false;
|
|
752
|
+
}
|
|
753
|
+
// Check for empty headers
|
|
754
|
+
if (headerRow.some(header => header === '')) {
|
|
755
|
+
config_1.typeshiLogger.warn(`[isValidCsv()]: Found empty header(s) in file: ${filePath}`);
|
|
756
|
+
if (!allowInconsistentColumns) {
|
|
757
|
+
return false;
|
|
758
|
+
}
|
|
759
|
+
}
|
|
760
|
+
// Validate required headers
|
|
761
|
+
if ((0, typeValidation_1.isNonEmptyArray)(requiredHeaders)) {
|
|
762
|
+
const hasRequiredHeaders = requiredHeaders.every(header => {
|
|
763
|
+
if (!(0, typeValidation_1.isNonEmptyString)(header)) {
|
|
764
|
+
config_1.typeshiLogger.warn([
|
|
765
|
+
`[reading.isValidCsv]: Invalid parameter: 'requiredHeaders'`,
|
|
766
|
+
`requiredHeaders must be of type: Array<string>`,
|
|
767
|
+
`found array element of type: '${typeof header}' (skipping)`
|
|
768
|
+
].join(config_1.INDENT_LOG_LINE));
|
|
769
|
+
return true; // skip headers if they are not strings
|
|
770
|
+
}
|
|
771
|
+
return headerRow.includes(header);
|
|
772
|
+
});
|
|
773
|
+
if (!hasRequiredHeaders) {
|
|
774
|
+
config_1.typeshiLogger.warn([
|
|
775
|
+
`[isValidCsv()]: Required headers missing from headerRow`,
|
|
776
|
+
`filePath: '${filePath}'`,
|
|
777
|
+
`requiredHeaders: ${JSON.stringify(requiredHeaders)}`,
|
|
778
|
+
`csvFileHeaders: ${JSON.stringify(headerRow)}`
|
|
779
|
+
].join(config_1.INDENT_LOG_LINE));
|
|
780
|
+
return false;
|
|
781
|
+
}
|
|
782
|
+
}
|
|
783
|
+
// Check consistency of data rows
|
|
784
|
+
const maxRows = Math.min(lines.length, maxRowsToCheck + 1); // +1 for header
|
|
785
|
+
const expectedColumnCount = headerRow.length;
|
|
786
|
+
for (let i = 1; i < maxRows; i++) {
|
|
787
|
+
const line = lines[i];
|
|
788
|
+
// Skip completely empty lines if allowed
|
|
789
|
+
if (allowEmptyRows && line.trim() === '') {
|
|
790
|
+
continue;
|
|
791
|
+
}
|
|
792
|
+
const rowValues = parseCsvLine(line, delimiter);
|
|
793
|
+
// Check if row is empty (all fields are empty)
|
|
794
|
+
const isEmptyRow = rowValues.every(val => val === '');
|
|
795
|
+
if (isEmptyRow && allowEmptyRows) {
|
|
796
|
+
continue;
|
|
797
|
+
}
|
|
798
|
+
// Check column count consistency
|
|
799
|
+
if (rowValues.length !== expectedColumnCount && !allowInconsistentColumns) {
|
|
800
|
+
config_1.typeshiLogger.warn([
|
|
801
|
+
`[isValidCsv()]: Invalid row found: header.length !== rowValues.length`,
|
|
802
|
+
` header.length: ${expectedColumnCount}`,
|
|
803
|
+
`rowValues.length: ${rowValues.length}`,
|
|
804
|
+
` -> Difference = ${expectedColumnCount - rowValues.length}`,
|
|
805
|
+
` header: ${JSON.stringify(headerRow)}`,
|
|
806
|
+
// `rowValues: ${JSON.stringify(rowValues)}`,
|
|
807
|
+
` rowIndex: ${i}`,
|
|
808
|
+
` filePath: '${filePath}'`,
|
|
809
|
+
`delimiter: '${delimiter}'`
|
|
810
|
+
].join(config_1.INDENT_LOG_LINE));
|
|
811
|
+
return false;
|
|
812
|
+
}
|
|
813
|
+
}
|
|
814
|
+
return true;
|
|
815
|
+
}
|
|
816
|
+
catch (error) {
|
|
817
|
+
config_1.typeshiLogger.error([
|
|
818
|
+
`[isValidCsv()]: Error reading or parsing CSV file: ${filePath}`,
|
|
819
|
+
`Error: ${error instanceof Error ? error.message : String(error)}`
|
|
820
|
+
].join(config_1.INDENT_LOG_LINE));
|
|
821
|
+
return false;
|
|
822
|
+
}
|
|
823
|
+
}
|
|
824
|
+
/**
|
|
825
|
+
* Parses a CSV line into fields, properly handling quoted fields with embedded delimiters, quotes, and newlines
|
|
826
|
+
* @param line `string` - the CSV line to parse
|
|
827
|
+
* @param delimiter `string` - the delimiter character
|
|
828
|
+
* @returns **`fields`** `string[]` - array of field values
|
|
829
|
+
*/
|
|
830
|
+
function parseCsvLine(line, delimiter) {
|
|
831
|
+
const fields = [];
|
|
832
|
+
let current = '';
|
|
833
|
+
let inQuotes = false;
|
|
834
|
+
let i = 0;
|
|
835
|
+
while (i < line.length) {
|
|
836
|
+
const char = line[i];
|
|
837
|
+
const nextChar = line[i + 1];
|
|
838
|
+
if (!inQuotes) {
|
|
839
|
+
if (char === '"') {
|
|
840
|
+
inQuotes = true;
|
|
841
|
+
}
|
|
842
|
+
else if (char === delimiter) {
|
|
843
|
+
fields.push(current.trim());
|
|
844
|
+
current = '';
|
|
845
|
+
}
|
|
846
|
+
else {
|
|
847
|
+
current += char;
|
|
848
|
+
}
|
|
849
|
+
}
|
|
850
|
+
else {
|
|
851
|
+
if (char === '"') {
|
|
852
|
+
if (nextChar === '"') {
|
|
853
|
+
// Escaped quote within quoted field
|
|
854
|
+
current += '"';
|
|
855
|
+
i++; // Skip the next quote
|
|
856
|
+
}
|
|
857
|
+
else {
|
|
858
|
+
// End of quoted field
|
|
859
|
+
inQuotes = false;
|
|
860
|
+
}
|
|
861
|
+
}
|
|
862
|
+
else {
|
|
863
|
+
current += char;
|
|
864
|
+
}
|
|
865
|
+
}
|
|
866
|
+
i++;
|
|
867
|
+
}
|
|
868
|
+
// Add the last field
|
|
869
|
+
fields.push(current.trim());
|
|
870
|
+
return fields;
|
|
871
|
+
}
|
|
872
|
+
/**
|
|
873
|
+
* Analyzes a CSV file and returns detailed validation information
|
|
874
|
+
* @param filePath `string` - path to the CSV file
|
|
875
|
+
* @param options `object` - validation options
|
|
876
|
+
* @returns **`analysis`** `object` - detailed analysis of the CSV file
|
|
877
|
+
*/
|
|
878
|
+
function analyzeCsv(filePath, options = {}) {
|
|
879
|
+
const { sampleSize = 1000, checkEncoding = false, detectDelimiter = false } = options;
|
|
880
|
+
const issues = [];
|
|
881
|
+
const warnings = [];
|
|
882
|
+
const stats = {
|
|
883
|
+
totalRows: 0,
|
|
884
|
+
headerCount: 0,
|
|
885
|
+
maxRowLength: 0,
|
|
886
|
+
minRowLength: Infinity,
|
|
887
|
+
emptyRows: 0,
|
|
888
|
+
encoding: null,
|
|
889
|
+
detectedDelimiter: null
|
|
890
|
+
};
|
|
891
|
+
let headers = [];
|
|
892
|
+
try {
|
|
893
|
+
validate.existingPathArgument(`reading.analyzeCsv`, { filePath });
|
|
894
|
+
const data = fs_1.default.readFileSync(filePath, 'utf8');
|
|
895
|
+
const normalizedData = data.replace(/\r\n/g, '\n').replace(/\r/g, '\n');
|
|
896
|
+
// Detect delimiter if requested
|
|
897
|
+
let delimiter;
|
|
898
|
+
if (detectDelimiter) {
|
|
899
|
+
const commonDelimiters = [',', '\t', ';', '|'];
|
|
900
|
+
const delimiterCounts = commonDelimiters.map(delim => ({
|
|
901
|
+
delimiter: delim,
|
|
902
|
+
count: (data.match(new RegExp(`\\${delim}`, 'g')) || []).length
|
|
903
|
+
}));
|
|
904
|
+
const mostLikely = delimiterCounts.sort((a, b) => b.count - a.count)[0];
|
|
905
|
+
delimiter = mostLikely.count > 0 ? mostLikely.delimiter : getDelimiterFromFilePath(filePath);
|
|
906
|
+
stats.detectedDelimiter = delimiter;
|
|
907
|
+
}
|
|
908
|
+
else {
|
|
909
|
+
delimiter = getDelimiterFromFilePath(filePath);
|
|
910
|
+
}
|
|
911
|
+
// Parse the file properly
|
|
912
|
+
let lines = [];
|
|
913
|
+
let currentLine = '';
|
|
914
|
+
let inQuotes = false;
|
|
915
|
+
let i = 0;
|
|
916
|
+
while (i < normalizedData.length) {
|
|
917
|
+
const char = normalizedData[i];
|
|
918
|
+
const nextChar = normalizedData[i + 1];
|
|
919
|
+
if (char === '"') {
|
|
920
|
+
if (inQuotes && nextChar === '"') {
|
|
921
|
+
currentLine += '""';
|
|
922
|
+
i++;
|
|
923
|
+
}
|
|
924
|
+
else {
|
|
925
|
+
inQuotes = !inQuotes;
|
|
926
|
+
currentLine += char;
|
|
927
|
+
}
|
|
928
|
+
}
|
|
929
|
+
else if (char === '\n' && !inQuotes) {
|
|
930
|
+
lines.push(currentLine);
|
|
931
|
+
currentLine = '';
|
|
932
|
+
}
|
|
933
|
+
else {
|
|
934
|
+
currentLine += char;
|
|
935
|
+
}
|
|
936
|
+
i++;
|
|
937
|
+
}
|
|
938
|
+
if (currentLine) {
|
|
939
|
+
lines.push(currentLine);
|
|
940
|
+
}
|
|
941
|
+
stats.totalRows = lines.length;
|
|
942
|
+
if (lines.length === 0) {
|
|
943
|
+
issues.push('File is empty');
|
|
944
|
+
return { isValid: false, issues, warnings, stats, headers };
|
|
945
|
+
}
|
|
946
|
+
headers = parseCsvLine(lines[0], delimiter);
|
|
947
|
+
stats.headerCount = headers.length;
|
|
948
|
+
stats.maxRowLength = headers.length;
|
|
949
|
+
stats.minRowLength = headers.length;
|
|
950
|
+
// Check for duplicate headers
|
|
951
|
+
const headerSet = new Set(headers);
|
|
952
|
+
if (headerSet.size !== headers.length) {
|
|
953
|
+
warnings.push('Duplicate header names found');
|
|
954
|
+
}
|
|
955
|
+
// Check for empty headers
|
|
956
|
+
if (headers.some(h => h.trim() === '')) {
|
|
957
|
+
warnings.push('Empty header names found');
|
|
958
|
+
}
|
|
959
|
+
// Analyze data rows (sample if necessary)
|
|
960
|
+
const rowsToCheck = Math.min(lines.length - 1, sampleSize);
|
|
961
|
+
const step = rowsToCheck < lines.length - 1 ? Math.floor((lines.length - 1) / rowsToCheck) : 1;
|
|
962
|
+
let inconsistentRows = 0;
|
|
963
|
+
for (let i = 1; i < lines.length; i += step) {
|
|
964
|
+
const line = lines[i];
|
|
965
|
+
if (line.trim() === '') {
|
|
966
|
+
stats.emptyRows++;
|
|
967
|
+
continue;
|
|
968
|
+
}
|
|
969
|
+
const fields = parseCsvLine(line, delimiter);
|
|
970
|
+
stats.maxRowLength = Math.max(stats.maxRowLength, fields.length);
|
|
971
|
+
stats.minRowLength = Math.min(stats.minRowLength, fields.length);
|
|
972
|
+
if (fields.length !== headers.length) {
|
|
973
|
+
inconsistentRows++;
|
|
974
|
+
}
|
|
975
|
+
}
|
|
976
|
+
if (inconsistentRows > 0) {
|
|
977
|
+
warnings.push(`${inconsistentRows} rows have inconsistent column counts`);
|
|
978
|
+
}
|
|
979
|
+
if (stats.emptyRows > 0) {
|
|
980
|
+
warnings.push(`${stats.emptyRows} empty rows found`);
|
|
981
|
+
}
|
|
982
|
+
// Encoding detection (basic)
|
|
983
|
+
if (checkEncoding) {
|
|
984
|
+
try {
|
|
985
|
+
const buffer = fs_1.default.readFileSync(filePath);
|
|
986
|
+
const hasUtf8Bom = buffer.length >= 3 &&
|
|
987
|
+
buffer[0] === 0xEF && buffer[1] === 0xBB && buffer[2] === 0xBF;
|
|
988
|
+
stats.encoding = hasUtf8Bom ? 'UTF-8 with BOM' : 'UTF-8';
|
|
989
|
+
}
|
|
990
|
+
catch (error) {
|
|
991
|
+
warnings.push('Could not detect file encoding');
|
|
992
|
+
}
|
|
993
|
+
}
|
|
994
|
+
const isValid = issues.length === 0;
|
|
995
|
+
return { isValid, issues, warnings, stats, headers };
|
|
996
|
+
}
|
|
997
|
+
catch (error) {
|
|
998
|
+
issues.push(`Error analyzing file: ${error instanceof Error ? error.message : String(error)}`);
|
|
999
|
+
return { isValid: false, issues, warnings, stats, headers };
|
|
1000
|
+
}
|
|
1001
|
+
}
|
|
1002
|
+
/**
|
|
1003
|
+
* Attempts to repair common CSV formatting issues
|
|
1004
|
+
* @param filePath `string` - path to the CSV file to repair
|
|
1005
|
+
* @param outputPath `string` - path where the repaired CSV will be saved
|
|
1006
|
+
* @param options `object` - repair options
|
|
1007
|
+
* @returns **`repairResult`** `object` - result of the repair operation
|
|
1008
|
+
*/
|
|
1009
|
+
function repairCsv(filePath, outputPath, options = {}) {
|
|
1010
|
+
const { fixQuoting = true, removeEmptyRows = true, standardizeLineEndings = true, fillMissingColumns = true, fillValue = '' } = options;
|
|
1011
|
+
const repairsMade = [];
|
|
1012
|
+
const errors = [];
|
|
1013
|
+
try {
|
|
1014
|
+
validate.existingPathArgument(`reading.repairCsv`, { filePath });
|
|
1015
|
+
validate.stringArgument(`reading.repairCsv`, { outputPath });
|
|
1016
|
+
const delimiter = getDelimiterFromFilePath(filePath);
|
|
1017
|
+
let data = fs_1.default.readFileSync(filePath, 'utf8');
|
|
1018
|
+
// Standardize line endings
|
|
1019
|
+
if (standardizeLineEndings) {
|
|
1020
|
+
const originalData = data;
|
|
1021
|
+
data = data.replace(/\r\n/g, '\n').replace(/\r/g, '\n');
|
|
1022
|
+
if (originalData !== data) {
|
|
1023
|
+
repairsMade.push('Standardized line endings');
|
|
1024
|
+
}
|
|
1025
|
+
}
|
|
1026
|
+
// Parse lines properly
|
|
1027
|
+
let lines = [];
|
|
1028
|
+
let currentLine = '';
|
|
1029
|
+
let inQuotes = false;
|
|
1030
|
+
let i = 0;
|
|
1031
|
+
while (i < data.length) {
|
|
1032
|
+
const char = data[i];
|
|
1033
|
+
const nextChar = data[i + 1];
|
|
1034
|
+
if (char === '"') {
|
|
1035
|
+
if (inQuotes && nextChar === '"') {
|
|
1036
|
+
currentLine += '""';
|
|
1037
|
+
i++;
|
|
1038
|
+
}
|
|
1039
|
+
else {
|
|
1040
|
+
inQuotes = !inQuotes;
|
|
1041
|
+
currentLine += char;
|
|
1042
|
+
}
|
|
1043
|
+
}
|
|
1044
|
+
else if (char === '\n' && !inQuotes) {
|
|
1045
|
+
lines.push(currentLine);
|
|
1046
|
+
currentLine = '';
|
|
1047
|
+
}
|
|
1048
|
+
else {
|
|
1049
|
+
currentLine += char;
|
|
1050
|
+
}
|
|
1051
|
+
i++;
|
|
1052
|
+
}
|
|
1053
|
+
if (currentLine) {
|
|
1054
|
+
lines.push(currentLine);
|
|
1055
|
+
}
|
|
1056
|
+
if (lines.length === 0) {
|
|
1057
|
+
errors.push('File is empty');
|
|
1058
|
+
return { success: false, repairsMade, errors };
|
|
1059
|
+
}
|
|
1060
|
+
// Get expected column count from header
|
|
1061
|
+
const headerFields = parseCsvLine(lines[0], delimiter);
|
|
1062
|
+
const expectedColumnCount = headerFields.length;
|
|
1063
|
+
// Process each line
|
|
1064
|
+
const repairedLines = [];
|
|
1065
|
+
let emptyRowsRemoved = 0;
|
|
1066
|
+
let rowsWithMissingColumns = 0;
|
|
1067
|
+
for (let lineIndex = 0; lineIndex < lines.length; lineIndex++) {
|
|
1068
|
+
const line = lines[lineIndex];
|
|
1069
|
+
// Skip empty rows if requested
|
|
1070
|
+
if (removeEmptyRows && line.trim() === '') {
|
|
1071
|
+
emptyRowsRemoved++;
|
|
1072
|
+
continue;
|
|
1073
|
+
}
|
|
1074
|
+
let fields = parseCsvLine(line, delimiter);
|
|
1075
|
+
// Fill missing columns
|
|
1076
|
+
if (fillMissingColumns && fields.length < expectedColumnCount) {
|
|
1077
|
+
while (fields.length < expectedColumnCount) {
|
|
1078
|
+
fields.push(fillValue);
|
|
1079
|
+
}
|
|
1080
|
+
rowsWithMissingColumns++;
|
|
1081
|
+
}
|
|
1082
|
+
// Reconstruct line with proper quoting
|
|
1083
|
+
const repairedLine = fields.map(field => {
|
|
1084
|
+
// Escape quotes and wrap in quotes if needed
|
|
1085
|
+
if (field.includes(delimiter) || field.includes('\n') || field.includes('"')) {
|
|
1086
|
+
const escapedField = field.replace(/"/g, '""');
|
|
1087
|
+
return `"${escapedField}"`;
|
|
1088
|
+
}
|
|
1089
|
+
return field;
|
|
1090
|
+
}).join(delimiter);
|
|
1091
|
+
repairedLines.push(repairedLine);
|
|
1092
|
+
}
|
|
1093
|
+
// Record repairs made
|
|
1094
|
+
if (emptyRowsRemoved > 0) {
|
|
1095
|
+
repairsMade.push(`Removed ${emptyRowsRemoved} empty rows`);
|
|
1096
|
+
}
|
|
1097
|
+
if (rowsWithMissingColumns > 0) {
|
|
1098
|
+
repairsMade.push(`Fixed ${rowsWithMissingColumns} rows with missing columns`);
|
|
1099
|
+
}
|
|
1100
|
+
// Write repaired file
|
|
1101
|
+
const repairedData = repairedLines.join('\n');
|
|
1102
|
+
fs_1.default.writeFileSync(outputPath, repairedData, 'utf8');
|
|
1103
|
+
return { success: true, repairsMade, errors };
|
|
1104
|
+
}
|
|
1105
|
+
catch (error) {
|
|
1106
|
+
errors.push(`Error repairing CSV: ${error instanceof Error ? error.message : String(error)}`);
|
|
1107
|
+
return { success: false, repairsMade, errors };
|
|
1108
|
+
}
|
|
1109
|
+
}
|
|
1110
|
+
/** paths to folders or files */
|
|
1111
|
+
async function validatePath(...paths) {
|
|
1112
|
+
for (const path of paths) {
|
|
1113
|
+
if (!fs_1.default.existsSync(path)) {
|
|
1114
|
+
throw new Error(`[ERROR reading.validatePath()]: path does not exist: ${path}`);
|
|
1115
|
+
}
|
|
1116
|
+
}
|
|
1117
|
+
}
|
|
1118
|
+
/**
|
|
1119
|
+
* @param rowSource `string | Record<string, any>[]`
|
|
1120
|
+
* @param targetColumn `string`
|
|
1121
|
+
* @param targetValues `string[]`
|
|
1122
|
+
* @param extractor `function (columnValue: string, ...args: any[]) => string`
|
|
1123
|
+
* @param extractorArgs `any[]`
|
|
1124
|
+
* @returns **`targetRows`** `Promise<Record<string, any>[]>`
|
|
1125
|
+
* - array of all rows where either `row[targetColumn]` or `extractor(row[targetColumn])` is in `targetValues`
|
|
1126
|
+
*/
|
|
1127
|
+
async function extractTargetRows(
|
|
1128
|
+
/**
|
|
1129
|
+
* - `string` -> filePath to a csv file
|
|
1130
|
+
* - `Record<string, any>[]` -> array of rows
|
|
1131
|
+
* */
|
|
1132
|
+
rowSource, targetColumn, targetValues, extractor, extractorArgs) {
|
|
1133
|
+
const source = (0, logging_1.getSourceString)(F, extractTargetRows.name);
|
|
1134
|
+
if (!(0, typeValidation_1.isNonEmptyString)(rowSource) && !(0, typeValidation_1.isNonEmptyArray)(rowSource)) {
|
|
1135
|
+
throw new Error([`${source} Invalid param 'rowSource'`,
|
|
1136
|
+
`Expected rowSource: string | Record<string, any>[]`,
|
|
1137
|
+
`Received rowSource: '${typeof rowSource}'`
|
|
1138
|
+
].join(config_1.INDENT_LOG_LINE));
|
|
1139
|
+
}
|
|
1140
|
+
validate.stringArgument(source, { targetColumn });
|
|
1141
|
+
if (extractor !== undefined)
|
|
1142
|
+
validate.functionArgument(source, { extractor });
|
|
1143
|
+
validate.arrayArgument(source, { targetValues, isNonEmptyString: typeValidation_1.isNonEmptyString });
|
|
1144
|
+
const sourceRows = await handleFileArgument(rowSource, extractTargetRows.name, [targetColumn]);
|
|
1145
|
+
const remainingValues = [];
|
|
1146
|
+
let potentials = {};
|
|
1147
|
+
let valuesFound = [];
|
|
1148
|
+
const targetRows = [];
|
|
1149
|
+
for (let i = 0; i < sourceRows.length; i++) {
|
|
1150
|
+
const row = sourceRows[i];
|
|
1151
|
+
if (!(0, typeValidation_1.hasKeys)(row, targetColumn)) {
|
|
1152
|
+
config_1.typeshiLogger.warn([`${source} row does not have provided targetColumn`,
|
|
1153
|
+
` targetColumn: '${targetColumn}'`,
|
|
1154
|
+
`Object.keys(row): ${JSON.stringify(Object.keys(row))}`,
|
|
1155
|
+
].join(config_1.INDENT_LOG_LINE));
|
|
1156
|
+
continue;
|
|
1157
|
+
}
|
|
1158
|
+
const originalValue = String(row[targetColumn]);
|
|
1159
|
+
if (targetValues.includes(originalValue)) {
|
|
1160
|
+
targetRows.push(row);
|
|
1161
|
+
if (!valuesFound.includes(originalValue))
|
|
1162
|
+
valuesFound.push(originalValue);
|
|
1163
|
+
// slog.debug(`${source} ORIGINAL VALUE IN TARGET VALUES`)
|
|
1164
|
+
continue;
|
|
1165
|
+
}
|
|
1166
|
+
if (!extractor) {
|
|
1167
|
+
continue;
|
|
1168
|
+
}
|
|
1169
|
+
const extractedValue = await extractor(originalValue, extractorArgs);
|
|
1170
|
+
if (!(0, typeValidation_1.isNonEmptyString)(extractedValue)) {
|
|
1171
|
+
// slog.warn([`${source} extractor(value) returned invalid string`,
|
|
1172
|
+
// ` originalValue: '${originalValue}'`,
|
|
1173
|
+
// `rowSource type: '${typeof rowSource}'`
|
|
1174
|
+
// ].join(TAB));
|
|
1175
|
+
continue;
|
|
1176
|
+
}
|
|
1177
|
+
if (targetValues.includes(extractedValue)) {
|
|
1178
|
+
targetRows.push(row);
|
|
1179
|
+
if (!valuesFound.includes(extractedValue))
|
|
1180
|
+
valuesFound.push(extractedValue);
|
|
1181
|
+
continue;
|
|
1182
|
+
}
|
|
1183
|
+
let targetMatch = targetValues.find(v => {
|
|
1184
|
+
v = v.toUpperCase();
|
|
1185
|
+
return v.startsWith(extractedValue.toUpperCase());
|
|
1186
|
+
});
|
|
1187
|
+
if (targetMatch) {
|
|
1188
|
+
if (!potentials[targetMatch]) {
|
|
1189
|
+
potentials[targetMatch] = [i];
|
|
1190
|
+
}
|
|
1191
|
+
else {
|
|
1192
|
+
potentials[targetMatch].push(i);
|
|
1193
|
+
}
|
|
1194
|
+
// slog.debug([`${source} Found potentialMatch for a targetValue at rowIndex ${i}`,
|
|
1195
|
+
// ` originalValue: '${originalValue}'`,
|
|
1196
|
+
// `extractedValue: '${extractedValue}'`,
|
|
1197
|
+
// `potentialMatch: '${targetMatch}'`,
|
|
1198
|
+
// ].join(TAB));
|
|
1199
|
+
}
|
|
1200
|
+
}
|
|
1201
|
+
remainingValues.push(...targetValues.filter(v => !valuesFound.includes(v)));
|
|
1202
|
+
// if (remainingValues.length > 0) {
|
|
1203
|
+
// mlog.warn([`${source} ${remainingValues.length} value(s) from targetValues did not have a matching row`,
|
|
1204
|
+
// // indentedStringify(remainingValues)
|
|
1205
|
+
// ].join(TAB));
|
|
1206
|
+
// write({remainingValues}, path.join(CLOUD_LOG_DIR, `${getFileNameTimestamp()}_remainingValues.json`))
|
|
1207
|
+
// }
|
|
1208
|
+
return { rows: targetRows, remainingValues };
|
|
1209
|
+
}
|
|
1210
|
+
/**
|
|
1211
|
+
* @param extantValues `string[]`
|
|
1212
|
+
* @param csvFiles `string[] | FileData[] | Record<string, any>[][]`
|
|
1213
|
+
* @param column `string`
|
|
1214
|
+
* @param extractor `(columnValue: string, ...args: any[]) => string | Promise<string>`
|
|
1215
|
+
* @param extractorArgs `any[]`
|
|
1216
|
+
* @returns **`missingValues`** `Promise<string[][]>`
|
|
1217
|
+
* where `missingValues[i]` is the array of values
|
|
1218
|
+
* that are found in `csvFiles[i][column]` but not in `extantValues`
|
|
1219
|
+
*/
|
|
1220
|
+
async function findMissingValues(extantValues, csvFiles, column, extractor, extractorArgs = []) {
|
|
1221
|
+
const source = (0, logging_1.getSourceString)(__filename, findMissingValues.name);
|
|
1222
|
+
const missingValues = [];
|
|
1223
|
+
for (let i = 0; i < csvFiles.length; i++) {
|
|
1224
|
+
const rowSource = csvFiles[i];
|
|
1225
|
+
missingValues[i] = [];
|
|
1226
|
+
const columnValues = await getColumnValues(rowSource, column);
|
|
1227
|
+
for (const originalValue of columnValues) {
|
|
1228
|
+
const extractedValue = await extractor(originalValue, ...extractorArgs);
|
|
1229
|
+
if (!(0, typeValidation_1.isNonEmptyString)(extractedValue)) {
|
|
1230
|
+
config_1.typeshiSimpleLogger.warn([`${source} extractor(value) returned invalid string`,
|
|
1231
|
+
`originalValue: '${originalValue}'`,
|
|
1232
|
+
].join(config_1.INDENT_LOG_LINE));
|
|
1233
|
+
if (!missingValues[i].includes(originalValue)) {
|
|
1234
|
+
missingValues[i].push(originalValue);
|
|
1235
|
+
}
|
|
1236
|
+
continue;
|
|
1237
|
+
}
|
|
1238
|
+
if (!extantValues.includes(extractedValue)
|
|
1239
|
+
&& !missingValues[i].includes(extractedValue)) {
|
|
1240
|
+
missingValues[i].push(extractedValue);
|
|
1241
|
+
}
|
|
1242
|
+
}
|
|
1243
|
+
}
|
|
1244
|
+
return missingValues;
|
|
1245
|
+
}
|