@teselagen/file-utils 0.3.0 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,13 +1,15 @@
1
1
  {
2
2
  "name": "@teselagen/file-utils",
3
- "version": "0.3.0",
4
- "type": "module",
3
+ "version": "0.3.2",
4
+ "type": "commonjs",
5
5
  "dependencies": {
6
6
  "bluebird": "^3.7.2",
7
7
  "jszip": "^3.10.1",
8
8
  "lodash": "^4.17.21",
9
9
  "papaparse": "^5.4.1"
10
10
  },
11
- "module": "index.js",
12
- "main": "index.js"
13
- }
11
+ "devDependencies": {
12
+ "mock-fs": "^5.2.0"
13
+ },
14
+ "license": "MIT"
15
+ }
@@ -0,0 +1,354 @@
1
+ /* Copyright (C) 2018 TeselaGen Biotechnology, Inc. */
2
+ import { camelCase, flatMap, remove, startsWith, snakeCase } from "lodash";
3
+ import { loadAsync } from "jszip";
4
+ import Promise from "bluebird";
5
+ import { parse, unparse } from "papaparse";
6
+
7
+ const debug = false;
8
+ const logDebug = (...args) => {
9
+ if (debug) {
10
+ // eslint-disable-next-line no-console
11
+ console.log(...args);
12
+ }
13
+ };
14
+
15
+ export const allowedCsvFileTypes = [".csv", ".txt", ".xlsx"];
16
+
17
+ export const isZipFile = (file) => {
18
+ if (getExt(file) === "zip") return true;
19
+ if (getExt(file) === "geneious") return false;
20
+ const type = file.mimetype || file.type;
21
+ return type === "application/zip" || type === "application/x-zip-compressed";
22
+ };
23
+
24
+ export const getExt = (file) => file?.name?.split(".").pop();
25
+ export const isExcelFile = (file) => getExt(file) === "xlsx";
26
+ export const isCsvFile = (file) => getExt(file) === "csv";
27
+ export const isTextFile = (file) => ["text", "txt"].includes(getExt(file));
28
+
29
+ export const isCsvOrExcelFile = (file) => isCsvFile(file) || isExcelFile(file);
30
+
31
+ export const extractZipFiles = async (allFiles) => {
32
+ if (!Array.isArray(allFiles)) allFiles = [allFiles];
33
+ // make a copy so we don't mutate the form value
34
+ allFiles = [...allFiles];
35
+ const zipFiles = remove(allFiles, isZipFile);
36
+ if (!zipFiles.length) return allFiles;
37
+ const zipFilesArray = Array.isArray(zipFiles) ? zipFiles : [zipFiles];
38
+ const parsedZips = await Promise.map(zipFilesArray, (file) =>
39
+ loadAsync(file instanceof Blob ? file : file.originFileObj)
40
+ );
41
+ const zippedFiles = flatMap(parsedZips, (zip) =>
42
+ Object.keys(zip.files).map((key) => zip.files[key])
43
+ );
44
+ const unzippedFiles = await Promise.map(zippedFiles, (file) => {
45
+ // converts the compressed file to a string of its contents
46
+ return file.async("blob").then(function (fileData) {
47
+ const newFileObj = new File([fileData], file.name);
48
+ return {
49
+ name: file.name,
50
+ originFileObj: newFileObj,
51
+ originalFileObj: newFileObj
52
+ };
53
+ });
54
+ });
55
+ if (unzippedFiles.length) {
56
+ return allFiles.concat(
57
+ unzippedFiles.filter(
58
+ ({ name, originFileObj }) =>
59
+ !name.includes("__MACOSX") &&
60
+ !name.includes(".DS_Store") &&
61
+ originFileObj.size !== 0
62
+ )
63
+ );
64
+ } else {
65
+ return allFiles;
66
+ }
67
+ };
68
+
69
+ const defaultCsvParserOptions = {
70
+ header: true,
71
+ skipEmptyLines: "greedy",
72
+ trimHeaders: true
73
+ };
74
+ export const setupCsvParserOptions = (parserOptions = {}) => {
75
+ const {
76
+ camelCaseHeaders = false,
77
+ lowerCaseHeaders = false,
78
+ ...rest
79
+ } = parserOptions;
80
+
81
+ const papaParseOpts = { ...rest };
82
+ if (camelCaseHeaders) {
83
+ logDebug("[CSV-PARSER] camelCasing headers");
84
+ papaParseOpts.transformHeader = (header) => {
85
+ let transHeader = header;
86
+ if (!startsWith(header.trim(), "ext-")) {
87
+ transHeader = camelCase(header);
88
+ }
89
+
90
+ if (transHeader) {
91
+ logDebug(
92
+ `[CSV-PARSER] Transformed header from: ${header} to: ${transHeader}`
93
+ );
94
+ transHeader = transHeader.trim();
95
+ } else {
96
+ logDebug(`[CSV-PARSER] Not transforming header: ${header}`);
97
+ }
98
+
99
+ return transHeader;
100
+ };
101
+ } else if (lowerCaseHeaders) {
102
+ papaParseOpts.transformHeader = (header) => {
103
+ let transHeader = header;
104
+ if (!startsWith(header, "ext-")) {
105
+ transHeader = header.toLowerCase();
106
+ }
107
+
108
+ if (transHeader) {
109
+ logDebug(
110
+ `[CSV-PARSER] Transformed header from: ${header} to: ${transHeader}`
111
+ );
112
+ transHeader = transHeader.trim();
113
+ } else {
114
+ logDebug(`[CSV-PARSER] Not transforming header: ${header}`);
115
+ }
116
+
117
+ return transHeader;
118
+ };
119
+ }
120
+
121
+ return papaParseOpts;
122
+ };
123
+
124
+ const normalizeCsvHeaderHelper = (h) =>
125
+ snakeCase(h.toUpperCase()).toUpperCase();
126
+
127
+ export function normalizeCsvHeader(header) {
128
+ if (header.startsWith("ext-") || header.startsWith("EXT-")) {
129
+ return header;
130
+ }
131
+ return normalizeCsvHeaderHelper(header);
132
+ }
133
+
134
+ export const parseCsvFile = (csvFile, parserOptions = {}) => {
135
+ return new Promise((resolve, reject) => {
136
+ const opts = {
137
+ ...defaultCsvParserOptions,
138
+ ...setupCsvParserOptions(parserOptions),
139
+ complete: (results) => {
140
+ if (
141
+ results &&
142
+ results.data?.length &&
143
+ results.errors &&
144
+ results.errors.length === 1 &&
145
+ results.errors[0].code === `UndetectableDelimiter`
146
+ ) {
147
+ return resolve(results);
148
+ } else if (results && results.errors && results.errors.length) {
149
+ return reject("Error in csv: " + JSON.stringify(results.errors));
150
+ }
151
+ return resolve(results);
152
+ },
153
+ error: (error) => {
154
+ reject(error);
155
+ }
156
+ };
157
+ logDebug(`[CSV-PARSER] parseCsvFile opts:`, opts);
158
+ parse(csvFile.originFileObj, opts);
159
+ });
160
+ };
161
+
162
+ /**
163
+ * Gets a properly formatted json object into s csv string
164
+ * https://www.papaparse.com/docs#json-to-csv
165
+ * const options = {
166
+ quotes: false, //or array of booleans
167
+ quoteChar: '"',
168
+ escapeChar: '"',
169
+ delimiter: ",",
170
+ header: true,
171
+ newline: "\r\n",
172
+ skipEmptyLines: false, //other option is 'greedy', meaning skip delimiters, quotes, and whitespace.
173
+ columns: null //or array of strings
174
+ }
175
+ * @returns csv as string
176
+ */
177
+ export const jsonToCsv = (jsonData, options = {}) => {
178
+ const csv = unparse(jsonData, options);
179
+ return csv;
180
+ };
181
+
182
+ export const parseCsvString = (csvString, parserOptions = {}) => {
183
+ const opts = {
184
+ ...defaultCsvParserOptions,
185
+ ...setupCsvParserOptions(parserOptions)
186
+ };
187
+ logDebug(`[CSV-PARSER] parseCsvString opts:`, opts);
188
+ return parse(csvString, opts);
189
+ };
190
+
191
+ export async function parseCsvOrExcelFile(
192
+ fileOrFiles,
193
+ { csvParserOptions } = {}
194
+ ) {
195
+ let csvFile, excelFile, txtFile;
196
+ if (Array.isArray(fileOrFiles)) {
197
+ csvFile = fileOrFiles.find(isCsvFile);
198
+ excelFile = fileOrFiles.find(isExcelFile);
199
+ txtFile = fileOrFiles.find(isTextFile);
200
+ } else {
201
+ if (isExcelFile(fileOrFiles)) excelFile = fileOrFiles;
202
+ else if (isCsvFile(fileOrFiles)) csvFile = fileOrFiles;
203
+ else if (isTextFile(fileOrFiles)) txtFile = fileOrFiles;
204
+ }
205
+ if (!csvFile && !excelFile && !txtFile) {
206
+ throw new Error("No csv or excel files found");
207
+ }
208
+
209
+ if (!csvFile && !excelFile) csvFile = txtFile;
210
+
211
+ if (!csvFile && excelFile && window.parseExcelToCsv) {
212
+ csvFile = await window.parseExcelToCsv(
213
+ excelFile.originFileObj || excelFile
214
+ );
215
+ if (csvFile.error) {
216
+ throw new Error(csvFile.error);
217
+ }
218
+ } else if (excelFile) {
219
+ throw new Error("Excel Parser not initialized on the window");
220
+ }
221
+ const parsedCsv = await parseCsvFile(csvFile, csvParserOptions);
222
+ parsedCsv.originalFile = csvFile;
223
+ return parsedCsv;
224
+ }
225
+
226
+ export const validateCSVRequiredHeaders = (
227
+ fields,
228
+ requiredHeaders,
229
+ filename
230
+ ) => {
231
+ const missingRequiredHeaders = requiredHeaders.filter((field) => {
232
+ return !fields.includes(field);
233
+ });
234
+ if (missingRequiredHeaders.length) {
235
+ const name = filename ? `The file ${filename}` : "CSV file";
236
+ return `${name} is missing required headers. (${missingRequiredHeaders.join(
237
+ ", "
238
+ )})`;
239
+ }
240
+ };
241
+
242
+ export const validateCSVRow = (row, requiredHeaders, index) => {
243
+ const missingRequiredFields = requiredHeaders.filter((field) => !row[field]);
244
+ if (missingRequiredFields.length) {
245
+ if (missingRequiredFields.length === 1) {
246
+ return `Row ${index + 1} is missing the required field "${
247
+ missingRequiredFields[0]
248
+ }"`;
249
+ } else {
250
+ return `Row ${
251
+ index + 1
252
+ } is missing these required fields: ${missingRequiredFields.join(", ")}`;
253
+ }
254
+ }
255
+ };
256
+
257
+ export const cleanCommaSeparatedCell = (cellData) =>
258
+ (cellData || "")
259
+ .split(",")
260
+ .map((n) => n.trim())
261
+ .filter((n) => n);
262
+
263
+ /**
264
+ * Because the csv rows might not have the same header keys in some cases (extended properties)
265
+ * this function will make sure that each row will have all headers so that the export
266
+ * does not drop fields
267
+ * @param {*} rows
268
+ */
269
+ export const cleanCsvExport = (rows) => {
270
+ const allHeaders = [];
271
+ rows.forEach((row) => {
272
+ Object.keys(row).forEach((header) => {
273
+ if (!allHeaders.includes(header)) {
274
+ allHeaders.push(header);
275
+ }
276
+ });
277
+ });
278
+ rows.forEach((row) => {
279
+ allHeaders.forEach((header) => {
280
+ row[header] = row[header] || "";
281
+ });
282
+ });
283
+ return rows;
284
+ };
285
+
286
+ export const filterFilesInZip = async (file, accepted) => {
287
+ const zipExtracted = await extractZipFiles(file);
288
+ const acceptedFiles = [];
289
+ for (const extFile of zipExtracted) {
290
+ if (accepted.some((ext) => ext?.replace(".", "") === getExt(extFile))) {
291
+ acceptedFiles.push(extFile);
292
+ }
293
+ }
294
+
295
+ if (acceptedFiles.length && acceptedFiles.length < zipExtracted.length)
296
+ window.toastr.warning("Some files don't have the proper file extension.");
297
+
298
+ if (!acceptedFiles.length)
299
+ window.toastr.warning("No files with the proper extension were found.");
300
+
301
+ return acceptedFiles;
302
+ };
303
+
304
+ export function removeExt(filename) {
305
+ if (filename && filename.includes(".")) {
306
+ return filename.split(".").slice(0, -1).join(".");
307
+ } else {
308
+ return filename;
309
+ }
310
+ }
311
+
312
+ export async function uploadAndProcessFiles(files = []) {
313
+ if (!files.length) return null;
314
+
315
+ const formData = new FormData();
316
+ files.forEach(({ originFileObj }) => formData.append("file", originFileObj));
317
+
318
+ const response = await window.api.post("/user_uploads/", formData);
319
+
320
+ return response.data.map((d) => ({
321
+ encoding: d.encoding,
322
+ mimetype: d.mimetype,
323
+ originalname: d.originalname,
324
+ path: d.path,
325
+ size: d.size
326
+ }));
327
+ }
328
+
329
+ export async function encodeFilesForRequest(files) {
330
+ const encodedFiles = [];
331
+ for (const file of files) {
332
+ const encoded = await fileToBase64(file.originalFileObj);
333
+ const data = encoded.split(",");
334
+ encodedFiles.push({
335
+ type: file.type,
336
+ base64Data: data[1],
337
+ name: file.name
338
+ });
339
+ }
340
+ return encodedFiles;
341
+ }
342
+
343
+ const fileToBase64 = (file) => {
344
+ return new Promise((resolve) => {
345
+ const reader = new FileReader();
346
+ // Read file content on file loaded event
347
+ reader.onload = function (event) {
348
+ resolve(event.target.result);
349
+ };
350
+
351
+ // Convert data to base64
352
+ reader.readAsDataURL(file);
353
+ });
354
+ };
@@ -0,0 +1,186 @@
1
+ import {
2
+ isZipFile,
3
+ isExcelFile,
4
+ isCsvFile,
5
+ isTextFile,
6
+ normalizeCsvHeader,
7
+ validateCSVRequiredHeaders,
8
+ validateCSVRow,
9
+ removeExt,
10
+ filterFilesInZip,
11
+ parseCsvFile
12
+ } from "./file-utils"; // replace 'yourFile' with the path of your actual file
13
+
14
+ import * as JSZip from "jszip";
15
+ import * as mock from "mock-fs";
16
+
17
+ describe("parseCsvFile", () => {
18
+ it("resolves with results when parsing is successful", async () => {
19
+ const results = await parseCsvFile({
20
+ originFileObj: new File(
21
+ [
22
+ `Material name
23
+ mat 1
24
+ mat 2`
25
+ ],
26
+ "dummyFile"
27
+ )
28
+ });
29
+ expect(results.data).toEqual([
30
+ { "Material name": "mat 1" },
31
+ { "Material name": "mat 2" }
32
+ ]);
33
+ });
34
+ });
35
+
36
+ describe.skip("filterFilesInZip", () => {
37
+ beforeAll(async () => {
38
+ const zip = new JSZip();
39
+ zip.file("test1.txt", "Hello World");
40
+ zip.file("test2.csv", "id,name\n1,John");
41
+
42
+ const data = await zip.generateAsync({ type: "nodebuffer" });
43
+
44
+ mock({
45
+ "/path/to": {
46
+ "myzipfile.zip": data
47
+ }
48
+ });
49
+ });
50
+
51
+ afterAll(() => {
52
+ mock.restore();
53
+ });
54
+
55
+ it("should filter and return only .csv files", async () => {
56
+ const file = {
57
+ path: "/path/to/myzipfile.zip",
58
+ originalname: "myzipfile.zip",
59
+ mimetype: "application/zip"
60
+ };
61
+ const accepted = [".csv"];
62
+
63
+ const files = await filterFilesInZip(file, accepted);
64
+
65
+ expect(files.length).toBe(1);
66
+ expect(files[0].name).toBe("test2.csv");
67
+ const accepted2 = ["csv"];
68
+
69
+ const files2 = await filterFilesInZip(file, accepted2);
70
+
71
+ expect(files2.length).toBe(1);
72
+ expect(files2[0].name).toBe("test2.csv");
73
+ });
74
+ });
75
+
76
+ describe("CSV and Excel file tests", () => {
77
+ describe("isZipFile", () => {
78
+ it("should return true if file type is zip", () => {
79
+ const file = { mimetype: "application/zip" };
80
+ expect(isZipFile(file)).toBeTruthy();
81
+ });
82
+
83
+ it("should return false if file type is not zip", () => {
84
+ const file = { mimetype: "application/pdf" };
85
+ expect(isZipFile(file)).toBeFalsy();
86
+ });
87
+ });
88
+
89
+ describe("isExcelFile", () => {
90
+ it("should return true if file type is excel", () => {
91
+ const file = { name: "test.xlsx" };
92
+ expect(isExcelFile(file)).toBeTruthy();
93
+ });
94
+
95
+ it("should return false if file type is not excel", () => {
96
+ const file = { name: "test.pdf" };
97
+ expect(isExcelFile(file)).toBeFalsy();
98
+ });
99
+ });
100
+
101
+ describe("isCsvFile", () => {
102
+ it("should return true if file type is csv", () => {
103
+ const file = { name: "test.csv" };
104
+ expect(isCsvFile(file)).toBeTruthy();
105
+ });
106
+
107
+ it("should return false if file type is not csv", () => {
108
+ const file = { name: "test.pdf" };
109
+ expect(isCsvFile(file)).toBeFalsy();
110
+ });
111
+ });
112
+
113
+ describe("isTextFile", () => {
114
+ it("should return true if file type is txt", () => {
115
+ const file = { name: "test.txt" };
116
+ expect(isTextFile(file)).toBeTruthy();
117
+ });
118
+
119
+ it("should return false if file type is not txt", () => {
120
+ const file = { name: "test.pdf" };
121
+ expect(isTextFile(file)).toBeFalsy();
122
+ });
123
+ });
124
+
125
+ describe("normalizeCsvHeader", () => {
126
+ it('should return the same header if it starts with "ext-"', () => {
127
+ const header = "ext-name";
128
+ expect(normalizeCsvHeader(header)).toBe(header);
129
+ });
130
+
131
+ it('should return a normalized header if it does not start with "ext-"', () => {
132
+ const header = "name";
133
+ expect(normalizeCsvHeader(header)).toBe("NAME");
134
+ });
135
+ });
136
+
137
+ describe("validateCSVRequiredHeaders", () => {
138
+ it("should return error message if required headers are missing", () => {
139
+ const fields = ["name", "address"];
140
+ const requiredHeaders = ["name", "email"];
141
+ const filename = "test.csv";
142
+ expect(
143
+ validateCSVRequiredHeaders(fields, requiredHeaders, filename)
144
+ ).toBe("The file test.csv is missing required headers. (email)");
145
+ });
146
+
147
+ it("should return undefined if no required headers are missing", () => {
148
+ const fields = ["name", "email"];
149
+ const requiredHeaders = ["name", "email"];
150
+ const filename = "test.csv";
151
+ expect(
152
+ validateCSVRequiredHeaders(fields, requiredHeaders, filename)
153
+ ).toBeUndefined();
154
+ });
155
+ });
156
+
157
+ describe("validateCSVRow", () => {
158
+ it("should return error message if required fields are missing", () => {
159
+ const row = { name: "John", email: "" };
160
+ const requiredHeaders = ["name", "email"];
161
+ const index = 0;
162
+ expect(validateCSVRow(row, requiredHeaders, index)).toBe(
163
+ 'Row 1 is missing the required field "email"'
164
+ );
165
+ });
166
+
167
+ it("should return undefined if no required fields are missing", () => {
168
+ const row = { name: "John", email: "john@example.com" };
169
+ const requiredHeaders = ["name", "email"];
170
+ const index = 0;
171
+ expect(validateCSVRow(row, requiredHeaders, index)).toBeUndefined();
172
+ });
173
+ });
174
+
175
+ describe("removeExt", () => {
176
+ it("should remove extension from filename", () => {
177
+ const filename = "test.csv";
178
+ expect(removeExt(filename)).toBe("test");
179
+ });
180
+
181
+ it("should return the same filename if there is no extension", () => {
182
+ const filename = "test";
183
+ expect(removeExt(filename)).toBe("test");
184
+ });
185
+ });
186
+ });
package/src/index.js ADDED
@@ -0,0 +1 @@
1
+ export * from './file-utils';
package/CHANGELOG.md DELETED
@@ -1,12 +0,0 @@
1
- # Changelog
2
-
3
- This file was generated using [@jscutlery/semver](https://github.com/jscutlery/semver).
4
-
5
- ## 0.1.0 (2023-05-30)
6
-
7
-
8
- ### Features
9
-
10
- * more work adding more repos ([f320d76](https://github.com/TeselaGen/tg-oss/commit/f320d76a7a2e0db34d68d1a51803efa24f6831df))
11
- * wip adding a bunch of repos to tg-oss ([6040094](https://github.com/TeselaGen/tg-oss/commit/60400941f0d7f72c02bf19b90896d9a35d32634d))
12
- * wip setting up nx ([24a1952](https://github.com/TeselaGen/tg-oss/commit/24a1952fdbadc2c733223109d620cbc508c94a28))
package/README.md DELETED
@@ -1,11 +0,0 @@
1
- # file-utils
2
-
3
- This library was generated with [Nx](https://nx.dev).
4
-
5
- ## Building
6
-
7
- Run `nx build file-utils` to build the library.
8
-
9
- ## Running unit tests
10
-
11
- Run `nx test file-utils` to execute the unit tests via [Jest](https://jestjs.io).