npm - read-excel-file - Versions diffs - 8.0.2 → 9.0.0 - Mend

read-excel-file 8.0.2 → 9.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

package/CHANGELOG.md +10 -1
package/README.md +213 -74
package/browser/index.d.ts +17 -4
package/commonjs/parseData/parseData.js +423 -200
package/commonjs/parseData/parseData.js.map +1 -1
package/commonjs/parseData/parseData.test.js.map +1 -1
package/commonjs/xml/xml.js +1 -1
package/commonjs/xml/xml.js.map +1 -1
package/modules/parseData/parseData.js +422 -200
package/modules/parseData/parseData.js.map +1 -1
package/modules/parseData/parseData.test.js.map +1 -1
package/modules/xml/xml.js +1 -1
package/modules/xml/xml.js.map +1 -1
package/node/index.d.ts +17 -4
package/package.json +2 -2
package/types/parseData/parseData.d.ts +7 -8
package/types/parseData/parseDataError.d.ts +126 -73
package/types/parseData/parseDataSchema.d.ts +2 -2
package/types/parseData/parseDataValueType.d.ts +17 -12
package/types/types.d.ts +4 -2
package/universal/index.d.ts +17 -4
package/web-worker/index.d.ts +17 -4

package/CHANGELOG.md CHANGED Viewed

@@ -1,3 +1,11 @@
+9.0.0 / 18.04.2026
+==================
+* Refactored `parseData()` function.
+* The result of `parseData()` function is now `{ errors, objects }`. If there're no errors, `errors` will be `undefined`. Otherwise, `errors` will be a non-empty array and `objects` will be `undefined`.
+  * Previously the result of `parseData()` function was `[{ errors, object }, ...]`, i.e. the `errors` were split between each particular data row. Now the `errors` are combined for all data rows. The rationale is that it's simpler to handle the result of the function this way.
+* In a schema, a nested object is now not allowed to be `required: true`. Otherwise, if a nested object was allowed to be `required: true`, a corresponding `"required"` error  would have to include a specific `column` title but a nested object simply doesn't have one.
 8.0.0 / 11.03.2026
 ==================
@@ -26,11 +34,12 @@
 	* `getEmptyArrayValue` → `transformEmptyArray`
     * The leading `.` character is now removed from the `path` parameter.
 * Previously, when parsing comma-separated values, it used to ignore any commas that're surrounded by quotes, similar to how it's done in `.csv` files. Now it no longer does that.
+* Previously, when parsing comma-separated values, it used to allow empty-string elements. Now it no longer does that and such empty-string elements will now result in an error with properties: `{ error: "invalid", reason: "syntax" }`.
 * Previously, when parsing using a schema, it used to force-convert all `type: Date` schema properties from any numeric cell value to a `Date` with a given timestamp. Now it demands the cell values for all such `type: Date` schema properties to already be correctly recognized as `Date`s when they're returned from `readSheet()` or `readExcelFile()` function. And I'd personally assume that in any sane (non-contrived) real-world usage scenario that would be the case, so it doesn't really seem like a "breaking change". And if, for some strange reason, that happens not to be the case, `parseData()` function will throw an error: `not_a_date`.
 * Previously, when parsing using a schema, it used to skip `required` validation for completely-empty rows. It no longer does that.
 * Removed exported function `parseExcelDate()` because there seems to be no need to have it exported.
 * (TypeScript) Renamed exported types:
-  * `Type` → `ParseDataValueType`
+  * `Type` → `ParseDataCustomType`
   * `Error` or `SchemaParseCellValueError` → `ParseDataError`
   * `CellValueRequiredError` → `ParseDataValueRequiredError`
   * `ParsedObjectsResult` → `ParseDataResult`

package/README.md CHANGED Viewed

@@ -37,6 +37,7 @@ Also check out [`write-excel-file`](https://www.npmjs.com/package/write-excel-fi
     * The `result` of the function is an array where each element represents a "data row" and has shape `{ object, errors }`.
       * Depending on whether there were any errors when parsing a given "data row", either `object` or `errors` property will be `undefined`.
       * The `errors` don't have a `row` property anymore because it could be derived from "data row" number.
+      * In version `9.x`, the returned result of `parseData()` has been changed back to `{ errors, objects }`, so consider migrating straight to `9.x`. In that case, if there're no errors, `errors` will be `undefined`; otherwise, `errors` will be a non-empty array and `objects` will be `undefined`.
 * Removed `transformData` parameter because `schema` parameter was removed. A developer could transform the `data` themself and then pass it to `parseData()` function.
 * Removed `isColumnOriented` parameter.
 * Removed `ignoreEmptyRows` parameter. Empty rows somewhere in the middle are not ignored now.
@@ -49,16 +50,28 @@ Also check out [`write-excel-file`](https://www.npmjs.com/package/write-excel-fi
 	* `getEmptyArrayValue` → `transformEmptyArray`
     * The leading `.` character is now removed from the `path` parameter.
 * Previously, when parsing comma-separated values, it used to ignore any commas that're surrounded by quotes, similar to how it's done in `.csv` files. Now it no longer does that.
+* Previously, when parsing comma-separated values, it used to allow empty-string elements. Now it no longer does that and such empty-string elements will now result in an error with properties: `{ error: "invalid", reason: "syntax" }`.
 * Previously, when parsing using a schema, it used to force-convert all `type: Date` schema properties from any numeric cell value to a `Date` with a given timestamp. Now it demands the cell values for all such `type: Date` schema properties to already be correctly recognized as `Date`s when they're returned from `readSheet()` or `readExcelFile()` function. And I'd personally assume that in any sane (non-contrived) real-world usage scenario that would be the case, so it doesn't really seem like a "breaking change". And if, for some strange reason, that happens not to be the case, `parseData()` function will throw an error: `not_a_date`.
 * Previously, when parsing using a schema, it used to skip `required` validation for completely-empty rows. It no longer does that.
 * Removed exported function `parseExcelDate()` because there seems to be no need to have it exported.
 * (TypeScript) Renamed exported types:
-  * `Type` → `ParseDataValueType`
+  * `Type` → `ParseDataCustomType`
   * `Error` or `SchemaParseCellValueError` → `ParseDataError`
   * `CellValueRequiredError` → `ParseDataValueRequiredError`
   * `ParsedObjectsResult` → `ParseDataResult`
 </details>
+<details>
+<summary>Migrating from <code>8.x</code> to <code>9.x</code></summary>
+######
+* Refactored `parseData()` function.
+* The result of `parseData()` function is now `{ errors, objects }`. If there're no errors, `errors` will be `undefined`. Otherwise, `errors` will be a non-empty array and `objects` will be `undefined`.
+  * Previously the result of `parseData()` function was `[{ errors, object }, ...]`, i.e. the `errors` were split between each particular data row. Now the `errors` are combined for all data rows. The rationale is that it's simpler to handle the result of the function this way.
+* In a schema, a nested object is now not allowed to be `required: true`. Otherwise, if a nested object was allowed to be `required: true`, a corresponding `"required"` error  would have to include a specific `column` title but a nested object simply doesn't have one.
+</details>
 ## Install
 ```js
@@ -71,6 +84,11 @@ Alternatively, it could be included on a web page [directly](#cdn) via a `<scrip
 If your `.xlsx` file only has a single "sheet", or if you only care for a single "sheet", or if you don't know or care what a "sheet" is, use `readSheet()` function.
+| Name       | Date of Birth | Married | Kids |
+| ---------- | ------------- | ------- | ---- |
+| John Smith | 1/1/1995      | TRUE    | 3    |
+| Kate Brown | 3/1/2010      | FALSE   | 0    |
 ```js
 import { readSheet } from 'read-excel-file/node'
@@ -78,9 +96,9 @@ await readSheet(file)
 // Returns
 [
-  ['John Smith',35,true,...],
-  ['Kate Brown',28,false,...],
-  ...
+  ['Name', 'Date of Birth', 'Married', 'Kids'],
+  ['John Smith', 1995-01-01T00:00:00.000Z, true, 3],
+  ['Kate Brown', 2010-03-01T00:00:00.000Z, false, 0]
 ]
 ```
@@ -101,9 +119,9 @@ await readExcelFile(file)
 [{
   sheet: 'Sheet1',
   data: [
-    ['John Smith',35,true,...],
-    ['Kate Brown',28,false,...],
-    ...
+    ['Name', 'Age'],
+    ['John Smith', 30],
+    ['Kate Brown', 15]
   ]
 }, {
   sheet: 'Sheet2',
@@ -115,7 +133,9 @@ At least one "sheet" always exists. Each "sheet" is an object with properties:
 * `sheet` — Sheet name.
   * Example: `"Sheet1"`
 * `data` — Sheet data. An array of rows. Each row is an array of values — `string`, `number`, `boolean` or `Date`.
-  * Example: `[ ['John Smith',35,true,...], ['Kate Brown',28,false,...], ... ]`
+  * Example: `[ ['Name','Age'], ['John Smith',30], ['Kate Brown',15] ]`
+## API
 This package provides a separate `import` path for each different environment, as described below.
@@ -280,16 +300,15 @@ import { readSheet, parseData } from "read-excel-file/browser"
 const data = await readSheet(file)
 const schema = { ... }
-for (const { object, errors } of parseData(data, schema)) {
-  if (errors) {
-    console.error(errors)
-  } else {
-    console.log(object)
-  }
+const { objects, errors } = parseData(data, schema)
+if (errors) {
+  console.error(errors)
+} else {
+  console.log(objects)
 }
 ```
-The `parseData()` function returns an array where each element represents a "data row" and has shape `{ object, errors }`. Depending on whether there were any errors when parsing a given "data row", either `object` or `errors` property will be `undefined`.
+The `parseData()` function returns an object — `{ objects, errors }`. Depending on whether there were any errors when parsing the data, either `objects` or `errors` property will be `undefined`.
 The sheet data that is being parsed should adhere to a simple structure: the first row should be a header row with just column titles, and each following row should specify the values for those columns.
@@ -351,107 +370,227 @@ Example:
 ```js
 // An example .xlsx document:
-// -----------------------------------------------------------------------------------------
-// | START DATE | NUMBER OF STUDENTS | IS FREE | COURSE TITLE |    CONTACT     |  STATUS   |
-// -----------------------------------------------------------------------------------------
-// | 03/24/2018 |         10         |   true  |  Chemistry   | (123) 456-7890 | SCHEDULED |
-// -----------------------------------------------------------------------------------------
+// --------------------------------------------------------------------------------------------------------
+// | START DATE | SEATS |   STATUS  |    CONTACT     | COURSE TITLE  | COURSE CATEGORY   | COURSE IS FREE |
+// --------------------------------------------------------------------------------------------------------
+// | 03/24/2018 |   10  | SCHEDULED | (123) 456-7890 | Basic Algebra | Math, Arithmetic  |     TRUE       |
+// --------------------------------------------------------------------------------------------------------
 const schema = {
-  date: {
+  startDate: {
     column: 'START DATE',
     type: Date
   },
-  numberOfStudents: {
-    column: 'NUMBER OF STUDENTS',
+  seats: {
+    column: 'SEATS',
     type: Number,
     required: true
   },
-  // Nested object example.
-  course: {
-    schema: {
-      isFree: {
-        column: 'IS FREE',
-        type: Boolean
-      },
-      title: {
-        column: 'COURSE TITLE',
-        type: String
-      }
-    }
-    // required: true/false
-  },
-  contact: {
-    column: 'CONTACT',
-    required: true,
-    // A custom `type` parsing function can be specified.
-    // It will parse the cell value if it's not empty.
-    type: (value) => {
-      const number = parsePhoneNumber(value)
-      if (!number) {
-        throw new Error('invalid')
-      }
-      return number
-    }
-  },
   status: {
     column: 'STATUS',
     type: String,
+    // An example of using `oneOf`
     oneOf: [
       'SCHEDULED',
       'STARTED',
       'FINISHED'
     ]
+  },
+  contact: {
+    column: 'CONTACT',
+    required: true,
+    // An example of using a custom `type`
+    type: PhoneNumber
+  },
+  // Nested object example
+  course: {
+    // A nested object could be declared as completely optional by specifying `required: false`.
+    // In that case, when all of its properties are missing from the input data, it wouldn't throw any error
+    // regardless of whether some of its properties are declared as `required: true` or not.
+    required: false,
+    schema: {
+      title: {
+        column: 'COURSE TITLE',
+        type: String,
+        // When course data is present, the course title must be specified.
+        required: true
+      },
+      categories: {
+        column: 'COURSE CATEGORY',
+        // An example of parsing comma-separated values.
+        type: [String]
+      },
+      isFree: {
+        column: 'COURSE IS FREE',
+        type: Boolean
+      }
+    }
   }
 }
+// If this code was written in TypeScript, `schema` would've been declared as:
+// const schema: Schema<Object, ColumnTitle> = { ... }
+// Read `data` from an `.xlsx` file
 const data = await readSheet(file)
+// Parse `data` using the `schema`
 const results = parseData(data, schema)
+// There's one data row in the `.xlsx` file.
 results.length === 1
-// `errors` items have shape: `{ column, error, reason?, value?, type? }`.
+// There have been no errors when parsing the first data row, so `errors` is `undefined`.
+// Should there have been any errors when parsing the row, `errors` would've been an array
+// with items having shape: `{ column, error, reason?, value?, type? }`.
 results[0].errors === undefined
 results[0].object === {
-  date: new Date(2018, 3 - 1, 24),
-  numberOfStudents: 10,
-  course: {
-    isFree: true,
-    title: 'Chemistry'
-  },
+  startDate: new Date(Date.UTC(2018, 3 - 1, 24)),
+  seats: 10,
+  status: 'SCHEDULED',
   contact: '+11234567890',
-  status: 'SCHEDULED'
+  course: {
+    title: 'Basic Algebra',
+    categories: ['Math', 'Arithmetic']
+    isFree: true
+  }
+}
+// An example of a custom `type` parser function.
+// It will parse the cell value when it's not empty.
+function PhoneNumber(value) {
+  const number = parsePhoneNumber(value)
+  if (!number) {
+    throw new Error('invalid')
+  }
+  return number
 }
 ```
-<!-- #### Schema: Tips and Features -->
+An example of how an application could handle the `results`:
-<!-- If no `type` is specified then the cell value is returned "as is": as a string, number, date or boolean. -->
+```js
+const errors = []
+const objects = []
+// If this code was written in TypeScript, `errors` and `objects` would've been declared as:
+// const errors: { error: ParseDataError, row: number }[] = []
+// const objects: Object[] = []
+let row = 1
+for (const { errors: errorsInRow, object } of results) {
+  if (errorsInRow) {
+    for (const error of errorsInRow) {
+      errors.push({ error, row })
+    }
+  } else {
+    objects.push(object)
+  }
+  row++
+}
-<!-- There are also some additional exported `type`s available: -->
+if (errors.length > 0) {
+  for (const { error, row } of errors) {
+    console.error('Error in data row', row, 'column', error.column, ':', error.error, error.reason || '')
+  }
+} else {
+  console.log('Objects', objects)
+}
+```
 <details>
-<summary>An example of a <strong>custom <code>type</code></strong></summary>
+<summary>An example of defining a <strong>custom <code>type</code></strong> in <strong>TypeScript</strong></summary>
 #####
-Here's an example of a basic custom `type`. It calls a custom `parseValue()` function to parse a cell value, and produces an `"invalid"` error if the value couldn't be parsed. If a cell is empty, it will not be parsed.
+```ts
+import type {
+  Schema,
+  CellValue,
+  ParseDataError,
+  ParseDataCustomType,
+  ParseDataCustomTypeErrorMessage
+} from 'read-excel-file/node'
-```js
-{
-  property: {
-    column: 'COLUMN TITLE',
-    type: (value) => {
-      try {
-        return parseValue(value)
-      } catch (error) {
-        console.error(error)
-        throw new Error('invalid')
-      }
+type ColumnTitle = 'COLUMN TITLE 1' | 'COLUMN TITLE 2'
+type CustomTypeValue = string
+function CustomType(value: CellValue): CustomTypeValue {
+  if (typeof value !== 'string') {
+    throw new Error('not_a_string')
+  }
+  return '~' + value + '~'
+}
+type CustomTypeErrorMessage<Type extends ParseDataCustomType<unknown>> =
+  Type extends typeof CustomType
+    ? 'not_a_string'
+    : never
+// type CustomTypeErrorReason<
+//   Type extends ParseDataCustomType<unknown>,
+//   ErrorMessage extends ParseDataCustomTypeErrorMessage<Type>
+// > =
+//   Type extends typeof CustomType
+//     ? (ErrorMessage extends 'not_a_string' ? undefined : never)
+//     : never
+type PossibleError = ParseDataError<
+  ColumnTitle,
+  typeof CustomType,
+  CustomTypeErrorMessage<typeof CustomType>
+  // CustomTypeErrorReason<typeof CustomType, CustomTypeErrorMessage<typeof CustomType>>
+>
+interface Object {
+  property1: CustomTypeValue;
+  property2?: string;
+}
+const schema: Schema<Object, ColumnTitle> = {
+  property1: {
+    column: 'COLUMN TITLE 1',
+    type: CustomType,
+    required: true
+  },
+  property2: {
+    column: 'COLUMN TITLE 2',
+    type: String
+  }
+}
+const results = parseData<Object, ColumnTitle, PossibleError>([
+  ['COLUMN TITLE 1', 'COLUMN TITLE 2'],
+  ['Value 1', 'Value 2']
+], schema)
+const errors: {
+  error: PossibleError,
+  row: number
+}[] = []
+const objects: Object[] = []
+let row = 1
+for (const { errors: errorsInRow, object } of results) {
+  if (errorsInRow) {
+    for (const error of errorsInRow) {
+      errors.push({ error, row })
     }
+  } else {
+    objects.push(object)
+  }
+  row++
+}
+if (errors.length > 0) {
+  for (const { error, row } of errors) {
+    console.error('Error in data row', row, 'column', error.column, ':', error.error, error.reason || '')
   }
+} else {
+  console.log('Objects', objects)
 }
 ```
 </details>

package/browser/index.d.ts CHANGED Viewed

@@ -18,14 +18,24 @@ import {
 	Schema
 } from '../types/parseData/parseDataSchema.d.js';
+import {
+	ParseDataError
+} from '../types/parseData/parseDataError.d.js';
 export {
 	CellValue,
 	Row,
-	SheetData
+	SheetData,
+	Sheet
 } from '../types/types.d.js';
 export {
-	ParseDataValueCustomType as ParseDataValueType,
+	ParseDataCustomType,
+	// Base `type`s when parsing data.
+	StringType as String,
+	DateType as Date,
+	NumberType as Number,
+	BooleanType as Boolean,
 	// Additional built-in `type`s when parsing data.
 	Integer,
 	Email,
@@ -33,6 +43,8 @@ export {
 } from '../types/parseData/parseDataValueType.d.js';
 export {
+	ParseDataCustomTypeErrorMessage,
+	ParseDataCustomTypeErrorReason,
 	ParseDataError,
 	ParseDataValueRequiredError
 } from '../types/parseData/parseDataError.d.js';
@@ -63,9 +75,10 @@ export function readSheet<ParsedNumber = number>(
 export function parseData<
 	Object extends object,
-	ColumnTitle extends string
+	ColumnTitle extends string,
+	Error extends ParseDataError
 >(
 	data: SheetData,
 	schema: Schema<Object, ColumnTitle>,
 	options?: ParseDataOptions
-): ParseDataResult<Object>;
+): ParseDataResult<Object, Error>;