@js-ak/excel-toolbox 1.6.0 → 1.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/cjs/lib/merge-sheets-to-base-file-process-sync.js +105 -0
- package/build/cjs/lib/merge-sheets-to-base-file-process.js +3 -3
- package/build/cjs/lib/merge-sheets-to-base-file-sync.js +2 -2
- package/build/cjs/lib/merge-sheets-to-base-file.js +1 -1
- package/build/cjs/lib/template/template-fs.js +8 -8
- package/build/cjs/lib/template/template-memory.js +21 -21
- package/build/cjs/lib/xml/extract-rows-from-sheet-sync.js +67 -0
- package/build/cjs/lib/xml/extract-rows-from-sheet.js +4 -2
- package/build/cjs/lib/xml/extract-xml-from-sheet-sync.js +43 -0
- package/build/cjs/lib/xml/extract-xml-from-sheet.js +15 -15
- package/build/cjs/lib/xml/index.js +2 -1
- package/build/esm/lib/merge-sheets-to-base-file-process-sync.js +69 -0
- package/build/esm/lib/merge-sheets-to-base-file-process.js +3 -3
- package/build/esm/lib/merge-sheets-to-base-file-sync.js +2 -2
- package/build/esm/lib/merge-sheets-to-base-file.js +1 -1
- package/build/esm/lib/template/template-fs.js +8 -8
- package/build/esm/lib/template/template-memory.js +21 -21
- package/build/esm/lib/xml/extract-rows-from-sheet-sync.js +64 -0
- package/build/esm/lib/xml/extract-rows-from-sheet.js +4 -2
- package/build/esm/lib/xml/extract-xml-from-sheet-sync.js +40 -0
- package/build/esm/lib/xml/extract-xml-from-sheet.js +12 -15
- package/build/esm/lib/xml/index.js +2 -1
- package/build/types/lib/merge-sheets-to-base-file-process-sync.d.ts +27 -0
- package/build/types/lib/merge-sheets-to-base-file-process.d.ts +1 -1
- package/build/types/lib/xml/extract-rows-from-sheet-sync.d.ts +28 -0
- package/build/types/lib/xml/extract-rows-from-sheet.d.ts +2 -2
- package/build/types/lib/xml/extract-xml-from-sheet-sync.d.ts +14 -0
- package/build/types/lib/xml/extract-xml-from-sheet.d.ts +2 -2
- package/build/types/lib/xml/index.d.ts +2 -1
- package/package.json +1 -5
- package/build/cjs/lib/xml/extract-xml-from-system-content.js +0 -53
- package/build/esm/lib/xml/extract-xml-from-system-content.js +0 -49
- package/build/types/lib/xml/extract-xml-from-system-content.d.ts +0 -15
@@ -16,13 +16,13 @@ import * as Xml from "./xml/index.js";
|
|
16
16
|
*
|
17
17
|
* The function returns a dictionary of file paths to their corresponding XML content.
|
18
18
|
*/
|
19
|
-
export function mergeSheetsToBaseFileProcess(data) {
|
19
|
+
export async function mergeSheetsToBaseFileProcess(data) {
|
20
20
|
const { additions, baseFiles, baseSheetIndex, gap, sheetNamesToRemove, sheetsToRemove, } = data;
|
21
21
|
const basePath = `xl/worksheets/sheet${baseSheetIndex}.xml`;
|
22
22
|
if (!baseFiles[basePath]) {
|
23
23
|
throw new Error(`Base file does not contain ${basePath}`);
|
24
24
|
}
|
25
|
-
const { lastRowNumber, mergeCells: baseMergeCells, rows: baseRows, xml, } = Xml.extractRowsFromSheet(baseFiles[basePath]);
|
25
|
+
const { lastRowNumber, mergeCells: baseMergeCells, rows: baseRows, xml, } = await Xml.extractRowsFromSheet(baseFiles[basePath]);
|
26
26
|
const allRows = [...baseRows];
|
27
27
|
const allMergeCells = [...baseMergeCells];
|
28
28
|
let currentRowOffset = lastRowNumber + gap;
|
@@ -32,7 +32,7 @@ export function mergeSheetsToBaseFileProcess(data) {
|
|
32
32
|
if (!files[sheetPath]) {
|
33
33
|
throw new Error(`File does not contain ${sheetPath}`);
|
34
34
|
}
|
35
|
-
const { mergeCells, rows } = Xml.extractRowsFromSheet(files[sheetPath]);
|
35
|
+
const { mergeCells, rows } = await Xml.extractRowsFromSheet(files[sheetPath]);
|
36
36
|
const shiftedRows = Xml.shiftRowIndices(rows, currentRowOffset);
|
37
37
|
const shiftedMergeCells = mergeCells.map(cell => {
|
38
38
|
const [start, end] = cell.ref.split(":");
|
@@ -1,6 +1,6 @@
|
|
1
1
|
import * as Utils from "./utils/index.js";
|
2
2
|
import * as Zip from "./zip/index.js";
|
3
|
-
import {
|
3
|
+
import { mergeSheetsToBaseFileProcessSync } from "./merge-sheets-to-base-file-process-sync.js";
|
4
4
|
/**
|
5
5
|
* Merge rows from other Excel files into a base Excel file.
|
6
6
|
* The output is a new Excel file with the merged content.
|
@@ -29,7 +29,7 @@ export function mergeSheetsToBaseFileSync(data) {
|
|
29
29
|
sheetIndexes,
|
30
30
|
});
|
31
31
|
}
|
32
|
-
|
32
|
+
mergeSheetsToBaseFileProcessSync({
|
33
33
|
additions: additionsUpdated,
|
34
34
|
baseFiles,
|
35
35
|
baseSheetIndex,
|
@@ -126,13 +126,13 @@ export class TemplateFs {
|
|
126
126
|
*/
|
127
127
|
async #getSheetPathByName(sheetName) {
|
128
128
|
// Read XML workbook to find sheet name and path
|
129
|
-
const workbookXml = Xml.extractXmlFromSheet(await this.#readFile(this.#excelKeys.workbook));
|
129
|
+
const workbookXml = await Xml.extractXmlFromSheet(await this.#readFile(this.#excelKeys.workbook));
|
130
130
|
const sheetMatch = workbookXml.match(Utils.sheetMatch(sheetName));
|
131
131
|
if (!sheetMatch || !sheetMatch[1]) {
|
132
132
|
throw new Error(`Sheet "${sheetName}" not found`);
|
133
133
|
}
|
134
134
|
const rId = sheetMatch[1];
|
135
|
-
const relsXml = Xml.extractXmlFromSheet(await this.#readFile(this.#excelKeys.workbookRels));
|
135
|
+
const relsXml = await Xml.extractXmlFromSheet(await this.#readFile(this.#excelKeys.workbookRels));
|
136
136
|
const relMatch = relsXml.match(Utils.relationshipMatch(rId));
|
137
137
|
if (!relMatch || !relMatch[1]) {
|
138
138
|
throw new Error(`Relationship "${rId}" not found`);
|
@@ -191,10 +191,10 @@ export class TemplateFs {
|
|
191
191
|
let sharedStringsContent = "";
|
192
192
|
let sheetContent = "";
|
193
193
|
if (this.fileKeys.has(sharedStringsPath)) {
|
194
|
-
sharedStringsContent = Xml.extractXmlFromSheet(await this.#readFile(sharedStringsPath));
|
194
|
+
sharedStringsContent = await Xml.extractXmlFromSheet(await this.#readFile(sharedStringsPath));
|
195
195
|
}
|
196
196
|
if (this.fileKeys.has(sheetPath)) {
|
197
|
-
sheetContent = Xml.extractXmlFromSheet(await this.#readFile(sheetPath));
|
197
|
+
sheetContent = await Xml.extractXmlFromSheet(await this.#readFile(sheetPath));
|
198
198
|
const TABLE_REGEX = /\$\{table:([a-zA-Z0-9_]+)\.([a-zA-Z0-9_]+)\}/g;
|
199
199
|
const hasTablePlaceholders = TABLE_REGEX.test(sharedStringsContent) || TABLE_REGEX.test(sheetContent);
|
200
200
|
if (hasTablePlaceholders) {
|
@@ -251,7 +251,7 @@ export class TemplateFs {
|
|
251
251
|
}
|
252
252
|
// Read workbook.xml and find the source sheet
|
253
253
|
const workbookXmlPath = this.#excelKeys.workbook;
|
254
|
-
const workbookXml = Xml.extractXmlFromSheet(await this.#readFile(workbookXmlPath));
|
254
|
+
const workbookXml = await Xml.extractXmlFromSheet(await this.#readFile(workbookXmlPath));
|
255
255
|
// Find the source sheet
|
256
256
|
const sheetMatch = workbookXml.match(Utils.sheetMatch(sourceName));
|
257
257
|
if (!sheetMatch || !sheetMatch[1]) {
|
@@ -265,7 +265,7 @@ export class TemplateFs {
|
|
265
265
|
// Find the source sheet path by rId
|
266
266
|
const rId = sheetMatch[1];
|
267
267
|
const relsXmlPath = this.#excelKeys.workbookRels;
|
268
|
-
const relsXml = Xml.extractXmlFromSheet(await this.#readFile(relsXmlPath));
|
268
|
+
const relsXml = await Xml.extractXmlFromSheet(await this.#readFile(relsXmlPath));
|
269
269
|
const relMatch = relsXml.match(Utils.relationshipMatch(rId));
|
270
270
|
if (!relMatch || !relMatch[1]) {
|
271
271
|
throw new Error(`Relationship "${rId}" not found`);
|
@@ -300,7 +300,7 @@ export class TemplateFs {
|
|
300
300
|
// Read [Content_Types].xml
|
301
301
|
// Update [Content_Types].xml
|
302
302
|
const contentTypesPath = this.#excelKeys.contentTypes;
|
303
|
-
const contentTypesXml = Xml.extractXmlFromSheet(await this.#readFile(contentTypesPath));
|
303
|
+
const contentTypesXml = await Xml.extractXmlFromSheet(await this.#readFile(contentTypesPath));
|
304
304
|
const overrideTag = `<Override PartName="/xl/worksheets/${newSheetFilename}" ContentType="application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml"/>`;
|
305
305
|
const updatedContentTypesXml = contentTypesXml.replace("</Types>", overrideTag + "</Types>");
|
306
306
|
await this.#set(contentTypesPath, updatedContentTypesXml);
|
@@ -359,7 +359,7 @@ export class TemplateFs {
|
|
359
359
|
// Find the sheet
|
360
360
|
const sheetPath = await this.#getSheetPathByName(sheetName);
|
361
361
|
const sheetXmlRaw = await this.#readFile(sheetPath);
|
362
|
-
const sheetXml = Xml.extractXmlFromSheet(sheetXmlRaw);
|
362
|
+
const sheetXml = await Xml.extractXmlFromSheet(sheetXmlRaw);
|
363
363
|
let nextRow = 0;
|
364
364
|
if (!startRowNumber) {
|
365
365
|
// Find the last row
|
@@ -104,7 +104,7 @@ export class TemplateMemory {
|
|
104
104
|
* @throws {Error} If the file key is not found.
|
105
105
|
* @experimental This API is experimental and might change in future versions.
|
106
106
|
*/
|
107
|
-
#extractXmlFromSheet(fileKey) {
|
107
|
+
async #extractXmlFromSheet(fileKey) {
|
108
108
|
if (!this.files[fileKey]) {
|
109
109
|
throw new Error(`${fileKey} not found`);
|
110
110
|
}
|
@@ -122,7 +122,7 @@ export class TemplateMemory {
|
|
122
122
|
* @throws {Error} If the file key is not found
|
123
123
|
* @experimental This API is experimental and might change in future versions.
|
124
124
|
*/
|
125
|
-
#extractRowsFromSheet(fileKey) {
|
125
|
+
async #extractRowsFromSheet(fileKey) {
|
126
126
|
if (!this.files[fileKey]) {
|
127
127
|
throw new Error(`${fileKey} not found`);
|
128
128
|
}
|
@@ -136,15 +136,15 @@ export class TemplateMemory {
|
|
136
136
|
* @throws {Error} If the sheet with the given name does not exist.
|
137
137
|
* @experimental This API is experimental and might change in future versions.
|
138
138
|
*/
|
139
|
-
#getSheetPathByName(sheetName) {
|
139
|
+
async #getSheetPathByName(sheetName) {
|
140
140
|
// Find the sheet
|
141
|
-
const workbookXml = this.#extractXmlFromSheet(this.#excelKeys.workbook);
|
141
|
+
const workbookXml = await this.#extractXmlFromSheet(this.#excelKeys.workbook);
|
142
142
|
const sheetMatch = workbookXml.match(Utils.sheetMatch(sheetName));
|
143
143
|
if (!sheetMatch || !sheetMatch[1]) {
|
144
144
|
throw new Error(`Sheet "${sheetName}" not found`);
|
145
145
|
}
|
146
146
|
const rId = sheetMatch[1];
|
147
|
-
const relsXml = this.#extractXmlFromSheet(this.#excelKeys.workbookRels);
|
147
|
+
const relsXml = await this.#extractXmlFromSheet(this.#excelKeys.workbookRels);
|
148
148
|
const relMatch = relsXml.match(Utils.relationshipMatch(rId));
|
149
149
|
if (!relMatch || !relMatch[1]) {
|
150
150
|
throw new Error(`Relationship "${rId}" not found`);
|
@@ -197,11 +197,11 @@ export class TemplateMemory {
|
|
197
197
|
let sharedStringsContent = "";
|
198
198
|
let sheetContent = "";
|
199
199
|
if (this.files[sharedStringsPath]) {
|
200
|
-
sharedStringsContent = this.#extractXmlFromSheet(sharedStringsPath);
|
200
|
+
sharedStringsContent = await this.#extractXmlFromSheet(sharedStringsPath);
|
201
201
|
}
|
202
|
-
const sheetPath = this.#getSheetPathByName(sheetName);
|
202
|
+
const sheetPath = await this.#getSheetPathByName(sheetName);
|
203
203
|
if (this.files[sheetPath]) {
|
204
|
-
sheetContent = this.#extractXmlFromSheet(sheetPath);
|
204
|
+
sheetContent = await this.#extractXmlFromSheet(sheetPath);
|
205
205
|
const TABLE_REGEX = /\$\{table:([a-zA-Z0-9_]+)\.([a-zA-Z0-9_]+)\}/g;
|
206
206
|
const hasTablePlaceholders = TABLE_REGEX.test(sharedStringsContent) || TABLE_REGEX.test(sheetContent);
|
207
207
|
if (hasTablePlaceholders) {
|
@@ -236,11 +236,11 @@ export class TemplateMemory {
|
|
236
236
|
* @throws {Error} If no sheets are found to merge.
|
237
237
|
* @experimental This API is experimental and might change in future versions.
|
238
238
|
*/
|
239
|
-
#mergeSheets(data) {
|
239
|
+
async #mergeSheets(data) {
|
240
240
|
const { additions, baseSheetIndex = 1, baseSheetName, gap = 0, } = data;
|
241
241
|
let fileKey = "";
|
242
242
|
if (baseSheetName) {
|
243
|
-
fileKey = this.#getSheetPathByName(baseSheetName);
|
243
|
+
fileKey = await this.#getSheetPathByName(baseSheetName);
|
244
244
|
}
|
245
245
|
if (baseSheetIndex && !fileKey) {
|
246
246
|
if (baseSheetIndex < 1) {
|
@@ -251,16 +251,16 @@ export class TemplateMemory {
|
|
251
251
|
if (!fileKey) {
|
252
252
|
throw new Error("Base sheet not found");
|
253
253
|
}
|
254
|
-
const { lastRowNumber, mergeCells: baseMergeCells, rows: baseRows, xml, } = this.#extractRowsFromSheet(fileKey);
|
254
|
+
const { lastRowNumber, mergeCells: baseMergeCells, rows: baseRows, xml, } = await this.#extractRowsFromSheet(fileKey);
|
255
255
|
const allRows = [...baseRows];
|
256
256
|
const allMergeCells = [...baseMergeCells];
|
257
257
|
let currentRowOffset = lastRowNumber + gap;
|
258
258
|
const sheetPaths = [];
|
259
259
|
if (additions.sheetIndexes) {
|
260
|
-
sheetPaths.push(...(additions.sheetIndexes
|
260
|
+
sheetPaths.push(...(await Promise.all(additions.sheetIndexes.map(e => this.#getSheetPathById(e)))));
|
261
261
|
}
|
262
262
|
if (additions.sheetNames) {
|
263
|
-
sheetPaths.push(...(additions.sheetNames
|
263
|
+
sheetPaths.push(...(await Promise.all(additions.sheetNames.map(e => this.#getSheetPathByName(e)))));
|
264
264
|
}
|
265
265
|
if (sheetPaths.length === 0) {
|
266
266
|
throw new Error("No sheets found to merge");
|
@@ -269,7 +269,7 @@ export class TemplateMemory {
|
|
269
269
|
if (!this.files[sheetPath]) {
|
270
270
|
throw new Error(`Sheet "${sheetPath}" not found`);
|
271
271
|
}
|
272
|
-
const { mergeCells, rows } = Xml.extractRowsFromSheet(this.files[sheetPath]);
|
272
|
+
const { mergeCells, rows } = await Xml.extractRowsFromSheet(this.files[sheetPath]);
|
273
273
|
const shiftedRows = Xml.shiftRowIndices(rows, currentRowOffset);
|
274
274
|
const shiftedMergeCells = mergeCells.map(cell => {
|
275
275
|
const [start, end] = cell.ref.split(":");
|
@@ -341,7 +341,7 @@ export class TemplateMemory {
|
|
341
341
|
}
|
342
342
|
// Read workbook.xml and find the source sheet
|
343
343
|
const workbookXmlPath = this.#excelKeys.workbook;
|
344
|
-
const workbookXml = this.#extractXmlFromSheet(this.#excelKeys.workbook);
|
344
|
+
const workbookXml = await this.#extractXmlFromSheet(this.#excelKeys.workbook);
|
345
345
|
// Find the source sheet
|
346
346
|
const sheetMatch = workbookXml.match(Utils.sheetMatch(sourceName));
|
347
347
|
if (!sheetMatch || !sheetMatch[1]) {
|
@@ -355,7 +355,7 @@ export class TemplateMemory {
|
|
355
355
|
// Find the source sheet path by rId
|
356
356
|
const rId = sheetMatch[1];
|
357
357
|
const relsXmlPath = this.#excelKeys.workbookRels;
|
358
|
-
const relsXml = this.#extractXmlFromSheet(this.#excelKeys.workbookRels);
|
358
|
+
const relsXml = await this.#extractXmlFromSheet(this.#excelKeys.workbookRels);
|
359
359
|
const relMatch = relsXml.match(Utils.relationshipMatch(rId));
|
360
360
|
if (!relMatch || !relMatch[1]) {
|
361
361
|
throw new Error(`Relationship "${rId}" not found`);
|
@@ -397,7 +397,7 @@ export class TemplateMemory {
|
|
397
397
|
// Read [Content_Types].xml
|
398
398
|
// Update [Content_Types].xml
|
399
399
|
const contentTypesPath = "[Content_Types].xml";
|
400
|
-
const contentTypesXml = this.#extractXmlFromSheet(contentTypesPath);
|
400
|
+
const contentTypesXml = await this.#extractXmlFromSheet(contentTypesPath);
|
401
401
|
const overrideTag = `<Override PartName="/xl/worksheets/${newSheetFilename}" ContentType="application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml"/>`;
|
402
402
|
const updatedContentTypesXml = contentTypesXml.replace("</Types>", overrideTag + "</Types>");
|
403
403
|
await this.#set(contentTypesPath, Buffer.from(updatedContentTypesXml));
|
@@ -454,8 +454,8 @@ export class TemplateMemory {
|
|
454
454
|
Utils.checkStartRow(startRowNumber);
|
455
455
|
Utils.checkRows(preparedRows);
|
456
456
|
// Find the sheet
|
457
|
-
const sheetPath = this.#getSheetPathByName(sheetName);
|
458
|
-
const sheetXml = this.#extractXmlFromSheet(sheetPath);
|
457
|
+
const sheetPath = await this.#getSheetPathByName(sheetName);
|
458
|
+
const sheetXml = await this.#extractXmlFromSheet(sheetPath);
|
459
459
|
let nextRow = 0;
|
460
460
|
if (!startRowNumber) {
|
461
461
|
// Find the last row
|
@@ -518,8 +518,8 @@ export class TemplateMemory {
|
|
518
518
|
if (!sheetName)
|
519
519
|
throw new Error("Sheet name is required");
|
520
520
|
// Read XML workbook to find sheet name and path
|
521
|
-
const sheetPath = this.#getSheetPathByName(sheetName);
|
522
|
-
const sheetXml = this.#extractXmlFromSheet(sheetPath);
|
521
|
+
const sheetPath = await this.#getSheetPathByName(sheetName);
|
522
|
+
const sheetXml = await this.#extractXmlFromSheet(sheetPath);
|
523
523
|
const output = new MemoryWriteStream();
|
524
524
|
let inserted = false;
|
525
525
|
// --- Case 1: <sheetData>...</sheetData> on one line ---
|
@@ -0,0 +1,64 @@
|
|
1
|
+
import { extractXmlFromSheetSync } from "./extract-xml-from-sheet-sync.js";
|
2
|
+
/**
|
3
|
+
* Parses a worksheet (either as Buffer or string) to extract row data,
|
4
|
+
* last row number, and merge cell information from Excel XML format.
|
5
|
+
*
|
6
|
+
* This function is particularly useful for processing Excel files in
|
7
|
+
* Open XML Spreadsheet format (.xlsx).
|
8
|
+
*
|
9
|
+
* @param {Buffer|string} sheet - The worksheet content to parse, either as:
|
10
|
+
* - Buffer (binary Excel sheet)
|
11
|
+
* - string (raw XML content)
|
12
|
+
* @returns {{
|
13
|
+
* rows: string[],
|
14
|
+
* lastRowNumber: number,
|
15
|
+
* mergeCells: {ref: string}[]
|
16
|
+
* }} An object containing:
|
17
|
+
* - rows: Array of raw XML strings for each <row> element
|
18
|
+
* - lastRowNumber: Highest row number found in the sheet (1-based)
|
19
|
+
* - mergeCells: Array of merged cell ranges (e.g., [{ref: "A1:B2"}])
|
20
|
+
* @throws {Error} If the sheetData section is not found in the XML
|
21
|
+
*/
|
22
|
+
export function extractRowsFromSheetSync(sheet) {
|
23
|
+
// Convert Buffer input to XML string if needed
|
24
|
+
const xml = typeof sheet === "string"
|
25
|
+
? sheet
|
26
|
+
: extractXmlFromSheetSync(sheet);
|
27
|
+
// Extract the sheetData section containing all rows
|
28
|
+
const sheetDataMatch = xml.match(/<sheetData[^>]*>([\s\S]*?)<\/sheetData>/);
|
29
|
+
if (!sheetDataMatch) {
|
30
|
+
throw new Error("sheetData not found in worksheet XML");
|
31
|
+
}
|
32
|
+
const sheetDataContent = sheetDataMatch[1] || "";
|
33
|
+
// Extract all <row> elements using regex
|
34
|
+
const rowMatches = [...sheetDataContent.matchAll(/<row\b[^>]*\/>|<row\b[^>]*>[\s\S]*?<\/row>/g)];
|
35
|
+
const rows = rowMatches.map(match => match[0]);
|
36
|
+
// Calculate the highest row number present in the sheet
|
37
|
+
const lastRowNumber = rowMatches
|
38
|
+
.map(match => {
|
39
|
+
// Extract row number from r="..." attribute (1-based)
|
40
|
+
const rowNumMatch = match[0].match(/r="(\d+)"/);
|
41
|
+
return rowNumMatch?.[1] ? parseInt(rowNumMatch[1], 10) : null;
|
42
|
+
})
|
43
|
+
.filter((row) => row !== null) // Type guard to filter out nulls
|
44
|
+
.reduce((max, current) => Math.max(max, current), 0); // Find maximum row number
|
45
|
+
// Extract all merged cell ranges from the worksheet
|
46
|
+
const mergeCells = [];
|
47
|
+
const mergeCellsMatch = xml.match(/<mergeCells[^>]*>([\s\S]*?)<\/mergeCells>/);
|
48
|
+
if (mergeCellsMatch) {
|
49
|
+
// Find all mergeCell entries with ref attributes
|
50
|
+
const mergeCellMatches = mergeCellsMatch[1]?.match(/<mergeCell[^>]+ref="([^"]+)"[^>]*>/g) || [];
|
51
|
+
mergeCellMatches.forEach(match => {
|
52
|
+
const refMatch = match.match(/ref="([^"]+)"/);
|
53
|
+
if (refMatch?.[1]) {
|
54
|
+
mergeCells.push({ ref: refMatch[1] }); // Store the cell range (e.g., "A1:B2")
|
55
|
+
}
|
56
|
+
});
|
57
|
+
}
|
58
|
+
return {
|
59
|
+
lastRowNumber,
|
60
|
+
mergeCells,
|
61
|
+
rows,
|
62
|
+
xml,
|
63
|
+
};
|
64
|
+
}
|
@@ -19,9 +19,11 @@ import { extractXmlFromSheet } from "./extract-xml-from-sheet.js";
|
|
19
19
|
* - mergeCells: Array of merged cell ranges (e.g., [{ref: "A1:B2"}])
|
20
20
|
* @throws {Error} If the sheetData section is not found in the XML
|
21
21
|
*/
|
22
|
-
export function extractRowsFromSheet(sheet) {
|
22
|
+
export async function extractRowsFromSheet(sheet) {
|
23
23
|
// Convert Buffer input to XML string if needed
|
24
|
-
const xml = typeof sheet === "string"
|
24
|
+
const xml = typeof sheet === "string"
|
25
|
+
? sheet
|
26
|
+
: await extractXmlFromSheet(sheet);
|
25
27
|
// Extract the sheetData section containing all rows
|
26
28
|
const sheetDataMatch = xml.match(/<sheetData[^>]*>([\s\S]*?)<\/sheetData>/);
|
27
29
|
if (!sheetDataMatch) {
|
@@ -0,0 +1,40 @@
|
|
1
|
+
import { inflateRawSync } from "node:zlib";
|
2
|
+
/**
|
3
|
+
* Extracts and parses XML content from an Excel worksheet file (e.g., xl/worksheets/sheet1.xml).
|
4
|
+
* Handles both compressed (raw deflate) and uncompressed (plain XML) formats.
|
5
|
+
*
|
6
|
+
* This function is designed to work with Excel Open XML (.xlsx) worksheet files,
|
7
|
+
* which may be stored in either compressed or uncompressed format within the ZIP container.
|
8
|
+
*
|
9
|
+
* @param {Buffer} buffer - The file content to process, which may be:
|
10
|
+
* - Raw XML text
|
11
|
+
* - Deflate-compressed XML data (without zlib headers)
|
12
|
+
* @returns {string} - The extracted XML content as a UTF-8 string
|
13
|
+
* @throws {Error} - If the buffer is empty or cannot be processed
|
14
|
+
*/
|
15
|
+
export function extractXmlFromSheetSync(buffer) {
|
16
|
+
if (!buffer || buffer.length === 0) {
|
17
|
+
throw new Error("Empty buffer provided");
|
18
|
+
}
|
19
|
+
let xml;
|
20
|
+
// Check if the buffer starts with an XML declaration (<?xml)
|
21
|
+
const head = buffer.subarray(0, 1024).toString("utf8").replace(/[\x00-\x08\x0B\x0C\x0E-\x1F]/g, "").trim();
|
22
|
+
const isXml = /^<\?xml[\s\S]+<\w+[\s>]/.test(head);
|
23
|
+
if (isXml) {
|
24
|
+
// Case 1: Already uncompressed XML - convert directly to string
|
25
|
+
xml = buffer.toString("utf8");
|
26
|
+
}
|
27
|
+
else {
|
28
|
+
// Case 2: Attempt to decompress as raw deflate data
|
29
|
+
try {
|
30
|
+
xml = inflateRawSync(buffer).toString("utf8");
|
31
|
+
}
|
32
|
+
catch (err) {
|
33
|
+
throw new Error("Failed to decompress sheet XML: " + (err instanceof Error ? err.message : String(err)));
|
34
|
+
}
|
35
|
+
}
|
36
|
+
// Sanitize XML by removing control characters (except tab, newline, carriage return)
|
37
|
+
// This handles potential corruption from binary data or encoding issues
|
38
|
+
xml = xml.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F]/g, "");
|
39
|
+
return xml;
|
40
|
+
}
|
@@ -1,4 +1,6 @@
|
|
1
|
-
import
|
1
|
+
import util from "node:util";
|
2
|
+
import zlib from "node:zlib";
|
3
|
+
const inflateRaw = util.promisify(zlib.inflateRaw);
|
2
4
|
/**
|
3
5
|
* Extracts and parses XML content from an Excel worksheet file (e.g., xl/worksheets/sheet1.xml).
|
4
6
|
* Handles both compressed (raw deflate) and uncompressed (plain XML) formats.
|
@@ -9,35 +11,30 @@ import { inflateRaw } from "pako";
|
|
9
11
|
* @param {Buffer} buffer - The file content to process, which may be:
|
10
12
|
* - Raw XML text
|
11
13
|
* - Deflate-compressed XML data (without zlib headers)
|
12
|
-
* @returns {string} - The extracted XML content as a UTF-8 string
|
14
|
+
* @returns {Promise<string>} - The extracted XML content as a UTF-8 string
|
13
15
|
* @throws {Error} - If the buffer is empty or cannot be processed
|
14
16
|
*/
|
15
|
-
export function extractXmlFromSheet(buffer) {
|
17
|
+
export async function extractXmlFromSheet(buffer) {
|
16
18
|
if (!buffer || buffer.length === 0) {
|
17
19
|
throw new Error("Empty buffer provided");
|
18
20
|
}
|
19
21
|
let xml;
|
20
22
|
// Check if the buffer starts with an XML declaration (<?xml)
|
21
|
-
const
|
22
|
-
|
23
|
+
const head = buffer.subarray(0, 1024).toString("utf8").replace(/[\x00-\x08\x0B\x0C\x0E-\x1F]/g, "").trim();
|
24
|
+
const isXml = /^<\?xml[\s\S]+<\w+[\s>]/.test(head);
|
25
|
+
if (isXml) {
|
23
26
|
// Case 1: Already uncompressed XML - convert directly to string
|
24
27
|
xml = buffer.toString("utf8");
|
25
28
|
}
|
26
29
|
else {
|
27
30
|
// Case 2: Attempt to decompress as raw deflate data
|
28
|
-
|
29
|
-
|
30
|
-
if (inflated && inflated.includes("<sheetData")) {
|
31
|
-
xml = inflated;
|
31
|
+
try {
|
32
|
+
xml = (await inflateRaw(buffer)).toString("utf8");
|
32
33
|
}
|
33
|
-
|
34
|
-
throw new Error("
|
34
|
+
catch (err) {
|
35
|
+
throw new Error("Failed to decompress sheet XML: " + (err instanceof Error ? err.message : String(err)));
|
35
36
|
}
|
36
37
|
}
|
37
|
-
// Fallback: If no XML obtained yet, try direct UTF-8 conversion
|
38
|
-
if (!xml) {
|
39
|
-
xml = buffer.toString("utf8");
|
40
|
-
}
|
41
38
|
// Sanitize XML by removing control characters (except tab, newline, carriage return)
|
42
39
|
// This handles potential corruption from binary data or encoding issues
|
43
40
|
xml = xml.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F]/g, "");
|
@@ -1,5 +1,6 @@
|
|
1
1
|
export * from "./build-merged-sheet.js";
|
2
|
+
export * from "./extract-rows-from-sheet-sync.js";
|
2
3
|
export * from "./extract-rows-from-sheet.js";
|
4
|
+
export * from "./extract-xml-from-sheet-sync.js";
|
3
5
|
export * from "./extract-xml-from-sheet.js";
|
4
|
-
export * from "./extract-xml-from-system-content.js";
|
5
6
|
export * from "./shift-row-indices.js";
|
@@ -0,0 +1,27 @@
|
|
1
|
+
/**
|
2
|
+
* Merges rows from other Excel files into a base Excel file.
|
3
|
+
*
|
4
|
+
* This function is a process-friendly version of mergeSheetsToBaseFile.
|
5
|
+
* It takes a single object with the following properties:
|
6
|
+
* - additions: An array of objects with two properties:
|
7
|
+
* - files: A dictionary of file paths to their corresponding XML content
|
8
|
+
* - sheetIndexes: The 1-based indexes of the sheet to extract rows from
|
9
|
+
* - baseFiles: A dictionary of file paths to their corresponding XML content
|
10
|
+
* - baseSheetIndex: The 1-based index of the sheet in the base file to add rows to
|
11
|
+
* - gap: The number of empty rows to insert between each added section
|
12
|
+
* - sheetNamesToRemove: The names of sheets to remove from the output file
|
13
|
+
* - sheetsToRemove: The 1-based indices of sheets to remove from the output file
|
14
|
+
*
|
15
|
+
* The function returns a dictionary of file paths to their corresponding XML content.
|
16
|
+
*/
|
17
|
+
export declare function mergeSheetsToBaseFileProcessSync(data: {
|
18
|
+
additions: {
|
19
|
+
files: Record<string, Buffer>;
|
20
|
+
sheetIndexes: number[];
|
21
|
+
}[];
|
22
|
+
baseFiles: Record<string, Buffer>;
|
23
|
+
baseSheetIndex: number;
|
24
|
+
gap: number;
|
25
|
+
sheetNamesToRemove: string[];
|
26
|
+
sheetsToRemove: number[];
|
27
|
+
}): void;
|
@@ -0,0 +1,28 @@
|
|
1
|
+
/**
|
2
|
+
* Parses a worksheet (either as Buffer or string) to extract row data,
|
3
|
+
* last row number, and merge cell information from Excel XML format.
|
4
|
+
*
|
5
|
+
* This function is particularly useful for processing Excel files in
|
6
|
+
* Open XML Spreadsheet format (.xlsx).
|
7
|
+
*
|
8
|
+
* @param {Buffer|string} sheet - The worksheet content to parse, either as:
|
9
|
+
* - Buffer (binary Excel sheet)
|
10
|
+
* - string (raw XML content)
|
11
|
+
* @returns {{
|
12
|
+
* rows: string[],
|
13
|
+
* lastRowNumber: number,
|
14
|
+
* mergeCells: {ref: string}[]
|
15
|
+
* }} An object containing:
|
16
|
+
* - rows: Array of raw XML strings for each <row> element
|
17
|
+
* - lastRowNumber: Highest row number found in the sheet (1-based)
|
18
|
+
* - mergeCells: Array of merged cell ranges (e.g., [{ref: "A1:B2"}])
|
19
|
+
* @throws {Error} If the sheetData section is not found in the XML
|
20
|
+
*/
|
21
|
+
export declare function extractRowsFromSheetSync(sheet: Buffer | string): {
|
22
|
+
rows: string[];
|
23
|
+
lastRowNumber: number;
|
24
|
+
mergeCells: {
|
25
|
+
ref: string;
|
26
|
+
}[];
|
27
|
+
xml: string;
|
28
|
+
};
|
@@ -18,11 +18,11 @@
|
|
18
18
|
* - mergeCells: Array of merged cell ranges (e.g., [{ref: "A1:B2"}])
|
19
19
|
* @throws {Error} If the sheetData section is not found in the XML
|
20
20
|
*/
|
21
|
-
export declare function extractRowsFromSheet(sheet: Buffer | string): {
|
21
|
+
export declare function extractRowsFromSheet(sheet: Buffer | string): Promise<{
|
22
22
|
rows: string[];
|
23
23
|
lastRowNumber: number;
|
24
24
|
mergeCells: {
|
25
25
|
ref: string;
|
26
26
|
}[];
|
27
27
|
xml: string;
|
28
|
-
}
|
28
|
+
}>;
|
@@ -0,0 +1,14 @@
|
|
1
|
+
/**
|
2
|
+
* Extracts and parses XML content from an Excel worksheet file (e.g., xl/worksheets/sheet1.xml).
|
3
|
+
* Handles both compressed (raw deflate) and uncompressed (plain XML) formats.
|
4
|
+
*
|
5
|
+
* This function is designed to work with Excel Open XML (.xlsx) worksheet files,
|
6
|
+
* which may be stored in either compressed or uncompressed format within the ZIP container.
|
7
|
+
*
|
8
|
+
* @param {Buffer} buffer - The file content to process, which may be:
|
9
|
+
* - Raw XML text
|
10
|
+
* - Deflate-compressed XML data (without zlib headers)
|
11
|
+
* @returns {string} - The extracted XML content as a UTF-8 string
|
12
|
+
* @throws {Error} - If the buffer is empty or cannot be processed
|
13
|
+
*/
|
14
|
+
export declare function extractXmlFromSheetSync(buffer: Buffer): string;
|
@@ -8,7 +8,7 @@
|
|
8
8
|
* @param {Buffer} buffer - The file content to process, which may be:
|
9
9
|
* - Raw XML text
|
10
10
|
* - Deflate-compressed XML data (without zlib headers)
|
11
|
-
* @returns {string} - The extracted XML content as a UTF-8 string
|
11
|
+
* @returns {Promise<string>} - The extracted XML content as a UTF-8 string
|
12
12
|
* @throws {Error} - If the buffer is empty or cannot be processed
|
13
13
|
*/
|
14
|
-
export declare function extractXmlFromSheet(buffer: Buffer): string
|
14
|
+
export declare function extractXmlFromSheet(buffer: Buffer): Promise<string>;
|
@@ -1,5 +1,6 @@
|
|
1
1
|
export * from "./build-merged-sheet.js";
|
2
|
+
export * from "./extract-rows-from-sheet-sync.js";
|
2
3
|
export * from "./extract-rows-from-sheet.js";
|
4
|
+
export * from "./extract-xml-from-sheet-sync.js";
|
3
5
|
export * from "./extract-xml-from-sheet.js";
|
4
|
-
export * from "./extract-xml-from-system-content.js";
|
5
6
|
export * from "./shift-row-indices.js";
|
package/package.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
{
|
2
2
|
"name": "@js-ak/excel-toolbox",
|
3
|
-
"version": "1.
|
3
|
+
"version": "1.7.0",
|
4
4
|
"description": "excel-toolbox",
|
5
5
|
"publishConfig": {
|
6
6
|
"access": "public",
|
@@ -70,7 +70,6 @@
|
|
70
70
|
"@semantic-release/release-notes-generator": "14.0.0",
|
71
71
|
"@stylistic/eslint-plugin-ts": "4.2.0",
|
72
72
|
"@types/node": "22.14.0",
|
73
|
-
"@types/pako": "2.0.3",
|
74
73
|
"@vitest/coverage-v8": "3.1.2",
|
75
74
|
"eslint": "9.24.0",
|
76
75
|
"eslint-plugin-sort-destructure-keys": "2.0.0",
|
@@ -80,8 +79,5 @@
|
|
80
79
|
"typescript": "5.8.3",
|
81
80
|
"typescript-eslint": "8.29.0",
|
82
81
|
"vitest": "3.1.2"
|
83
|
-
},
|
84
|
-
"dependencies": {
|
85
|
-
"pako": "2.1.0"
|
86
82
|
}
|
87
83
|
}
|
@@ -1,53 +0,0 @@
|
|
1
|
-
"use strict";
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
3
|
-
exports.extractXmlFromSystemContent = void 0;
|
4
|
-
const pako_1 = require("pako");
|
5
|
-
/**
|
6
|
-
* Extracts and decompresses XML content from Excel system files (e.g., workbook.xml, [Content_Types].xml).
|
7
|
-
* Handles both compressed (raw DEFLATE) and uncompressed (plain XML) formats with comprehensive error handling.
|
8
|
-
*
|
9
|
-
* @param {Buffer} buffer - The file content to process, which may be:
|
10
|
-
* - Raw XML text
|
11
|
-
* - DEFLATE-compressed XML data (without zlib headers)
|
12
|
-
* @param {string} name - The filename being processed (for error reporting)
|
13
|
-
* @returns {string} - The extracted XML content as a sanitized UTF-8 string
|
14
|
-
* @throws {Error} - With descriptive messages for various failure scenarios:
|
15
|
-
* - Empty buffer
|
16
|
-
* - Decompression failures
|
17
|
-
* - Invalid XML content
|
18
|
-
*/
|
19
|
-
const extractXmlFromSystemContent = (buffer, name) => {
|
20
|
-
// Validate input buffer
|
21
|
-
if (!buffer || buffer.length === 0) {
|
22
|
-
throw new Error(`Empty data buffer provided for file ${name}`);
|
23
|
-
}
|
24
|
-
let xml;
|
25
|
-
// Check for XML declaration in first 5 bytes (<?xml)
|
26
|
-
const startsWithXml = buffer.subarray(0, 5).toString("utf8").trim().startsWith("<?xml");
|
27
|
-
if (startsWithXml) {
|
28
|
-
// Case 1: Already uncompressed XML - convert directly to string
|
29
|
-
xml = buffer.toString("utf8");
|
30
|
-
}
|
31
|
-
else {
|
32
|
-
// Case 2: Attempt DEFLATE decompression
|
33
|
-
try {
|
34
|
-
const inflated = (0, pako_1.inflateRaw)(buffer, { to: "string" });
|
35
|
-
// Validate decompressed content contains XML declaration
|
36
|
-
if (inflated && inflated.includes("<?xml")) {
|
37
|
-
xml = inflated;
|
38
|
-
}
|
39
|
-
else {
|
40
|
-
throw new Error(`Decompressed data doesn't contain valid XML in ${name}`);
|
41
|
-
}
|
42
|
-
}
|
43
|
-
catch (error) {
|
44
|
-
const message = error instanceof Error ? error.message : "Unknown error";
|
45
|
-
throw new Error(`Failed to decompress ${name}: ${message}`);
|
46
|
-
}
|
47
|
-
}
|
48
|
-
// Sanitize XML by removing illegal control characters (per XML 1.0 spec)
|
49
|
-
// Preserves tabs (0x09), newlines (0x0A), and carriage returns (0x0D)
|
50
|
-
xml = xml.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F]/g, "");
|
51
|
-
return xml;
|
52
|
-
};
|
53
|
-
exports.extractXmlFromSystemContent = extractXmlFromSystemContent;
|